Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • sccs/docs.it4i.cz
  • soj0018/docs.it4i.cz
  • lszustak/docs.it4i.cz
  • jarosjir/docs.it4i.cz
  • strakpe/docs.it4i.cz
  • beranekj/docs.it4i.cz
  • tab0039/docs.it4i.cz
  • davidciz/docs.it4i.cz
  • gui0013/docs.it4i.cz
  • mrazek/docs.it4i.cz
  • lriha/docs.it4i.cz
  • it4i-vhapla/docs.it4i.cz
  • hol0598/docs.it4i.cz
  • sccs/docs-it-4-i-cz-fumadocs
  • siw019/docs-it-4-i-cz-fumadocs
15 results
Show changes
Showing
with 1317 additions and 0 deletions
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <stdio.h>
#include "shmem.h"
#if !defined(OSHMEM_SPEC_VERSION) || OSHMEM_SPEC_VERSION < 10200
#error This application uses API 1.2 and up
#endif
int main(int argc, char* argv[])
{
int proc, nproc;
char name[SHMEM_MAX_NAME_LEN];
int major, minor;
shmem_init();
nproc = shmem_n_pes();
proc = shmem_my_pe();
shmem_info_get_name(name);
shmem_info_get_version(&major, &minor);
printf("Hello, world, I am %d of %d: %s (version: %d.%d)\n",
proc, nproc, name, major, minor);
shmem_finalize();
return 0;
}
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <iostream>
#include "shmem.h"
#if !defined(OSHMEM_SPEC_VERSION) || OSHMEM_SPEC_VERSION < 10200
#error This application uses API 1.2 and up
#endif
int main(int argc, char* argv[])
{
int proc, nproc;
char name[SHMEM_MAX_NAME_LEN];
int major, minor;
shmem_init();
nproc = shmem_n_pes();
proc = shmem_my_pe();
shmem_info_get_name(name);
shmem_info_get_version(&major, &minor);
std::cout << "Hello, world, I am " << proc << " of " << nproc << ": " << name
<< " (version: " << major << "." << minor << ")" << std::endl;
shmem_finalize();
return 0;
}
!
! Copyright (c) 2014 Mellanox Technologies, Inc.
! All rights reserved.
! Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
! $COPYRIGHT$
!
! Additional copyrights may follow
!
! $HEADER$
!
program hello_oshmem
implicit none
include 'shmem.fh'
integer proc, nproc
integer shmem_my_pe, shmem_n_pes
integer major, minor, len
character(len=SHMEM_MAX_NAME_LEN) name
call SHMEM_INIT()
proc = SHMEM_MY_PE()
nproc = SHMEM_N_PES()
call SHMEM_INFO_GET_VERSION(major, minor)
call SHMEM_INFO_GET_NAME(name)
write(*, '("Hello, world, I am ", i2, " of ", i2, ": (version: ", i0, ".", i0, ")")') proc, nproc, major, minor
call SHMEM_FINALIZE()
end program hello_oshmem
!
! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
! University Research and Technology
! Corporation. All rights reserved.
! Copyright (c) 2004-2005 The Regents of the University of California.
! All rights reserved.
! Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
! $COPYRIGHT$
!
! Sample MPI "hello world" application using the Fortran mpi module
! bindings.
!
program main
use mpi
implicit none
integer :: ierr, rank, size, len
character(len=MPI_MAX_LIBRARY_VERSION_STRING) :: version
call MPI_INIT(ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
call MPI_GET_LIBRARY_VERSION(version, len, ierr)
write(*, '("Hello, world, I am ", i2, " of ", i2, ": ", a)') &
rank, size, version
call MPI_FINALIZE(ierr)
end
! -*- f90 -*-
!
! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
! University Research and Technology
! Corporation. All rights reserved.
! Copyright (c) 2004-2005 The Regents of the University of California.
! All rights reserved.
! Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
! Copyright (c) 2009-2012 Los Alamos National Security, LLC.
! All rights reserved.
! $COPYRIGHT$
!
! Sample MPI "hello world" application using the Fortran mpi_f08
! module bindings.
!
program main
use mpi_f08
implicit none
integer :: rank, size, len
character(len=MPI_MAX_LIBRARY_VERSION_STRING) :: version
call MPI_INIT()
call MPI_COMM_RANK(MPI_COMM_WORLD, rank)
call MPI_COMM_SIZE(MPI_COMM_WORLD, size)
call MPI_GET_LIBRARY_VERSION(version, len)
write(*, '("Hello, world, I am ", i2, " of ", i2, ": ", a)') &
rank, size, version
call MPI_FINALIZE()
end
File added
/*
* Copyright (c) 2014-2016 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <stdio.h>
#include <shmem.h>
int main (void)
{
static int aaa, bbb;
int num_pes, my_pe, peer;
shmem_init();
num_pes = shmem_n_pes();
my_pe = shmem_my_pe();
peer = (my_pe + 1) % num_pes;
printf("Process %d gets message from %d (%d processes in ring)\n", my_pe, peer, num_pes);
shmem_int_get(&aaa, &bbb, 1, peer);
shmem_barrier_all();
printf("Process %d exiting\n", my_pe);
shmem_finalize();
return 0;
}
/*
* Copyright (c) 2014-2016 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* reduce [0,1,2] + _my_pe() across 4 PEs with MAX()
*/
#include <stdio.h>
#include <string.h>
#include <shmem.h>
long pSync[_SHMEM_BCAST_SYNC_SIZE];
#define N 3
long src[N];
long dst[N];
long pWrk[_SHMEM_REDUCE_SYNC_SIZE];
int main(void)
{
int i;
int my_pe, num_pes;
for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) {
pSync[i] = _SHMEM_SYNC_VALUE;
}
shmem_init();
my_pe = shmem_my_pe();
num_pes = shmem_n_pes();
for (i = 0; i < N; i += 1) {
src[i] = my_pe + i;
}
shmem_barrier_all();
shmem_long_max_to_all(dst, src, N, 0, 0, num_pes, pWrk, pSync);
printf("%d/%d dst =", my_pe, num_pes);
for (i = 0; i < N; i+= 1) {
printf(" %ld", dst[i]);
}
printf("\n");
shmem_finalize();
return 0;
}
/*
* Copyright (c) 2014-2016 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* This sample allocates (shmalloc) symmetric memory (1 long integer),
* and then frees it. Success of allocation is not checked.
*
* Produces no output.
*/
#include <shmem.h>
int main(void)
{
long *x;
shmem_init();
x = (long *) shmem_malloc(sizeof(*x));
shmem_free(x);
shmem_finalize();
}
/*
* Copyright (c) 2014-2016 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* This program is an adaptation of examples found in the man pages
* of SGI’s SHMEM implementation.
*
* In this program, iput is used to select 5 elements from array source separated by
* a stride of 2 and write them to array target using a stride of 1.
*
* Given the array source = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }
* iput will select 5 elements from array source on PE 0, using a stride of 2:
*
* selected elements = { 1, 3, 5, 7, 9 }
*
* These elements will then be written to the array source on PE 1 using a stride of 1:
*
* target = { 1, 3, 5, 7, 9 }
*
*/
#include <stdio.h>
#include <shmem.h>
int main(void)
{
short source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
static short target[10];
int me;
shmem_init();
me = shmem_my_pe();
if (me == 0) {
/* put 10 words into target on PE 1 */
shmem_short_iput(target, source, 1, 2, 5, 1);
}
shmem_barrier_all(); /* sync sender and receiver */
if (me == 1) {
printf("target on PE %d is %hd %hd %hd %hd %hd\n", me,
target[0], target[1], target[2],
target[3], target[4] );
}
shmem_barrier_all(); /* sync before exiting */
shmem_finalize();
return 0;
}
/*
* Copyright (c) 2014-2016 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <stdio.h>
#include <shmem.h>
#define SIZE 16
int main(int argc, char* argv[])
{
short source[SIZE];
static short target[SIZE];
int i;
int num_pe, my_pe;
shmem_init();
num_pe = shmem_n_pes();
my_pe = shmem_my_pe();
if (my_pe == 0) {
/* initialize array */
for(i = 0; i < SIZE; i++) {
source[i] = i;
}
/* local, not symmetric */
/* static makes it symmetric */
/* put "size" words into target on each PE */
for(i = 1; i < num_pe; i++) {
shmem_short_put(target, source, SIZE, i);
}
}
shmem_barrier_all(); /* sync sender and receiver */
if (my_pe != 0) {
printf("Target on PE %d is \t", my_pe);
for(i = 0; i < SIZE; i++) {
printf("%hd \t", target[i]);
}
printf("\n");
}
shmem_barrier_all(); /* sync before exiting */
shmem_finalize();
return 0;
}
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
*
* Simple ring test program in C.
*/
#include <stdio.h>
#include "mpi.h"
int main(int argc, char *argv[])
{
int rank, size, next, prev, message, tag = 201;
/* Start up MPI */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* Calculate the rank of the next process in the ring. Use the
modulus operator so that the last process "wraps around" to
rank zero. */
next = (rank + 1) % size;
prev = (rank + size - 1) % size;
/* If we are the "master" process (i.e., MPI_COMM_WORLD rank 0),
put the number of times to go around the ring in the
message. */
if (0 == rank) {
message = 10;
printf("Process 0 sending %d to %d, tag %d (%d processes in ring)\n",
message, next, tag, size);
MPI_Send(&message, 1, MPI_INT, next, tag, MPI_COMM_WORLD);
printf("Process 0 sent to %d\n", next);
}
/* Pass the message around the ring. The exit mechanism works as
follows: the message (a positive integer) is passed around the
ring. Each time it passes rank 0, it is decremented. When
each processes receives a message containing a 0 value, it
passes the message on to the next process and then quits. By
passing the 0 message first, every process gets the 0 message
and can quit normally. */
while (1) {
MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
if (0 == rank) {
--message;
printf("Process 0 decremented value: %d\n", message);
}
MPI_Send(&message, 1, MPI_INT, next, tag, MPI_COMM_WORLD);
if (0 == message) {
printf("Process %d exiting\n", rank);
break;
}
}
/* The last process does one extra send to process 0, which needs
to be received before the program can exit */
if (0 == rank) {
MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
}
/* All done */
MPI_Finalize();
return 0;
}
//
// Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
// University Research and Technology
// Corporation. All rights reserved.
// Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
//
// Simple ring test program in C++.
//
// NOTE: The MPI C++ bindings were deprecated in MPI-2.2 and removed
// from the standard in MPI-3. Open MPI still provides C++ MPI
// bindings, but they are no longer built by default (and may be
// removed in a future version of Open MPI). You must
// --enable-mpi-cxx when configuring Open MPI to enable the MPI C++
// bindings.
//
#include "mpi.h"
#include <iostream>
int main(int argc, char *argv[])
{
int rank, size, next, prev, message, tag = 201;
// Start up MPI
MPI::Init();
rank = MPI::COMM_WORLD.Get_rank();
size = MPI::COMM_WORLD.Get_size();
// Calculate the rank of the next process in the ring. Use the
// modulus operator so that the last process "wraps around" to
// rank zero.
next = (rank + 1) % size;
prev = (rank + size - 1) % size;
// If we are the "master" process (i.e., MPI_COMM_WORLD rank 0),
// put the number of times to go around the ring in the message.
if (0 == rank) {
message = 10;
std::cout << "Process 0 sending " << message << " to " << next
<< ", tag " << tag << " (" << size << " processes in ring)"
<< std::endl;
MPI::COMM_WORLD.Send(&message, 1, MPI::INT, next, tag);
std::cout << "Process 0 sent to " << next << std::endl;
}
// Pass the message around the ring. The exit mechanism works as
// follows: the message (a positive integer) is passed around the
// ring. Each time it passes rank 0, it is decremented. When
// each processes receives a message containing a 0 value, it
// passes the message on to the next process and then quits. By
// passing the 0 message first, every process gets the 0 message
// and can quit normally.
while (1) {
MPI::COMM_WORLD.Recv(&message, 1, MPI::INT, prev, tag);
if (0 == rank) {
--message;
std::cout << "Process 0 decremented value: " << message
<< std::endl;
}
MPI::COMM_WORLD.Send(&message, 1, MPI::INT, next, tag);
if (0 == message) {
std::cout << "Process " << rank << " exiting" << std::endl;
break;
}
}
// The last process does one extra send to process 0, which needs
// to be received before the program can exit */
if (0 == rank) {
MPI::COMM_WORLD.Recv(&message, 1, MPI::INT, prev, tag);
}
// All done
MPI::Finalize();
return 0;
}
C
C Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
C University Research and Technology
C Corporation. All rights reserved.
C Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
C $COPYRIGHT$
C
C Simple ring test program using the mpif.h Fortran bindings.
C
program ring_f77
implicit none
include 'mpif.h'
integer rank, size, tag, next, from, message, ierr
C Start up MPI */
call MPI_INIT(ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
C Calculate the rank of the next process in the ring. Use the
C modulus operator so that the last process "wraps around" to rank
C zero.
tag = 201
next = mod((rank + 1), size)
from = mod((rank + size - 1), size)
C If we are the "master" process (i.e., MPI_COMM_WORLD rank 0), put
C the number of times to go around the ring in the message.
if (rank .eq. 0) then
message = 10
write(*, '("Process 0 sending ", i2, " to ", i2, " tag ",
& i3, " (", i2, " processes in ring)")')
& message, next, tag, size
call MPI_SEND(message, 1, MPI_INTEGER, next, tag,
& MPI_COMM_WORLD, ierr)
write(*, '("Process 0 sent to ", i2)')
& next
endif
C Pass the message around the ring. The exit mechanism works as
C follows: the message (a positive integer) is passed around the
C ring. Each time it passes rank 0, it is decremented. When each
C processes receives a message containing a 0 value, it passes the
C message on to the next process and then quits. By passing the 0
C message first, every process gets the 0 message and can quit
C normally.
10 call MPI_RECV(message, 1, MPI_INTEGER, from, tag,
& MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
if (rank .eq. 0) then
message = message - 1
write(*, '("Process 0 decremented value: ", i2)') message
endif
call MPI_SEND(message, 1, MPI_INTEGER, next, tag,
& MPI_COMM_WORLD, ierr)
if (message .eq. 0) then
write(*, '("Process ", i2, " exiting")') rank
goto 20
endif
goto 10
C The last process does one extra send to process 0, which needs to
C be received before the program can exit
20 if (rank .eq. 0) then
call MPI_RECV(message, 1, MPI_INTEGER, from, tag,
& MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
endif
C All done
call MPI_FINALIZE(ierr)
end
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <shmem.h>
#include <stdio.h>
#if !defined(OSHMEM_SPEC_VERSION) || OSHMEM_SPEC_VERSION < 10200
#error This application uses API 1.2 and up
#endif
int main (int argc, char * argv[])
{
static int rbuf = -1;
int proc, nproc, next;
int message = 10;
shmem_init();
nproc = shmem_n_pes();
proc = shmem_my_pe();
/* Calculate the PE number of the next process in the ring. Use the
modulus operator so that the last process "wraps around" to PE 0. */
next = (proc + 1) % nproc;
if(proc == 0)
{
printf("Process 0 puts message %d to %d (%d processes in ring)\n", message, next, nproc);
shmem_int_put(&rbuf, &message, 1, next);
}
/* Pass the message around the ring. The exit mechanism works as
follows: the message (a positive integer) is passed around the
ring. Each time it passes PE 0, it is decremented. When each
processes receives a message containing a 0 value, it passes the
message on to the next process and then quits. By passing the 0
message first, every process gets the 0 message and can quit
normally. */
while(message > 0) {
shmem_int_wait_until(&rbuf, SHMEM_CMP_EQ, message);
if(proc == 0) {
--message;
printf("Process 0 decremented value: %d\n", message);
}
shmem_int_put(&rbuf, &message, 1, next);
if(proc != 0) {
--message;
}
}
shmem_finalize();
/* All done */
printf("Process %d exiting\n", proc);
return 0;
}
!
! Copyright (c) 2014 Mellanox Technologies, Inc.
! All rights reserved.
! Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
! $COPYRIGHT$
!
! Additional copyrights may follow
!
! $HEADER$
!
program ring_oshmem
implicit none
include 'shmem.fh'
integer*8, save :: rbuf
integer*8 :: message
integer :: proc, nproc, next
integer :: my_pe, num_pes
rbuf = -1
message = 10
call start_pes(0)
proc = my_pe()
nproc = num_pes()
! Calculate the PE number of the next process in the ring. Use the
! modulus operator so that the last process "wraps around" to PE 0.
next = mod((proc + 1), nproc)
if (proc .eq. 0) then
write(*, '("Process 0 sending ", i2, " to", i2, " (", i2, " processes in ring)")') message, next, nproc
call shmem_put8(rbuf, message, 1, next)
write(*, '("Process 0 sent to ", i2)') next
end if
! Pass the message around the ring. The exit mechanism works as
! follows: the message (a positive integer) is passed around the
! ring. Each time it passes PE 0, it is decremented. When each
! processes receives a message containing a 0 value, it passes the
! message on to the next process and then quits. By passing the 0
! message first, every process gets the 0 message and can quit
! normally.
do while (message .gt. 0)
call shmem_int8_wait_until(rbuf, SHMEM_CMP_EQ, message)
if (proc .eq. 0) then
message = message - 1
write(*, '("Process 0 decremented value:", i2)') message
end if
call shmem_put8(rbuf, message, 1, next)
if (proc .gt. 0) then
message = message - 1
end if
end do
! All done
write(*, '("Process", i2," exiting.")') proc
end program ring_oshmem
!
! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
! University Research and Technology
! Corporation. All rights reserved.
! Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
! $COPYRIGHT$
!
! Simple ring test program using the Fortran mpi module bindings.
!
program ring
use mpi
implicit none
integer :: rank, size, tag, next, from, ierr, i, message
! Start up MPI
call MPI_INIT(ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
! Calculate the rank of the next process in the ring. Use the modulus
! operator so that the last process "wraps around" to rank zero.
tag = 201
next = mod((rank + 1), size)
from = mod((rank + size - 1), size)
! If we are the "master" process (i.e., MPI_COMM_WORLD rank 0), put
! the number of times to go around the ring in the message.
if (rank .eq. 0) then
message = 10
write(*, '("Process 0 sending ", i2, " to ", i2, " tag ", i3, " (", i2, " processes in ring)")') message, next, tag, size
call MPI_SEND(message, 1, MPI_INTEGER, next, tag, MPI_COMM_WORLD, ierr)
write(*, '("Process 0 sent to ", i2)') next
endif
! Pass the message around the ring. The exit mechanism works as
! follows: the message (a positive integer) is passed around the ring.
! Each time it passes rank 0, it is decremented. When each processes
! receives a message containing a 0 value, it passes the message on to
! the next process and then quits. By passing the 0 message first,
! every process gets the 0 message and can quit normally.
i = 1
10 call MPI_Recv(message, i, MPI_INTEGER, from, tag, MPI_COMM_WORLD, &
MPI_STATUS_IGNORE, ierr)
if (rank .eq. 0) then
message = message - 1
write(*, '("Process 0 decremented value: ", i2)') message
endif
call MPI_SEND(message, 1, MPI_INTEGER, next, tag, MPI_COMM_WORLD, ierr)
if (message .eq. 0) then
write(*, '("Process ", i2, " exiting")') rank
goto 20
endif
goto 10
! The last process does one extra send to process 0, which needs to be
! received before the program can exit
20 if (rank .eq. 0) then
call MPI_RECV(message, 1, MPI_INTEGER, from, tag, MPI_COMM_WORLD, &
MPI_STATUS_IGNORE, ierr)
endif
! All done
call MPI_FINALIZE(ierr)
end program
! -*- f90 -*-
!
! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
! University Research and Technology
! Corporation. All rights reserved.
! Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
! Copyright (c) 2009-2012 Los Alamos National Security, LLC.
! All rights reserved.
! $COPYRIGHT$
!
! Simple ring test program using the Fortran mpi_f08 module bindings.
!
program ring
use mpi_f08
implicit none
integer :: rank, size, tag, next, from, i, message
! Start up MPI
call MPI_INIT()
call MPI_COMM_RANK(MPI_COMM_WORLD, rank)
call MPI_COMM_SIZE(MPI_COMM_WORLD, size)
! Calculate the rank of the next process in the ring. Use the modulus
! operator so that the last process "wraps around" to rank zero.
tag = 201
next = mod((rank + 1), size)
from = mod((rank + size - 1), size)
! If we are the "master" process (i.e., MPI_COMM_WORLD rank 0), put
! the number of times to go around the ring in the message.
if (rank .eq. 0) then
message = 10
write(*, '("Process 0 sending ", i2, " to ", i2, " tag ", i3, " (", i2, " processes in ring)")') message, next, tag, size
call MPI_SEND(message, 1, MPI_INTEGER, next, tag, MPI_COMM_WORLD)
write(*, '("Process 0 sent to ", i2)') next
endif
! Pass the message around the ring. The exit mechanism works as
! follows: the message (a positive integer) is passed around the ring.
! Each time it passes rank 0, it is decremented. When each processes
! receives a message containing a 0 value, it passes the message on to
! the next process and then quits. By passing the 0 message first,
! every process gets the 0 message and can quit normally.
i = 1
10 call MPI_Recv(message, i, MPI_INTEGER, from, tag, MPI_COMM_WORLD, &
MPI_STATUS_IGNORE)
if (rank .eq. 0) then
message = message - 1
write(*, '("Process 0 decremented value: ", i2)') message
endif
call MPI_SEND(message, 1, MPI_INTEGER, next, tag, MPI_COMM_WORLD)
if (message .eq. 0) then
write(*, '("Process ", i2, " exiting")') rank
goto 20
endif
goto 10
! The last process does one extra send to process 0, which needs to be
! received before the program can exit
20 if (rank .eq. 0) then
call MPI_RECV(message, 1, MPI_INTEGER, from, tag, MPI_COMM_WORLD, &
MPI_STATUS_IGNORE)
endif
! All done
call MPI_FINALIZE()
end program
/*
* Copyright (c) 2018 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
*
* Simple example usage of SPCs through MPI_T.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"
/* Sends 'num_messages' messages of 'message_size' bytes from rank 0 to rank 1.
* All messages are send synchronously and with the same tag in MPI_COMM_WORLD.
*/
void message_exchange(int num_messages, int message_size)
{
int i, rank;
/* Use calloc to initialize data to 0's */
char *data = (char*)calloc(message_size, sizeof(char));
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0) {
for(i = 0; i < num_messages; i++)
MPI_Send(data, message_size, MPI_BYTE, 1, 123, MPI_COMM_WORLD);
} else if(rank == 1) {
for(i = 0; i < num_messages; i++)
MPI_Recv(data, message_size, MPI_BYTE, 0, 123, MPI_COMM_WORLD, &status);
}
free(data);
}
int main(int argc, char **argv)
{
int num_messages, message_size;
if(argc < 3) {
printf("Usage: mpirun -np 2 --mca mpi_spc_attach all --mca mpi_spc_dump_enabled true ./spc_example [num_messages] [message_size]\n");
return -1;
} else {
num_messages = atoi(argv[1]);
message_size = atoi(argv[2]);
}
int i, rank, size, provided, num, name_len, desc_len, verbosity, bind, var_class, readonly, continuous, atomic, count, index;
MPI_Datatype datatype;
MPI_T_enum enumtype;
MPI_Comm comm;
char name[256], description[256];
/* Counter names to be read by ranks 0 and 1 */
char *counter_names[] = {"runtime_spc_OMPI_BYTES_SENT_USER",
"runtime_spc_OMPI_BYTES_RECEIVED_USER" };
MPI_Init(NULL, NULL);
MPI_T_init_thread(MPI_THREAD_SINGLE, &provided);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(size != 2) {
fprintf(stderr, "ERROR: This test should be run with two MPI processes.\n");
MPI_Abort(MPI_COMM_WORLD, -1);
}
/* Determine the MPI_T pvar indices for the OMPI_BYTES_SENT/RECIEVED_USER SPCs */
index = -1;
MPI_T_pvar_get_num(&num);
for(i = 0; i < num; i++) {
name_len = desc_len = 256;
PMPI_T_pvar_get_info(i, name, &name_len, &verbosity,
&var_class, &datatype, &enumtype, description, &desc_len, &bind,
&readonly, &continuous, &atomic);
if(strcmp(name, counter_names[rank]) == 0) {
index = i;
printf("[%d] %s -> %s\n", rank, name, description);
}
}
/* Make sure we found the counters */
if(index == -1) {
fprintf(stderr, "ERROR: Couldn't find the appropriate SPC counter in the MPI_T pvars.\n");
MPI_Abort(MPI_COMM_WORLD, -1);
}
int ret;
long long value;
MPI_T_pvar_session session;
MPI_T_pvar_handle handle;
/* Create the MPI_T sessions/handles for the counters and start the counters */
ret = MPI_T_pvar_session_create(&session);
ret = MPI_T_pvar_handle_alloc(session, index, NULL, &handle, &count);
ret = MPI_T_pvar_start(session, handle);
message_exchange(num_messages, message_size);
ret = MPI_T_pvar_read(session, handle, &value);
/* Print the counter values in order by rank */
for(i = 0; i < 2; i++) {
if(i == rank) {
printf("[%d] Value Read: %lld\n", rank, value);
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* Stop the MPI_T session, free the handle, and then free the session */
ret = MPI_T_pvar_stop(session, handle);
ret = MPI_T_pvar_handle_free(session, &handle);
ret = MPI_T_pvar_session_free(&session);
MPI_T_finalize();
MPI_Finalize();
return 0;
}
#!/usr/bin/env python
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.autograd import Function
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import cudaq
from cudaq import spin
# GPU utilities
for tar in cudaq.get_targets():
print(f'{tar.description} {tar.name} {tar.platform} {tar.simulator} {tar.num_qpus}')
cudaq.set_target("default") # Set CUDAQ to run on GPU's
torch.cuda.is_available(
) # If this is True then the NVIDIA drivers are correctly installed
torch.cuda.device_count() # Counts the number of GPU's available
torch.cuda.current_device()
torch.cuda.get_device_name(0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Training set
sample_count = 140
X_train = datasets.FashionMNIST(
root="./data",
train=True,
download=True,
transform=transforms.Compose([transforms.ToTensor()]),
)
# Leaving only labels 0 and 1
idx = np.append(
np.where(X_train.targets == 0)[0][:sample_count],
np.where(X_train.targets == 1)[0][:sample_count],
)
X_train.data = X_train.data[idx]
X_train.targets = X_train.targets[idx]
train_loader = torch.utils.data.DataLoader(X_train, batch_size=1, shuffle=True)
# Test set
sample_count = 70
X_test = datasets.FashionMNIST(
root="./data",
train=False,
download=True,
transform=transforms.Compose([transforms.ToTensor()]),
)
idx = np.append(
np.where(X_test.targets == 0)[0][:sample_count],
np.where(X_test.targets == 1)[0][:sample_count],
)
X_test.data = X_test.data[idx]
X_test.targets = X_test.targets[idx]
test_loader = torch.utils.data.DataLoader(X_test, batch_size=1, shuffle=True)
class QuantumCircuit:
"""This class defines the quantum circuit structure and the run method which is used to calculate an expectation value"""
def __init__(self, qubit_count: int):
"""Define the quantum circuit in CUDA Quantum"""
kernel, thetas = cudaq.make_kernel(list)
self.kernel = kernel
self.theta = thetas
qubits = kernel.qalloc(qubit_count)
self.kernel.h(qubits)
# Variational gate parameters which are optimised during training
kernel.ry(thetas[0], qubits[0])
kernel.rx(thetas[1], qubits[0])
def run(self, thetas: torch.tensor) -> torch.tensor:
"""Excetute the quantum circuit to output an expectation value"""
expectation = torch.tensor(cudaq.observe(self.kernel, spin.z(0),
thetas).expectation_z(),
device=device)
return expectation
class QuantumFunction(Function):
"""Allows the quantum circuit to pass data through it and compute the gradients"""
@staticmethod
def forward(ctx, thetas: torch.tensor, quantum_circuit,
shift) -> torch.tensor:
# Save shift and quantum_circuit in context to use in backward
ctx.shift = shift
ctx.quantum_circuit = quantum_circuit
# Calculate exp_val
expectation_z = ctx.quantum_circuit.run(thetas)
ctx.save_for_backward(thetas, expectation_z)
return expectation_z
@staticmethod
def backward(ctx, grad_output):
"""Backward pass computation via finite difference parameter shift"""
thetas, expectation_z = ctx.saved_tensors
gradients = torch.zeros(len(thetas), device=device)
for i in range(len(thetas)):
shift_right = torch.clone(thetas)
shift_right[i] += ctx.shift
shift_left = torch.clone(thetas)
shift_left[i] -= ctx.shift
expectation_right = ctx.quantum_circuit.run(shift_right)
expectation_left = ctx.quantum_circuit.run(shift_left)
gradients[i] = 0.5 * (expectation_right - expectation_left)
return gradients * grad_output.float(), None, None
class QuantumLayer(nn.Module):
"""Encapsulates a quantum circuit and a quantum function into a quantum layer"""
def __init__(self, shift: torch.tensor):
super(QuantumLayer, self).__init__()
self.quantum_circuit = QuantumCircuit(1) # 1 qubit quantum circuit
self.shift = shift
def forward(self, input):
ans = QuantumFunction.apply(input, self.quantum_circuit, self.shift)
return ans
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# Neural network structure
self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
self.dropout = nn.Dropout2d()
self.fc1 = nn.Linear(256, 64)
self.fc2 = nn.Linear(
64, 2
) # Output a 2D tensor since we have 2 variational parameters in our quantum circuit
self.hybrid = QuantumLayer(
torch.tensor(np.pi / 2)
) # Input is the magnitude of the parameter shifts to calculate gradients
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = self.dropout(x)
x = x.view(1, -1)
x = F.relu(self.fc1(x))
x = self.fc2(x).reshape(
-1) # Reshapes required to satisfy input dimensions to CUDAQ
x = self.hybrid(x).reshape(-1)
return torch.cat((x, 1 - x), -1).unsqueeze(0)
# We move our model to the CUDA device to minimise data transfer between GPU and CPU
model = Net().to(device)
print(model)
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.NLLLoss().to(device)
epochs = 20
epoch_loss = []
model.train()
for epoch in range(epochs):
batch_loss = 0.0
for batch_idx, (data, target) in enumerate(train_loader): # batch training
optimizer.zero_grad()
data, target = data.to(device), target.to(device)
# Forward pass
output = model(data).to(device)
# Calculating loss
loss = loss_func(output, target).to(device)
# Backward pass
loss.backward()
# Optimize the weights
optimizer.step()
batch_loss += loss.item()
epoch_loss.append(batch_loss / batch_idx)
print("Training [{:.0f}%]\tLoss: {:.4f}".format(
100.0 * (epoch + 1) / epochs, epoch_loss[-1]))
plt.plot(epoch_loss)
plt.title("Hybrid NN Training Convergence")
plt.xlabel("Training Iterations")
plt.ylabel("Neg Log Likelihood Loss")
# Testing on the test set
model.eval()
with torch.no_grad():
correct = 0
for batch_idx, (data, target) in enumerate(test_loader):
data, target = data.to(device), target.to(device)
output = model(data).to(device)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
loss = loss_func(output, target)
epoch_loss.append(loss.item())
print("Performance on test data:\n\tAccuracy: {:.1f}%".format(
correct / len(test_loader) * 100))