Oct. 23, 2002Parallel Processing1 Parallel Processing (CS 730) Lecture 6: Message Passing using MPI * Jeremy R. Johnson *Parts of this lecture was derived from chapters 3-5,11 in Pacheco
Oct. 23, 2002Parallel Processing2 Introduction Objective: To introduce distributed memory parallel programming using message passing. Introduction to the MPI standard for message passing. Topics –Introduction to MPI hello.c hello.f –Example Problem (numeric integration) –Collective Communication –Performance Model
Oct. 23, 2002Parallel Processing3 MPI Message Passing Interface Distributed Memory Model –Single Program Multiple Data (SPMD) –Communication using message passing Send/Recv –Collective Communication Broadcast Reduce (AllReduce) Gather (AllGather) Scatter (AllScatter) Alltoall
Oct. 23, 2002Parallel Processing4 Benefits/Disadvantges No new language is requried Portable Good performance Explicitly forces programmer to deal with local/global access Harder to program that shared memory – requires larger program/algorithm changes
Oct. 23, 2002Parallel Processing5 Further Information Textbook
Oct. 23, 2002Parallel Processing6 Basic MPI Functions int MPI_Init( int* argc /* in/out */, char** argv /* in/out */) int MPI_Finalize(void) Int MPI_Comm_size( MPI_Comm communicator /* in */, int* number_of_processors /* out */) Int MPI_Comm_rank( MPI_Comm communicator /* in */, int* my_rank /* out */)
Oct. 23, 2002Parallel Processing7 Send Must package message in envelope containing destination, size, and an identifying tag, set of processors participating in the communication. int MPI_Send( void* message /* in */ int count /* in */ MPI_Datatype datatype /* in */ int dest /* in */ int tag /* in */ MPI_Comm communicator /* in */)
Oct. 23, 2002Parallel Processing8 Receive int MPI_Recv( void* message /* out */ int count /* in */ MPI_Datatype datatype /* in */ int source /* in */ int tag /* in */ MPI_Comm communicator /* in */ MPI_Status* status /* out */)
Oct. 23, 2002Parallel Processing9 Status Status-> MPI_SOURCE Status-> MPI_TAG Status-> MPI_ERROR Int MPI_Get_count( MPI_Status* status /* in */, MPI_Datatype datatype /* in */, int* count_ptr /* out */)
Oct. 23, 2002Parallel Processing10 hello.c #include #include "mpi.h" main(int argc, char * argv[]) { int my_rank; /* rank of process */ int p; /* number of processes */ int source; /* rank of sender */ int dest; /* rank of receiver */ int tag = 0; /* tag for messages */ char message[100]; /* storage for message */ MPI_Status status; /* return status for receive */ /* Start up MPI */ MPI_Init(&argc, &argv); /* Find out process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out number of processes */ MPI_Comm_size(MPI_COMM_WORLD, &p);
Oct. 23, 2002Parallel Processing11 hello.c if (my_rank != 0) { /* create message */ sprintf(message, "Greetings from process %d!\n",my_rank); dest = 0; /* user strlen + 1 so tat '\0' gets transmitted */ MPI_Send(message, strlen(message)+1,MPI_CHAR, dest, tag, MPI_COMM_WORLD); } else { for (source = 1; source < p; source++) { MPI_Recv(message,100, MPI_CHAR, source, tag, MPI_COMM_WORLD,&status); printf("%s\n",message); } /* Shut down MPI */ MPI_Finalize(); }
Oct. 23, 2002Parallel Processing12 IBM S80 An SMP with upto 24 processors (RS64 III processors) Name: Goopi.coe.drexel.edu Machine type: S80 12-Way with 8Gb RAM Specifications: 2 x 6 way 450 MHz RS64 III Processor Card, 8Mb L2 Cache 2 x 4096 Mb Memory 9 x 18.2 Gb Ultra SCSI Hot Swappable Hard Disk Drives. Name: bagha.coe.drexel.edu Machine Type: 44P Model way with 2 Gb RAM Specifications: 2 x 2 way 375 MHz POWER3-II Processor, 4 Mb L2 Cache 4 x 512 Mb SDRAM DIMMs 2 x 9.1 Gb Ultra SCSI HDD
Oct. 23, 2002Parallel Processing13 Compiling and Executing MPI Programs on the IBM S80 To compile a C program with OpenMP directives –mpcc hello.c -o hello To run an MPI program you must use the parallel operating environment –Create a hostfile in your home directory with goopi.coe.drexel.edu listed twelve times –Set the environment variable MP_HOSTFILE to point to this file –You may also set the env. Variable MP_PROCS to indicate the number of processes used. This can also be set on the command line. –poe file –procs PROCS –
Oct. 23, 2002Parallel Processing14 Compiling and Executing MPI Programs on Suns To compile a C program with OpenMP directives –mpicc -o foo foo.c To run an MPI program you must use –mpirun -np thread_num foo Default Host file is available
Oct. 23, 2002Parallel Processing15 Broadcast int MPI_Bcast( void* message /* in */ int count /* in */ MPI_Datatype datatype /* in */ int root /* in */ MPI_Comm communicator /* in */)
Oct. 23, 2002Parallel Processing16 dot.c #include float Serial_doc(float x[] /* in */, float y[] /* in */, int n /* in */) { int i; float sum = 0.0; for (i=0; i< n; i++) sum = sum + x[i]*y[i]; return sum; }
Oct. 23, 2002Parallel Processing17 Parallel Dot float Parallel_doc(float local_x[] /* in */, float local_y[] /* in */, int n_bar /* in */) { float local_dot; local_dot = Serial_dot(local_x, local_y,b_bar); MPI_Reduce(&local_dot, &dot, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); return dot; }
Oct. 23, 2002Parallel Processing18 Parallel All Dot float Parallel_doc(float local_x[] /* in */, float local_y[] /* in */, int n_bar /* in */) { float local_dot; local_dot = Serial_dot(local_x, local_y,b_bar); MPI_Allreduce(&local_dot, &dot, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); return dot; }
Oct. 23, 2002Parallel Processing19 Parallel Matrix-Vector Product float Parallel_matrix_vector_product(LOCAL_MATRIX_T local_A, int m, int n, float local_x, float global_x, float local_y, int local_m, int local_n) { /* local_m = m/p0, local_n = n/p */ int I, j; MPI_Allgather(local_x, local_n, MPI_FLOAT, global_x, local_n, MPI_FLOAT, MPI_COMM_WORLD); for (i=0; i< local_m; i++) { local_y[i] = 0.0; for (j = 0; j< n; j++) local_y[i] = local_y[i] + local_A[I][j]*global_x[j]; }
Oct. 23, 2002Parallel Processing20 Reduce int MPI_Reduce( void* operand /* in */ void* result /* out */ int count /* in */ MPI_Datatype datatype /* in */ MPI_Op operator /* in */ int root /* in */ MPI_Comm communicator /* in */) Operators –MPI_MAX, MPI_MIN, MPI_SUM, MPI_PROD, MPI_LAND, MPI_BAND, MPI_LOR, MPI_BOR, MPI_LXOR, MPI_BXOR, MPI_MAXLOC, MPI_MINLOC
Oct. 23, 2002Parallel Processing21 AllReduce int MPI_Allreduce( void* operand /* in */ void* result /* out */ int count /* in */ MPI_Datatype datatype /* in */ MPI_Op operator /* in */ int root /* in */ MPI_Comm communicator /* in */) Operators –MPI_MAX, MPI_MIN, MPI_SUM, MPI_PROD, MPI_LAND, MPI_BAND, MPI_LOR, MPI_BOR, MPI_LXOR, MPI_BXOR, MPI_MAXLOC, MPI_MINLOC
Oct. 23, 2002Parallel Processing22 Gather int MPI_Gather( void* send_data /* in */ int send_count /* in */ MPI_Datatype send_type /* in */ void* recv_data /* out */ int recv_count /* in */ MPI_Datatype recv_type /* in */ int root /* in */ MPI_Comm communicator /* in */) Process 0 Process 1 Process 2 Process 3 x0 x1 x2 x3
Oct. 23, 2002Parallel Processing23 Scatter int MPI_Scatter( void* send_data /* in */ int send_count /* in */ MPI_Datatype send_type /* in */ void* recv_data /* out */ int recv_count /* in */ MPI_Datatype recv_type /* in */ int root /* in */ MPI_Comm communicator /* in */) Process 0 Process 1 Process 2 Process 3 x0 x1x2 x3
Oct. 23, 2002Parallel Processing24 AllGather int MPI_AllGather( void* send_data /* in */ int send_count /* in */ MPI_Datatype send_type /* in */ void* recv_data /* out */ int recv_count /* in */ MPI_Datatype recv_type /* in */ MPI_Comm communicator /* in */) Process 0 Process 1 Process 2 Process 3 x0 x1 x2 x3