Markdown for tutorials. More parallel shortcourse files.

uvarc · Jan 12, 2024 · 6b1a7ca · 6b1a7ca
1 parent 2852991
commit 6b1a7ca
Show file tree

Hide file tree

Showing 96 changed files with 3,304 additions and 60 deletions.
diff --git a/content/courses/parallel-computing-introduction/codes/a.out b/content/courses/parallel-computing-introduction/codes/a.out
diff --git a/content/courses/parallel-computing-introduction/codes/manager_worker_stub.cxx b/content/courses/parallel-computing-introduction/codes/manager_worker_stub.cxx
@@ -0,0 +1,77 @@
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <random>
+#include <cmath>
+#include <vector>
+#include "mpi.h"
+
+
+using namespace std;
+
+int random_int(int n,int m) {
+    //quick and stupid way, using full C++ machinery is better but complex
+    return n+rand()%m;
+}
+
+double do_work() {
+    //hardcoded bounds for convenience
+    int nsteps=random_int(10000,30000);
+    float result=0.;
+    for (int i=0; i<nsteps; ++i) { 
+        result+=i;
+    }
+    return result;
+}
+
+int main(int argc, char **argv) {
+
+    int nprocs, rank;
+    MPI_Status status;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    //Don't use rand and srand if good random numbers are needed
+    unsigned int seed = (unsigned) time(NULL);
+    //taken from some University course site
+    unsigned int my_seed = (seed & 0xFFFFFFF0) | (rank + 1);
+    srand(my_seed);
+
+    vector<float> results(nprocs);
+
+    int done=0;
+    float result;
+    int sender;
+
+    if (rank==0) {
+        for (int i=1; i<nprocs; ++i) {
+            MPI_Recv(
+	    sender=
+	    results[sender]=result;
+            done=1;
+            MPI_Send(&done,1,MPI_INT,sender,0,MPI_COMM_WORLD);
+	}
+    } else {
+        for(int n=1; n<nprocs; ++n) {
+            if (rank==n) {
+                result=do_work();
+                MPI_Send(
+                MPI_Recv(
+	    }
+	}
+    }
+
+    float total=0;
+    if (rank==0) {
+	for (int i=1; i<nprocs; ++i ) {
+            total+=results[i];
+	}
+        cout << "The final result is "<<total<<"\n";
+    }
+
+    MPI_Finalize();
+
+}
diff --git a/content/courses/parallel-computing-introduction/codes/manager_worker_stub.f90 b/content/courses/parallel-computing-introduction/codes/manager_worker_stub.f90
@@ -0,0 +1,169 @@
+module Random
+  implicit none
+  ! Comment out one or the other when the module is incorporated into a code.
+
+  ! Single precision
+  integer, parameter   :: rk = kind(1.0)
+
+  ! Double precision
+  !integer, parameter   :: rk = kind(1.0d0)
+
+contains
+
+  subroutine get_random_seed(seed)
+    ! To use this subroutine, seed must be declared allocatable in the
+    ! calling unit.
+    integer, dimension(:), allocatable, intent(out)  :: seed
+    integer                                          :: isize
+
+    call random_seed(size=isize)
+    if (.not. allocated(seed)) allocate(seed(isize))
+    call random_seed(get=seed)
+
+  end subroutine get_random_seed
+
+  subroutine set_random_seed(seed)
+    ! Sets all elements of the seed array
+    integer, optional, intent(in)       :: seed
+
+    integer                             :: isize
+    integer, dimension(:), allocatable  :: iseed
+    integer, dimension(8)               :: idate
+    integer, parameter                  :: default_seed=2345678
+
+    call get_random_seed(iseed)
+
+    if ( .not. present(seed) ) then
+       call date_and_time(values=idate)
+       ! idate(8) contains millisecond
+       if ( all(iseed .ne. 0) ) then
+          iseed = iseed * (idate(8))
+       else
+          iseed = default_seed * (idate(8))
+       endif
+    else
+       iseed=int(seed)
+    endif
+
+    call random_seed(put=iseed)
+
+  end subroutine set_random_seed
+
+  function urand(lb,ub,seed)
+    ! Returns a uniformly-distributed random number in the range lb to ub.
+    real(rk)                        :: urand
+    real(rk), optional, intent(in)  :: lb,ub
+    real(rk), optional, intent(in)  :: seed
+
+    integer                         :: iseed
+    real(rk)                        :: rnd
+    real(rk)                        :: lower,upper
+
+    if ( present(seed) ) then
+       iseed=int(seed)
+       call set_random_seed(iseed)
+    endif
+
+    if ( present(lb) ) then
+       lower=lb
+    else
+       lower=0.0_rk
+    endif
+
+    if ( present(ub) ) then
+       upper = ub
+    else
+       upper = 1.0_rk
+    endif
+
+    call random_number(rnd)
+    urand = lower+(upper-lower)*rnd
+
+    return
+  end function urand
+
+  function randint(n,m)
+    ! Returns a random integer between n and m.
+    integer                 :: randint
+    integer, intent(in)     :: n,m
+
+    randint=ceiling(urand(real(n-1,rk),real(m,rk)))
+
+  end function randint
+
+end module random
+
+program manager_worker
+use random
+use mpi
+
+   integer :: i, n
+
+   integer :: nprocs, rank, sender, ierr
+   integer, dimension(MPI_STATUS_SIZE) :: status;
+
+   integer ::  done=0
+   real    ::  my_result, total=0.
+   real, dimension(:), allocatable :: results
+   integer, dimension(:), allocatable :: seed
+
+   interface
+        function do_work() result(my_result)
+        use random
+        real :: my_result
+      end function do_work
+   end interface
+
+   call MPI_Init(ierr)
+   call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)
+   call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
+
+   allocate(results(nprocs))
+   results=0.
+
+   ! The topic of handling random-number generation in parallel programs is
+   ! an issue in applied mathematics, and beyond our scope here. We just
+   ! use the rank to spread out the seeds some.
+   call get_random_seed(seed)
+   seed=seed*(rank+1)
+   call set_random_seed(seed(1))
+
+   if (rank==0) then
+       do i=1,nprocs-1
+          call MPI_Recv(
+          sender=
+          results(sender)=my_result
+          done=1;
+          call MPI_Send(done,1,MPI_INTEGER,sender,0,MPI_COMM_WORLD,ierr)
+       enddo
+   else 
+       do n=1,nprocs-1
+          if (rank==n) then
+             my_result=do_work();
+             call MPI_Send(my_result,1,MPI_REAL,0,rank,MPI_COMM_WORLD,ierr)
+             call MPI_Recv(done,1,MPI_INTEGER,0,MPI_ANY_TAG,MPI_COMM_WORLD,status,ierr)
+          endif
+       enddo
+   endif
+
+   total=sum(results)
+   if (rank==0) then
+       write(*,*) "The final result is",total
+   endif
+
+   call MPI_Finalize(ierr)
+
+end program
+
+function do_work() result(my_result)
+    use random
+    real :: my_result
+    integer nsteps
+    !hardcoded bounds for convenience
+    nsteps=randint(10000,30000);
+
+    my_result=0.
+    do i=1,nsteps
+        my_result=my_result+i;
+    enddo
+end function
diff --git a/content/courses/parallel-computing-introduction/codes/manager_worker_stub.py b/content/courses/parallel-computing-introduction/codes/manager_worker_stub.py
@@ -0,0 +1,47 @@
+import sys
+import numpy as np
+from mpi4py import MPI
+
+def do_work():
+    nsteps=rng.integers(low=10000, high=30000)
+    result=0
+    for i in range(nsteps):
+        result+=i
+    return np.array([result],dtype='float')
+
+comm=MPI.COMM_WORLD
+nprocs=comm.Get_size()
+rank=comm.Get_rank()
+
+status=MPI.Status()
+
+# The topic of handling random-number generation in parallel programs is 
+# an issue in applied mathematics, and beyond our scope here. This should 
+# generally space out the seeds a bit.
+# rng is global
+rng = np.random.default_rng()
+
+done=np.array([0])
+results=np.zeros(nprocs)
+
+if rank==0:
+    result=np.empty(1)
+    for i in range(1,nprocs):
+        comm.Recv(....
+        sender=
+        results[sender]=result
+        done[:]=1
+        comm.Send([done,MPI.INT],dest=sender)
+else: 
+    for n in range(1,nprocs):
+        if (rank==n):
+            result=do_work()
+            comm.Send(
+            comm.Recv(
+
+total=np.sum(results)
+if rank==0:
+    print(f"The final result is {total:e}")
+
+#Disconnect all processes from communicator
+comm.Disconnect()
diff --git a/content/courses/parallel-computing-introduction/codes/random.mod b/content/courses/parallel-computing-introduction/codes/random.mod
diff --git a/content/courses/parallel-computing-introduction/distributed_mpi_intro.md b/content/courses/parallel-computing-introduction/distributed_mpi_intro.md
@@ -107,6 +107,7 @@ Python
 ```python
 MPI.Finalize()
 ```
+For `mpi4py`, `Finalize` must be invoked only if `Init` was explicitly called.
 
-This must be the last routine after all other MPI library calls.  It allows the system to free up MPI resources.  It does not have to be the last executable statement, but no more MPI routines may be invoked after it.
+`Finalize` must be the last routine after all other MPI library calls.  It allows the system to free up MPI resources.  It does not have to be the last executable statement, but no more MPI routines may be invoked after it.
 
diff --git a/content/courses/parallel-computing-introduction/distributed_mpi_project_set2.md b/content/courses/parallel-computing-introduction/distributed_mpi_project_set2.md
@@ -0,0 +1,83 @@
+---
+title: "MPI Project Set 2"
+toc: true
+type: docs
+weight: 65
+menu:
+    parallel_programming:
+        parent: Distributed-Memory Programming
+---
+
+## Project 4
+
+A "token ring" is a circular messaging system.  Think of a relay, with a message or "baton" being passed from one runner to the next, but around a circle so that the last "runner" passes it back to the first. Write a program that implements this with MPI.  Hint: separate the ranks into root and everybody else. You may wish to use MPI_ANY_TAG and MPI_ANY_SOURCE.
+
+#### Example Solutions
+{{< spoiler text="C++" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/ring.cxx" lang="c++" >}}
+{{< /spoiler >}}
+{{< spoiler text="Fortran" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/ring.f90" lang="fortran" >}}
+{{< /spoiler >}}
+{{< spoiler text="Python" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/ring.py" lang="python" >}}
+{{< /spoiler >}}
+
+## Project 5
+
+Write a program in which all processes send a message to their left and receive from their right. Your program should handle all process counts appropriately.
+
+1. Left end sends nothing, right end receives nothing.
+2. Make the messages circular, i.e. 0 sends to np-1 and np-1 receives from 0. 
+
+#### Example Solutions
+{{< spoiler text="C++" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/shift_1.cxx" lang="c++" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/shift_2.cxx" lang="c++" >}}
+{{< /spoiler >}}
+{{< spoiler text="Fortran" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/shift_1.f90" lang="fortran" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/shift_2.f90" lang="fortran" >}}
+{{< /spoiler >}}
+{{< spoiler text="Python" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/shift_1.py" lang="python" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/shift_2.py" lang="python" >}}
+{{< /spoiler >}}
+
+## Project 6
+
+Now write a program in which all processes send a message to their left and receive from their right, then send a different message to the right and receive from the left. Your program should handle all process counts appropriately.
+
+1. Left end sends only to the right, right end receives only from the left.
+2. Make the messages circular, i.e. 0 sends to np-1 and np-1 receives from 0.
+
+In this case, watch out for deadlock or unsafe patterns.
+
+
+## Project 7
+
+A common pattern in parallel programming is the _manager-worker_ structure. A "manager" process distributes work to "worker" processes.  Sometimes manager code is separated by `if rank==0` (the manager should nearly always be rank 0) statements, while the other ranks execute the "worker" code. Sometimes the manager spawns distinct worker processes, but that requires using the more advanced MPI `spawn` capability.  In this project we will use a single code.  Usually the manager distributes work to the workers, which return results; the manager then hands more work to those processes that are ready, until all is completed. For this example the workers will do only one round of work.
+
+Starting from the stub for your language, complete the send and receive calls.
+
+#### Starting Codes
+{{< spoiler text="C++" >}}
+{{< code-download file="/courses/parallel-computing-introduction/codes/manager_worker_stub.cxx" lang="c++" >}}
+{{< /spoiler >}}
+{{< spoiler text="Fortran" >}}
+{{< code-download file="/courses/parallel-computing-introduction/codes/manager_worker_stub.f90" lang="fortran" >}}
+{{< /spoiler >}}
+{{< spoiler text="Python" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/manager_worker.py" lang="python" >}}
+{{< /spoiler >}}
+
+#### Example Solutions
+{{< spoiler text="C++" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/manager_worker.cxx" lang="c++" >}}
+{{< /spoiler >}}
+{{< spoiler text="Fortran" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/manager_worker.f90" lang="fortran" >}}
+{{< /spoiler >}}
+{{< spoiler text="Python" >}}
+{{< code-download file="/courses/parallel-computing-introduction/solns/manager_worker.py" lang="python" >}}
+{{< /spoiler >}}