From ad664941db3e8f806b1ce867a9d2416ffcfedc3d Mon Sep 17 00:00:00 2001 From: Paul Gannay Date: Tue, 7 Jan 2025 16:56:34 +0100 Subject: [PATCH 1/5] Corrects some typos --- courses/01_beginners/main.tex | 68 +++++++++++++++++------------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/courses/01_beginners/main.tex b/courses/01_beginners/main.tex index 7beb9f8..3d39656 100644 --- a/courses/01_beginners/main.tex +++ b/courses/01_beginners/main.tex @@ -218,7 +218,7 @@ \section{Introduction} \item Operating system \end{itemize} \item Tens to hundred of cores in biggest processors - \item SIMD (Single Instruction Multiple Data) units for accelerating arithmetic operations + \item SIMD (Single Instruction Multiple Data) units to accelerate arithmetic operations \end{itemize} \end{column} \end{columns} @@ -299,7 +299,7 @@ \section{Introduction} \begin{frame}{Performance, portability and productivity} \begin{columns}[T] \begin{column}{0.45\linewidth} - What \emph{all developers} want + What \emph{every developer} want \vspace{1em} @@ -509,7 +509,7 @@ \subsection{Compilation} % _____________________________________________________________________________ -\begin{frame}{Compiling Kokkos for a specific archiecture} +\begin{frame}{Compiling Kokkos for a specific architecture} \begin{itemize} \item Architecture CMake options \texttt{-DKokkos\_ARCH\_=ON} can be specified for best performance \item Replace \texttt{} with the architecture name of the target host and device @@ -853,7 +853,7 @@ \subsection{Compilation} \includegraphics[width=0.8\textwidth]{layout_right_left.png} \end{center} \begin{itemize} - \item Layout is the way to map multidimensional indices to the linear memory + \item Layout is the way multidimensional indices map to linear memory \item Kokkos provides an abstraction of the data layout \item The default layout of a View depends on the backend (Host or Device) \item Not covered in this talk @@ -917,8 +917,8 @@ \subsection{Compilation} \begin{itemize} \item Template argument \texttt{MemorySpace} to specify the memory space when creating a view \item Several ones available - \item Beginners usually does not set the memory space (we'll see why) - \item Using a backend-specific memory space possible, but breaks portability + \item Beginners usually do not set the memory space (we'll see why) + \item Using a backend-specific memory space is possible, but breaks portability \end{itemize} \end{column} \end{columns} @@ -1014,7 +1014,7 @@ \subsection{Compilation} \begin{column}{0.6\linewidth} \begin{minted}{C++} Kokkos::View - device_matrix(matrix", Nx, Ny); + device_matrix("matrix", Nx, Ny); auto host_matrix = Kokkos::create_mirror(device_matrix); @@ -1096,11 +1096,9 @@ \subsection{Parallel loops} % _____________________________________________________________________________ -\begin{frame}[fragile]{Comparison of Kokkos and OpenMP parallel loops} - \begin{columns}[T] +\begin{frame}[fragile]{Anatomy of a Kokkos parallel loop} + \begin{columns} \begin{column}{0.5\linewidth} - Kokkos version - \begin{minted}{C++} Kokkos::parallel_for( "my_loop", @@ -1110,31 +1108,25 @@ \subsection{Parallel loops} } ); \end{minted} - \begin{block}{Note} - Works on CPUs and GPUs depending on the compile backend - \end{block} \end{column} \begin{column}{0.5\linewidth} - OpenMP version - - \begin{minted}{C++} - #pragma omp parallel for - for (int i = 0; i < N; i++) { - A(i) = B(i) + C(i) * D(i); - } - \end{minted} - \begin{block}{Note} - Only works on CPUs (need \texttt{target} directive for GPUs) - \end{block} + \begin{itemize} + \item \mintinline{C++}{parallel_for} is the loop pattern + \item \mintinline{C++}{"my_loop"} is the name of the loop (bonus for debugging!) + \item \mintinline{C++}{N} and \mintinline{C++}{int i} is the execution policy (can't be simpler) + \item \mintinline[breaklines]{C++}{KOKKOS_LAMBDA (int i) {/*...*/}} is the kernel + \end{itemize} \end{column} \end{columns} \end{frame} % _____________________________________________________________________________ -\begin{frame}[fragile]{Anatomy of a Kokkos parallel loop} - \begin{columns} +\begin{frame}[fragile]{Comparison of Kokkos and OpenMP parallel loops} + \begin{columns}[T] \begin{column}{0.5\linewidth} + Kokkos version + \begin{minted}{C++} Kokkos::parallel_for( "my_loop", @@ -1144,14 +1136,22 @@ \subsection{Parallel loops} } ); \end{minted} + \begin{block}{Note} + Works on CPUs and GPUs depending on the compile backend + \end{block} \end{column} \begin{column}{0.5\linewidth} - \begin{itemize} - \item \mintinline{C++}{parallel_for} is the loop pattern - \item \mintinline{C++}{"my_loop"} is the name of the loop (bonus for debugging!) - \item \mintinline{C++}{N} and \mintinline{C++}{int i} is the execution policy (can't be simpler) - \item \mintinline[breaklines]{C++}{KOKKOS_LAMBDA (int i) {/*...*/}} is the kernel - \end{itemize} + OpenMP version + + \begin{minted}{C++} + #pragma omp parallel for + for (int i = 0; i < N; i++) { + A(i) = B(i) + C(i) * D(i); + } + \end{minted} + \begin{block}{Note} + Only works on CPUs (need \texttt{target} directive for GPUs) + \end{block} \end{column} \end{columns} \end{frame} @@ -1346,7 +1346,7 @@ \subsection{Extending loop policies} \item Execution space to control where the loop is executed (CPU or GPU) \item Several ones available \item \texttt{Kokkos::DefaultExecutionSpace} and \texttt{Kokkos::DefaultHostExecutionSpace} are enough for beginners - \item Using a backend-specific execution space possible, but breaks portability + \item Using a backend-specific execution space is possible, but it breaks portability \end{itemize} \begin{center} \begin{tblr}[theme=kokkostable]{colspec=lll, row{2}={bg=lightmain}} From 843881820fd26f5eb4b49d8df4d0a051d99ad4e2 Mon Sep 17 00:00:00 2001 From: Paul Gannay Date: Tue, 7 Jan 2025 16:56:34 +0100 Subject: [PATCH 2/5] Corrects some typos --- courses/01_beginners/main.tex | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/courses/01_beginners/main.tex b/courses/01_beginners/main.tex index 3d39656..0b8956c 100644 --- a/courses/01_beginners/main.tex +++ b/courses/01_beginners/main.tex @@ -696,12 +696,12 @@ \subsection{Compilation} \item No need to allocate or deallocate memory by hand \item Vendor-specific memory allocation is hidden \item Unified semantic and portable memory management (CPU and GPU) - \item Advanced capability (abstracted layout, subarray, multidimensionality, etc.) + \item Advanced capabilities (abstracted layout, subarray, multidimensionality, etc.) \end{itemize} \item Kokkos provides the View class \begin{itemize} \item View is an abstraction of the notion of container and multidimensional array - \item Bring portable Python numpy/Fortran-like syntax + \item Brings portable Python numpy/Fortran-like syntax \end{itemize} \end{itemize} \end{frame} @@ -773,7 +773,7 @@ \subsection{Compilation} \includegraphics[width=0.65\textwidth]{view_memory.png} \end{center} \begin{itemize} - \item A View data lives in a specific memory space (Host or Device), not both + \item A View data lives only in a specific memory space (Host or Device), not both \item Suppose we created a view on the Host \end{itemize} \end{frame} @@ -917,8 +917,8 @@ \subsection{Compilation} \begin{itemize} \item Template argument \texttt{MemorySpace} to specify the memory space when creating a view \item Several ones available - \item Beginners usually do not set the memory space (we'll see why) - \item Using a backend-specific memory space is possible, but breaks portability + \item Using a backend-specific memory space is possible, but it breaks portability + \item Beginners usually do not need to set the memory space (we'll see why) \end{itemize} \end{column} \end{columns} @@ -942,7 +942,7 @@ \subsection{Compilation} \item A \highlight{mirror view} represents a view in a different memory space \item Similar to its original view (shape, layout, etc.) \item Created with \texttt{create\_mirror} - \item Used to create a host mirror of a device view + \item Used to create a mirror of a device view on the host \end{itemize} \end{column} \end{columns} From 84a185a8fa617688330c768949dadbe9c2d1b567 Mon Sep 17 00:00:00 2001 From: Paul Gannay Date: Thu, 9 Jan 2025 11:09:31 +0100 Subject: [PATCH 3/5] Apply request for changes --- courses/01_beginners/main.tex | 52 +++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/courses/01_beginners/main.tex b/courses/01_beginners/main.tex index 1631f6d..ff6ec1e 100644 --- a/courses/01_beginners/main.tex +++ b/courses/01_beginners/main.tex @@ -1110,32 +1110,6 @@ \subsection{Parallel loops} % _____________________________________________________________________________ -\begin{frame}[fragile]{Anatomy of a Kokkos parallel loop} - \begin{columns} - \begin{column}{0.5\linewidth} - \begin{minted}{C++} - Kokkos::parallel_for( - "my_loop", - N, - KOKKOS_LAMBDA (int i) { - A(i) = B(i) + C(i) * D(i); - } - ); - \end{minted} - \end{column} - \begin{column}{0.5\linewidth} - \begin{itemize} - \item \mintinline{C++}{parallel_for} is the loop pattern - \item \mintinline{C++}{"my_loop"} is the name of the loop (bonus for debugging!) - \item \mintinline{C++}{N} and \mintinline{C++}{int i} is the execution policy (can't be simpler) - \item \mintinline[breaklines]{C++}{KOKKOS_LAMBDA (int i) {/*...*/}} is the kernel - \end{itemize} - \end{column} - \end{columns} -\end{frame} - -% _____________________________________________________________________________ - \begin{frame}[fragile]{Comparison of Kokkos and OpenMP parallel loops} \begin{columns}[T] \begin{column}{0.5\linewidth} @@ -1176,6 +1150,32 @@ \subsection{Parallel loops} % _____________________________________________________________________________ +\begin{frame}[fragile]{Anatomy of a Kokkos parallel loop} + \begin{columns} + \begin{column}{0.5\linewidth} + \begin{minted}{C++} + Kokkos::parallel_for( + "my_loop", + N, + KOKKOS_LAMBDA (int i) { + A(i) = B(i) + C(i) * D(i); + } + ); + \end{minted} + \end{column} + \begin{column}{0.5\linewidth} + \begin{itemize} + \item \mintinline{C++}{parallel_for} is the loop pattern + \item \mintinline{C++}{"my_loop"} is the name of the loop (bonus for debugging!) + \item \mintinline{C++}{N} and \mintinline{C++}{int i} is the execution policy (can't be simpler) + \item \mintinline[breaklines]{C++}{KOKKOS_LAMBDA (int i) {/*...*/}} is the kernel + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +% _____________________________________________________________________________ + \begin{frame}[fragile]{Notion of C++ lambdas} \begin{columns} \begin{column}{0.5\linewidth} From fcca5783f909278060179bc3aa941bbd3743adf2 Mon Sep 17 00:00:00 2001 From: Paul Gannay Date: Thu, 9 Jan 2025 11:21:17 +0100 Subject: [PATCH 4/5] Swap column for openmp loop and Kokkos --- courses/01_beginners/main.tex | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/courses/01_beginners/main.tex b/courses/01_beginners/main.tex index ff6ec1e..3947e2c 100644 --- a/courses/01_beginners/main.tex +++ b/courses/01_beginners/main.tex @@ -1112,6 +1112,19 @@ \subsection{Parallel loops} \begin{frame}[fragile]{Comparison of Kokkos and OpenMP parallel loops} \begin{columns}[T] + \begin{column}{0.5\linewidth} + OpenMP version + + \begin{minted}{C++} + #pragma omp parallel for + for (int i = 0; i < N; i++) { + A(i) = B(i) + C(i) * D(i); + } + \end{minted} + \begin{block}{Note} + Only works on CPUs (need \texttt{target} directive for GPUs) + \end{block} + \end{column} \begin{column}{0.5\linewidth} Kokkos version @@ -1125,25 +1138,12 @@ \subsection{Parallel loops} ); \end{minted} \begin{block}{Note} - Works on CPUs and GPUs depending on the compile backend - \end{block} - \end{column} - \begin{column}{0.5\linewidth} - OpenMP version - - \begin{minted}{C++} - #pragma omp parallel for - for (int i = 0; i < N; i++) { - A(i) = B(i) + C(i) * D(i); - } - \end{minted} - \begin{block}{Note} - Only works on CPUs (need \texttt{target} directive for GPUs) + Works on CPUs and GPUs depending on the compiled backend \end{block} \end{column} \end{columns} - \vspace{1em} + \vspace{0.5em} \structure{Question:} What is this \texttt{KOKKOS\_LAMBDA} thingy? \end{frame} From 512ed6d8836c51cd49aea592602f35cf880c6f2f Mon Sep 17 00:00:00 2001 From: Paul Gannay Date: Thu, 9 Jan 2025 11:27:45 +0100 Subject: [PATCH 5/5] Move sentence that had been placed on the wrong slide by the previous commit --- courses/01_beginners/main.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/courses/01_beginners/main.tex b/courses/01_beginners/main.tex index 3947e2c..e85e5f4 100644 --- a/courses/01_beginners/main.tex +++ b/courses/01_beginners/main.tex @@ -1142,10 +1142,6 @@ \subsection{Parallel loops} \end{block} \end{column} \end{columns} - - \vspace{0.5em} - - \structure{Question:} What is this \texttt{KOKKOS\_LAMBDA} thingy? \end{frame} % _____________________________________________________________________________ @@ -1172,6 +1168,10 @@ \subsection{Parallel loops} \end{itemize} \end{column} \end{columns} + + \vspace{1em} + + \structure{Question:} What is this \texttt{KOKKOS\_LAMBDA} thingy? \end{frame} % _____________________________________________________________________________