diff --git a/doc/UserManual.pdf b/doc/UserManual.pdf index b5eb655ba..4d0ba4a51 100644 Binary files a/doc/UserManual.pdf and b/doc/UserManual.pdf differ diff --git a/doc/UserManual.tex b/doc/UserManual.tex index 27b2d08e7..c19c947b5 100644 --- a/doc/UserManual.tex +++ b/doc/UserManual.tex @@ -9,7 +9,7 @@ \usepackage{longtable} \usepackage{amsmath} \usepackage{amsfonts} -\usepackage{subfigure} +\usepackage{subcaption} \usepackage{afterpage} \usepackage[calcwidth]{titlesec} \usepackage{verbatim} @@ -24,6 +24,7 @@ \usepackage{epstopdf} \usepackage{tcolorbox} \usepackage{amssymb} +\usepackage{pgfplots} \setmarginsrb{2cm}{2cm}{2cm}{2cm}{0cm}{0cm}{0cm}{0.5cm}%{left}{top}{right}{bottom}{headhgt}{} %\numberwithin{equation}{section} %Bibliography style: @@ -1979,6 +1980,52 @@ \section{Generate HORSES3D solution file from OpenFOAM result} \end{longtable} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\chapter{Performance} +\section{Hybrid MPI and OMP} +Balancing the utilization of MPI and OMP in extensive simulations, particularly for large cases with a degree of freedom (NDOF) exceeding 5,000,000, alongside a substantial number of CPU cores, is advisable. Figure 15.1 illustrates the relative speed-up achieved with varying ratios of shared faces in partitioning and the total number of cores. The hybrid setup demonstrates peak performance at approximately $\left(\frac{nMPIFacesShared}{nMPIFaces}\right)_{max} \approx 0.4-0.6$, particularly when the total number of cores is high. Utilizing this value as a reference point is recommended. In smaller cases, maximizing the number of OMP is recommended. +\begin{figure}[h] +\centering + \begin{tikzpicture} + \begin{axis}[ + xlabel={$\left(\frac{nMPIFacesShared}{nMPIFaces}\right)_{max}$}, + ylabel={Relative Speed-Up}, + label style={font=\small}, + xmin=0, xmax=1, + ymin=0.6, ymax=1.7, + axis lines=left, + axis line style={-stealth, line width=2pt}, % Adjust arrow size and line width + xtick={0, 0.2, 0.4, 0.6, 0.8, 1.0}, + ytick={0.5,1.0,1.5}, + ylabel style={at={(ticklabel cs:0.3)}, anchor=south west, inner sep=0pt, yshift=-10mm}, % Adjust the position of the y-axis label + grid=both, + ymajorgrids=true, + xmajorgrids=true, % Display only vertical grid lines + major grid style={line width=0.2pt, grey!50}, + width=0.48\textwidth, + legend pos=south east + ] + % Plot the curve using the data points + \addplot[blue, thick, mark=*] coordinates {(0.029829545,1.05811138) (0.111114264,1.179805616) (0.216105632,0.957074025) (0.390092879,0.947117469) (0.652849741,1)}; + \addlegendentry{nCores = 32} + + \addplot[red, dashed, thick, mark=x] coordinates {(0.111114264,1.31696086) (0.216105632,1.462915601) (0.390092879,1.600746269) (0.652849741,1.383870968) (0.984375,1)}; + \addlegendentry{nCores = 64} + + \addplot[black, dashdotted, thick, mark=o] coordinates {(0.216105632,1.237316725) (0.390092879,1.37962963) (0.652849741,1.281892728) (0.984375,1)}; + \addlegendentry{nCores = 128} + + % Box Plot + \draw[line width=1pt] (current axis.south west) rectangle (current axis.north east); % Box around the axis + \end{axis} + \end{tikzpicture} + \label{fig: OMP_MPI_sharedfaces} +\caption{Relative speed-up in Hybrid MPI-OMP based on maximum ratio of faces in partitions. NS solver with $64\times64\times32$ elements and polynomial N=3. } +\end{figure} + + + \clearpage