From f260967b3ded2f142b7ddca9d639dddbc7049eae Mon Sep 17 00:00:00 2001 From: James Gabbard Date: Fri, 8 Sep 2023 21:58:54 +0000 Subject: [PATCH 01/19] :sparkles: allow LGF with 2unb + 1spe --- src/Solver.cpp | 5 +++-- src/green_functions.cpp | 8 ++++---- test/src/convergence_test.cpp | 20 ++++++-------------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/src/Solver.cpp b/src/Solver.cpp index 8fd4774..56badcd 100644 --- a/src/Solver.cpp +++ b/src/Solver.cpp @@ -1056,7 +1056,6 @@ void Solver::cmptGreenFunction_(Topology *topo[3], double *green, FFTW_plan_dim FLUPS_INFO(">> using Green function type %d on 3 dir unbounded", typeGreen_); cmpt_Green_3dirunbounded(topo[0], hfact, symstart, green, typeGreen_, kernelLength); } else if ((n_unbounded) == 2) { - FLUPS_CHECK(!(typeGreen_ == LGF_2 && nbr_spectral == 1), "You cannot use LGF with one spectral direction!!"); FLUPS_INFO(">> using Green function of type %d on 2 dir unbounded", typeGreen_); cmpt_Green_2dirunbounded(topo[0], hfact, kfact, koffset, symstart, green, typeGreen_, kernelLength); } else if ((n_unbounded) == 1) { @@ -1109,7 +1108,9 @@ void Solver::cmptGreenFunction_(Topology *topo[3], double *green, FFTW_plan_dim /** - Complete the Green function in 2dirunbounded regularized case: we rewrite on the whole domain * except the plane where k=0 in the spectral direction, as this was correctly computed. */ // No need to scale this as that part of the Green function has a volfact = 1 - if (ndim_ == 3 && nbr_spectral == 1 && (typeGreen_ == HEJ_2 || typeGreen_ == HEJ_4 || typeGreen_ == HEJ_6 || typeGreen_ == HEJ_8 || typeGreen_ == HEJ_10)) { + const bool is_hej = (typeGreen_ == HEJ_2 || typeGreen_ == HEJ_4 || typeGreen_ == HEJ_6 || typeGreen_ == HEJ_8 || typeGreen_ == HEJ_10); + const bool is_lgf = (typeGreen_ == LGF_2 || typeGreen_ == LGF_4 || typeGreen_ == LGF_6 || typeGreen_ == LGF_8); + if (ndim_ == 3 && nbr_spectral == 1 && (is_hej || is_lgf)) { int istart_cstm[3] = {0, 0, 0}; // global for (int ip = 0; ip < 3; ip++) { diff --git a/src/green_functions.cpp b/src/green_functions.cpp index b769bf3..210bbe0 100644 --- a/src/green_functions.cpp +++ b/src/green_functions.cpp @@ -276,7 +276,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const // caution: the value of G in k=r=0 is specified at the end of this routine break; case LGF_2: - FLUPS_CHECK(hfact[2] < 1.0e-14, "This LGF cannot be called in a 3D problem -> h[3] = %e",hfact[3]); + FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs"); // read the LGF data and store it lgf_readfile_(LGF_2, 2, &GN, &Gdata); @@ -286,7 +286,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const Gr0 = &lgf_2_2unb0spe_; break; case LGF_4: - FLUPS_CHECK(hfact[2] < 1.0e-14, "This LGF cannot be called in a 3D problem -> h[3] = %e",hfact[3]); + FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs"); lgf_readfile_(LGF_4, 2, &GN, &Gdata); G = &zero_; @@ -294,7 +294,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const Gr0 = &lgf_4_2unb0spe_; break; case LGF_6: - FLUPS_CHECK(hfact[2] < 1.0e-14, "This LGF cannot be called in a 3D problem -> h[3] = %e",hfact[3]); + FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs"); lgf_readfile_(LGF_6, 2, &GN, &Gdata); G = &zero_; @@ -302,7 +302,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const Gr0 = &lgf_6_2unb0spe_; break; case LGF_8: - FLUPS_CHECK(hfact[2] < 1.0e-14, "This LGF cannot be called in a 3D problem -> h[3] = %e",hfact[3]); + FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs"); lgf_readfile_(LGF_8, 2, &GN, &Gdata); G = &zero_; diff --git a/test/src/convergence_test.cpp b/test/src/convergence_test.cpp index c7038ce..2517dd6 100644 --- a/test/src/convergence_test.cpp +++ b/test/src/convergence_test.cpp @@ -165,20 +165,12 @@ TEST_P(ConvergenceTest, AllBoundaryConditions){ bool isZSpectral = (*(mybc_[2][0]) != UNB) && (*(mybc_[2][1]) != UNB); int n_spectral = isXSpectral + isYSpectral + isZSpectral; bool is_green_spectral = (n_spectral == 3) && (CHAT_2 == green_ || HEJ_0 == green_); // Chat2 and Hej0 have a spectral accuracy with spectral bc - bool is_green_invalid = (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs. - if (is2D) { - is_green_invalid |= (MEHR_4L == green_) || (MEHR_4F == green_); // MEHR_4 stencils are for 3D domains - is_green_invalid |= (MEHR_6L == green_) || (MEHR_6F == green_); // MEHR_6 stencils are for 3D domains - } else { - is_green_invalid |= (LGF_2 == green_) && (n_spectral == 1); // LGF_2 cannot handle one spectral direction - is_green_invalid |= (LGF_4 == green_) && (n_spectral == 1); // LGF_4 cannot handle one spectral direction - is_green_invalid |= (LGF_6 == green_) && (n_spectral == 1); // LGF_6 cannot handle one spectral direction - is_green_invalid |= (LGF_8 == green_) && (n_spectral == 1); // LGF_8 cannot handle one spectral direction - is_green_invalid |= (MEHR_4L == green_) && (n_spectral == 1); // MEHR_4L cannot handle one spectral direction - is_green_invalid |= (MEHR_6L == green_) && (n_spectral == 1); // MEHR_6L cannot handle one spectral direction - is_green_invalid |= (MEHR_4F == green_) && (n_spectral == 1); // MEHR_4F cannot handle one spectral direction - is_green_invalid |= (MEHR_6F == green_) && (n_spectral == 1); // MEHR_6F cannot handle one spectral direction - } + bool is_mehrstellen = (MEHR_4L == green_ || MEHR_4F == green_ || MEHR_6L == green_ || MEHR_6F == green_); + + bool is_green_invalid = false; + is_green_invalid |= (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs. + is_green_invalid |= ((!is2D) && is_mehrstellen && (n_spectral == 1)); // MEHR stencils cannot handle 3D with one spectral direction + is_green_invalid |= (is2D && is_mehrstellen); // MEHR stencils cannot handle 2D domains // ------------------------------------------------------------------------- // Perform the test From f7ccc15ef0cbd6b780dcac3f4afeb8f397d9104e Mon Sep 17 00:00:00 2001 From: James Gabbard Date: Wed, 13 Sep 2023 15:46:59 +0000 Subject: [PATCH 02/19] :bug: fixed isotropy check for LGFs --- src/green_functions.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/green_functions.cpp b/src/green_functions.cpp index 210bbe0..7656d0e 100644 --- a/src/green_functions.cpp +++ b/src/green_functions.cpp @@ -223,6 +223,11 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const //Implementation note: if you want to do Helmolz, you need Hankel functions (3rd order Bessel) which are not implemented in stdC. Consider the use of boost lib. //notice that bessel_k has been introduced in c++17 + // check for isotropy + const bool unb_dirs_are_isotropic = ((hfact[0] == 0.0) && (hfact[1] == hfact[2])) || + ((hfact[1] == 0.0) && (hfact[0] == hfact[2])) || + ((hfact[2] == 0.0) && (hfact[0] == hfact[1])); + GreenKernel G; // the Green kernel (general expression in the whole domain) GreenKernel Gk0; // the Green kernel (particular expression in k=0) GreenKernel Gr0; // the Green kernel (particular expression in r=0) @@ -277,7 +282,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const break; case LGF_2: FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); - FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs"); + FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the LGFs"); // read the LGF data and store it lgf_readfile_(LGF_2, 2, &GN, &Gdata); // associate the Green's function @@ -287,7 +292,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const break; case LGF_4: FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); - FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs"); + FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the LGFs"); lgf_readfile_(LGF_4, 2, &GN, &Gdata); G = &zero_; Gk0 = &lgf_4_2unb0spe_; @@ -295,7 +300,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const break; case LGF_6: FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); - FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs"); + FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the LGFs"); lgf_readfile_(LGF_6, 2, &GN, &Gdata); G = &zero_; Gk0 = &lgf_6_2unb0spe_; @@ -303,7 +308,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const break; case LGF_8: FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); - FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs"); + FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the LGFs"); lgf_readfile_(LGF_8, 2, &GN, &Gdata); G = &zero_; Gk0 = &lgf_8_2unb0spe_; From 82e30755872876cddceab4e66dac9bd3489d1772 Mon Sep 17 00:00:00 2001 From: James Gabbard Date: Mon, 4 Dec 2023 16:09:10 +0000 Subject: [PATCH 03/19] :construction: kernel/code for 2D MEHR --- kernel/MEHR_4F_2d_32.ker | Bin 0 -> 8192 bytes kernel/MEHR_4L6L_2d_32.ker | Bin 0 -> 8192 bytes kernel/MEHR_6F_2d_32.ker | Bin 0 -> 8192 bytes src/green_functions.hpp | 6 ++-- src/lgf_3unb0spe.hpp | 58 ++++++++++++++++++++++++++++++++++ test/src/convergence_test.cpp | 12 ++++--- 6 files changed, 69 insertions(+), 7 deletions(-) create mode 100644 kernel/MEHR_4F_2d_32.ker create mode 100644 kernel/MEHR_4L6L_2d_32.ker create mode 100644 kernel/MEHR_6F_2d_32.ker diff --git a/kernel/MEHR_4F_2d_32.ker b/kernel/MEHR_4F_2d_32.ker new file mode 100644 index 0000000000000000000000000000000000000000..8743a46f2de2ec545bd822ffa4be25a4ff40082e GIT binary patch literal 8192 zcmX|GcU+G982$5AOkyTkO*%?JiC=G=q6s068L^4V#N?Bzq+>&vUk%|y$ zyzjH>B#Glx_&v|(^Sj=Epa1UXzOVTmpdEiK)isCiU;EPKQ?C*VYIx`5zomk1INkTD zJX=Wt`Je2Y5~?V6_LAwge$}Lr8vjktrH1Ae7`Yh^tEJ~QmuA|P)zV(?gGZ};>WFPN zRGI#xj-3CgHjLOLBjNeWh97k@x~(}TXpw`QtomO1)qGJ-hdtTBtTICmZq7kHoEgf@ z`?IcpHbXXlm99>1Vd&`i8wpyIS^DT}$G3&BwBJDc!SQ=6$xEkJO>bq%wrF{#sVPUM z6RhfTy*awFq3iJaRE}PZ^8ebioTDdIH&U#-@RYGCQU8)DPs{$fl;P{n(;z3V$C)^u zX5ZN2zWNTYnEPp2&{+-Ba(bb4B)j19JDO9s%&5ZnBl-BM#9ftF(aLS-E1RRL>CKqz zLtXr8=#0|J;mb{GDRi%`!TL|NG-Gvv(&sI8)Oh3MpZxYZnsPL?sU}E9ErqvRgE<-f zaM+?ZdcK@`T3b2I%#l;WcD<>^e=+o`-E2_eN`{5h|`_J{%1-cWU{5hNHQw_oQ`x z!%@MyJst+iJVm5!uL?8c$z$a8pX1l?RC;8$+lqrcO-kwCqMFAm_Kmr><$ejP>QhOF zb1HV-js8TV4TF2S<$b29drQh~Uscn>_@m3Y)Ee69WS!))td_DjmY*tbttHE2lUL1+ zsiRNLxh1dFWK{d!@67Ee8QpoakT3f#qw#J7ZXR)#(~Y^dcL(LkY1AoF^XSb`+4KAD z25T7_J7RlK&ua`NuQeKy+|CfQIOBw!HA^AAQ^U4Juv9S3KVwHBOX1G78d%q((9qI8=Qwgb9Tmk?aAXqmu(YfjPx|_m8-JVgw4nEs$jG%kwH#>E zjXccLgV7#G``zUgcaD8y?zpN+FE1H?CW}~$z4yesZ?vjNS2M4nv~cahyZ_YC-QZ=j zM;xi8gp+5DHAmLb)uaIb_}n_u@R}68ajcA7v^0zC&d4arafn@rikx1?7Bmj`lhaz` zJ;mnFCP{Uk@hUPezuzv62nsF z(%Vjtp0PBJjm)rC7WbQet(WO^jx6tObb1lUQKH5Bu~V}-a!)lKe(57e-|lx@YTc73 z*;SX$gQoEGZ;FT3DleYQ($}QwB=9uh@)VUo;uZMDonzmaJ0{|ET#{N1#Wk;T)6T6S zeW#~&N)ENuqvFKN&Enlp>pIimg-0Fj9%5nSSzAX>K0HzDyi`VSW1{tT7RqSz_xHy# zN61Nk@2*V_QF1C+e=R_(S}yX5fm+UZ5$9daN5_XVlx*W^0B41i~@slC*+HF-z9Zm>yHK* zm0fgwzgXnGtUK~4Gc)8=lr_j?LYthz$J;a>vuDVv(6;JXB11d%Kc$SRW+=Q@T&wyp zmj1M@S^99Dc<(LC{lm_&B+U0!KTyNc^xN|*G_*J}>!wuexR9gz)tW&bQ5+>Lcp1Ml zk0Yas9-evi9NA{HmrT{-$@|TwTzy*+_svZXdjomO%*Z<&lFC#1uMI17pYaMl2d@F& zxO40qbMHLSGh1J;jx257H!KnHz9EaBJX_@Z%B1G2r(MLm|9iH|t@|=cKJ)qA0&O|X zi=1)QKTu9PHfkQ6DBit)*rU?wfeekm&{HdGJwu~U?y@e+VJNLr!Vih})Qvw?IZR?{ z$F)^A`UHz}w`n$xyeaBH^P$`aO)Tk$pXt(NBuA%{eN^>aIZ};cmzy8qXrH?ASl`DS zWoHk2Tm79Qm%GGY9?DaAr@n{3F5qd4;husAAv_t6Rtq#aFYbMw#m5z|d4-OF&%tZJ zH|`w!#@rQszWZzOb!3vC=h!?;MsNILg}EZ$9etyMEm~#d9uPfetjPCe(Zju&G&!}$ z(%reuawnM=g(HsKn%ayma?t-?($^8*}g9d$!yp zQAXd#zlaJEb?$2ZxKR%$%4t^w{XJL2`{(s9kLH%i$#j2JhbuY^9h#y2WQ54~CJ+9o z3=s9+Vwdi%hz5pwrgm1@J({K8+gg98FLzSiEQMHF8zVGDy z@5;V=#l0Wu@k`SCZRfjtoBR-c4|C~mA@Z{dmA-#O6rG*9xPaZN9ui2q?|y~B`3Ug6i^ zTcF?2G4MHf4fw{LW8avY&9#cs6ZL)JJASaa$a9TfTuv5DW{4{?=(%hML*Kf8oN@II zL#`g2L#l}P)5V3solRI8RGgFk(wikM&B}7wqn>3t3Vz>;5n>3xasgLm4Qms@kR zC{pdql`R~l7??FkdLP~r)KyjVdrp-zf~C9n_~RG#YYb0z*QeU6E$8X<2e;@i;XIZ3 znI~Sjz*A`Fm$viEL?2M!R?q*(D|{;a8hi`%8#)F)2d@F&xO40qbLZW;rRgz&q0VU) z!Vyv59W4ewUMBjxlzl>{)eJ)uABCI^&}C`W8Ixd1=N7MY9@$sKd$i-(X36Iye$>&3 z{gzTMeEX16LKS zoz7FfMxKGB_l_qcE=qT=x-{gM>G2d4q8r z$|xD$*-6II*sVuaNqDc$Y8bL{Hb<|t#)L{fN4H(sxBFQU_s#Lo3|@0obpJy9$X^^e zth_e2?+9_;5jM#Z-@A9~TK9N6PuDz4+fJnM)Ni<%%GM&DZZ*0ua{t0pT3es~>oo*L zE`dLXPlaEDZ-IV8$H3>{HQ*a}j(ub9kCS{JOZ@Jc_`|?n^tHyyAM}T8VM(*?Sb@ZI z)gzfetG6r-b&eM#f2Z4HV{d;mjv9+B_uW~;QH0L0@eA-_SAeId~2D#+_r|n7e9idS!_}OZg7-&n-=1>BQoag7YuL z{YNcGmvT);hY#kGzTdleX<7Laj_j8Dc{PV|^z@_5V8JEP-=40|lKlP9cS9p3yxV#C zYNZ(RBsdGo_ZEuto~uk%7We*U$NSTg&-ogC>ETe3?^fzXp7APh==YO1@al{NfWTW?uBdrx5dCA{p7OtO}Wi960 zg1m7O-bYSl*DuM9rJV~eLodn*ec~(SzOxK4bD8hAAiyM_HGf^CV|h_ zpX2Guih^De-XGrQuGcj1)b(NE&TMh-|M@TN^V(RTx3$?HofZj--U0a=ITU#exdi?k zJ{5iqz6JUX9Rr_(*MM)_IrfdYv$m{Ako??*V`p0D9OdY3SMNND-@o2&X_I`7%xB;} zDbLRv{AXm)D4uS|eA8JmpQl+K$ppgkrE25vL!5vmFvEeCu=Flm=LcO8?%f@kog*8ga##P6H)YJ(*||E+<`lZkm|v9qtYl;?EV|4vIh_tr;yY^aJr zGyYx`lrT!5nnwo>tn3B4-d^4v_0I4-F8R5q#|9M6P3LJ7y_mGU zP}INvo5o3g{^y|HuX=S7$Zy%%U!LN<8)lsz>}@5`1XG(PbvJ>6Q}K>kJ!MIJ*gfj@^&gYT1(ndboW4f+X?Y4;O!S zvknw!aO_>%#CY*P1g-0nctue3spu8auc2o`--6x&`5QSDc?`J({v19Peht0_`VAce zpM%$cZ`?Wdjk!C71YFd68LlYRQNUc7U(y0415k=1HN(R*f-{$R9x+&{*$Mx3IFO!KDNWnui80A0+nj~ zWmw}N(4da1qnkVhI{#mn@;4CzDK)gcjZPJ4j8*z3<-6kje>lE#c7>qm&(R~JPereY zehobn`WEyK$lu7J$YaPQ@aOQU@N4ib&~NA%_#C_jeB;isZ_It6`^z(N;{^Jd^Jd0P zJAvkQx^=O1tw61+LGt&z1=_mL#G>$&Ks&ywXh+=?Xh4^&&bqI~+_OB`IkH|*yc?iD zM~{p?6}=+*HS|pAThKcoe=K^or=$&@-WLLGOV4jU0+RhFk)F4xb9Y2HyhxhK_;H!E3-b?i~BZ+&=?y&IySE zRc-cq(Uv37vKyy0IzJO=&#|q0+iC@R^{AhAWt%|T)f~?!Y1Av;iSS;6cLVh2=#kN< zqE|$}hMoz13wj6SZ{$$qG2{~XbNE#FHTV|jH*^eq4qgMkap%}K=H5R;f4**!K-;?g zV61Bd8eO#Qz^eZQ>fLm)pSOBFWxd`%&V6LP;(ZM7M0hX3y8-%h^vLK_(JP`~L(hc1 z1-%3EH*zTQ7;*{xIeaSo8hi`%8#)F)2d@F&xO40qa|cCLltlj&$j4vvd5Ky*eYw>y zy_Z2fU2&+cv7T11co)R`7~YBSUV?W6^ylc2(WjzUM8Aff34IHC2jp+$P~68LlY zRQNUc7U(y0415k=1HN(R*f-{0|8v#?yW#Z|;T$6`oKjEM&*yYAUsSJn55>D6-pBAx zg!dA>8=yZ&kBmMQy(0QG^i1em&^sW1BZnf7A(z0P!>7Wp!M8xap=024@EY)qJIB5; z_e}MfH$oQHQ@F>dcyrHs#XB(GL-8(%_c6Q^;k^X!2I$YxBco46uZVsPJrnvC^bW}1 p$f3w%$R+US@Tu@?@Ga19=ot7Myas&Z&arRIZKKxEZCXe@{SS!{me&9P literal 0 HcmV?d00001 diff --git a/kernel/MEHR_4L6L_2d_32.ker b/kernel/MEHR_4L6L_2d_32.ker new file mode 100644 index 0000000000000000000000000000000000000000..380dfd93d89f114f93f4ec48fe83dfaec9856652 GIT binary patch literal 8192 zcmYLOcUX=68#gL1p`xs$Br0U26pFVHsYoRyD?|y2L=r`bQY0g9MMi~+GLn)zWM-G_ z);Z@{S((ZDJLbqlvXZdfLx zv&SMU?Y_#$wBAc=>nb@lkG7xCER)j_x9F7~&I+1xUH!tRTm{X(v+ESA#!$UuTX{zx zhFUW0E_FZ2(3U@0ndzSy`o?&6W6zrJeg&Q%;;k?H2ytMZiJ)@*2=1y7%bQU3A;Jk2OSqjNZs zr;N=5m-=4h33H#$9?@L-PZb4Z{m6{UeMZ?45#Q&UzoG$)iZyka8ak_5b53Jh9fdB8 zOx0OYPp=nwY+qpBKo^sYnwGt8AkDD$)DR-0+E9%xmw(F0XkV***;+Zxv8x{)&dF)+ zWKVT7R|Q$W^iZ3aFV27U@U@Ee4E-n1+q!=aLsKspT6pC$H26!K4~M=oA(kHX>bElG2TQ9%#=Z-(-Z1u9QC z>XlL)Y^=u9^@9Q*Wy#Y-E~|~jLY{W_G!K}ylc&R9ihgu1=9T-#+-GpJ_=GSU*x7=tRR&E^5{if81iXy^)~Ti zNPF#uE1k|V)W-GCob;azecTs%XNo;b%a2!`SQpJw;rb^x!>_S)?{{^4`X83wTDx$1 zqc~b#V!>Ffja5Av=c=$pZ|f<;RQt7Y zas!pbZ&CC#m66{@tHsF$GRp9?ShI47oO)dzyw~ZloNBF-K1HZ2NR0-E8Lv=~mwQJW z>pKc^`Mh~Rm;pn62|jko!3-&eEj*%gnV~-adJI{w#?soi27abaES)cpT^g~2rKuTn zL(1>4WEpdi?A65k&et4ZF^;1(B}M-C)^Id3e9u_hlN_b2yFViD6-NuTDi_#y;wjuB zr*j`0p3Da{@V?Mm?3;#RPXMsHgEW z&9hpZ8fZ~)X8KC;?hl=w{=Q<7jB+%dG%RV5(fxoRQ#cq9teK*(M4K zxBiEmV-*yieENcJoq~2P{p_4Sf}y54Rld77Gc?Wo*jnp*41M?X{?uNZrPSOwE8m$c zCH4NWDPccLcRbF9S5=AMV|K@!)fybF&$Ej?Jc*+Zy6GDdBRI)g%#h+bd z95viK_%fg?Pu>eyMPNiQP^#0@No&FN-{^h)q*MM)_IrfdYPwfmi zx>Urw$BVr&9nLk7j?1cPEyHDWc){*@Yd)>u_FIX%00NDn4_r| z6IK;Ba#VlH`Gu`6PX(8XF7|U2=M9+Zv2_hk!{p8>5g9zSo8mNI{|>L@bMPAQjXTG_ zF*h?}tV>ltonbfu(u-7IslJ<*3^(on8j>IC8taagkLrN9U&9uv&hdBi{{o zYwEs;_o124bjpY)pF?^nP1AY$Soz{+Sp-jgZEvkNKhD#mZGQE$9`Q;Y1D}J}fN$J6 z_Kmq6if+dD;ANC>^p(YjiE?^X=f2Ka#CzwHU$=~Ek<;tkxDyiJn`Wi|!yHji-o5it zE*}+iyU)_pz_AQzR9otL$1?P=TgDFQ?&IE!S}|OcrP-Zp43@jHlw9fkM>~%;=2^{**vZ9uDi@DmZ#|)vuIcWPb!}JukSwRmHG`G1D}J}fN$J6_KmrBjxaDB zzF$rU@*>7=5Or=>eao0D)(R?WvM_KF@$U9nFes>0(3bj^GpFpTLt|uBL zy+56sbu{`7L;tEyZ526~CATgn9}SkWbSxrjr*!umnsT@lu zj^>rw4LZJ&qlox&bKiW9mR9O9>*_@QbI|Ujroq$rhhH@%-n--&*Y?R0o;o{U&9U9h z)A1Ryy#XaWP2LdHKS0JSeGBv(ItD%muL0kZrvK@ zrI0|-soILvj zM;D*E@2Jw_DNQ&gZxV4|@~lnt^$?!)hN!KZcaW#1PQ5dQ#6ChBlW*_Ua(&`|ZKY>-`*AlDEqqDfxTf*v@xO6tFZR!T#+G zIZN8pzd1{Ik1&hT33A}*MNp3r$>#{siGCf9iF0o@x^Dc4qhkXr6HR}Ky7ySyNymhz zp0iU^CBC0Ewrj)n^`if`4E%B62v1D4m3ruH@psKR_XY2H3UN8qeTlZ9%q8&W@Tu@? z@Ga19=ot7Myas&Z&arRI9o4pHtiTi8RSQ^qXEM4NcqrT~0cGWCh z9idez`MVw}rGF%Ue;{mg*u{mS-gzH1mhfKFF-t$9lq1(~n=ZZMIBK<{xnA-)JCeQp zK8Sobrt{1_lF$7|ovO?dc&Zqo>GkfchyxAFT8ZyXP8h^L_|4NaExPd2P*CPEk6(3VV$}?eNMC2x$0&}>k8anUp2yL)QuPeU-!C(8jgavE<;w(}Lj!sGdbdxT zQa7G#j0UBviFls|%)_a>cj=%OKzrP{=~zUG25ha!(5 zm%yLHr^2tnw?Mz4W8ib}8t{!f$G$N)uR1C~=GLMTTeapxyW`X1E&RGsG?j-H%08*A`M{0^)A>m`4;{6>=P345NlkFp&o;eCs0 z&86Sv)nwilH78#xqt47mjU z96l9(4Za2X4IKlYgV%s>+&T7*xm8s6oaOs)G|0m^Me_5G2UmWT@|;|6o>xo?N2_Lj z(q3Q2kxs*saEa%#w-4_s>H7?|)*q!@8#rT;@lqe2nonM^DT);J{m;IeOEP&%P`}qv z!uu=BqJsK2JROM}x&EZMcV`VZtw+NJQXSOorKh`~>>ZH5kwcNkkW1jt;Zxz);9H>I z&@u2icn$c*onzmayUL+Mq~zzqobR)f_HZ;Irsagh?+3L4au%JWV)QuNmj z;z@m$k!~c#JM+qsUF)}pmNRHA_?zy z?^i4sI6BwP zXa4#kj=pbq7$@b}&%Rd&+-N20+BxU%lAnLv{*&j7$vm}Qw!!FRpy=!V4CyWPIT!7` z10-MTSXwew(sxB;$kUe3JnbpFo;0YNxc9NI+-gS(6u6?SjrL4|9BMZP-3k@I!{43> zeG7UAd=6d%zH#T+H|CyZP!ievo1mKF}Kfew_oCS)iv9LB|rb~tn9OLG0(iTGWL`5+~_082PB@$cpq-MQC%Q6K5+Hk zK?3DRS{T_m3G_Dlm+Ho)0<}IA`^0g(@c(}8Z_k9j1-%3EH*zTQ7;*{xIeaSo8hi`% z8#)F)2d@F&xO40qbH96}b6v{sQw(nvN&Vdz)6kugpX1+|mbe_{DLDP1`TA=-y`AD_ zD*5@xHO03z+X|G{zx8*2@!m7pRJ}kufugomzR{X5(0jvVTknkmO>CZDXPYJ{dqwnX z=$X*Bpm#w2Mh-Te3eer|OUaqineJ3-=i_occy!MXw&+du1Ya+E+mJzZVW#Csogbio|^H3CKW zpL5v1TcCF1ZfNd5Ehzg`^or=$&@-WLLGOV4jU0+RhFk)F4xb9Y2HyhxhK_;H!E3-b z?i~BZ-0zH@9qWCWr)|#a&r4p4J*NM~WM|RGC;XeHe^tbJ=3w*fS{4E=wtcX2_;i76 z(wEitTp`~7;$3~p;{;mnu{L>Nwy1kAW6~#I6Mz5q$mmniE23XR&xF1Oy#w+$awzf` za*5KP|MjWxYw#`5Z|E5K9J~g6hK_zUE>(Bk2fXo2Re-CMamL!hni4+g1S5-6$aZJ5JTLD`?9M@FBDUJ?Bo zdM5NO=pB&1kwcNkkW1jt;Zxz);9H>I&@u2icn$c*onzma+h^0tw8WvJpXo1*Eu13i z*_YABANvWkz3<}s=aB-1>&A|}mMZ>Fr5>?`0_|x!*INIPKqq%qd&M*g%69|w=jf5q zr=nLxzlNR(eG7UAd=6d%zH#T+H|Dml@OdHW`9#D1 zNzX-ot1{c_R-Yo?e?Y~U>N5hhZ7SQCe^;QZdo&ya7=iNYDso%?i2MKhUV?W6^ylc2 z(WjzUM8Aff34IHC2jp+$P~68LlYRQNUc7U(y0415k=1HN(R*f-`*vZ^>N>=(a7 z&coY3@&zKnRZHcLKqfZ9TYfYMR2<{k^W_hLWL+L)rfD}S--+;Ef_DS-=jf5qr=nLx zzlNR(eG7UAd=6d%zH#T+H|91dzwNAlTcEJ#FJ<=i z0xhjb+cx*R$PXr+v;(yo=~kwZ*#gr><@*@kiSS;6cLVh2=#kNpx@9j@Hu!5_{N=M-OXUn+59d#Q*j~jYhI)|6hit zaU+fVwYS!OY@_mB5btAnC&GIP-VM;7qen)cie3@@8hR%5E$AJPzmY?c$B;|l&*4+y z*Wg>A-_SAeId~2D#+_r|n0wptD^sToXe3)73)wZBMpB&|t!CxksC*B_yCB}j@J@vH z61*FrKSz&@J{7$p`Ze@S=v&Y`Ab%r=B99@Lz@NjX!mq)%K)<15;B)XA@QpjizA^Wv zP_45OZjIz)-e;GUf1~mp81JEY7sUG*-ih#Df_DS-=jf5qr=nLxzlNR(eG7UAd=6d%zH#T+H|E}v_FHXiL?itVQIk*h literal 0 HcmV?d00001 diff --git a/kernel/MEHR_6F_2d_32.ker b/kernel/MEHR_6F_2d_32.ker new file mode 100644 index 0000000000000000000000000000000000000000..370063835340eaa43108975dd6aabf202d178122 GIT binary patch literal 8192 zcmX|GcU+D89}kr+QASFcDL2v$w?RpvjAwH&uLTY9lM3wU(pBFyXo86 zw`8-Y)7()B71U~9dvp8BDwWCQrVKl*-xA5v+l&RD58Y+yk@1sG*3B%HtofdAYRZxI zROfbAmvPkh_-?DZWRCXdSawhAa zr>9-5SNwCESIljB;Of_Nor-DD^c{gdX|E|_mR+}SlQQbkBkP$BQ$ZJ^=8qe&y^6eI zxUSkOtI6U=p7Rou8hTUiGdZBVhWv9=C(43qDM9DaR%)rGppOH+Wx+DaG46CHoRg9L z!`XgDE^;b0&v&v*m(xu9!0-R-%#fyPSbD;IhCGZl+~!#Oo~`*q}pO=EeoJ8?qQ)SIW$nHQGN-NTc{-w%KG$mA9K#@rre%VI**-je%L zrHkR)%ZUlxI!r&alAPx}PjP%!Mg3>zcd1XVrs_P~>x<^p(2M5J4yDaCbnW@ULYLjO zG;4E;N3oiWQkZ$0GNWa*H#yU# zhoQY4`c-zn#!#TwyS2w!7|Pn$TJB)OQl+YJJ7gKMewzua=ErVKGySC<0{FPez;XJo!)hHQ#3Ao8Po{*9Dtm)&|ca>8{R&=`lN;y5& zRrC8RUrx^*hd74nF|;ehZejdthE7lYl&N`-q35ZIHesqP1*^2IHE?9;$k&Jmun;*|Ohd%!Dr4fw{LW8av& zewnpd8{Qpm^}5^fdJRSHw7JnVrk0$}H1=~$tEKoM#gx(iz&}cR=F3pT~R}oy^3$e**?ECEvZ?e>(s_sEn14{<)Bm1fta5N->2I?*3$Y;_W-|_Jrtv`6wVnr@TPnOu! zRDI#dqV;^^rJ+2%E_;9EqbpBG92VziMeJNb8aK`<10Kl538woQpzjz8#)F)2d@F&xO40qb9)2~{@cAQ>n}Og-+o%whW8$Idp@NX$w}|ti}*`A3{Bj5M&kP}(=#N!x6XKbbn82Y zzK>IG+3*KTQTNRn^!!<}RT?7QeXNPwqPrC={ks0yu0fNd-4|wG9P7x@FpYsmXV-JI zkGX03PdZ17=k?-3syMPV{MkcQji({%1rqN~HFr=cSt|102H%Sl_KEsmy)wb?4o}Rw zs^NaMyu!CYzoBE`bMPAQjXTG_G57GF&xdCl%V~0a&WCQHa=LqDzg}XNoPNFU&$juT zI6hIrdwvg(1#4nOp3^FPHuo7r_FZ)wZF;a&zo5^$HoiaL^~p&yiY5Q(?~>lzsJ`_v z`O1=`syuVsNRB)gH1lFdworU6{m^UW{#H!9$+Dg_pPt{=h5z8hi`%8#)F)2d@F&xO40qbAR#g zE9v{MhA59Vo*SB7+?r*<5W773fP1+39sE>;E4LZadl4w%eSNTty|M{Q*&M&IV3~+- z2hZhACt2Eg+@Wgo85K^(2R6)5Tbt%3HE-!lj`mhOI9 z&A=V4Ms4^Ga8~mW`7cnut9~<2N-1@R&!6XMW6U8Zmm;24U&#N!f9DlG6@CrA1^Nvg z1D}J}fN$KnV&DJG{Z=(9z`1eK$$(FZny)X+`I~7=}{D*?FzjWl6`Vo1}Ak z<=@)1;oa!Cwd8X)u;D*;%K9N(|!FH9Bp^B zk1=Q!^?qL`2h9;8-#xFF_`c-&mFnCuQSbOkKMtqx)X4nRbzL4$DQYD%7uEBWtn#Yw zQVl_oOW@DpQ{mU(TcF?2G4MHf4fw{LW8awj7dukock7lflCL#Ov0w%Tv9zy2RpL3n zXN=RNmn>;5O_ThcUXqRE?{EH_7u$>-$P z6?iv@J}*bhO!B#TeZCeM#qjj1(@ysf*F^uTT`Tc@z~A3?KKm*D?|$P}%V0r~$B;|l z&*4+y*Wg>A-_SAeId~2D#+_r|n7caf!I9^ySSqwSy2AZ9OJ&U6eP^GtqAv81a!p6g zAW7dtpN^hb>?Z1a(8rmdHi^1E=TINliyUe0?px@9j@Hu!5_{N=M-xcS#@|0WZW}dN;r-NHg6FfIy#JN3I=%Xx_#Ldqgr=pB&1kwcNkkW1jt;Zxz);9H>I&@u2icn$c*onzmadw$&($~bjxk=_KURo3M>|R;wiC!M?z)8cw7Bqp3~dGSp1AY%qs4-vZ$a;X{EZxnJce8X ze-57tzXsm|{f3T#&%tZJH|`w!#@wM?jYn99sBiZtNI6!&_nJWum3T56cuw;3zJ>2+ zxJ~1!u%GInv}HUs{}?azId^syO1?J2Awkl2kJ{9d=0={L#aYLV>?6>DV+UO;#|fnF ze5IX+yFfQwlU6=nCn$O*^eyNekiU^bk;jlr;LqVx;n(0>px@9j@Hu!5_{N=M-z9w`sPbW72?lH{cOe8-)!!0XaCgz#y2dAVw_cYUA;<-gb zQ}ow&704#;)#?Ky1-kaXvcZ!a1^OEOMtQxzKrW8^itTp@ihd0}6Z#hP4#?lgp~z#% zCGh9)sqky?Ezoc182B8#27Kerv2V=XxIab8?}EYa{%&$hwdCi*FSp)uKFL#kld^GG z4o}lZRZ4#T@b`pAJv#_wWjFkruc&WUrtP$sO%iBP+Uh!WFM+h(y)8Y~3sl~3e&vKj zLD4IsUqjD?z6HGl@;7oQ@)&Xn{5gCo{2F`<^cy+`J_oM>-?($^8*{(T^xnO8ljzqE zzLRwAj_L3IX54~VTb)ImA3T{M@q2cT>bcbe1WGYf?wn>OP~wuCj)~&EPxNZ_v{@_u z{~T$Tuuq_?p10K!E(?l26}=+*HS|pAThKcoeJc=|hj>dQNCc{*-uzSl|g@u~3(bnc5dZ`aNwTHREjLwcP8#<&X9 zSzcP+f2BY^-bOkPb_x^_WVO%mj6g?nf)c0Y2#OvVeJXlI^lRvu(6^v>K>kJ!MIJ*g zfj@^&gs;Ahmk| zrwe@qip$BZEZr#by}R1joTCEiSFPw9eN&)kTcVV7UkFrr`my`AIzjPnfc_jkGWt~X zis;wSGof!m?|}S`9Ev=KTmpX%p9;UG@Gbx8H*^eq4qgMkap%}K<{qiPL(+5e%fWj} zMSk-(XmzPRB+%a*e9T{76)4;{A~OAnKz@P3R6jIL&fc%Xdiadr~0)Gyl3cm*50{w=LfzQEfz&Gw3`^MawxoaGC^8`vaN~*D`7O10A zNc4j5qW@i^)O(qF9W^zN`eTtno#K5A??iYn!Mg$abM(mQQ_(A;UqjD?z6HGl@;7oQ z@)&Xn{5gCo{2F`<^cy+`J_oM>-?($^8*`6{%X_k;NyPtRx5v-b>d1Ni(c?Y!>&PSc ze1*+KasU6l3*vnY??iYn!Mg$abM(mQQ_(A;UqjD?z6HGl@;7oQ@)&Xn{5gCo{2F`< z^cy+`J_oM>-?($^8*}^Ale5F{I*RbLt;w;hBda^9D&uF?Dc(czE{OLryc6NQ1n&mu z&(R~JPereYehobn`WEyK$lu7J$YaPQ@aOQU@N4ib&~NA%_#C_jeB;isZ_KT=-1utb zj5^vo&}q+j-#W!RFy2G)E{OLryc6NQ1n&mu&(R~JPereYehobn`WEyK$lu7J$YaPQ j@aOQU@N4ib&~NA%_#C_jeB;isZ_Itw!c28yWF7q& info) { class ConvergenceTest : public testing::TestWithParam{ protected: // Default variable for all the tests - const int nproc_3d_[3] = {4, 4, 4}; - const int nproc_2d_[3] = {8, 8, 1}; + // const int nproc_3d_[3] = {4, 4, 4}; + // const int nproc_2d_[3] = {8, 8, 1}; + const int nproc_3d_[3] = {1, 1, 1}; + const int nproc_2d_[3] = {1, 1, 1}; const double L_[3] = {1., 1., 1.}; // Variable specific to a test_case @@ -165,12 +167,12 @@ TEST_P(ConvergenceTest, AllBoundaryConditions){ bool isZSpectral = (*(mybc_[2][0]) != UNB) && (*(mybc_[2][1]) != UNB); int n_spectral = isXSpectral + isYSpectral + isZSpectral; bool is_green_spectral = (n_spectral == 3) && (CHAT_2 == green_ || HEJ_0 == green_); // Chat2 and Hej0 have a spectral accuracy with spectral bc - bool is_mehrstellen = (MEHR_4L == green_ || MEHR_4F == green_ || MEHR_6L == green_ || MEHR_6F == green_); + // bool is_mehrstellen = (MEHR_4L == green_ || MEHR_4F == green_ || MEHR_6L == green_ || MEHR_6F == green_); bool is_green_invalid = false; is_green_invalid |= (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs. - is_green_invalid |= ((!is2D) && is_mehrstellen && (n_spectral == 1)); // MEHR stencils cannot handle 3D with one spectral direction - is_green_invalid |= (is2D && is_mehrstellen); // MEHR stencils cannot handle 2D domains + // is_green_invalid |= ((!is2D) && is_mehrstellen && (n_spectral == 1)); // MEHR stencils cannot handle 3D with one spectral direction + // is_green_invalid |= (is2D && is_mehrstellen); // MEHR stencils cannot handle 2D domains // ------------------------------------------------------------------------- // Perform the test From 61ac0eeb5595107843476bf38f0060f60b285350 Mon Sep 17 00:00:00 2001 From: James Gabbard Date: Mon, 4 Dec 2023 08:29:48 -0800 Subject: [PATCH 04/19] :construction: 2D MEHR test all passing, 2unb1spe failing --- src/green_functions.cpp | 28 ++++++++++++++++++--- src/green_kernels.hpp | 54 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/src/green_functions.cpp b/src/green_functions.cpp index 7656d0e..2a28f2f 100644 --- a/src/green_functions.cpp +++ b/src/green_functions.cpp @@ -315,16 +315,36 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const Gr0 = &lgf_8_2unb0spe_; break; case MEHR_4L: - FLUPS_CHECK(false, "MEHR4 kernel not available for 2D unbounded problems."); + FLUPS_WARNING("MEHR kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); + FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the MEHR kernels"); + lgf_readfile_(MEHR_4L, 2, &GN, &Gdata); + G = &zero_; + Gk0 = &mehr_4l6l_2unb0spe_; + Gr0 = &mehr_4l6l_2unb0spe_; break; case MEHR_6L: - FLUPS_CHECK(false, "MEHR6 kernel not available for 2D unbounded problems."); + FLUPS_WARNING("MEHR kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); + FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the MEHR kernels"); + lgf_readfile_(MEHR_6L, 2, &GN, &Gdata); + G = &zero_; + Gk0 = &mehr_4l6l_2unb0spe_; + Gr0 = &mehr_4l6l_2unb0spe_; break; case MEHR_4F: - FLUPS_CHECK(false, "MEHR4 kernel not available for 2D unbounded problems."); + FLUPS_WARNING("MEHR kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); + FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the MEHR kernels"); + lgf_readfile_(MEHR_4F, 2, &GN, &Gdata); + G = &zero_; + Gk0 = &mehr_4f_2unb0spe_; + Gr0 = &mehr_4f_2unb0spe_; break; case MEHR_6F: - FLUPS_CHECK(false, "MEHR6 kernel not available for 2D unbounded problems."); + FLUPS_WARNING("MEHR kernels in 2dirunbounded 1dirspectral entail an approximation in 3D."); + FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the MEHR kernels"); + lgf_readfile_(MEHR_6F, 2, &GN, &Gdata); + G = &zero_; + Gk0 = &mehr_6f_2unb0spe_; + Gr0 = &mehr_6f_2unb0spe_; break; default: FLUPS_CHECK(false, "Green Function type unknown."); diff --git a/src/green_kernels.hpp b/src/green_kernels.hpp index 29c3c3f..b4a31d2 100644 --- a/src/green_kernels.hpp +++ b/src/green_kernels.hpp @@ -296,6 +296,60 @@ static inline double mehr_6f_3unb0spe_(const void* params,const double* data) { } return -green/(h); } +static inline double mehr_4l6l_2unb0spe_(const void* params,const double* data) { + int ix = (int)((double*)params)[4]; + int iy = (int)((double*)params)[5]; + int iz = (int)((double*)params)[6]; + int N = (int)((double*)params)[7]; + int i1 = (ix == 0) ? iy : ix; // first nonzero index + int i2 = (ix == 0) ? iz : (iy == 0) ? iz : iy; // second nonzero index + + // if the point is close enough, it will be already precomputed + double green; + if (i1 < N && i2 < N) { + green = data[i1 + i2 * N]; + } else { // if not, we use the extrapolation + const double rho = sqrt(i1 * i1 + i2 * i2); + mehr_4l6l_2unb0spe_expansion(&green, i1, i2, rho); + } + return -green; +} +static inline double mehr_4f_2unb0spe_(const void* params,const double* data) { + int ix = (int)((double*)params)[4]; + int iy = (int)((double*)params)[5]; + int iz = (int)((double*)params)[6]; + int N = (int)((double*)params)[7]; + int i1 = (ix == 0) ? iy : ix; // first nonzero index + int i2 = (ix == 0) ? iz : (iy == 0) ? iz : iy; // second nonzero index + + // if the point is close enough, it will be already precomputed + double green; + if (i1 < N && i2 < N) { + green = data[i1 + i2 * N]; + } else { // if not, we use the extrapolation + const double rho = sqrt(i1 * i1 + i2 * i2); + mehr_4f_2unb0spe_expansion(&green, i1, i2, rho); + } + return -green; +} +static inline double mehr_6f_2unb0spe_(const void* params,const double* data) { + int ix = (int)((double*)params)[4]; + int iy = (int)((double*)params)[5]; + int iz = (int)((double*)params)[6]; + int N = (int)((double*)params)[7]; + int i1 = (ix == 0) ? iy : ix; // first nonzero index + int i2 = (ix == 0) ? iz : (iy == 0) ? iz : iy; // second nonzero index + + // if the point is close enough, it will be already precomputed + double green; + if (i1 < N && i2 < N) { + green = data[i1 + i2 * N]; + } else { // if not, we use the extrapolation + const double rho = sqrt(i1 * i1 + i2 * i2); + mehr_6f_2unb0spe_expansion(&green, i1, i2, rho); + } + return -green; +} /**@} */ From 06f523a8b114380c225c059f17cea39e8f645dd2 Mon Sep 17 00:00:00 2001 From: James Gabbard Date: Mon, 4 Dec 2023 08:40:52 -0800 Subject: [PATCH 05/19] :sparkles: 2unb1spe MEHR kernels --- src/Solver.cpp | 3 ++- test/src/convergence_test.cpp | 8 ++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Solver.cpp b/src/Solver.cpp index 56badcd..25a7578 100644 --- a/src/Solver.cpp +++ b/src/Solver.cpp @@ -1110,7 +1110,8 @@ void Solver::cmptGreenFunction_(Topology *topo[3], double *green, FFTW_plan_dim // No need to scale this as that part of the Green function has a volfact = 1 const bool is_hej = (typeGreen_ == HEJ_2 || typeGreen_ == HEJ_4 || typeGreen_ == HEJ_6 || typeGreen_ == HEJ_8 || typeGreen_ == HEJ_10); const bool is_lgf = (typeGreen_ == LGF_2 || typeGreen_ == LGF_4 || typeGreen_ == LGF_6 || typeGreen_ == LGF_8); - if (ndim_ == 3 && nbr_spectral == 1 && (is_hej || is_lgf)) { + const bool is_meh = (typeGreen_ == MEHR_4L || typeGreen_ == MEHR_4F || typeGreen_ == MEHR_6L || typeGreen_ == MEHR_6F); + if (ndim_ == 3 && nbr_spectral == 1 && (is_hej || is_lgf || is_meh)) { int istart_cstm[3] = {0, 0, 0}; // global for (int ip = 0; ip < 3; ip++) { diff --git a/test/src/convergence_test.cpp b/test/src/convergence_test.cpp index 1385689..25b2006 100644 --- a/test/src/convergence_test.cpp +++ b/test/src/convergence_test.cpp @@ -166,13 +166,9 @@ TEST_P(ConvergenceTest, AllBoundaryConditions){ bool isYSpectral = (*(mybc_[1][0]) != UNB) && (*(mybc_[1][1]) != UNB); bool isZSpectral = (*(mybc_[2][0]) != UNB) && (*(mybc_[2][1]) != UNB); int n_spectral = isXSpectral + isYSpectral + isZSpectral; + bool is_green_spectral = (n_spectral == 3) && (CHAT_2 == green_ || HEJ_0 == green_); // Chat2 and Hej0 have a spectral accuracy with spectral bc - // bool is_mehrstellen = (MEHR_4L == green_ || MEHR_4F == green_ || MEHR_6L == green_ || MEHR_6F == green_); - - bool is_green_invalid = false; - is_green_invalid |= (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs. - // is_green_invalid |= ((!is2D) && is_mehrstellen && (n_spectral == 1)); // MEHR stencils cannot handle 3D with one spectral direction - // is_green_invalid |= (is2D && is_mehrstellen); // MEHR stencils cannot handle 2D domains + bool is_green_invalid = (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs. // ------------------------------------------------------------------------- // Perform the test From a33da054e3a50a80000c698e5aaede1cbe0d2a9a Mon Sep 17 00:00:00 2001 From: James Gabbard Date: Mon, 4 Dec 2023 08:43:55 -0800 Subject: [PATCH 06/19] :bug: revert convergence test size to normal --- test/src/convergence_test.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/src/convergence_test.cpp b/test/src/convergence_test.cpp index 25b2006..05dbe62 100644 --- a/test/src/convergence_test.cpp +++ b/test/src/convergence_test.cpp @@ -95,10 +95,8 @@ std::string TestNameGenerator(const ::testing::TestParamInfo& info) { class ConvergenceTest : public testing::TestWithParam{ protected: // Default variable for all the tests - // const int nproc_3d_[3] = {4, 4, 4}; - // const int nproc_2d_[3] = {8, 8, 1}; - const int nproc_3d_[3] = {1, 1, 1}; - const int nproc_2d_[3] = {1, 1, 1}; + const int nproc_3d_[3] = {4, 4, 4}; + const int nproc_2d_[3] = {8, 8, 1}; const double L_[3] = {1., 1., 1.}; // Variable specific to a test_case From f19cc5904bea8478069de5ca513ec1abf7d53e5e Mon Sep 17 00:00:00 2001 From: Gilles Poncelet Date: Wed, 6 Dec 2023 17:26:18 +0100 Subject: [PATCH 07/19] :white_check_mark: update references for validation test --- .../data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt | 1 + .../data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt | 1 + .../data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt | 1 + .../data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt | 1 + .../data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt | 1 + .../data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt | 1 + .../data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt | 1 + .../data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt | 1 + .../data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt | 1 + .../data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt | 1 + .../data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt | 1 + .../data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt | 1 + .../data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt | 1 + .../data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt | 1 + .../data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt | 1 + .../data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt | 1 + samples/validation/scripts/test_3D_kerns.py | 4 ---- 17 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt diff --git a/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt new file mode 100644 index 0000000..e856869 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt @@ -0,0 +1 @@ +16 3.311760673069e-02 6.001754483240e-02 diff --git a/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt new file mode 100644 index 0000000..e856869 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt @@ -0,0 +1 @@ +16 3.311760673069e-02 6.001754483240e-02 diff --git a/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt new file mode 100644 index 0000000..cb19938 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt @@ -0,0 +1 @@ +16 3.296578486463e-04 5.974240491287e-04 diff --git a/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt new file mode 100644 index 0000000..492d43f --- /dev/null +++ b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt @@ -0,0 +1 @@ +16 4.420020480575e-05 8.010203741826e-05 diff --git a/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt new file mode 100644 index 0000000..2decaf8 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt @@ -0,0 +1 @@ +16 8.279381136966e-03 5.177451801366e-02 diff --git a/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt new file mode 100644 index 0000000..2decaf8 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt @@ -0,0 +1 @@ +16 8.279381136966e-03 5.177451801366e-02 diff --git a/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt new file mode 100644 index 0000000..9ac0bea --- /dev/null +++ b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt @@ -0,0 +1 @@ +16 2.601533757485e-04 8.942064632086e-04 diff --git a/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt new file mode 100644 index 0000000..ced3cde --- /dev/null +++ b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt @@ -0,0 +1 @@ +16 6.344010847012e-05 2.062116091302e-04 diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt new file mode 100644 index 0000000..6f600b9 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt @@ -0,0 +1 @@ +17 3.311760673069e-02 6.623521346139e-02 diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt new file mode 100644 index 0000000..6f600b9 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt @@ -0,0 +1 @@ +17 3.311760673069e-02 6.623521346139e-02 diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt new file mode 100644 index 0000000..b05f12e --- /dev/null +++ b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt @@ -0,0 +1 @@ +17 3.296578486463e-04 6.593156972932e-04 diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt new file mode 100644 index 0000000..21100ec --- /dev/null +++ b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt @@ -0,0 +1 @@ +17 4.420020480569e-05 8.840040961200e-05 diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt new file mode 100644 index 0000000..f50098b --- /dev/null +++ b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt @@ -0,0 +1 @@ +17 8.290370265556e-03 5.322438967995e-02 diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt new file mode 100644 index 0000000..f50098b --- /dev/null +++ b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt @@ -0,0 +1 @@ +17 8.290370265556e-03 5.322438967995e-02 diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt new file mode 100644 index 0000000..b2d4500 --- /dev/null +++ b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt @@ -0,0 +1 @@ +17 2.785479427274e-04 1.315704724925e-03 diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt new file mode 100644 index 0000000..cbce81c --- /dev/null +++ b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt @@ -0,0 +1 @@ +17 7.436135343861e-05 4.195883266850e-04 diff --git a/samples/validation/scripts/test_3D_kerns.py b/samples/validation/scripts/test_3D_kerns.py index 5c90e36..66321ef 100644 --- a/samples/validation/scripts/test_3D_kerns.py +++ b/samples/validation/scripts/test_3D_kerns.py @@ -59,10 +59,6 @@ print("skip kernel 7 and 2dirunbounded... unsupported error due to inherent approximation.") continue - if (kern in ['11','12','13','14'] and bcs[4:6] == ["9","9"]) : - print("Mehrstellen stencils not implemented for 2D problems") - continue - if(bcs[4:6] == ["9","9"]): str_bcs = str(bcs[0]) + "," + str(bcs[1])+ "," + str(bcs[2])+ "," + str(bcs[3])+ "," + str(bcs[4])+ "," + str(bcs[5]) r = subprocess.run(["mpirun"] + ["-np"] + ["2"] + ["./flups_validation_nb"] + ["--np=1,2,1"] + ["--kernel="+str(kern)] + ["--center=" + str(centerType)] + ["--res=16,16,1"] + ["--nres=1"] + ["--bc="+str_bcs], capture_output=True) From 2c095c04a5da9166b60ed664203277318a3c7e89 Mon Sep 17 00:00:00 2001 From: Gilles Poncelet Date: Thu, 7 Dec 2023 16:57:08 +0100 Subject: [PATCH 08/19] :white_check_mark: further subdivide the tests to avoid running out of time --- .gitlab-ci/flups-lm3.yml | 40 +++++++++++++++++------------------- test/run/test_convergence.sh | 2 +- test/run/test_workflow.sh | 20 ++++++++++-------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml index f654471..790a168 100644 --- a/.gitlab-ci/flups-lm3.yml +++ b/.gitlab-ci/flups-lm3.yml @@ -111,20 +111,20 @@ check result lm3: start_in: 1200 minutes artifacts: paths: - - report_test_a2a_cell.xml - - report_test_nb_cell.xml - - report_test_isr_cell.xml - - report_test_a2a_node.xml - - report_test_nb_node.xml - - report_test_isr_node.xml + - report_test_a2a_cell_*.xml + - report_test_nb_cell_*.xml + - report_test_isr_cell_*.xml + - report_test_a2a_node_*.xml + - report_test_nb_node_*.xml + - report_test_isr_node_*.xml reports: junit: - - report_test_a2a_cell.xml - - report_test_nb_cell.xml - - report_test_isr_cell.xml - - report_test_a2a_node.xml - - report_test_nb_node.xml - - report_test_isr_node.xml + - report_test_a2a_cell_*.xml + - report_test_nb_cell_*.xml + - report_test_isr_cell_*.xml + - report_test_a2a_node_*.xml + - report_test_nb_node_*.xml + - report_test_isr_node_*.xml when: always needs: [run lm3] dependencies: @@ -137,15 +137,13 @@ check result lm3: - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_nb_*") ]]; then echo "At least one of the nb test did not execute till the end."; exit 1; fi - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_isr_*") ]]; then echo "At least one of the isr test did not execute till the end."; exit 1; fi - scp lm3:${PATH2RUN}/*.xml . - - test -f report_test_a2a_cell.xml && if [[ $(grep " Date: Thu, 7 Dec 2023 18:06:03 +0100 Subject: [PATCH 09/19] :white_check_mark: more clarity and automation in the lm3 test validation --- .gitlab-ci/flups-lm3.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml index 790a168..1acb359 100644 --- a/.gitlab-ci/flups-lm3.yml +++ b/.gitlab-ci/flups-lm3.yml @@ -137,13 +137,18 @@ check result lm3: - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_nb_*") ]]; then echo "At least one of the nb test did not execute till the end."; exit 1; fi - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_isr_*") ]]; then echo "At least one of the isr test did not execute till the end."; exit 1; fi - scp lm3:${PATH2RUN}/*.xml . - - if [[ $(ls -dq report_test_* | wc -l) -ne 90 ]]; then echo "At least one of the test did not execute till the end."; exit 1; fi - - if [[ $(grep " Date: Thu, 7 Dec 2023 18:18:24 +0100 Subject: [PATCH 10/19] :white_check_mark: check everything before exiting (and more clarity) --- .gitlab-ci/flups-lm3.yml | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml index 1acb359..3df4143 100644 --- a/.gitlab-ci/flups-lm3.yml +++ b/.gitlab-ci/flups-lm3.yml @@ -133,22 +133,42 @@ check result lm3: - echo -e "\e[0Ksection_start:`date +%s`:my_first_section[collapsed=true]\r\e[0KWaiting for job completion" - while [[ $(ssh lm3 "squeue -u vortexbot | grep vortex") ]]; do echo "Waiting for job completion:"; echo "$(ssh lm3 squeue -u vortexbot )"; sleep 2m; done - echo -e "\e[0Ksection_end:`date +%s`:my_first_section\r\e[0K" - - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_a2a_*") ]]; then echo "At least one of the a2a test did not execute till the end."; exit 1; fi - - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_nb_*") ]]; then echo "At least one of the nb test did not execute till the end."; exit 1; fi - - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_isr_*") ]]; then echo "At least one of the isr test did not execute till the end."; exit 1; fi + - | + unfinished_tests=() + + for comm_type in a2a nb isr; do + for center_type in cell node; do + for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do + if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_${comm_type}_${center_type}_${kernel_type}") ]]; then + echo "Test ${comm_type}_${center_type}_${kernel_type} did not execute till the end" + unfinished_tests+=("${comm_type}_${center_type}_${kernel_type}") + fi + done + done + done + + if [[ ${#unfinished_tests[@]} -gt 0 ]]; then + exit 1 + fi - scp lm3:${PATH2RUN}/*.xml . - | + failed_tests=() + for comm_type in a2a nb isr; do for center_type in cell node; do for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do test -f report_test_${comm_type}_${center_type}_${kernel_type}.xml if [[ $(grep " Date: Fri, 8 Dec 2023 14:40:51 +0100 Subject: [PATCH 11/19] :bug: :white_check_mark: add wildcard to account for the SLURM jobid at the end of the stdout file --- .gitlab-ci/flups-lm3.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml index 3df4143..f1a6937 100644 --- a/.gitlab-ci/flups-lm3.yml +++ b/.gitlab-ci/flups-lm3.yml @@ -139,7 +139,7 @@ check result lm3: for comm_type in a2a nb isr; do for center_type in cell node; do for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_${comm_type}_${center_type}_${kernel_type}") ]]; then + if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_${comm_type}_${center_type}_${kernel_type}_*") ]]; then echo "Test ${comm_type}_${center_type}_${kernel_type} did not execute till the end" unfinished_tests+=("${comm_type}_${center_type}_${kernel_type}") fi From 6454496796621bc47ec95540efc4c24685aaa924 Mon Sep 17 00:00:00 2001 From: Gilles Poncelet Date: Mon, 11 Dec 2023 10:07:50 +0100 Subject: [PATCH 12/19] :bug: :white_check_mark: increase job time by 30min & try to fix the artifact upload --- .gitlab-ci/flups-lm3.yml | 14 ++------------ test/run/test_convergence.sh | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml index f1a6937..3c18232 100644 --- a/.gitlab-ci/flups-lm3.yml +++ b/.gitlab-ci/flups-lm3.yml @@ -111,20 +111,10 @@ check result lm3: start_in: 1200 minutes artifacts: paths: - - report_test_a2a_cell_*.xml - - report_test_nb_cell_*.xml - - report_test_isr_cell_*.xml - - report_test_a2a_node_*.xml - - report_test_nb_node_*.xml - - report_test_isr_node_*.xml + - ./*.xml reports: junit: - - report_test_a2a_cell_*.xml - - report_test_nb_cell_*.xml - - report_test_isr_cell_*.xml - - report_test_a2a_node_*.xml - - report_test_nb_node_*.xml - - report_test_isr_node_*.xml + - ./*.xml when: always needs: [run lm3] dependencies: diff --git a/test/run/test_convergence.sh b/test/run/test_convergence.sh index 3285717..354b725 100755 --- a/test/run/test_convergence.sh +++ b/test/run/test_convergence.sh @@ -1,7 +1,7 @@ #!/bin/bash # Submission script for Lemaitre3 #SBATCH --job-name=flups_auto_test -#SBATCH --time=02:00:00 # hh:mm:ss +#SBATCH --time=02:30:00 # hh:mm:ss # #SBATCH --ntasks=64 #SBATCH --mem-per-cpu=4000 # megabytes From ff01de69df17bf32217b4fac6982dec07ec0a7ed Mon Sep 17 00:00:00 2001 From: Gilles Poncelet Date: Mon, 11 Dec 2023 16:49:27 +0100 Subject: [PATCH 13/19] :bug: :white_check_mark: error in the kernel list... mehr4l -> mehr6f --- .gitlab-ci/flups-lm3.yml | 4 ++-- test/run/test_workflow.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml index 3c18232..d3a0388 100644 --- a/.gitlab-ci/flups-lm3.yml +++ b/.gitlab-ci/flups-lm3.yml @@ -128,7 +128,7 @@ check result lm3: for comm_type in a2a nb isr; do for center_type in cell node; do - for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do + for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr6f; do if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_${comm_type}_${center_type}_${kernel_type}_*") ]]; then echo "Test ${comm_type}_${center_type}_${kernel_type} did not execute till the end" unfinished_tests+=("${comm_type}_${center_type}_${kernel_type}") @@ -146,7 +146,7 @@ check result lm3: for comm_type in a2a nb isr; do for center_type in cell node; do - for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do + for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr6f; do test -f report_test_${comm_type}_${center_type}_${kernel_type}.xml if [[ $(grep " Date: Mon, 11 Mar 2024 15:08:19 +0100 Subject: [PATCH 14/19] :tada: Introducing a function to avoid freeing fftw --- src/Solver.cpp | 4 ---- src/Solver.hpp | 3 +++ src/flups.cpp | 15 +++++++++++++++ src/flups.h | 9 +++++++++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/Solver.cpp b/src/Solver.cpp index 25a7578..a680d7a 100644 --- a/src/Solver.cpp +++ b/src/Solver.cpp @@ -523,11 +523,7 @@ Solver::~Solver() { // fftw_export_wisdom_to_filename(FLUPS_WISDOM_PATH); //#endif -#if FLUPS_OPENMP - fftw_cleanup_threads(); -#endif - fftw_cleanup(); // m_profStopi(prof_, "Clean up"); //------------------------------------------------------------------------- END_FUNC; diff --git a/src/Solver.hpp b/src/Solver.hpp index 1d9f9f4..c2955cc 100644 --- a/src/Solver.hpp +++ b/src/Solver.hpp @@ -52,6 +52,9 @@ #include "metis.h" #endif + + + /** * @brief The Poisson solver * diff --git a/src/flups.cpp b/src/flups.cpp index 4484fa9..0d63ae3 100644 --- a/src/flups.cpp +++ b/src/flups.cpp @@ -133,9 +133,24 @@ Solver* flups_init_timed(Topology* t, BoundaryType* bc[3][2], const double h[3], // destroy the solver void flups_cleanup(Solver* s) { + flups_cleanup_ext(s, false); +} + +void flups_cleanup_ext(Solver* s, const bool cleanup_fftw) { delete s; + if (cleanup_fftw){ + flups_cleanup_fftw(); + } } +void flups_cleanup_fftw(){ +#if FLUPS_OPENMP + fftw_cleanup_threads(); +#endif + + fftw_cleanup(); +}; + // setup the solver void flups_set_greenType(Solver* s, const GreenType type) { s->set_GreenType(type); diff --git a/src/flups.h b/src/flups.h index df4c04a..61ffa0d 100644 --- a/src/flups.h +++ b/src/flups.h @@ -401,6 +401,15 @@ FLUPS_Solver* flups_init_timed(FLUPS_Topology* t, FLUPS_BoundaryType* bc[3][2], */ void flups_cleanup(FLUPS_Solver* s); +/** + * @brief must be called before execution terminates as it frees the memory used by the solver + * + * @param s + */ +void flups_cleanup_ext(FLUPS_Solver* s, const bool cleanup_fftw); + +void flups_cleanup_fftw(); + /** * @brief sets the type of the Green's function used by the solver * From c36f0cc92bebccfaa3078594956d55a08c6ed2fe Mon Sep 17 00:00:00 2001 From: pbalty Date: Tue, 30 Apr 2024 12:21:53 +0200 Subject: [PATCH 15/19] :construction: update the samples directory --- samples/compareACCFFT/main.cpp | 57 ++- samples/compareP3DFFT++/main_compare++.cpp | 447 +++++++++--------- samples/compareP3DFFT/main_compare.cpp | 300 ++++++------ samples/simpleTest/main_simple_test.cpp | 134 +++--- .../solve_advanced_C/main_solve_advanced.c | 173 +++---- samples/solve_vtube/src/vtube.cpp | 190 ++++---- samples/validation/src/validation_3d.cpp | 134 +++--- src/flups.cpp | 2 + src/flups.h | 2 + src/flups_interface.h | 2 +- 10 files changed, 713 insertions(+), 728 deletions(-) diff --git a/samples/compareACCFFT/main.cpp b/samples/compareACCFFT/main.cpp index ed18280..82e71b8 100644 --- a/samples/compareACCFFT/main.cpp +++ b/samples/compareACCFFT/main.cpp @@ -2,10 +2,9 @@ #include #include "accfft.h" -#include "h3lpr/profiler.hpp" -#include "h3lpr/parser.hpp" #include "flups.h" - +#include "h3lpr/parser.hpp" +#include "h3lpr/profiler.hpp" int main(int argc, char *argv[]) { //------------------------------------------------------------------------- @@ -26,29 +25,28 @@ int main(int argc, char *argv[]) { // Get info from the command line //-------------------------------------------------------------------------- H3LPR::Parser parser(argc, (const char **)argv); - const auto arg_nglob = parser.GetValues("--nglob", "the global resolution, will be used for both ACCFFT and FLUPS", {64,64,64}); + const auto arg_nglob = parser.GetValues("--nglob", "the global resolution, will be used for both ACCFFT and FLUPS", {64, 64, 64}); const auto arg_nproc = parser.GetValues("--nproc", "the proc distribution, for FLUPS only", {1, 1, 1}); const auto arg_dom = parser.GetValues("--dom", "the size of the domain, must be compatible with nglob", {1.0, 1.0, 1.0}); const int n_iter = parser.GetValue("--niter", "the number of iterations to perform", 20); const int n_warm = parser.GetValue("--warm", "the number of iterations to perform when warming up", 1); - const bool profile = parser.GetFlag("--profile","forward the profiler to flups"); + const bool profile = parser.GetFlag("--profile", "forward the profiler to flups"); parser.Finalize(); //-------------------------------------------------------------------------- // Definition of the problem //-------------------------------------------------------------------------- - const int nglob[3] = {arg_nglob[0], arg_nglob[1], arg_nglob[2]}; - const int nproc[3] = {arg_nproc[0], arg_nproc[1], arg_nproc[2]}; - const double L[3] = {arg_dom[0], arg_dom[1], arg_dom[2]}; - + const int nglob[3] = {arg_nglob[0], arg_nglob[1], arg_nglob[2]}; + const int nproc[3] = {arg_nproc[0], arg_nproc[1], arg_nproc[2]}; + const double L[3] = {arg_dom[0], arg_dom[1], arg_dom[2]}; // get the grid spacing const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]}; // get the PER PER PER BC everywhere const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER}; - //const FLUPS_CenterType center_type[3] = {NODE_CENTER, NODE_CENTER, NODE_CENTER}; - FLUPS_BoundaryType *mybc[3][2]; + // const FLUPS_CenterType center_type[3] = {NODE_CENTER, NODE_CENTER, NODE_CENTER}; + FLUPS_BoundaryType *mybc[3][2]; for (int id = 0; id < 3; id++) { for (int is = 0; is < 2; is++) { mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1); @@ -56,7 +54,7 @@ int main(int argc, char *argv[]) { } } - //.......................................................................... + //.......................................................................... // Display flups_info(argc, argv); if (rank == 0) { @@ -70,11 +68,9 @@ int main(int argc, char *argv[]) { printf("--------------------------------------------------------------\n"); } - - //-------------------------------------------------------------------------- - std::string prof_name = "beatme_nglob" + std::to_string(nglob[0]) +"_"+ std::to_string(nglob[1]) + "_" + std::to_string(nglob[2]) + "_nrank" + std::to_string(comm_size); - H3LPR::Profiler prof(prof_name); + std::string prof_name = "beatme_nglob" + std::to_string(nglob[0]) + "_" + std::to_string(nglob[1]) + "_" + std::to_string(nglob[2]) + "_nrank" + std::to_string(comm_size); + H3LPR::Profiler prof(prof_name); //-------------------------------------------------------------------------- /** - Initialize FLUPS */ @@ -82,17 +78,17 @@ int main(int argc, char *argv[]) { if (rank == 0) printf("Initialization of FLUPS\n"); // create a real topology - FLUPS_Profiler* flups_prof = (profile)? (FLUPS_Profiler*) &prof : nullptr; - FLUPS_Topology *topoTmp = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); - FLUPS_Solver *mysolver = flups_init_timed(topoTmp, mybc, h, L, NOD, center_type, flups_prof); + FLUPS_Profiler *flups_prof = (profile) ? (FLUPS_Profiler *)&prof : nullptr; + FLUPS_Topology *topoTmp = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); + FLUPS_Solver *mysolver = flups_init_timed(topoTmp, mybc, h, L, NOD, center_type, flups_prof); // set the CHAT2 green type (even if it's not used) flups_set_greenType(mysolver, CHAT_2); flups_setup(mysolver, true); - double *solFLU = flups_get_innerBuffer(mysolver); + double *solFLU = flups_get_innerBuffer(mysolver); // to fill the data we use the inner topo - const Topology *topoIn =flups_get_innerTopo_physical(mysolver); + const Topology *topoIn = flups_get_innerTopo_physical(mysolver); // instruct the solver to skip the first ST flups_skip_firstSwitchtopo(mysolver); @@ -110,18 +106,18 @@ int main(int argc, char *argv[]) { //.......................................................................... // set some straightforward data - int start_id[3]; + int start_id[3]; flups_topo_get_istartGlob(topoIn, start_id); int topo_nmem[3] = {flups_topo_get_nmem(topoIn, 0), flups_topo_get_nmem(topoIn, 1), flups_topo_get_nmem(topoIn, 2)}; // set a simple expression double val = 0.0; - for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn, 2); ++i2){ - for(int i1 = 0; i1 < flups_topo_get_nloc(topoIn, 1); ++ i1){ - for(int i0 = 0; i0 < flups_topo_get_nloc(topoIn, 0); ++i0){ - //double x = 2.0 * M_PI / nglob[0] * (i0 + topoIn->cmpt_start_id(0)); - //double y = 2.0 * M_PI / nglob[1] * (i1 + topoIn->cmpt_start_id(1)); - //double z = 2.0 * M_PI / nglob[2] * (i2 + topoIn->cmpt_start_id(2)); + for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn, 2); ++i2) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topoIn, 1); ++i1) { + for (int i0 = 0; i0 < flups_topo_get_nloc(topoIn, 0); ++i0) { + // double x = 2.0 * M_PI / nglob[0] * (i0 + topoIn->cmpt_start_id(0)); + // double y = 2.0 * M_PI / nglob[1] * (i1 + topoIn->cmpt_start_id(1)); + // double z = 2.0 * M_PI / nglob[2] * (i2 + topoIn->cmpt_start_id(2)); double x = 2.0 * M_PI / nglob[0] * (i0 + start_id[0]); double y = 2.0 * M_PI / nglob[1] * (i1 + start_id[1]); double z = 2.0 * M_PI / nglob[2] * (i2 + start_id[2]); @@ -142,9 +138,9 @@ int main(int argc, char *argv[]) { accfft_create_comm(MPI_COMM_WORLD, c_dims, &c_comm); // let ACCFFT decide on the topology choice, pencil in Z, as always - int isize[3], osize[3], istart[3], ostart[3]; + int isize[3], osize[3], istart[3], ostart[3]; - int n_acc[3] = {nglob[2],nglob[1],nglob[0]}; + int n_acc[3] = {nglob[2], nglob[1], nglob[0]}; size_t alloc_max = accfft_local_size_dft_r2c(n_acc, isize, istart, osize, ostart, c_comm); double *data_acc = (double *)accfft_alloc(alloc_max); @@ -273,6 +269,7 @@ int main(int argc, char *argv[]) { } } + flups_cleanup_fftw(); MPI_Finalize(); } diff --git a/samples/compareP3DFFT++/main_compare++.cpp b/samples/compareP3DFFT++/main_compare++.cpp index 0c51f69..1d3eff4 100644 --- a/samples/compareP3DFFT++/main_compare++.cpp +++ b/samples/compareP3DFFT++/main_compare++.cpp @@ -5,32 +5,32 @@ #include "p3dfft.h" void print_res(p3dfft::complex_double *A, int *sdims, int *gstart); -void print_res(double *A, const Topology* topo); +void print_res(double *A, const Topology *topo); /** * @brief Compare P3DFFT++ and FLUPS on the same FFT 3D - * + * * This code will time both libraries on a give FFT 3D: the full periodic/DFT. * Notice that P3D requires the data to be already in pencils, which * is not the case of FLUPS. To do a fair comparison, the time required - * by FLUPS to switch from the initial topology to the inner pencil topology + * by FLUPS to switch from the initial topology to the inner pencil topology * (Switch0) must be substracted from the total time of the solve. - * + * * Alternatively, we could have chosen to do an ODD-ODD/DST2 in the x direction. - * As a result, FLUPS would automatically skip Switch0. However, we could not + * As a result, FLUPS would automatically skip Switch0. However, we could not * manage to do that case with P3DFFT... - * - * @param argc - * @param argv - * @return int + * + * @param argc + * @param argv + * @return int */ int main(int argc, char *argv[]) { //------------------------------------------------------------------------- // Default values //------------------------------------------------------------------------- - int n_iter = 10; - int res[3] = {16,16,16}; - int nproc2D[2] = {1,2}; + int n_iter = 10; + int res[3] = {16, 16, 16}; + int nproc2D[2] = {1, 2}; //------------------------------------------------------------------------- // Parse arguments @@ -43,114 +43,114 @@ int main(int argc, char *argv[]) { printf(" --niter, -ni Ni : Ni is the number of times we call the same 3D FFT (for statistics on the profiler) \n"); return 0; } else if ((arg == "-np") || (arg == "--nprocs")) { - for (int j = 0; j<2;j++){ - if (i + j + 1 < argc) { // Make sure we aren't at the end of argv! - nproc2D[j] = atoi(argv[i+j+1]); - if(nproc2D[j]<1){ + for (int j = 0; j < 2; j++) { + if (i + j + 1 < argc) { // Make sure we aren't at the end of argv! + nproc2D[j] = atoi(argv[i + j + 1]); + if (nproc2D[j] < 1) { fprintf(stderr, "nprocs must be >0\n"); return 1; } - } else { //Missing argument + } else { // Missing argument fprintf(stderr, "missing argument in --nprocs\n"); return 1; - } + } } - i+=2; - } else if ((arg == "-res")|| (arg== "--resolution") ) { - for (int j = 0; j<3;j++){ - if (i + j + 1 < argc) { // Make sure we aren't at the end of argv! - res[j] = atoi(argv[i+j+1]); - if(res[j]<=0.0){ + i += 2; + } else if ((arg == "-res") || (arg == "--resolution")) { + for (int j = 0; j < 3; j++) { + if (i + j + 1 < argc) { // Make sure we aren't at the end of argv! + res[j] = atoi(argv[i + j + 1]); + if (res[j] <= 0.0) { fprintf(stderr, "res must be >0\n"); return 1; } - } else { //Missing argument + } else { // Missing argument fprintf(stderr, "missing argument in -res\n"); return 1; - } + } } - i+=3; - } else if ((arg == "-ni")|| (arg== "--niter") ) { - if (i + 1 < argc) { // Make sure we aren't at the end of argv! - n_iter = atoi(argv[i+1]); - if(n_iter<1){ + i += 3; + } else if ((arg == "-ni") || (arg == "--niter")) { + if (i + 1 < argc) { // Make sure we aren't at the end of argv! + n_iter = atoi(argv[i + 1]); + if (n_iter < 1) { fprintf(stderr, "niter must be >0\n"); return 1; } - } else { //Missing argument + } else { // Missing argument fprintf(stderr, "missing --niter\n"); return 1; - } + } i++; - // } else if ((arg == "-bc")|| (arg== "--boundary-conditions") ) { - // for (int j = 0; j<6;j++){ - // if (i + j + 1 < argc) { // Make sure we aren't at the end of argv! - // bcdef[j/2][j%2] = (BoundaryType) atoi(argv[i+j+1]); - // } else { //Missing argument - // fprintf(stderr, "missing argument in --boundary-conditions\n"); - // return 1; - // } - // } - // i+=6; + // } else if ((arg == "-bc")|| (arg== "--boundary-conditions") ) { + // for (int j = 0; j<6;j++){ + // if (i + j + 1 < argc) { // Make sure we aren't at the end of argv! + // bcdef[j/2][j%2] = (BoundaryType) atoi(argv[i+j+1]); + // } else { //Missing argument + // fprintf(stderr, "missing argument in --boundary-conditions\n"); + // return 1; + // } + // } + // i+=6; } } - + //------------------------------------------------------------------------- // Initialize MPI //------------------------------------------------------------------------- - int rank, comm_size; + int rank, comm_size; MPI_Comm comm = MPI_COMM_WORLD; int provided; // set MPI_THREAD_FUNNELED or MPI_THREAD_SERIALIZED int requested = MPI_THREAD_FUNNELED; MPI_Init_thread(&argc, &argv, requested, &provided); - if(provided < requested){ + if (provided < requested) { FLUPS_ERROR("The MPI-provided thread behavior does not match"); } - + MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &comm_size); //------------------------------------------------------------------------- - //Definition of the problem + // Definition of the problem //------------------------------------------------------------------------- - const int nglob[3] = {res[0], res[1], res[2]}; - const int nproc[3] = {1, nproc2D[0], nproc2D[1]}; //nproc[0] has to be 1 //CAUTION FOR THIS: nproc[1]<=nproc[2] !!! - const double L[3] = {1., 1., 1.};; + const int nglob[3] = {res[0], res[1], res[2]}; + const int nproc[3] = {1, nproc2D[0], nproc2D[1]}; // nproc[0] has to be 1 //CAUTION FOR THIS: nproc[1]<=nproc[2] !!! + const double L[3] = {1., 1., 1.}; + ; const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]}; - FLUPS_BoundaryType* mybc[3][2]; - for(int id=0; id<3; id++){ - for(int is=0; is<2; is++){ - mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*1); + FLUPS_BoundaryType *mybc[3][2]; + for (int id = 0; id < 3; id++) { + for (int is = 0; is < 2; is++) { + mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1); mybc[id][is][0] = PER; } } - if(comm_size!=nproc[0]*nproc[1]*nproc[2]) + if (comm_size != nproc[0] * nproc[1] * nproc[2]) FLUPS_ERROR("Invalid number of procs"); - //------------------------------------------------------------------------- /** - Initialize FLUPS */ //------------------------------------------------------------------------- // create a real topology - int FLUnmemIn[3],FLUnmemOUT[3]; - Topology *topoIn = new Topology(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); - const int nprocOut[3] = {1, 2, 1}; - const int nglobOut[3] = {17, 32, 64}; + int FLUnmemIn[3], FLUnmemOUT[3]; + Topology *topoIn = new Topology(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); + const int nprocOut[3] = {1, 2, 1}; + const int nglobOut[3] = {17, 32, 64}; const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER}; - + // prepare profiling #ifdef SKIP_P3D - std::string FLUPSprof = "only_FLUPS_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads()); + std::string FLUPSprof = "only_FLUPS_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads()); #else - std::string FLUPSprof = "compare_FLUPS_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads()); + std::string FLUPSprof = "compare_FLUPS_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads()); #endif - Profiler* FLUprof = new Profiler(FLUPSprof); + Profiler *FLUprof = new Profiler(FLUPSprof); // solver creation and init Solver *mysolver = new Solver(topoIn, mybc, h, L, NOD, center_type, FLUprof); @@ -162,13 +162,13 @@ int main(int argc, char *argv[]) { MPI_Comm_rank(comm, &rank); // retrieveing internal info from the solver - const Topology *topoSpec = mysolver->get_innerTopo_spectral(); + const Topology *topoSpec = mysolver->get_innerTopo_spectral(); for (int i = 0; i < 3; i++) { FLUnmemIn[i] = topoIn->nmem(i); FLUnmemOUT[i] = topoSpec->nmem(i); } - + int istartGloOut[3], istartGlo[3]; topoIn->get_istart_glob(istartGlo); topoSpec->get_istart_glob(istartGloOut); @@ -181,143 +181,146 @@ int main(int argc, char *argv[]) { //------------------------------------------------------------------------- #ifndef SKIP_P3D int conf; - int dims[2] = {nproc[1],nproc[2]}; - int proc_order[3] = {0, 1, 2}; - + int dims[2] = {nproc[1], nproc[2]}; + int proc_order[3] = {0, 1, 2}; + if (rank == 0) printf("Using processor grid %d x %d with pencils in x\n", dims[0], dims[1]); - std::string P3DFFTprof = "compare_P3DFFT_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads()); - Profiler* P3Dprof = new Profiler(P3DFFTprof); + std::string P3DFFTprof = "compare_P3DFFT_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads()); + Profiler *P3Dprof = new Profiler(P3DFFTprof); P3Dprof->create("init", "root"); P3Dprof->create("FFTandSwitch", "root"); /* Initialize P3DFFT */ P3Dprof->start("init"); - if(rank==0) - printf("Initilizing P3D...\n"); fflush(stdout); + if (rank == 0) + printf("Initilizing P3D...\n"); + fflush(stdout); p3dfft::setup(); - if(rank==0) - printf("...set up.\n"); fflush(stdout); + if (rank == 0) + printf("...set up.\n"); + fflush(stdout); // Define the transform types for these two transforms - int type_ids1[3] = {p3dfft::R2CFFT_D,p3dfft::CFFT_FORWARD_D ,p3dfft::CFFT_FORWARD_D }; - int type_ids2[3] = {p3dfft::C2RFFT_D,p3dfft::CFFT_BACKWARD_D,p3dfft::CFFT_BACKWARD_D}; - //Unfortunately, P3D segfaults when you do a DST2_REAL_D followed by a R2CFFT_D !! - p3dfft::trans_type3D type_rcc(type_ids1); + int type_ids1[3] = {p3dfft::R2CFFT_D, p3dfft::CFFT_FORWARD_D, p3dfft::CFFT_FORWARD_D}; + int type_ids2[3] = {p3dfft::C2RFFT_D, p3dfft::CFFT_BACKWARD_D, p3dfft::CFFT_BACKWARD_D}; + // Unfortunately, P3D segfaults when you do a DST2_REAL_D followed by a R2CFFT_D !! + p3dfft::trans_type3D type_rcc(type_ids1); p3dfft::trans_type3D type_ccr(type_ids2); // input grid - int gdimsIN[3] = {topoIn->nglob(0), topoIn->nglob(1), topoIn->nglob(2)}; - int mem_orderIN[3] = {0, 1, 2}; - int nprocIN[3] = {nproc[0], nproc[1], nproc[2]}; - p3dfft::grid gridIN(gdimsIN,-1,nprocIN,proc_order,mem_orderIN,comm); + int gdimsIN[3] = {topoIn->nglob(0), topoIn->nglob(1), topoIn->nglob(2)}; + int mem_orderIN[3] = {0, 1, 2}; + int nprocIN[3] = {nproc[0], nproc[1], nproc[2]}; + p3dfft::grid gridIN(gdimsIN, -1, nprocIN, proc_order, mem_orderIN, comm); - if(rank==0) - printf("...input grid.\n"); fflush(stdout); + if (rank == 0) + printf("...input grid.\n"); + fflush(stdout); // ouput grid int gdimsOUT[3]; - for(int i=0; i < 3;i++) + for (int i = 0; i < 3; i++) gdimsOUT[i] = gdimsIN[i]; - gdimsOUT[0] = gdimsOUT[0]/2+1; - int mem_orderOUT[3] = {2, 1, 0}; //blindly mimicking samples, anything else produces a segfault anyway... - int nprocOUT[3] = {dims[0],dims[1],1}; - p3dfft::grid gridOUT(gdimsOUT,0,nprocOUT,proc_order,mem_orderOUT,comm); + gdimsOUT[0] = gdimsOUT[0] / 2 + 1; + int mem_orderOUT[3] = {2, 1, 0}; // blindly mimicking samples, anything else produces a segfault anyway... + int nprocOUT[3] = {dims[0], dims[1], 1}; + p3dfft::grid gridOUT(gdimsOUT, 0, nprocOUT, proc_order, mem_orderOUT, comm); - if(rank==0) - printf("...output grid.\n"); fflush(stdout); + if (rank == 0) + printf("...output grid.\n"); + fflush(stdout); // Set up 3D transforms, including stages and plans, for forward trans. - p3dfft::transform3D trans_f(gridIN,gridOUT,&type_rcc); + p3dfft::transform3D trans_f(gridIN, gridOUT, &type_rcc); // Set up 3D transforms, including stages and plans, for backward trans. - p3dfft::transform3D trans_b(gridOUT,gridIN,&type_ccr); - if(rank==0) - printf("...transforms.\n"); fflush(stdout); + p3dfft::transform3D trans_b(gridOUT, gridIN, &type_ccr); + if (rank == 0) + printf("...transforms.\n"); + fflush(stdout); P3Dprof->stop("init"); - if(rank==0) - printf("Done with P3D init.\n"); fflush(stdout); + if (rank == 0) + printf("Done with P3D init.\n"); + fflush(stdout); p3dfft::timers.init(); - // Find local dimensions in storage order, and also the starting position of the local array in the global array - - //input grid - SIZE IN DOUBLEs - int P3DnlocIN[3],glob_startIN[3]; - for(int i=0;i<3;i++) { - P3DnlocIN[mem_orderIN[i]] = gridIN.ldims[i]; + + // input grid - SIZE IN DOUBLEs + int P3DnlocIN[3], glob_startIN[3]; + for (int i = 0; i < 3; i++) { + P3DnlocIN[mem_orderIN[i]] = gridIN.ldims[i]; glob_startIN[mem_orderIN[i]] = gridIN.glob_start[i]; } - int P3DmemsizeIN = P3DnlocIN[0]*P3DnlocIN[1]*P3DnlocIN[2]; + int P3DmemsizeIN = P3DnlocIN[0] * P3DnlocIN[1] * P3DnlocIN[2]; - //output grid - SIZE IN COMPLEXes - int P3DnlocOUT[3],glob_startOUT[3]; - for(int i=0;i<3;i++) { - P3DnlocOUT[mem_orderOUT[i]] = gridOUT.ldims[i]; + // output grid - SIZE IN COMPLEXes + int P3DnlocOUT[3], glob_startOUT[3]; + for (int i = 0; i < 3; i++) { + P3DnlocOUT[mem_orderOUT[i]] = gridOUT.ldims[i]; glob_startOUT[mem_orderOUT[i]] = gridOUT.glob_start[i]; } - int P3DmemsizeOUT = P3DnlocOUT[0]*P3DnlocOUT[1]*P3DnlocOUT[2]; + int P3DmemsizeOUT = P3DnlocOUT[0] * P3DnlocOUT[1] * P3DnlocOUT[2]; #endif - // //------------------------------------------------------------------------- // /** - allocate rhs and solution */ // //------------------------------------------------------------------------- - if(rank == 0) { - printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2)); - printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2)); - printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2)); - printf("[FLUPS] topo OUT loc : nmem: %d*%d*%d nf:%d (nloc: %d %d %d) \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2)); + if (rank == 0) { + printf("[FLUPS] topo IN glob : %d %d %d \n", topoIn->nglob(0), topoIn->nglob(1), topoIn->nglob(2)); + printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n", topoIn->nmem(0), topoIn->nmem(1), topoIn->nmem(2), topoIn->memsize(), topoIn->nloc(0), topoIn->nloc(1), topoIn->nloc(2)); + printf("[FLUPS] topo OUT glob : %d %d %d \n", topoSpec->nglob(0), topoSpec->nglob(1), topoSpec->nglob(2)); + printf("[FLUPS] topo OUT loc : nmem: %d*%d*%d nf:%d (nloc: %d %d %d) \n", topoSpec->nmem(0), topoSpec->nmem(1), topoSpec->nmem(2), topoSpec->nf(), topoSpec->nloc(0), topoSpec->nloc(1), topoSpec->nloc(2)); #ifndef SKIP_P3D - printf("[P3DFFT++] topo IN glob : %d %d %d \n",gdimsIN[0],gdimsIN[1],gdimsIN[2]); - printf("[P3DFFT++] topo IN loc : %d %d %d (is: %d %d %d) \n",P3DnlocIN[0],P3DnlocIN[1],P3DnlocIN[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]); - printf("[P3DFFT++] topo OUT glob : %d %d %d \n",gdimsOUT[0],gdimsOUT[1],gdimsOUT[2]); - printf("[P3DFFT++] topo OUT loc : %d %d %d (is: %d %d %d) \n",P3DnlocOUT[0],P3DnlocOUT[1],P3DnlocOUT[2],glob_startOUT[0],glob_startOUT[1],glob_startOUT[2]); + printf("[P3DFFT++] topo IN glob : %d %d %d \n", gdimsIN[0], gdimsIN[1], gdimsIN[2]); + printf("[P3DFFT++] topo IN loc : %d %d %d (is: %d %d %d) \n", P3DnlocIN[0], P3DnlocIN[1], P3DnlocIN[2], glob_startIN[0], glob_startIN[1], glob_startIN[2]); + printf("[P3DFFT++] topo OUT glob : %d %d %d \n", gdimsOUT[0], gdimsOUT[1], gdimsOUT[2]); + printf("[P3DFFT++] topo OUT loc : %d %d %d (is: %d %d %d) \n", P3DnlocOUT[0], P3DnlocOUT[1], P3DnlocOUT[2], glob_startOUT[0], glob_startOUT[1], glob_startOUT[2]); #endif - printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n",FLUmemsizeIN,FLUmemsizeOUT); -#ifndef SKIP_P3D - printf(" P3D: %d (out %d C) \n",P3DmemsizeIN,P3DmemsizeOUT); + printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n", FLUmemsizeIN, FLUmemsizeOUT); +#ifndef SKIP_P3D + printf(" P3D: %d (out %d C) \n", P3DmemsizeIN, P3DmemsizeOUT); #endif } - - - double *rhsFLU = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN); - //solFLU allocated by sflups setup with the larger size - std::memset(rhsFLU, 0, sizeof(double ) * FLUmemsizeIN); - std::memset(solFLU, 0, sizeof(double ) * FLUmemsizeOUT); - -#ifndef SKIP_P3D - double *rhsP3D = (double *)fftw_malloc(sizeof(double) * P3DmemsizeIN); - p3dfft::complex_double *solP3D = (p3dfft::complex_double *)fftw_malloc(sizeof(p3dfft::complex_double) * P3DmemsizeOUT); - std::memset(rhsP3D, 0, sizeof(double ) * P3DmemsizeIN); + + double *rhsFLU = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN); + // solFLU allocated by sflups setup with the larger size + std::memset(rhsFLU, 0, sizeof(double) * FLUmemsizeIN); + std::memset(solFLU, 0, sizeof(double) * FLUmemsizeOUT); + +#ifndef SKIP_P3D + double *rhsP3D = (double *)fftw_malloc(sizeof(double) * P3DmemsizeIN); + p3dfft::complex_double *solP3D = (p3dfft::complex_double *)fftw_malloc(sizeof(p3dfft::complex_double) * P3DmemsizeOUT); + std::memset(rhsP3D, 0, sizeof(double) * P3DmemsizeIN); std::memset(solP3D, 0, sizeof(p3dfft::complex_double) * P3DmemsizeOUT); #endif - + // printf("istart: %d %d %d =? %d %d %d(P3D)\n",istartGlo[0],istartGlo[1],istartGlo[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]); - double f = 1; //frequency of the wave + double f = 1; // frequency of the wave for (int i2 = 0; i2 < topoIn->nloc(2); i2++) { for (int i1 = 0; i1 < topoIn->nloc(1); i1++) { for (int i0 = 0; i0 < topoIn->nloc(0); i0++) { - double x = (istartGlo[0] + i0 ) * h[0]; //+ 0.5 - double y = (istartGlo[1] + i1 ) * h[1]; //+ 0.5 - double z = (istartGlo[2] + i2 ) * h[2]; //+ 0.5 + double x = (istartGlo[0] + i0) * h[0]; //+ 0.5 + double y = (istartGlo[1] + i1) * h[1]; //+ 0.5 + double z = (istartGlo[2] + i2) * h[2]; //+ 0.5 - // double xF = (istartGlo[0] + i0 + .5 ) * h[0]; + // double xF = (istartGlo[0] + i0 + .5 ) * h[0]; size_t id; - id = localIndex(0, i0, i1, i2, 0, FLUnmemIn, 1, 0); + id = localIndex(0, i0, i1, i2, 0, FLUnmemIn, 1, 0); rhsFLU[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z); -#ifndef SKIP_P3D - //p3d does not care of the size you allocate, juste fill the first isize elements - id = localIndex(0, i0, i1, i2, 0, P3DnlocIN, 1, 0); +#ifndef SKIP_P3D + // p3d does not care of the size you allocate, juste fill the first isize elements + id = localIndex(0, i0, i1, i2, 0, P3DnlocIN, 1, 0); rhsP3D[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z); #endif } @@ -327,114 +330,112 @@ int main(int argc, char *argv[]) { // //------------------------------------------------------------------------- // /** - Proceed to the solve */ // //------------------------------------------------------------------------- - - - double factor = 1.0/(nglob[0]* nglob[1]*nglob[2]); - //Warmup - // trans_f.exec(rhsP3D,solP3D,false); - - for (int iter=0;iterstart("FFTandSwitch"); - trans_f.exec(rhsP3D,solP3D,false); // Execute forward real-to-complex FFT + trans_f.exec(rhsP3D, solP3D, false); // Execute forward real-to-complex FFT P3Dprof->stop("FFTandSwitch"); // #define PRINT_RES #ifdef PRINT_RES /* normalize */ - for(int id = 0; idstart("solve"); - mysolver->do_FFT(solFLU,FLUPS_FORWARD); + mysolver->do_FFT(solFLU, FLUPS_FORWARD); FLUprof->stop("solve"); #ifdef PRINT_RES /* normalize*/ - for(int id = 0; id=1. This is because - // FLUPS would require to do the backward transform before doing a new forward transform (or a reset - // function, which we choose not to implement). + // Note: if we do several FFT in a raw, the results are wrong with FLUPS for iter>=1. This is because + // FLUPS would require to do the backward transform before doing a new forward transform (or a reset + // function, which we choose not to implement). } // //------------------------------------------------------------------------- // /** - get timings */ // //------------------------------------------------------------------------- - + // --- FLUPS ------- FLUprof->disp("solve"); - delete(FLUprof); + delete (FLUprof); -#ifndef SKIP_P3D +#ifndef SKIP_P3D // --- P3DFFT ------- #ifdef P3DMODIF double times[8][3]; - p3dfft::timers.get(times,comm); - string P3DNames[8] = {"Reorder_trans","Reorder_out","Reorder_in","Trans_exec","Packsend","Packsend_trans","Unpackrecv","Alltoall"}; + p3dfft::timers.get(times, comm); + string P3DNames[8] = {"Reorder_trans", "Reorder_out", "Reorder_in", "Trans_exec", "Packsend", "Packsend_trans", "Unpackrecv", "Alltoall"}; #endif - double timeRef = P3Dprof->get_timeAcc("FFTandSwitch"); - double timeInit = P3Dprof->get_timeAcc("init"); + double timeRef = P3Dprof->get_timeAcc("FFTandSwitch"); + double timeInit = P3Dprof->get_timeAcc("init"); P3Dprof->disp("FFTandSwitch"); -#ifdef P3DMODIF - if(rank == 0) { +#ifdef P3DMODIF + if (rank == 0) { // printf("===================================================================================================================================================\n"); // printf(" TIMER P3DFFT %s\n",P3DFFTprof.c_str()); // printf("%25s| %-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\n","-NAME- ", "-% global-", "-% local-", "-Total time-", "-Self time-", "-time/call-", "-Min time-", "-Max time-","-Mean cnt-","-(MB/s)-"); string filename = "prof/" + P3DFFTprof + "_time.csv"; - FILE* file = fopen(filename.c_str(), "w+"); - - printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "init", 100*timeInit/timeRef, 100*timeInit/(timeRef+timeInit), timeInit, 0., timeInit/n_iter, 0., 0.,(double) n_iter,0.); - fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "init", 100*timeInit/timeRef, 100*timeInit/(timeRef+timeInit), timeInit, 0., timeInit/n_iter, 0., 0.,(double) n_iter,0); - printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "FFTandSwitch", 100., 100*timeRef/(timeRef+timeInit), timeRef, 0., timeRef/n_iter, 0., 0., (double) n_iter,0.); - fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "FFTandSwitch", 100., 100*timeRef/(timeRef+timeInit), timeRef, 0., timeRef/n_iter, 0., 0., (double) n_iter,0.); - for(int i=0; i<8; i++){ + FILE *file = fopen(filename.c_str(), "w+"); + + printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "init", 100 * timeInit / timeRef, 100 * timeInit / (timeRef + timeInit), timeInit, 0., timeInit / n_iter, 0., 0., (double)n_iter, 0.); + fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "init", 100 * timeInit / timeRef, 100 * timeInit / (timeRef + timeInit), timeInit, 0., timeInit / n_iter, 0., 0., (double)n_iter, 0); + printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "FFTandSwitch", 100., 100 * timeRef / (timeRef + timeInit), timeRef, 0., timeRef / n_iter, 0., 0., (double)n_iter, 0.); + fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "FFTandSwitch", 100., 100 * timeRef / (timeRef + timeInit), timeRef, 0., timeRef / n_iter, 0., 0., (double)n_iter, 0.); + for (int i = 0; i < 8; i++) { int j = i; - printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", ("-- "+P3DNames[j]).c_str(), 100.*times[j][0]/timeRef, 100*times[j][0]/timeRef, times[j][0], 0., times[j][0]/n_iter, times[j][1]/n_iter, times[j][2]/n_iter, (double) n_iter,0.); - fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", ("-- "+P3DNames[j]).c_str(), 100.*times[j][0]/timeRef, 100*times[j][0]/timeRef, times[j][0], 0., times[j][0]/n_iter, times[j][1]/n_iter, times[j][2]/n_iter, (double) n_iter,0.); + printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", ("-- " + P3DNames[j]).c_str(), 100. * times[j][0] / timeRef, 100 * times[j][0] / timeRef, times[j][0], 0., times[j][0] / n_iter, times[j][1] / n_iter, times[j][2] / n_iter, (double)n_iter, 0.); + fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", ("-- " + P3DNames[j]).c_str(), 100. * times[j][0] / timeRef, 100 * times[j][0] / timeRef, times[j][0], 0., times[j][0] / n_iter, times[j][1] / n_iter, times[j][2] / n_iter, (double)n_iter, 0.); } fclose(file); - } + } #else p3dfft::timers.print(comm); #endif - delete(P3Dprof); + delete (P3Dprof); #endif - if(rank==0) + if (rank == 0) printf("Done ! Now let's clean...\n"); fflush(stdout); -#ifndef SKIP_P3D +#ifndef SKIP_P3D fftw_free(solP3D); fftw_free(rhsP3D); #endif @@ -443,36 +444,36 @@ int main(int argc, char *argv[]) { topoIn->~Topology(); topoSpec->~Topology(); mysolver->~Solver(); -#ifndef SKIP_P3D +#ifndef SKIP_P3D p3dfft::cleanup(); #endif + flups_cleanup_fftw(); MPI_Finalize(); } -void print_res(double *A, const Topology* topo) { - const int ax0 = topo->axis(); - const int ax1 = (ax0 + 1) % 3; - const int ax2 = (ax0 + 2) % 3; - const int nf = 2;//topo->nf() - int nmem[3]; - - for(int i=0;i<3;i++){ +void print_res(double *A, const Topology *topo) { + const int ax0 = topo->axis(); + const int ax1 = (ax0 + 1) % 3; + const int ax2 = (ax0 + 2) % 3; + const int nf = 2; // topo->nf() + int nmem[3]; + + for (int i = 0; i < 3; i++) { nmem[i] = topo->nmem(i); } int gstart[3]; topo->get_istart_glob(gstart); - if (topo->isComplex()){ + if (topo->isComplex()) { for (int i2 = 0; i2 < topo->nloc(ax2); i2++) { for (int i1 = 0; i1 < topo->nloc(ax1); i1++) { - //local indexes start + // local indexes start const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, 2, 0); for (int i0 = 0; i0 < topo->nloc(ax0); i0++) { - - if (std::fabs(A[id+i0*2]) + std::fabs(A[id+i0*2 + 1]) > 1.25e-4) { - printf("FLU(%d %d %d) %lg +i1* %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id+i0*2], A[id+i0*2 + 1]); + if (std::fabs(A[id + i0 * 2]) + std::fabs(A[id + i0 * 2 + 1]) > 1.25e-4) { + printf("FLU(%d %d %d) %lg +i1* %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0 * 2], A[id + i0 * 2 + 1]); } } } @@ -480,12 +481,11 @@ void print_res(double *A, const Topology* topo) { } else { for (int i2 = 0; i2 < topo->nloc(ax2); i2++) { for (int i1 = 0; i1 < topo->nloc(ax1); i1++) { - //local indexes start + // local indexes start const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, 1, 0); for (int i0 = 0; i0 < topo->nloc(ax0); i0++) { - - if (std::fabs(A[id+i0]) > 1.25e-4) { - printf("FLU(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id+i0]); + if (std::fabs(A[id + i0]) > 1.25e-4) { + printf("FLU(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0]); } } } @@ -493,18 +493,17 @@ void print_res(double *A, const Topology* topo) { } } -void print_res(p3dfft::complex_double *A,int *sdims,int *gstart) -{ - int x,y,z; - p3dfft::complex_double *p; - int imo[3],i,j; - p = A; - - for(z=0;z < sdims[2];z++) - for(y=0;y < sdims[1];y++) - for(x=0;x < sdims[0];x++) { - if(std::fabs(p->real())+std::fabs(p->imag()) > 1.25e-4) - printf("P3D(%d %d %d) %lg +i1* %lg\n",x+gstart[0],y+gstart[1],z+gstart[2],p->real(),p->imag()); - p++; - } -} \ No newline at end of file +void print_res(p3dfft::complex_double *A, int *sdims, int *gstart) { + int x, y, z; + p3dfft::complex_double *p; + int imo[3], i, j; + p = A; + + for (z = 0; z < sdims[2]; z++) + for (y = 0; y < sdims[1]; y++) + for (x = 0; x < sdims[0]; x++) { + if (std::fabs(p->real()) + std::fabs(p->imag()) > 1.25e-4) + printf("P3D(%d %d %d) %lg +i1* %lg\n", x + gstart[0], y + gstart[1], z + gstart[2], p->real(), p->imag()); + p++; + } +} diff --git a/samples/compareP3DFFT/main_compare.cpp b/samples/compareP3DFFT/main_compare.cpp index eb8c57d..4735e1f 100644 --- a/samples/compareP3DFFT/main_compare.cpp +++ b/samples/compareP3DFFT/main_compare.cpp @@ -4,65 +4,62 @@ #include "Solver.hpp" #include "p3dfft.h" -void print_all_P3D(double *A,long int nar); -void print_res(double *A, const Topology* topo); +void print_all_P3D(double *A, long int nar); +void print_res(double *A, const Topology *topo); int main(int argc, char *argv[]) { - //------------------------------------------------------------------------- // Initialize MPI //------------------------------------------------------------------------- - int rank, comm_size; + int rank, comm_size; MPI_Comm comm = MPI_COMM_WORLD; int provided; // set MPI_THREAD_FUNNELED or MPI_THREAD_SERIALIZED int requested = MPI_THREAD_FUNNELED; MPI_Init_thread(&argc, &argv, requested, &provided); - if(provided < requested){ + if (provided < requested) { FLUPS_ERROR("The MPI-provided thread behavior does not match"); } - + MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &comm_size); //------------------------------------------------------------------------- - //Definition of the problem + // Definition of the problem //------------------------------------------------------------------------- - const int nglob[3] = {16, 16, 16}; - const int nproc[3] = {1, 1, 2}; //nproc[0] has to be 1 //CAUTION FOR THIS: nproc[1]<=nproc[2] !!! - const double L[3] = {1., 1., 1.}; + const int nglob[3] = {16, 16, 16}; + const int nproc[3] = {1, 1, 2}; // nproc[0] has to be 1 //CAUTION FOR THIS: nproc[1]<=nproc[2] !!! + const double L[3] = {1., 1., 1.}; const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]}; - + const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER}; - FLUPS_BoundaryType* mybc[3][2]; - for(int id=0; id<3; id++){ - for(int is=0; is<2; is++){ - mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*1); + FLUPS_BoundaryType *mybc[3][2]; + for (int id = 0; id < 3; id++) { + for (int is = 0; is < 2; is++) { + mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1); mybc[id][is][0] = PER; } } - unsigned char op_f[]="fft", op_b[]="tff"; + unsigned char op_f[] = "fft", op_b[] = "tff"; const int n_iter = 100; - - //------------------------------------------------------------------------- /** - Initialize FLUPS */ //------------------------------------------------------------------------- // create a real topology - int FLUnmemIn[3],FLUnmemOUT[3]; - FLUPS_Topology *topoIn = new FLUPS_Topology(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); - const int nprocOut[3] = {1, 2, 1}; - const int nglobOut[3] = {17, 32, 64}; - - std::string FLUPSprof = "compare_FLUPS_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads()); - Profiler* FLUprof = new Profiler(FLUPSprof); + int FLUnmemIn[3], FLUnmemOUT[3]; + FLUPS_Topology *topoIn = new FLUPS_Topology(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); + const int nprocOut[3] = {1, 2, 1}; + const int nglobOut[3] = {17, 32, 64}; + + std::string FLUPSprof = "compare_FLUPS_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads()); + Profiler *FLUprof = new Profiler(FLUPSprof); FLUPS_Solver *mysolver = new FLUPS_Solver(topoIn, mybc, h, L, NOD, center_type, FLUprof); @@ -73,13 +70,13 @@ int main(int argc, char *argv[]) { comm = flups_topo_get_comm(topoIn); MPI_Comm_rank(comm, &rank); - const Topology *topoSpec = mysolver->get_innerTopo_spectral(); + const Topology *topoSpec = mysolver->get_innerTopo_spectral(); for (int i = 0; i < 3; i++) { FLUnmemIn[i] = topoIn->nmem(i); FLUnmemOUT[i] = topoSpec->nmem(i); } - + int istartGloOut[3], istartGlo[3]; topoIn->get_istart_glob(istartGlo); topoSpec->get_istart_glob(istartGloOut); @@ -92,17 +89,17 @@ int main(int argc, char *argv[]) { //------------------------------------------------------------------------- int conf; - int dims[2] = {nproc[1],nproc[2]}; + int dims[2] = {nproc[1], nproc[2]}; int P3Dmemsize[3]; - int istart[3],isize[3],iend[3]; - int fstart[3],fsize[3],fend[3]; - int tstart[3],tsize[3],tend[3]; + int istart[3], isize[3], iend[3]; + int fstart[3], fsize[3], fend[3]; + int tstart[3], tsize[3], tend[3]; if (rank == 0) printf("Using processor grid %d x %d with pencils in x\n", dims[0], dims[1]); - std::string P3DFFTprof = "compare_P3DFFT_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads()); - Profiler* P3Dprof = new Profiler(P3DFFTprof); + std::string P3DFFTprof = "compare_P3DFFT_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads()); + Profiler *P3Dprof = new Profiler(P3DFFTprof); P3Dprof->create("init", "root"); P3Dprof->create("FFTandSwitch", "root"); @@ -111,76 +108,72 @@ int main(int argc, char *argv[]) { P3Dprof->start("init"); Cp3dfft_setup(dims, nglob[0], nglob[1], nglob[2], MPI_Comm_c2f(comm), nglob[0], nglob[1], nglob[2], 1, P3Dmemsize); P3Dprof->stop("init"); - + /* Get dimensions for input array - real numbers, X-pencil shape. Note that we are following the Fortran ordering, i.e. the dimension with stride-1 is X. */ - //really? well x seems to be the last dimension... + // really? well x seems to be the last dimension... conf = 1; Cp3dfft_get_dims(istart, iend, isize, conf); - + /* Get dimensions for output array - complex numbers, Z-pencil shape. Stride-1 dimension could be X or Z, depending on how the library was compiled (stride1 option) */ conf = 2; Cp3dfft_get_dims(fstart, fend, fsize, conf); - + /* Get what you should allocate in place, in double, per proc. Large enough for padding */ conf = 3; Cp3dfft_get_dims(tstart, tend, tsize, conf); - - // //------------------------------------------------------------------------- // /** - allocate rhs and solution */ // //------------------------------------------------------------------------- - printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2)); - printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2)); - printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2)); - printf("[FLUPS] topo OUT loc : nmem: %d*%d*%d nf:%d (nloc: %d %d %d) \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2)); - - printf("[P3DFFT] topo 0 loc : %d %d %d - (is: %d %d %d ie: %d %d %d size: %d %d %d) \n",P3Dmemsize[0],P3Dmemsize[1],P3Dmemsize[2],istart[0],istart[1],istart[2], iend[0],iend[1],iend[2], isize[0],isize[1],isize[2]); - printf("[P3DFFT] topo 2 loc : %d %d %d - (is: %d %d %d ie: %d %d %d) \n",fsize[0],fsize[1],fsize[2], fstart[0],fstart[1],fstart[2], fend[0],fend[1],fend[2]); - printf("[P3DFFT] for alloc: %d %d %d =? %d %d %d \n",P3Dmemsize[0],P3Dmemsize[1],P3Dmemsize[2], tsize[0],tsize[1],tsize[2]); + printf("[FLUPS] topo IN glob : %d %d %d \n", topoIn->nglob(0), topoIn->nglob(1), topoIn->nglob(2)); + printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n", topoIn->nmem(0), topoIn->nmem(1), topoIn->nmem(2), topoIn->memsize(), topoIn->nloc(0), topoIn->nloc(1), topoIn->nloc(2)); + printf("[FLUPS] topo OUT glob : %d %d %d \n", topoSpec->nglob(0), topoSpec->nglob(1), topoSpec->nglob(2)); + printf("[FLUPS] topo OUT loc : nmem: %d*%d*%d nf:%d (nloc: %d %d %d) \n", topoSpec->nmem(0), topoSpec->nmem(1), topoSpec->nmem(2), topoSpec->nf(), topoSpec->nloc(0), topoSpec->nloc(1), topoSpec->nloc(2)); + + printf("[P3DFFT] topo 0 loc : %d %d %d - (is: %d %d %d ie: %d %d %d size: %d %d %d) \n", P3Dmemsize[0], P3Dmemsize[1], P3Dmemsize[2], istart[0], istart[1], istart[2], iend[0], iend[1], iend[2], isize[0], isize[1], isize[2]); + printf("[P3DFFT] topo 2 loc : %d %d %d - (is: %d %d %d ie: %d %d %d) \n", fsize[0], fsize[1], fsize[2], fstart[0], fstart[1], fstart[2], fend[0], fend[1], fend[2]); + printf("[P3DFFT] for alloc: %d %d %d =? %d %d %d \n", P3Dmemsize[0], P3Dmemsize[1], P3Dmemsize[2], tsize[0], tsize[1], tsize[2]); // => P3Dmemsize = tsize - // int nm = isize[0]*isize[1]*isize[2]; // printf("isize: %d %d %d - fsize: %d %d %d\n", isize[0],isize[1],isize[2], fsize[0],fsize[1],fsize[2]); // if(nm < fsize[0]*fsize[1]*fsize[2]*2){ //switch to complex? // nm = fsize[0]*fsize[1]*fsize[2]*2; // } - int nm = tsize[0]*tsize[1]*tsize[2]; //this is in double - printf("I am going to allocate FLUPS: %d (out %d R) , P3D: %d (out %d C) \n",FLUmemsizeIN,FLUmemsizeOUT,nm, tsize[0]*tsize[1]*tsize[2]); - - - double *rhsFLU = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN); + int nm = tsize[0] * tsize[1] * tsize[2]; // this is in double + printf("I am going to allocate FLUPS: %d (out %d R) , P3D: %d (out %d C) \n", FLUmemsizeIN, FLUmemsizeOUT, nm, tsize[0] * tsize[1] * tsize[2]); + + double *rhsFLU = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN); // solFLU allocated by setup - double *rhsP3D = (double *)fftw_malloc(sizeof(double) * nm); - double *solP3D = (double *)fftw_malloc(sizeof(double) * nm); + double *rhsP3D = (double *)fftw_malloc(sizeof(double) * nm); + double *solP3D = (double *)fftw_malloc(sizeof(double) * nm); - std::memset(rhsFLU, 0, sizeof(double ) * FLUmemsizeIN); - std::memset(solFLU, 0, sizeof(double ) * FLUmemsizeOUT); - std::memset(rhsP3D, 0, sizeof(double ) * nm); - std::memset(solP3D, 0, sizeof(double ) * nm); - - printf("istart: %d %d %d =? %d %d %d(P3D)\n",istartGlo[0],istartGlo[1],istartGlo[2],istart[0],istart[1],istart[2]); + std::memset(rhsFLU, 0, sizeof(double) * FLUmemsizeIN); + std::memset(solFLU, 0, sizeof(double) * FLUmemsizeOUT); + std::memset(rhsP3D, 0, sizeof(double) * nm); + std::memset(solP3D, 0, sizeof(double) * nm); + + printf("istart: %d %d %d =? %d %d %d(P3D)\n", istartGlo[0], istartGlo[1], istartGlo[2], istart[0], istart[1], istart[2]); double f = 1; for (int i2 = 0; i2 < topoIn->nloc(2); i2++) { for (int i1 = 0; i1 < topoIn->nloc(1); i1++) { for (int i0 = 0; i0 < topoIn->nloc(0); i0++) { - double x = (istartGlo[0] + i0 ) * h[0]; //+ 0.5 - double y = (istartGlo[1] + i1 ) * h[1]; //+ 0.5 - double z = (istartGlo[2] + i2 ) * h[2]; //+ 0.5 + double x = (istartGlo[0] + i0) * h[0]; //+ 0.5 + double y = (istartGlo[1] + i1) * h[1]; //+ 0.5 + double z = (istartGlo[2] + i2) * h[2]; //+ 0.5 size_t id; id = localIndex(0, i0, i1, i2, 0, FLUnmemIn, 1, 0); rhsFLU[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z); - - //p3d does not care of the size you allocate, juste fill the first isize elements - id = localIndex(0, i0, i1, i2, 0, isize, 1, 0); + + // p3d does not care of the size you allocate, juste fill the first isize elements + id = localIndex(0, i0, i1, i2, 0, isize, 1, 0); rhsP3D[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z); } } @@ -189,72 +182,70 @@ int main(int argc, char *argv[]) { // //------------------------------------------------------------------------- // /** - Proceed to the solve */ // //------------------------------------------------------------------------- - - int Ntot = fsize[0]*fsize[1]; - Ntot *= fsize[2]*2; //the number of real corresponding to the complex numbers - - double factor = 1.0/(nglob[0]* nglob[1]*nglob[2]); - - for (int iter=0;iterstart("FFTandSwitch"); - Cp3dfft_ftran_r2c(rhsP3D,solP3D,op_f); + Cp3dfft_ftran_r2c(rhsP3D, solP3D, op_f); P3Dprof->stop("FFTandSwitch"); -//#define PRINT_RES +// #define PRINT_RES #ifdef PRINT_RES /* normallize */ - for(int id = 0; iddo_FFT(solFLU,FLUPS_FORWARD); + std::memcpy(solFLU, rhsFLU, sizeof(double) * FLUmemsizeIN); + mysolver->do_FFT(solFLU, FLUPS_FORWARD); #ifdef PRINT_RES /* normalize*/ - for(int id = 0; id=1. This is because - // FLUPS would require to do the backward transform before doing a new forward transform (or a reset - // function, which we choose not to implement). - + // Note: if we do several FFT in a raw, the results are wrong with FLUPS for iter>=1. This is because + // FLUPS would require to do the backward transform before doing a new forward transform (or a reset + // function, which we choose not to implement). } // //------------------------------------------------------------------------- // /** - get timings */ // //------------------------------------------------------------------------- - + // --- FLUPS ------- FLUprof->disp("solve"); - delete(FLUprof); + delete (FLUprof); // --- P3DFFT ------- double timers[12], gtmean[12], gtmax[12], gtmin[12]; get_timers(timers); - MPI_Reduce(&timers,>mean,12,MPI_DOUBLE,MPI_SUM,0,comm); - MPI_Reduce(&timers,>max ,12,MPI_DOUBLE,MPI_MAX,0,comm); - MPI_Reduce(&timers,>min ,12,MPI_DOUBLE,MPI_MIN,0,comm); + MPI_Reduce(&timers, >mean, 12, MPI_DOUBLE, MPI_SUM, 0, comm); + MPI_Reduce(&timers, >max, 12, MPI_DOUBLE, MPI_MAX, 0, comm); + MPI_Reduce(&timers, >min, 12, MPI_DOUBLE, MPI_MIN, 0, comm); - for (int i=0;i < 12;i++) { - gtmean[i] = gtmean[i]/ ((double) comm_size); + for (int i = 0; i < 12; i++) { + gtmean[i] = gtmean[i] / ((double)comm_size); } double timeRef = P3Dprof->get_timeAcc("FFTandSwitch"); @@ -262,38 +253,37 @@ int main(int argc, char *argv[]) { string P3DNames[12] = {"All2All(v) 1", "All2All(v) 2", "?", "?", "fft 1", "reorder 1", "fft 2", "reorder+fft 3", "?", "?", "?", "?"}; int order[12] = {5, 1, 6, 7, 2, 8, 3, 4, 9, 10, 11, 12}; - P3Dprof->disp("FFTandSwitch"); - if(rank == 0) { + if (rank == 0) { // printf("===================================================================================================================================================\n"); // printf(" TIMER P3DFFT %s\n",P3DFFTprof.c_str()); // printf("%25s| %-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\n","-NAME- ", "-% global-", "-% local-", "-Total time-", "-Self time-", "-time/call-", "-Min time-", "-Max time-","-Mean cnt-","-(MB/s)-"); string filename = "prof/" + P3DFFTprof + "_time.csv"; - FILE* file = fopen(filename.c_str(), "w+"); - - printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "init", 100*timeInit/timeRef, 100*timeInit/(timeRef+timeInit), timeInit, 0., timeInit/n_iter, 0., 0.,(double) n_iter,0.); - fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "init", 100*timeInit/timeRef, 100*timeInit/(timeRef+timeInit), timeInit, 0., timeInit/n_iter, 0., 0.,(double) n_iter,0); - printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "FFTandSwitch", 100., 100*timeRef/(timeRef+timeInit), timeRef, 0., timeRef/n_iter, 0., 0., (double) n_iter,0.); - fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "FFTandSwitch", 100., 100*timeRef/(timeRef+timeInit), timeRef, 0., timeRef/n_iter, 0., 0., (double) n_iter,0.); - for(int i=0; i<6; i++){ - int j = order[i]-1; - printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", ("-- "+P3DNames[j]).c_str(), 100.*gtmean[j]/timeRef, 100*gtmean[j]/timeRef, gtmean[j], 0., gtmean[j]/n_iter, gtmin[j]/n_iter, gtmax[j]/n_iter, (double) n_iter,0.); - fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", ("-- "+P3DNames[j]).c_str(), 100.*gtmean[j]/timeRef, 100*gtmean[j]/timeRef, gtmean[j], 0., gtmean[j]/n_iter, gtmin[j]/n_iter, gtmax[j]/n_iter, (double) n_iter,0.); + FILE *file = fopen(filename.c_str(), "w+"); + + printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "init", 100 * timeInit / timeRef, 100 * timeInit / (timeRef + timeInit), timeInit, 0., timeInit / n_iter, 0., 0., (double)n_iter, 0.); + fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "init", 100 * timeInit / timeRef, 100 * timeInit / (timeRef + timeInit), timeInit, 0., timeInit / n_iter, 0., 0., (double)n_iter, 0); + printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "FFTandSwitch", 100., 100 * timeRef / (timeRef + timeInit), timeRef, 0., timeRef / n_iter, 0., 0., (double)n_iter, 0.); + fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "FFTandSwitch", 100., 100 * timeRef / (timeRef + timeInit), timeRef, 0., timeRef / n_iter, 0., 0., (double)n_iter, 0.); + for (int i = 0; i < 6; i++) { + int j = order[i] - 1; + printf("%-25.25s| %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", ("-- " + P3DNames[j]).c_str(), 100. * gtmean[j] / timeRef, 100 * gtmean[j] / timeRef, gtmean[j], 0., gtmean[j] / n_iter, gtmin[j] / n_iter, gtmax[j] / n_iter, (double)n_iter, 0.); + fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", ("-- " + P3DNames[j]).c_str(), 100. * gtmean[j] / timeRef, 100 * gtmean[j] / timeRef, gtmean[j], 0., gtmean[j] / n_iter, gtmin[j] / n_iter, gtmax[j] / n_iter, (double)n_iter, 0.); } fclose(file); // -- backup of all timers -- filename = "prof/" + P3DFFTprof + "_backup.csv"; - file = fopen(filename.c_str(), "w+"); - for(int i=0;i < 12;i++) { - fprintf(file,"timer[%d] (avg/max/min): %lE %lE %lE\n",i+1,gtmean[i],gtmax[i],gtmin[i]); + file = fopen(filename.c_str(), "w+"); + for (int i = 0; i < 12; i++) { + fprintf(file, "timer[%d] (avg/max/min): %lE %lE %lE\n", i + 1, gtmean[i], gtmax[i], gtmin[i]); } fclose(file); - } - delete(P3Dprof); + } + delete (P3Dprof); - if(rank==0) + if (rank == 0) printf("Done ! Now let's clean...\n"); fflush(stdout); @@ -303,65 +293,62 @@ int main(int argc, char *argv[]) { topoIn->~Topology(); topoSpec->~Topology(); - + mysolver->~Solver(); - - for(int id=0; id<3; id++){ - for(int is=0; is<2; is++){ - mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*1); + + for (int id = 0; id < 3; id++) { + for (int is = 0; is < 2; is++) { + mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1); flups_free(mybc[id][is]); } } Cp3dfft_clean(); + flups_cleanup_fftw(); MPI_Finalize(); } +void print_all_P3D(double *A, long int nar) { + int x, y, z, conf, Fstart[3], Fsize[3], Fend[3]; + long int i; - -void print_all_P3D(double *A,long int nar) -{ - int x,y,z,conf,Fstart[3],Fsize[3],Fend[3]; - long int i; - - conf = 2; - Cp3dfft_get_dims(Fstart,Fend,Fsize,conf); - /* - Fsize[0] *= 2; - Fstart[0] = (Fstart[0]-1)*2; - */ - for(i=0;i < nar;i+=2) - if(fabs(A[i]) + fabs(A[i+1]) > 1.25e-4) { - z = i/(2*Fsize[0]*Fsize[1]); - y = i/(2*Fsize[0]) - z*Fsize[1]; - x = i/2-z*Fsize[0]*Fsize[1] - y*Fsize[0]; - printf("P3D(%d,%d,%d) %.16lg %.16lg\n",x+Fstart[0],y+Fstart[1],z+Fstart[2],A[i],A[i+1]); - } + conf = 2; + Cp3dfft_get_dims(Fstart, Fend, Fsize, conf); + /* + Fsize[0] *= 2; + Fstart[0] = (Fstart[0]-1)*2; + */ + for (i = 0; i < nar; i += 2) + if (fabs(A[i]) + fabs(A[i + 1]) > 1.25e-4) { + z = i / (2 * Fsize[0] * Fsize[1]); + y = i / (2 * Fsize[0]) - z * Fsize[1]; + x = i / 2 - z * Fsize[0] * Fsize[1] - y * Fsize[0]; + printf("P3D(%d,%d,%d) %.16lg %.16lg\n", x + Fstart[0], y + Fstart[1], z + Fstart[2], A[i], A[i + 1]); + } } -void print_res(double *A, const Topology* topo) { - const int ax0 = topo->axis(); - const int ax1 = (ax0 + 1) % 3; - const int ax2 = (ax0 + 2) % 3; - const int nf = 2;//topo->nf() - int nmem[3]; - - for(int i=0;i<3;i++){ +void print_res(double *A, const Topology *topo) { + const int ax0 = topo->axis(); + const int ax1 = (ax0 + 1) % 3; + const int ax2 = (ax0 + 2) % 3; + const int nf = 2; // topo->nf() + int nmem[3]; + + for (int i = 0; i < 3; i++) { nmem[i] = topo->nmem(i); } int gstart[3]; topo->get_istart_glob(gstart); - if (topo->isComplex()){ + if (topo->isComplex()) { for (int i2 = 0; i2 < topo->nloc(ax2); i2++) { for (int i1 = 0; i1 < topo->nloc(ax1); i1++) { - //local indexes start + // local indexes start const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, 2, 0); for (int i0 = 0; i0 < topo->nloc(ax0); i0++) { - - if (std::fabs(A[id+i0*2]) + std::fabs(A[id+i0*2 + 1]) > 1.25e-4) { - printf("FLU(%d %d %d) %lg +i1* %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id+i0*2], A[id+i0*2 + 1]); + if (std::fabs(A[id + i0 * 2]) + std::fabs(A[id + i0 * 2 + 1]) > 1.25e-4) { + printf("FLU(%d %d %d) %lg +i1* %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0 * 2], A[id + i0 * 2 + 1]); } } } @@ -369,15 +356,14 @@ void print_res(double *A, const Topology* topo) { } else { for (int i2 = 0; i2 < topo->nloc(ax2); i2++) { for (int i1 = 0; i1 < topo->nloc(ax1); i1++) { - //local indexes start + // local indexes start const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, 1, 0); for (int i0 = 0; i0 < topo->nloc(ax0); i0++) { - - if (std::fabs(A[id+i0]) > 1.25e-4) { - printf("FLU(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id+i0]); + if (std::fabs(A[id + i0]) > 1.25e-4) { + printf("FLU(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0]); } } } } } -} \ No newline at end of file +} diff --git a/samples/simpleTest/main_simple_test.cpp b/samples/simpleTest/main_simple_test.cpp index 2d2936b..220d4ab 100644 --- a/samples/simpleTest/main_simple_test.cpp +++ b/samples/simpleTest/main_simple_test.cpp @@ -3,19 +3,18 @@ #include #include -#include "mpi.h" -#include "h3lpr/profiler.hpp" -#include "h3lpr/macros.hpp" #include "flups.h" +#include "h3lpr/macros.hpp" +#include "h3lpr/profiler.hpp" +#include "mpi.h" #define m_log(format, ...) m_log_def("test", format, ##__VA_ARGS__) -void print_res(double *A, const FLUPS_Topology* topo, std::string filename); +void print_res(double *A, const FLUPS_Topology *topo, std::string filename); -int main(int argc, char *argv[]) { - +int main(int argc, char *argv[]) { MPI_Init(&argc, &argv); - int rank, comm_size; + int rank, comm_size; MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &comm_size); @@ -24,14 +23,14 @@ int main(int argc, char *argv[]) { //------------------------------------------------------------------------- // Definition of the problem //------------------------------------------------------------------------- - bool is_node = true; - const int nglob[3] = {9, 9, 9}; - const int nproc[3] = {1, 1, comm_size}; - const double L[3] = {1., 1., 1.}; - const double h[3] = {L[0] / (nglob[0] - is_node), L[1] / (nglob[1] - is_node), L[2] / (nglob[2] - is_node)}; - int nsolve = 1; - - FLUPS_CenterType center_type[3]; + bool is_node = true; + const int nglob[3] = {9, 9, 9}; + const int nproc[3] = {1, 1, comm_size}; + const double L[3] = {1., 1., 1.}; + const double h[3] = {L[0] / (nglob[0] - is_node), L[1] / (nglob[1] - is_node), L[2] / (nglob[2] - is_node)}; + int nsolve = 1; + + FLUPS_CenterType center_type[3]; FLUPS_BoundaryType *mybc[3][2]; for (int id = 0; id < 3; id++) { center_type[id] = is_node ? NODE_CENTER : CELL_CENTER; @@ -45,19 +44,19 @@ int main(int argc, char *argv[]) { /** - Initialize FLUPS */ //------------------------------------------------------------------------- // create a real topology - FLUPS_Topology *topo = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); + FLUPS_Topology *topo = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); - // Profiler creation - FLUPS_Profiler* prof = flups_profiler_new(); + // Profiler creation + FLUPS_Profiler *prof = flups_profiler_new(); // Solver creation and init FLUPS_Solver *mysolver = flups_init_timed(topo, mybc, h, L, NOD, center_type, prof); - flups_set_greenType(mysolver,CHAT_2); + flups_set_greenType(mysolver, CHAT_2); flups_setup(mysolver, true); // recompute the communicator and the rank comm = flups_topo_get_comm(topo); - MPI_Comm_rank(comm,&rank); + MPI_Comm_rank(comm, &rank); //------------------------------------------------------------------------- /** - allocate rhs and solution */ @@ -75,17 +74,17 @@ int main(int argc, char *argv[]) { const int ax1 = (ax0 + 1) % 3; const int ax2 = (ax0 + 2) % 3; - const double k = 2.0 * M_PI ; - auto fun = [k](double pos[3]) -> double { return cos(k * pos[0]); }; - auto d2fundx = [k](double pos[3]) -> double { return (-(k*k)*cos(k * pos[0])); }; - - const double shift = is_node? 0.0 : 0.5; - for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2) ; i2++) { - for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1) ; i1++) { - for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) { - double pos[3] ={(istart[ax0] + i0 + shift) * h[ax0], (istart[ax1] + i1 + shift) * h[ax1], (istart[ax2] + i2 + shift) * h[ax2]}; - const size_t id = flups_locID(0, i0, i1, i2, 0, 0, nmem, 1); - rhs[id] = d2fundx(pos); + const double k = 2.0 * M_PI; + auto fun = [k](double pos[3]) -> double { return cos(k * pos[0]); }; + auto d2fundx = [k](double pos[3]) -> double { return (-(k * k) * cos(k * pos[0])); }; + + const double shift = is_node ? 0.0 : 0.5; + for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) { + for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) { + double pos[3] = {(istart[ax0] + i0 + shift) * h[ax0], (istart[ax1] + i1 + shift) * h[ax1], (istart[ax2] + i2 + shift) * h[ax2]}; + const size_t id = flups_locID(0, i0, i1, i2, 0, 0, nmem, 1); + rhs[id] = d2fundx(pos); // rhs[id] = (istart[ax0] + i0) + nmem[0]*((istart[ax1] + i1) + nglob[1]*(i2 + istart[ax2])); //d2fundx(pos); } } @@ -99,48 +98,47 @@ int main(int argc, char *argv[]) { flups_solve(mysolver, field, rhs, STD); } - //------------------------------------------------------------------------- /** - Compute the error */ //------------------------------------------------------------------------- double err = std::numeric_limits::lowest(); - bool is_last_topo[3]; - for(int id = 0; id < 3; id++){ - is_last_topo[id] = (istart[id] + flups_topo_get_nloc(topo,(flups_topo_get_axis(topo) + id)%3) == nglob[id]); + bool is_last_topo[3]; + for (int id = 0; id < 3; id++) { + is_last_topo[id] = (istart[id] + flups_topo_get_nloc(topo, (flups_topo_get_axis(topo) + id) % 3) == nglob[id]); } - if((0 == flups_topo_get_nloc(topo,ax2) - is_last_topo[2]) || (0 == flups_topo_get_nloc(topo,ax1) - is_last_topo[1]) || (0 == flups_topo_get_nloc(topo,ax1) - is_last_topo[0])){ + if ((0 == flups_topo_get_nloc(topo, ax2) - is_last_topo[2]) || (0 == flups_topo_get_nloc(topo, ax1) - is_last_topo[1]) || (0 == flups_topo_get_nloc(topo, ax1) - is_last_topo[0])) { printf("[test - %d] You don't have enough points to fill in all the procs - my error (rank %d) is set to 0 by default \n ", rank, rank); err = 0.0; } - for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2) - is_last_topo[2] ; i2++) { - for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1) - is_last_topo[1] ; i1++) { - for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0) - is_last_topo[0]; i0++) { - double pos[3] ={(istart[ax0] + i0 + shift) * h[ax0], (istart[ax1] + i1 + shift) * h[ax1], (istart[ax2] + i2 + shift) * h[ax2]}; - const size_t id = flups_locID(0, i0, i1, i2, 0, 0, nmem, 1); + for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2) - is_last_topo[2]; i2++) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1) - is_last_topo[1]; i1++) { + for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0) - is_last_topo[0]; i0++) { + double pos[3] = {(istart[ax0] + i0 + shift) * h[ax0], (istart[ax1] + i1 + shift) * h[ax1], (istart[ax2] + i2 + shift) * h[ax2]}; + const size_t id = flups_locID(0, i0, i1, i2, 0, 0, nmem, 1); // Gaussian err = std::max(err, abs(field[id] - fun(pos))); } } } - //m_log("Printing results"); - //printf("[test %d] Error computation - end %d %d %d ", rank,flups_topo_get_nloc(topo,ax0) - is_last_topo[0], flups_topo_get_nloc(topo,ax1) - is_last_topo[1], flups_topo_get_nloc(topo,ax2) - is_last_topo[2] ); - //std::string arg_name = (argc == 2) ? argv[1] : "default"; - //print_res(field, topo, "data/sol_" + arg_name) ; + // m_log("Printing results"); + // printf("[test %d] Error computation - end %d %d %d ", rank,flups_topo_get_nloc(topo,ax0) - is_last_topo[0], flups_topo_get_nloc(topo,ax1) - is_last_topo[1], flups_topo_get_nloc(topo,ax2) - is_last_topo[2] ); + // std::string arg_name = (argc == 2) ? argv[1] : "default"; + // print_res(field, topo, "data/sol_" + arg_name) ; // -------------------------------------------------------------------------- - // Local error + // Local error printf("[test - %d] my error is %e \n", rank, err); // -------------------------------------------------------------------------- - // Global error + // Global error double err_glob = 0.0; MPI_Allreduce(&err, &err_glob, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); m_log("--------------- Global Error ---------------"); m_log("The global error == %e", err_glob); - + flups_profiler_disp(prof); //------------------------------------------------------------------------- @@ -150,6 +148,7 @@ int main(int argc, char *argv[]) { flups_free(field); flups_topo_free(topo); flups_cleanup(mysolver); + flups_cleanup_fftw(); flups_profiler_free(prof); for (int id = 0; id < 3; id++) { @@ -161,30 +160,33 @@ int main(int argc, char *argv[]) { MPI_Finalize(); } -void print_res(double *A, const FLUPS_Topology* topo, std::string filename) { +void print_res(double *A, const FLUPS_Topology *topo, std::string filename) { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - FILE *fptr; + FILE *fptr; std::string name = filename + "_" + std::to_string(rank) + ".txt"; - fptr = fopen(name.c_str(), "w"); + fptr = fopen(name.c_str(), "w"); - if(fptr == NULL){ printf("Error!"); exit(1); } + if (fptr == NULL) { + printf("Error!"); + exit(1); + } const int ax0 = flups_topo_get_axis(topo); const int ax1 = (ax0 + 1) % 3; const int ax2 = (ax0 + 2) % 3; - int nmem[3] = {flups_topo_get_nmem(topo,0), flups_topo_get_nmem(topo,1), flups_topo_get_nmem(topo,2)}; - + int nmem[3] = {flups_topo_get_nmem(topo, 0), flups_topo_get_nmem(topo, 1), flups_topo_get_nmem(topo, 2)}; + // int gstart[3]; // flups_topo_get_istartGlob(topo,gstart); - if (flups_topo_get_isComplex(topo)){ - for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2); i2++) { - for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1); i1++) { - //local indexes start - const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem,2); - for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) { + if (flups_topo_get_isComplex(topo)) { + for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) { + // local indexes start + const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem, 2); + for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) { // printf("(%d %d %d) %lg +i1 * %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0 * 2], A[id + i0 * 2 + 1]); fprintf(fptr, "\t (%e, i1 * %e)", A[id + i0 * 2], A[id + i0 * 2 + 1]); } @@ -192,13 +194,13 @@ void print_res(double *A, const FLUPS_Topology* topo, std::string filename) { } fprintf(fptr, "\n \n"); } - + } else { - for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2); i2++) { - for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1); i1++) { - //local indexes start - const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem,1); - for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) { + for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) { + // local indexes start + const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem, 1); + for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) { // printf("(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0]); fprintf(fptr, "\t %e", A[id + i0]); } diff --git a/samples/solve_advanced_C/main_solve_advanced.c b/samples/solve_advanced_C/main_solve_advanced.c index 76cfacf..254b849 100644 --- a/samples/solve_advanced_C/main_solve_advanced.c +++ b/samples/solve_advanced_C/main_solve_advanced.c @@ -3,90 +3,90 @@ #include #include -#include "mpi.h" #include "flups.h" +#include "mpi.h" -void print_res(double *A, const FLUPS_Topology* topo); +void print_res(double *A, const FLUPS_Topology *topo); #define PRINT_RES -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) { printf("Starting...\n"); //------------------------------------------------------------------------- // Initialize MPI //------------------------------------------------------------------------- - int rank, comm_size; + int rank, comm_size; MPI_Comm comm = MPI_COMM_WORLD; int provided; // set MPI_THREAD_FUNNELED or MPI_THREAD_SERIALIZED int requested = MPI_THREAD_FUNNELED; MPI_Init_thread(&argc, &argv, requested, &provided); - if(provided < requested){ + if (provided < requested) { printf("Invalid number of procs\n"); - MPI_Abort(comm,1); + MPI_Abort(comm, 1); } - + MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &comm_size); //------------------------------------------------------------------------- - //Definition of the problem + // Definition of the problem //------------------------------------------------------------------------- - const int nglob[3] = {64, 64, 64}; - const int nproc[3] = {1, 1, 2}; - const double L[3] = {1., 1., 1.};; + const int nglob[3] = {64, 64, 64}; + const int nproc[3] = {1, 1, 2}; + const double L[3] = {1., 1., 1.}; + ; const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]}; const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER}; - FLUPS_BoundaryType* mybc[3][2]; - for(int id=0; id<3; id++){ - for(int is=0; is<2; is++){ - mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*1); + FLUPS_BoundaryType *mybc[3][2]; + for (int id = 0; id < 3; id++) { + for (int is = 0; is < 2; is++) { + mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1); mybc[id][is][0] = PER; } } - if(comm_size!=nproc[0]*nproc[1]*nproc[2]){ + if (comm_size != nproc[0] * nproc[1] * nproc[2]) { printf("Invalid number of procs\n"); - MPI_Abort(comm,1); + MPI_Abort(comm, 1); } - //------------------------------------------------------------------------- /** - Initialize FLUPS */ //------------------------------------------------------------------------- // create a real topology - FLUPS_Topology *topoIn = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); + FLUPS_Topology *topoIn = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm); // solver creation and init FLUPS_Solver *mysolver = flups_init(topoIn, mybc, h, L, NOD, center_type); - flups_set_greenType(mysolver,CHAT_2); + flups_set_greenType(mysolver, CHAT_2); flups_setup(mysolver, true); - double* solFLU = flups_get_innerBuffer(mysolver); + double *solFLU = flups_get_innerBuffer(mysolver); // recompute the communicator and the rank comm = flups_topo_get_comm(topoIn); - MPI_Comm_rank(comm,&rank); + MPI_Comm_rank(comm, &rank); // retrieveing internal info from the solver const FLUPS_Topology *topoSpec = flups_get_innerTopo_spectral(mysolver); int nmemIn[3]; int istartIn[3], istartSpec[3]; - int nmemIN[3],nmemSpec[3]; + int nmemIN[3], nmemSpec[3]; int memsizeIN = flups_topo_get_memsize(topoIn); for (int i = 0; i < 3; i++) { nmemIn[i] = flups_topo_get_nmem(topoIn, i); nmemSpec[i] = flups_topo_get_nmem(topoSpec, i); } - flups_topo_get_istartGlob(topoIn,istartIn); - flups_topo_get_istartGlob(topoSpec,istartSpec); - + flups_topo_get_istartGlob(topoIn, istartIn); + flups_topo_get_istartGlob(topoSpec, istartSpec); + // //------------------------------------------------------------------------- // /** - allocate rhs and solution */ // //------------------------------------------------------------------------- @@ -100,27 +100,27 @@ int main(int argc, char *argv[]) { fflush(stdout); } - double *rhsFLU = (double *)flups_malloc(sizeof(double) * memsizeIN); - memset(rhsFLU, 0, sizeof(double ) * memsizeIN); - // memset(solFLU, 0, sizeof(double ) * FLUmemsizeOUT); - - double f = 1; //frequency of the wave - const double c_2pi = 2.0*3.141592653589793; + double *rhsFLU = (double *)flups_malloc(sizeof(double) * memsizeIN); + memset(rhsFLU, 0, sizeof(double) * memsizeIN); + // memset(solFLU, 0, sizeof(double ) * FLUmemsizeOUT); - for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn,2); i2++) { - for (int i1 = 0; i1 < flups_topo_get_nloc(topoIn,1); i1++) { - for (int i0 = 0; i0 < flups_topo_get_nloc(topoIn,0); i0++) { - double x = (istartIn[0] + i0 + 0.5) * h[0]; - double y = (istartIn[1] + i1 + 0.5) * h[1]; - double z = (istartIn[2] + i2 + 0.5) * h[2]; + double f = 1; // frequency of the wave + const double c_2pi = 2.0 * 3.141592653589793; + + for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn, 2); i2++) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topoIn, 1); i1++) { + for (int i0 = 0; i0 < flups_topo_get_nloc(topoIn, 0); i0++) { + double x = (istartIn[0] + i0 + 0.5) * h[0]; + double y = (istartIn[1] + i1 + 0.5) * h[1]; + double z = (istartIn[2] + i2 + 0.5) * h[2]; size_t id; - id = flups_locID(0, i0, i1, i2, 0, 0, nmemIn, 1); + id = flups_locID(0, i0, i1, i2, 0, 0, nmemIn, 1); rhsFLU[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z); } } } - memcpy(solFLU, rhsFLU, sizeof(double ) * memsizeIN); + memcpy(solFLU, rhsFLU, sizeof(double) * memsizeIN); // //------------------------------------------------------------------------- // /** - Skip the copy of data @@ -128,9 +128,9 @@ int main(int argc, char *argv[]) { // By using the advanced features of the API, we skip the copy which is done // in flups_solve: data are copied from the location the user allocated himself, // to an inner memory reserved by flups, and ensuring proper alignment. - // In this example, we have directly filled that "inner memory" (solFlu) with + // In this example, we have directly filled that "inner memory" (solFlu) with // the initial condition - + // flups_do_copy(mysolver,topoIn,solFLU,FLUPS_FORWARD); // //------------------------------------------------------------------------- @@ -148,26 +148,26 @@ int main(int argc, char *argv[]) { // //------------------------------------------------------------------------- double kfact[3], koffset[3], symstart[3]; - flups_get_spectralInfo(mysolver,kfact,koffset,symstart); + flups_get_spectralInfo(mysolver, kfact, koffset, symstart); if (rank == 0) { - printf("%lf %lf %lf %lf %lf %lf %lf %lf %lf \n",kfact[0],kfact[1],kfact[2],koffset[0],koffset[1],koffset[2],symstart[0],symstart[1],symstart[2]); + printf("%lf %lf %lf %lf %lf %lf %lf %lf %lf \n", kfact[0], kfact[1], kfact[2], koffset[0], koffset[1], koffset[2], symstart[0], symstart[1], symstart[2]); } - const int ax0 = flups_topo_get_axis(topoSpec); - const int ax1 = (ax0 + 1) % 3; - const int ax2 = (ax0 + 2) % 3; - const int nf = 2; //topoSpec is full spectral, there are two doubles per element - + const int ax0 = flups_topo_get_axis(topoSpec); + const int ax1 = (ax0 + 1) % 3; + const int ax2 = (ax0 + 2) % 3; + const int nf = 2; // topoSpec is full spectral, there are two doubles per element + // int nmemSpec[3]; // for(int i=0;i<3;i++){ // nmemSpec[i] = flups_topo_get_nmem(topoSpec,i); // } - - for (int i2 = 0; i2 < flups_topo_get_nloc(topoSpec,ax2); i2++) { - for (int i1 = 0; i1 < flups_topo_get_nloc(topoSpec,ax1); i1++) { - //local indexes start - const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmemSpec,nf); - for (int i0 = 0; i0 < flups_topo_get_nloc(topoSpec,ax0); i0++) { + + for (int i2 = 0; i2 < flups_topo_get_nloc(topoSpec, ax2); i2++) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topoSpec, ax1); i1++) { + // local indexes start + const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmemSpec, nf); + for (int i0 = 0; i0 < flups_topo_get_nloc(topoSpec, ax0); i0++) { int is[3]; flups_symID(ax0, i0, i1, i2, istartSpec, symstart, 0, is); @@ -176,10 +176,10 @@ int main(int argc, char *argv[]) { const double k1 = (is[ax1] + koffset[ax1]) * kfact[ax1]; const double k2 = (is[ax2] + koffset[ax2]) * kfact[ax2]; - const double ksqr = k0 * k0 + k1 * k1 + k2 * k2; + const double ksqr = k0 * k0 + k1 * k1 + k2 * k2; - solFLU[id + i0 * nf] *= exp(-ksqr); //REAL part - solFLU[id + i0 * nf + 1] *= exp(-ksqr); //COMPLEX part + solFLU[id + i0 * nf] *= exp(-ksqr); // REAL part + solFLU[id + i0 * nf + 1] *= exp(-ksqr); // COMPLEX part } } } @@ -204,7 +204,7 @@ int main(int argc, char *argv[]) { // #define PRINT_RES #ifdef PRINT_RES /* normalize*/ - print_res(solFLU,topoIn); + print_res(solFLU, topoIn); #endif flups_hdf5_dump(topoIn, "result", solFLU); @@ -212,58 +212,59 @@ int main(int argc, char *argv[]) { // //------------------------------------------------------------------------- // /** - CLEAN */ // //------------------------------------------------------------------------- - - if(rank==0) + + if (rank == 0) printf("Done ! Now let's clean...\n"); fflush(stdout); - + flups_free(rhsFLU); flups_topo_free(topoIn); flups_cleanup(mysolver); + flups_cleanup_fftw(); - for(int id=0; id<3; id++){ - for(int is=0; is<2; is++){ + for (int id = 0; id < 3; id++) { + for (int is = 0; is < 2; is++) { flups_free(mybc[id][is]); } } - + MPI_Finalize(); } -void print_res(double *A, const FLUPS_Topology* topo) { - const int ax0 = flups_topo_get_axis(topo); - const int ax1 = (ax0 + 1) % 3; - const int ax2 = (ax0 + 2) % 3; - int nmem[3]; - - for(int i=0;i<3;i++){ - nmem[i] = flups_topo_get_nmem(topo,i); +void print_res(double *A, const FLUPS_Topology *topo) { + const int ax0 = flups_topo_get_axis(topo); + const int ax1 = (ax0 + 1) % 3; + const int ax2 = (ax0 + 2) % 3; + int nmem[3]; + + for (int i = 0; i < 3; i++) { + nmem[i] = flups_topo_get_nmem(topo, i); } int gstart[3]; - flups_topo_get_istartGlob(topo,gstart); + flups_topo_get_istartGlob(topo, gstart); - if (flups_topo_get_isComplex(topo)){ - for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2); i2++) { + if (flups_topo_get_isComplex(topo)) { + for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) { printf("(z = %d) \n", i2 + gstart[ax2]); - for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1); i1++) { - //local indexes start - const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem,2); - for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) { + // local indexes start + const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem, 2); + for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) { printf("(%1.3lg + i1* %1.3lg) \t", A[id + i0 * 2], A[id + i0 * 2 + 1]); } - printf("\n"); + printf("\n"); } printf("\n"); } } else { - for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2); i2++) { + for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) { printf("(z = %d) \n", i2 + gstart[ax2]); - for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1); i1++) { - //local indexes start - const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem,1); - for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) { + for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) { + // local indexes start + const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem, 1); + for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) { printf(" %lg \t", A[id + i0]); } printf("\n"); diff --git a/samples/solve_vtube/src/vtube.cpp b/samples/solve_vtube/src/vtube.cpp index 30f8845..d34315d 100644 --- a/samples/solve_vtube/src/vtube.cpp +++ b/samples/solve_vtube/src/vtube.cpp @@ -2,32 +2,33 @@ * @file vtube.cpp * @author Thomas Gillis and Denis-Gabriel Caprace * @copyright Copyright © UCLouvain 2020 - * + * * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers. - * + * * Copyright <2020> - * + * * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE and NOTICE files. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * + * */ #include "vtube.hpp" + #include "omp.h" -template <> -double gexpint<1>(const double z){ +template <> +double gexpint<1>(const double z) { // for real values E1(x) = -Ei(-x): // according to // https://en.cppreference.com/w/cpp/numeric/special_functions/expint and @@ -35,9 +36,9 @@ double gexpint<1>(const double z){ return (-std::expint(-z)); } -double vort_inf(const double r, const double sigma){ +double vort_inf(const double r, const double sigma) { // Gregoire winckelmans 2004 encyclopedia - const double rho1 = r / sigma; + const double rho1 = r / sigma; return 1.0 / (c_2pi * sigma * sigma) * exp(-rho1 * rho1 * 0.5); } double vel_inf(const double r, const double sigma) { @@ -47,10 +48,10 @@ double vel_inf(const double r, const double sigma) { return (r < eps) ? 0 : 1.0 / (c_2pi * r) * (1.0 - exp(-rho1 * rho1 * 0.5)); } -double vort_compact(const double r, const double R){ +double vort_compact(const double r, const double R) { // Wincklemans 2015, hand-written notes (cfr vortexbib) - const double rho2 = r / (R); - const double oo_rad2 = 1.0/(R*R); + const double rho2 = r / (R); + const double oo_rad2 = 1.0 / (R * R); return (rho2 >= 1.0) ? (0.0) : (1.0 / c_2pi * (2.0 * oo_rad2) * (1.0 / gexpint<2>(1.0)) * exp(-1.0 / (1.0 - rho2 * rho2))); } double vel_compact(const double r, const double R) { @@ -86,9 +87,9 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int const double fact = (double)(!is_cell); // Ensure that 64 points in cell-centred mode is equivalent 64 points in node-centred mode - const int nglob[3] = {myCase.nglob[0] + (int)fact, - myCase.nglob[1] + (int)fact, - myCase.nglob[2] + (int)fact}; + const int nglob[3] = {myCase.nglob[0] + (int)fact, + myCase.nglob[1] + (int)fact, + myCase.nglob[2] + (int)fact}; // h definition depends on if we are cell- of node-centred const double h[3] = {L[0] / (nglob[0] - fact), @@ -97,13 +98,13 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int FLUPS_CenterType center_type[3]; for (int i = 0; i < 3; i++) { - center_type[i] = myCase.center[i]; + center_type[i] = myCase.center[i]; } - FLUPS_BoundaryType* mybc[3][2]; - for(int id=0; id<3; id++){ - for(int is=0; is<2; is++){ - mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*lda); + FLUPS_BoundaryType *mybc[3][2]; + for (int id = 0; id < 3; id++) { + for (int is = 0; is < 2; is++) { + mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * lda); } } @@ -123,7 +124,7 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int //------------------------------------------------------------------------- std::string name = "tube_" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads()); FLUPS_Profiler *prof = flups_profiler_new_n(name.c_str()); - FLUPS_Solver * mysolver; + FLUPS_Solver *mysolver; mysolver = flups_init_timed(topo, mybc, h, L, order, center_type, prof); flups_set_greenType(mysolver, typeGreen); @@ -167,8 +168,8 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int double *rhs1 = rhs + flups_locID(ax0, 0, 0, 0, 1, ax0, nmem, 1); double *rhs2 = rhs + flups_locID(ax0, 0, 0, 0, 2, ax0, nmem, 1); double *sol0 = sol + flups_locID(ax0, 0, 0, 0, 0, ax0, nmem, 1); - double* sol1 = sol + flups_locID(ax0, 0, 0, 0, 1, ax0, nmem, 1); - double* sol2 = sol + flups_locID(ax0, 0, 0, 0, 2, ax0, nmem, 1); + double *sol1 = sol + flups_locID(ax0, 0, 0, 0, 1, ax0, nmem, 1); + double *sol2 = sol + flups_locID(ax0, 0, 0, 0, 2, ax0, nmem, 1); int dir0 = (vdir + 1) % 3; int dir1 = (vdir + 2) % 3; @@ -317,53 +318,53 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int sol2[id] += 0.0 * myCase.ysign * myCase.xsign; } } - // } else if (cross) { - // const double lpos[3] = {pos[0] - (myCase.xcntr * L[0]), - // pos[1] - (myCase.ycntr * L[1]), - // pos[2] - (myCase.zcntr * L[2])}; - // double *lrhs[3] = {rhs0, rhs1, rhs2}; - // double *lsol[3] = {sol0, sol1, sol2}; - - // for (int i = 0; i < 3; ++i) { - // lrhs[(i + 0) % 3][id] = 0.0; - // lrhs[(i + 1) % 3][id] = 0.0; - // lrhs[(i + 2) % 3][id] = 0.0; - // lsol[(i + 0) % 3][id] = 0.0; - // lsol[(i + 1) % 3][id] = 0.0; - // lsol[(i + 2) % 3][id] = 0.0; - // } - - // for (int i = 0; i < 3; ++i) { - // const double x = lpos[(i + 1) % 3]; - // const double y = lpos[(i + 2) % 3]; - // // tube in z - // const double theta = std::atan2(y, x); // get the angle in the x-y plane - // const double r = sqrt(x * x + y * y); - // const double vort = (compact) ? vort_compact(r, R) : vort_inf(r, sigma); - // const double vel = (compact) ? vel_compact(r, R) : vel_inf(r, sigma); - // lrhs[(i + 0) % 3][id] += -vort; - // lrhs[(i + 1) % 3][id] += 0.0; - // lrhs[(i + 2) % 3][id] += 0.0; - // lsol[(i + 0) % 3][id] += 0.0; - // lsol[(i + 1) % 3][id] += -sin(theta) * vel; - // lsol[(i + 2) % 3][id] += +cos(theta) * vel; - // } - // } else if (gauss) { - // const double x = pos[0] - (myCase.xcntr * L[0]); - // const double y = pos[1] - (myCase.ycntr * L[1]); - // const double z = pos[2] - (myCase.zcntr * L[2]); - // // tube in z - // const double r = sqrt(x * x + y * y + z*z); - // const double rho = r/sigma; - // const double vort = 1.0/(4.0*M_PI*pow(sigma,3)) * sqrt(2.0/M_PI) * exp(-0.5 * rho * rho) ; - // const double vel = 1.0; - // lrhs[(i + 0) % 3][id] += -vort; - // lrhs[(i + 1) % 3][id] += 0.0; - // lrhs[(i + 2) % 3][id] += 0.0; - // lsol[(i + 0) % 3][id] += 0.0; - // lsol[(i + 1) % 3][id] += -sin(theta) * vel; - // lsol[(i + 2) % 3][id] += +cos(theta) * vel; - // } + // } else if (cross) { + // const double lpos[3] = {pos[0] - (myCase.xcntr * L[0]), + // pos[1] - (myCase.ycntr * L[1]), + // pos[2] - (myCase.zcntr * L[2])}; + // double *lrhs[3] = {rhs0, rhs1, rhs2}; + // double *lsol[3] = {sol0, sol1, sol2}; + + // for (int i = 0; i < 3; ++i) { + // lrhs[(i + 0) % 3][id] = 0.0; + // lrhs[(i + 1) % 3][id] = 0.0; + // lrhs[(i + 2) % 3][id] = 0.0; + // lsol[(i + 0) % 3][id] = 0.0; + // lsol[(i + 1) % 3][id] = 0.0; + // lsol[(i + 2) % 3][id] = 0.0; + // } + + // for (int i = 0; i < 3; ++i) { + // const double x = lpos[(i + 1) % 3]; + // const double y = lpos[(i + 2) % 3]; + // // tube in z + // const double theta = std::atan2(y, x); // get the angle in the x-y plane + // const double r = sqrt(x * x + y * y); + // const double vort = (compact) ? vort_compact(r, R) : vort_inf(r, sigma); + // const double vel = (compact) ? vel_compact(r, R) : vel_inf(r, sigma); + // lrhs[(i + 0) % 3][id] += -vort; + // lrhs[(i + 1) % 3][id] += 0.0; + // lrhs[(i + 2) % 3][id] += 0.0; + // lsol[(i + 0) % 3][id] += 0.0; + // lsol[(i + 1) % 3][id] += -sin(theta) * vel; + // lsol[(i + 2) % 3][id] += +cos(theta) * vel; + // } + // } else if (gauss) { + // const double x = pos[0] - (myCase.xcntr * L[0]); + // const double y = pos[1] - (myCase.ycntr * L[1]); + // const double z = pos[2] - (myCase.zcntr * L[2]); + // // tube in z + // const double r = sqrt(x * x + y * y + z*z); + // const double rho = r/sigma; + // const double vort = 1.0/(4.0*M_PI*pow(sigma,3)) * sqrt(2.0/M_PI) * exp(-0.5 * rho * rho) ; + // const double vel = 1.0; + // lrhs[(i + 0) % 3][id] += -vort; + // lrhs[(i + 1) % 3][id] += 0.0; + // lrhs[(i + 2) % 3][id] += 0.0; + // lsol[(i + 0) % 3][id] += 0.0; + // lsol[(i + 1) % 3][id] += -sin(theta) * vel; + // lsol[(i + 2) % 3][id] += +cos(theta) * vel; + // } } else if (ring) { //--------------------------------------------------------------- // WARNING @@ -428,13 +429,13 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int } } else if (periodic) { { - const double x = pos[0]; - const double y = pos[1]; - const double z = pos[2]; + const double x = pos[0]; + const double y = pos[1]; + const double z = pos[2]; const double k[3] = {2.0 * M_PI, 2.0 * M_PI, 2.0 * M_PI}; - rhs0[id] = cos(k[1] * y) / k[1]; - rhs1[id] = cos(k[2] * z) / k[2]; - rhs2[id] = cos(k[0] * x) / k[0]; + rhs0[id] = cos(k[1] * y) / k[1]; + rhs1[id] = cos(k[2] * z) / k[2]; + rhs2[id] = cos(k[0] * x) / k[0]; // rot_x = sin(k[2] * z) -> sol = -1/k[2] * sin(k[2] * z) // rot_y = sin(k[0] * x) -> sol = -1/k[0] * sin(k[0] * x) // rot_z = sin(k[1] * y) -> sol = -1/k[1] * sin(k[1] * y) @@ -466,15 +467,15 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int const double z = pos[2]; const double k[3] = {1.5 * M_PI, 1.5 * M_PI, 1.5 * M_PI}; - rhs0[id] = 0.0; //sin(k[1] * y) / k[1]; + rhs0[id] = 0.0; // sin(k[1] * y) / k[1]; rhs1[id] = sin(k[2] * z) / k[2]; - rhs2[id] = 0.0; //sin(k[0] * x) / k[0]; + rhs2[id] = 0.0; // sin(k[0] * x) / k[0]; // rot_x = cos(k[2] * z) -> sol = -1/k[2]^2 * cos(k[2] * z) // rot_y = cos(k[0] * x) -> sol = -1/k[0]^2 * cos(k[0] * x) // rot_z = cos(k[1] * y) -> sol = -1/k[1]^2 * cos(k[1] * y) sol0[id] = -1.0 / (k[2] * k[2]) * cos(k[2] * z); - sol1[id] = 0.0; //-1.0 / (k[0] * k[0]) * cos(k[0] * x); - sol2[id] = 0.0; //-1.0 / (k[1] * k[1]) * cos(k[1] * y); + sol1[id] = 0.0; //-1.0 / (k[0] * k[0]) * cos(k[0] * x); + sol2[id] = 0.0; //-1.0 / (k[1] * k[1]) * cos(k[1] * y); } } } @@ -497,11 +498,9 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int flups_solve(mysolver, field, rhs, ROT); } - flups_profiler_disp(prof); flups_profiler_free(prof); - #ifdef DUMP_DBG // write the source term and the solution sprintf(msg, "sol_%d%d%d%d%d%d_%dx%dx%d", mybc[0][0][0], mybc[0][1][0], mybc[1][0][0], mybc[1][1][0], mybc[2][0][0], mybc[2][1][0], nglob[0], nglob[1], nglob[2]); @@ -521,7 +520,7 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int std::memset(err2, 0, sizeof(double) * lda); std::memset(erri, 0, sizeof(double) * lda); - //determine the volume associated to a mesh + // determine the volume associated to a mesh double vol = 1.0; for (int id = 0; id < 3; id++) { if (mybc[id][0][0] != NONE && mybc[id][1][0] != NONE) { @@ -535,23 +534,23 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int const int ax2 = (ax0 + 2) % 3; const int nmem[3] = {flups_topo_get_nmem(topo, 0), flups_topo_get_nmem(topo, 1), flups_topo_get_nmem(topo, 2)}; for (int lia = 0; lia < lda; lia++) { - // Check if flups is able to compute the last point of the domain. If at least one of the direction is periodic, + // Check if flups is able to compute the last point of the domain. If at least one of the direction is periodic, // and we are in an node centred framework, then the last point in each direction may be influenced - const bool last_point_valid = !(!is_cell && ( mybc[lia][0][0] == PER || mybc[lia][0][1] == PER || mybc[lia][0][2] == PER )); - + const bool last_point_valid = !(!is_cell && (mybc[lia][0][0] == PER || mybc[lia][0][1] == PER || mybc[lia][0][2] == PER)); + // If the last point is not valid, remove it from the error computation const int iend[3] = {flups_topo_get_nloc(topo, ax0), flups_topo_get_nloc(topo, ax1), flups_topo_get_nloc(topo, ax2)}; - + for (int i2 = 0; i2 < iend[2]; i2++) { for (int i1 = 0; i1 < iend[1]; i1++) { for (int i0 = 0; i0 < iend[0]; i0++) { const size_t id = flups_locID(ax0, i0, i1, i2, lia, ax0, nmem, 1); const double err = sol[id] - field[id]; - lerri[lia] = max(lerri[lia], fabs(err)); + lerri[lia] = max(lerri[lia], fabs(err)); lerr2[lia] += (err * err) * vol; - error[id] = err; + error[id] = err; } } } @@ -571,9 +570,9 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int #endif char filename[512]; - string folder = "./data"; - string ct_name = is_cell ? "CellCenter" : "NodeCenter"; - sprintf(filename, "%s/%s_%s_%d_%d%d%d_typeGreen=%d.txt", folder.c_str(), __func__, ct_name.c_str(), type, vdir, (int) myCase.xsign, (int)myCase.ysign, typeGreen); + string folder = "./data"; + string ct_name = is_cell ? "CellCenter" : "NodeCenter"; + sprintf(filename, "%s/%s_%s_%d_%d%d%d_typeGreen=%d.txt", folder.c_str(), __func__, ct_name.c_str(), type, vdir, (int)myCase.xsign, (int)myCase.ysign, typeGreen); if (rank == 0) { struct stat st = {0}; @@ -604,12 +603,13 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int free(lerri); free(err2); free(erri); - + flups_free(sol); flups_free(rhs); flups_free(field); flups_free(error); flups_cleanup(mysolver); + flups_cleanup_fftw(); flups_topo_free(topo); for (int id = 0; id < 3; id++) { diff --git a/samples/validation/src/validation_3d.cpp b/samples/validation/src/validation_3d.cpp index e32ebc5..059c93e 100644 --- a/samples/validation/src/validation_3d.cpp +++ b/samples/validation/src/validation_3d.cpp @@ -2,32 +2,32 @@ * @file validation_3d.cpp * @author Thomas Gillis and Denis-Gabriel Caprace * @copyright Copyright © UCLouvain 2020 - * + * * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers. - * + * * Copyright <2020> - * + * * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE and NOTICE files. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * + * */ -#include -#include - #include "validation_3d.hpp" +#include +#include + #include "omp.h" using namespace std; @@ -45,31 +45,31 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co * @param nSolve number of times we call the same solver (for timing) */ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int lda, const int nSolve, const std::string output_dir) { - int rank, comm_size; + int rank, comm_size; MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &comm_size); - const int * nproc = myCase.nproc; - const double *L = myCase.L; + const int *nproc = myCase.nproc; + const double *L = myCase.L; - const bool is_cell = myCase.center_type[0] == CELL_CENTER; - const double fact = (double) (!is_cell); + const bool is_cell = myCase.center_type[0] == CELL_CENTER; + const double fact = (double)(!is_cell); - const int nglob[3] = {myCase.nglob[0] + (int)fact, myCase.nglob[1] + (int)fact, + const int nglob[3] = {myCase.nglob[0] + (int)fact, myCase.nglob[1] + (int)fact, (myCase.nglob[2] == 1) ? 1 : myCase.nglob[2] + (int)fact}; - const double h[3] = {L[0] / (nglob[0] - fact), L[1] / (nglob[1] - fact), + const double h[3] = {L[0] / (nglob[0] - fact), L[1] / (nglob[1] - fact), nglob[2] == 1 ? 1 : L[2] / (nglob[2] - fact)}; FLUPS_CenterType center_type[3]; for (int i = 0; i < 3; i++) { - center_type[i] = myCase.center_type[i]; + center_type[i] = myCase.center_type[i]; } - FLUPS_BoundaryType* mybc[3][2]; - for(int id=0; id<3; id++){ - for(int is=0; is<2; is++){ - mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*lda); + FLUPS_BoundaryType *mybc[3][2]; + for (int id = 0; id < 3; id++) { + for (int is = 0; is < 2; is++) { + mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * lda); } } @@ -100,9 +100,9 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co //------------------------------------------------------------------------- std::string name = "validation_nx" + std::to_string((int)(nglob[0])) + "_ny" + std::to_string((int)(nglob[1])) + "_nz" + std::to_string((int)(nglob[2])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads()); FLUPS_Profiler *prof = flups_profiler_new_n(name.c_str()); - + m_profStart(prof, "Validation--Init-Flups"); - FLUPS_Solver * mysolver; + FLUPS_Solver *mysolver; mysolver = flups_init_timed(topo, mybc, h, L, NOD, center_type, prof); flups_set_greenType(mysolver, typeGreen); @@ -113,7 +113,7 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co MPI_Comm_rank(comm, &rank); m_profStop(prof, "Validation--Init-Flups"); - //flups_switchtopo_info(mysolver); + // flups_switchtopo_info(mysolver); //------------------------------------------------------------------------- /** - allocate rhs and solution */ @@ -145,19 +145,19 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co */ for (int lia = 0; lia < lda; lia++) { for (int j2 = -1; j2 < 2; j2++) { - if (j2 != 0 && mybc[2][(j2 + 1) / 2] == UNB) continue; //skip unbounded dirs + if (j2 != 0 && mybc[2][(j2 + 1) / 2] == UNB) continue; // skip unbounded dirs for (int j1 = -1; j1 < 2; j1++) { - if (j1 != 0 && mybc[1][(j1 + 1) / 2] == UNB) continue; //skip unbounded dirs + if (j1 != 0 && mybc[1][(j1 + 1) / 2] == UNB) continue; // skip unbounded dirs for (int j0 = -1; j0 < 2; j0++) { - if (j0 != 0 && mybc[0][(j0 + 1) / 2] == UNB) continue; //skip unbounded dirs + if (j0 != 0 && mybc[0][(j0 + 1) / 2] == UNB) continue; // skip unbounded dirs double sign = 1.0; double centerPos[3]; - double orig[3] = {j0 * L[0], j1 * L[1], j2 * L[2]}; //inner left corner of the current block i'm looking at + double orig[3] = {j0 * L[0], j1 * L[1], j2 * L[2]}; // inner left corner of the current block i'm looking at - sign *= j0 == 0 ? 1.0 : 1 - 2 * (mybc[0][(j0 + 1) / 2] == ODD); //multiply by -1 if the symm is odd - sign *= j1 == 0 ? 1.0 : 1 - 2 * (mybc[1][(j1 + 1) / 2] == ODD); //multiply by -1 if the symm is odd - sign *= j2 == 0 ? 1.0 : 1 - 2 * (mybc[2][(j2 + 1) / 2] == ODD); //multiply by -1 if the symm is odd + sign *= j0 == 0 ? 1.0 : 1 - 2 * (mybc[0][(j0 + 1) / 2] == ODD); // multiply by -1 if the symm is odd + sign *= j1 == 0 ? 1.0 : 1 - 2 * (mybc[1][(j1 + 1) / 2] == ODD); // multiply by -1 if the symm is odd + sign *= j2 == 0 ? 1.0 : 1 - 2 * (mybc[2][(j2 + 1) / 2] == ODD); // multiply by -1 if the symm is odd centerPos[0] = orig[0] + ((j0 != 0) && (mybc[0][(j0 + 1) / 2] != PER) ? (1.0 - center[0]) * L[0] : (center[0] * L[0])); centerPos[1] = orig[1] + ((j1 != 0) && (mybc[1][(j1 + 1) / 2] != PER) ? (1.0 - center[1]) * L[1] : (center[1] * L[1])); @@ -297,13 +297,13 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co } } - //USE THE FOLLOWING TO TEST THE K=0 PART OF THE 1DIRUNBOUNDED KERNEL - // manuRHS[0] = &fZero; - // manuSol[0] = &fCst; - // manuRHS[1] = &fZero; - // manuSol[1] = &fCst; - // manuRHS[2] = &d2dx2_fUnbSpietz; - // manuSol[2] = &fUnbSpietz; + // USE THE FOLLOWING TO TEST THE K=0 PART OF THE 1DIRUNBOUNDED KERNEL + // manuRHS[0] = &fZero; + // manuSol[0] = &fCst; + // manuRHS[1] = &fZero; + // manuSol[1] = &fCst; + // manuRHS[2] = &d2dx2_fUnbSpietz; + // manuSol[2] = &fUnbSpietz; { // Obtaining the reference sol and rhs @@ -317,32 +317,32 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) { for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) { for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) { - const size_t id = flups_locID(ax0, i0, i1, i2, lia, ax0, nmem, 1); - const double shift = is_cell ? 0.5: 0.0; - const double x[3] = {(istart[ax0] + i0 + shift) * h[ax0], - (istart[ax1] + i1 + shift) * h[ax1], - (istart[ax2] + i2 + shift) * h[ax2]}; + const size_t id = flups_locID(ax0, i0, i1, i2, lia, ax0, nmem, 1); + const double shift = is_cell ? 0.5 : 0.0; + const double x[3] = {(istart[ax0] + i0 + shift) * h[ax0], + (istart[ax1] + i1 + shift) * h[ax1], + (istart[ax2] + i2 + shift) * h[ax2]}; // const double y[3] = {0.,(x[1]-.5)/params.sigma[1],(x[2]-.5)/params.sigma[2] }; // const double rsq = y[1]*y[1] + y[2]*y[2] ; //((x[1]-.5)*(x[1]-.5)+(x[2]-.5)*(x[2]-.5)) / .25; // const double r = sqrt(rsq); - //CST(z): - // sol[id] = fabs(rsq)>=1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) ; - // rhs[id] = fabs(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \ + // CST(z): + // sol[id] = fabs(rsq)>=1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) ; + // rhs[id] = fab s(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \ // (c_C * rsq - 1. + pow(y[1],4) + pow(y[2],4) + 2.* y[1]*y[1]*y[2]*y[2]) ; - //SIN(z): - // sol[id] = fabs(rsq) >= 1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) * (sin(2. * M_PI * x[0] / L[0])); - // rhs[id] = fabs(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \ + // SIN(z): + // sol[id] = fabs(rsq) >= 1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) * (sin(2. * M_PI * x[0] / L[0])); + // rhs[id] = fab s(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \ // (c_C * rsq - 1. + pow(y[1],4) + pow(y[2],4) + 2.* y[1]*y[1]*y[2]*y[2]) * (sin(2.*M_PI*x[0] /L[0])) ; - // rhs[id] += fabs(rsq) >= 1. ? 0.0 : -sin(2*M_PI*x[0] /L[0]) * (2. * M_PI / L[0])* (2. * M_PI / L[0]) * exp(c_C * (1. - 1. / (1. - rsq))) ; + // rhs[id] += fabs(rsq) >= 1. ? 0.0 : -sin(2*M_PI*x[0] /L[0]) * (2. * M_PI / L[0])* (2. * M_PI / L[0]) * exp(c_C * (1. - 1. / (1. - rsq))) ; - //CST(z)+sin(z): - // sol[id] = fabs(rsq) >= 1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) * (1. + sin(2. * M_PI * x[0] / L[0])); - // rhs[id] = fabs(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \ + // CST(z)+sin(z): + // sol[id] = fabs(rsq) >= 1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) * (1. + sin(2. * M_PI * x[0] / L[0])); + // rhs[id] = fab s(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \ // (c_C * rsq - 1. + pow(y[1],4) + pow(y[2],4) + 2.* y[1]*y[1]*y[2]*y[2]) * (1.+sin(2.*M_PI*x[0] /L[0])) ; - // rhs[id] += fabs(rsq) >= 1. ? 0.0 : -sin(2*M_PI*x[0] /L[0]) * (2. * M_PI / L[0])* (2. * M_PI / L[0]) * exp(c_C * (1. - 1. / (1. - rsq))) ; + // rhs[id] += fabs(rsq) >= 1. ? 0.0 : -sin(2*M_PI*x[0] /L[0]) * (2. * M_PI / L[0])* (2. * M_PI / L[0]) * exp(c_C * (1. - 1. / (1. - rsq))) ; for (int dir = 0; dir < 3; dir++) { const int dir2 = (dir + 1) % 3; @@ -360,7 +360,6 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co #endif m_profStop(prof, "Validation--Init-RHS"); - #ifdef DUMP_DBG char msg[512]; // write the source term and the solution @@ -370,17 +369,16 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co flups_hdf5_dump(topo, msg, sol); #endif - //------------------------------------------------------------------------- /** - Warm up */ //------------------------------------------------------------------------- for (int is = 0; is < 5; is++) { MPI_Barrier(MPI_COMM_WORLD); m_profStart(prof, "Validation--Warm-up"); - flups_solve(mysolver, field, rhs,STD); + flups_solve(mysolver, field, rhs, STD); m_profStop(prof, "Validation--Warm-up"); MPI_Barrier(MPI_COMM_WORLD); - } + } //------------------------------------------------------------------------- /** - solve the equations */ @@ -388,14 +386,13 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co for (int is = 0; is < nSolve; is++) { MPI_Barrier(MPI_COMM_WORLD); m_profStart(prof, "Validation--Solve"); - flups_solve(mysolver, field, rhs,STD); + flups_solve(mysolver, field, rhs, STD); m_profStop(prof, "Validation--Solve"); MPI_Barrier(MPI_COMM_WORLD); } flups_profiler_disp(prof); - #ifdef DUMP_DBG // write the source term and the solution sprintf(msg, "sol_%d%d%d%d%d%d_%dx%dx%d", mybc[0][0][0], mybc[0][1][0], mybc[1][0][0], mybc[1][1][0], mybc[2][0][0], mybc[2][1][0], nglob[0], nglob[1], nglob[2]); @@ -415,7 +412,7 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co std::memset(err2, 0, sizeof(double) * lda); std::memset(erri, 0, sizeof(double) * lda); - //determine the volume associated to a mesh + // determine the volume associated to a mesh double vol = 1.0; for (int id = 0; id < 3; id++) { if (mybc[id][0][0] != NONE && mybc[id][1][0] != NONE) { @@ -449,12 +446,10 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co err2[i] = sqrt(err2[i]); } - - - string folder = output_dir + "/data"; - string ct_name = is_cell ? "CellCenter" : "NodeCenter"; + string folder = output_dir + "/data"; + string ct_name = is_cell ? "CellCenter" : "NodeCenter"; char filename[PATH_MAX]; - sprintf(filename, "%s/%s_%s_%d%d%d%d%d%d_typeGreen=%d.txt", folder.c_str(), __func__, ct_name.c_str(), mybc[0][0][0], mybc[0][1][0], mybc[1][0][0], mybc[1][1][0], mybc[2][0][0], mybc[2][1][0], typeGreen); + sprintf(filename, "%s/%s_%s_%d%d%d%d%d%d_typeGreen=%d.txt", folder.c_str(), __func__, ct_name.c_str(), mybc[0][0][0], mybc[0][1][0], mybc[1][0][0], mybc[1][1][0], mybc[2][0][0], mybc[2][1][0], typeGreen); if (rank == 0) { struct stat st_output = {0}; @@ -462,9 +457,9 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co if (stat(output_dir.c_str(), &st_output) == -1) { mkdir(output_dir.c_str(), 0770); } - + if (stat(folder.c_str(), &st_folder) == -1) { - mkdir(folder.c_str(), 0770); + mkdir(folder.c_str(), 0770); } FILE *myfile = fopen(filename, "a+"); @@ -498,6 +493,7 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co flups_free(rhs); flups_free(field); flups_cleanup(mysolver); + flups_cleanup_fftw(); flups_profiler_free(prof); flups_topo_free(topo); diff --git a/src/flups.cpp b/src/flups.cpp index 0d63ae3..7352fac 100644 --- a/src/flups.cpp +++ b/src/flups.cpp @@ -136,6 +136,7 @@ void flups_cleanup(Solver* s) { flups_cleanup_ext(s, false); } +// Destroy the solver and cleanup fftw void flups_cleanup_ext(Solver* s, const bool cleanup_fftw) { delete s; if (cleanup_fftw){ @@ -143,6 +144,7 @@ void flups_cleanup_ext(Solver* s, const bool cleanup_fftw) { } } +// cleanup all the structures related to fftw void flups_cleanup_fftw(){ #if FLUPS_OPENMP fftw_cleanup_threads(); diff --git a/src/flups.h b/src/flups.h index 61ffa0d..d3dc73a 100644 --- a/src/flups.h +++ b/src/flups.h @@ -410,6 +410,8 @@ void flups_cleanup_ext(FLUPS_Solver* s, const bool cleanup_fftw); void flups_cleanup_fftw(); +void flups_fftw_init(); + /** * @brief sets the type of the Green's function used by the solver * diff --git a/src/flups_interface.h b/src/flups_interface.h index 3642344..46b1ea4 100644 --- a/src/flups_interface.h +++ b/src/flups_interface.h @@ -24,7 +24,7 @@ * @brief Memory alignment in bytes. * */ -#define FLUPS_ALIGNMENT 16 +#define FLUPS_ALIGNMENT 8 //============================================================================== /** From f7c18eeb494e968ed78f4f821592abfb1980781f Mon Sep 17 00:00:00 2001 From: pbalty Date: Tue, 30 Apr 2024 12:28:19 +0200 Subject: [PATCH 16/19] :memo: Update the ReadMe --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fa04950..b33c02f 100644 --- a/README.md +++ b/README.md @@ -224,7 +224,8 @@ flups_solve(mysolver,rhs, rhs); Then, destroy the solver and the created topology ``` -flups_cleanup(mysolver); +flups_cleanup(mysolver); // destroy the solver +flups_cleanup_fftw(); // cleanup the fftw stuff flups_topo_free(topo); for (int id = 0; id < 3; id++) { for (int is = 0; is < 2; is++) { @@ -319,4 +320,4 @@ The _daily test_ is a smaller, in-house, test suite, that can be executed on a d ### Resources - [CI testing](doc/CI.md) -- [real to real FFT](doc/Mode_Correction.md) \ No newline at end of file +- [real to real FFT](doc/Mode_Correction.md) From 988dfcd3c8ea61a7304ad9b60616632c720a005c Mon Sep 17 00:00:00 2001 From: pbalty Date: Tue, 30 Apr 2024 13:30:53 +0200 Subject: [PATCH 17/19] :art: Working on the naming of the new functions --- src/flups.cpp | 20 +++++++++----------- src/flups.h | 15 ++++++++++++--- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/flups.cpp b/src/flups.cpp index 7352fac..033a4a3 100644 --- a/src/flups.cpp +++ b/src/flups.cpp @@ -30,16 +30,16 @@ #include // std::remove +#include "FFTW_plan_dim.hpp" #include "Solver.hpp" #include "Topology.hpp" -#include "FFTW_plan_dim.hpp" #include "defines.hpp" #include "h3lpr/profiler.hpp" extern "C" { void* flups_malloc(size_t size) { - return m_calloc(size); + return m_calloc(size); } void flups_free(void* data) { @@ -133,20 +133,18 @@ Solver* flups_init_timed(Topology* t, BoundaryType* bc[3][2], const double h[3], // destroy the solver void flups_cleanup(Solver* s) { - flups_cleanup_ext(s, false); + flups_cleanup_all(s); } // Destroy the solver and cleanup fftw -void flups_cleanup_ext(Solver* s, const bool cleanup_fftw) { +void flups_cleanup_all(Solver* s) { delete s; - if (cleanup_fftw){ - flups_cleanup_fftw(); - } + flups_cleanup_fftw(); } // cleanup all the structures related to fftw -void flups_cleanup_fftw(){ -#if FLUPS_OPENMP +void flups_cleanup_fftw() { +#if FLUPS_OPENMP fftw_cleanup_threads(); #endif @@ -181,7 +179,7 @@ void flups_set_alpha(Solver* s, const double alpha) { s->set_alpha(alpha); } -double* flups_get_innerBuffer(FLUPS_Solver* s){ +double* flups_get_innerBuffer(FLUPS_Solver* s) { return s->get_innerBuffer(); } @@ -193,7 +191,7 @@ Topology* flups_get_innerTopo_spectral(Solver* s) { return s->get_innerTopo_spectral(); } -void flups_skip_firstSwitchtopo(Solver* s){ +void flups_skip_firstSwitchtopo(Solver* s) { s->skip_firstSwitchtopo(); } diff --git a/src/flups.h b/src/flups.h index d3dc73a..6861d68 100644 --- a/src/flups.h +++ b/src/flups.h @@ -402,15 +402,24 @@ FLUPS_Solver* flups_init_timed(FLUPS_Topology* t, FLUPS_BoundaryType* bc[3][2], void flups_cleanup(FLUPS_Solver* s); /** - * @brief must be called before execution terminates as it frees the memory used by the solver + * @brief Free the memomry of the solver and the data employed by fftw + * + * @warning this function free all the data of fftw. If you use multiple solver + * you should not call this function * * @param s */ -void flups_cleanup_ext(FLUPS_Solver* s, const bool cleanup_fftw); +void flups_cleanup_all(FLUPS_Solver* s); +/** + * @brief Free data employed by fftw + * + * @warning this function free all the data of fftw. If you use multiple solver, this function + * should be called after all the solver have freed + * + */ void flups_cleanup_fftw(); -void flups_fftw_init(); /** * @brief sets the type of the Green's function used by the solver From f1fffe050fbda8b7adfbd4b1858d59a2930528c4 Mon Sep 17 00:00:00 2001 From: pbalty Date: Tue, 30 Apr 2024 15:07:16 +0200 Subject: [PATCH 18/19] :bug: :white_check_mark: Fix the test --- src/Solver.cpp | 2 +- src/flups_interface.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Solver.cpp b/src/Solver.cpp index a680d7a..01a9257 100644 --- a/src/Solver.cpp +++ b/src/Solver.cpp @@ -66,7 +66,7 @@ Solver::Solver(Topology *topo, BoundaryType *rhsbc[3][2], const double h[3], con if (fftw_alignment_of(&(data[i])) == 0) { // if we are above the minimum requirement, generate an error if (i > alignSize) { - FLUPS_CHECK(false, "The FLUPS alignement has to be a multiple integer of the FFTW alignement, please change the constant variable FLUPS_ALIGNMENT into file flups.h accordingly: FFTW=%d vs FLUPS=%d", fftwalignment_, FLUPS_ALIGNMENT); + FLUPS_CHECK(false, "The FLUPS alignement has to be a multiple integer of the FFTW alignement, please change the constant variable FLUPS_ALIGNMENT into file flups_interface.h accordingly: FFTW=%d vs FLUPS=%d", fftwalignment_, FLUPS_ALIGNMENT); } // else, just stop and advise the user to change break; diff --git a/src/flups_interface.h b/src/flups_interface.h index 46b1ea4..3642344 100644 --- a/src/flups_interface.h +++ b/src/flups_interface.h @@ -24,7 +24,7 @@ * @brief Memory alignment in bytes. * */ -#define FLUPS_ALIGNMENT 8 +#define FLUPS_ALIGNMENT 16 //============================================================================== /** From bf3e8afbc7cd1318875ef9add6f610c74dc1502d Mon Sep 17 00:00:00 2001 From: pbalty Date: Fri, 3 May 2024 09:56:42 +0200 Subject: [PATCH 19/19] :memo: Add credits to James for the new kernels --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b32f707..c1d4962 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,13 @@ For the list of all the contributors to the development of FLUPS, description an FLUPS' design, implementation, and performances are described in two papers. If you use FLUPS, please cite them in your publications: -- [Balty et al.](https://arxiv.org/abs/2211.07777), **FLUPS - a flexible and performant massively parallel Fourier transform library**, submitted, 2022 +- [Balty et al.](https://arxiv.org/abs/2211.07777), **FLUPS - a flexible and performant massively parallel Fourier transform library**, IEEE Transactions on Parallel and Distributed Systems, 2023 - [Caprace et al.](https://arxiv.org/abs/2006.09300), **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2021 +The high-order Lattice Green's functions (LGF and MEHR) available in FLUPS are described in a third paper. If you use those kernels, please cite the related paper in your publications: +- [Gabbard et al.](https://arxiv.org/abs/2309.13503), **Lattice Green’s Functions for High-Order Finite Difference Stencils**, SIAM Journal on Numerical Analysis, 2024 + ## Why should you use FLUPS? - You can solve the Poisson on rectangular and uniform distributed grids; - You can use either cell-centred or node-centred data layout;