From f260967b3ded2f142b7ddca9d639dddbc7049eae Mon Sep 17 00:00:00 2001
From: James Gabbard <jgabbard@mit.edu>
Date: Fri, 8 Sep 2023 21:58:54 +0000
Subject: [PATCH 01/19] :sparkles: allow LGF with 2unb + 1spe

---
 src/Solver.cpp                |  5 +++--
 src/green_functions.cpp       |  8 ++++----
 test/src/convergence_test.cpp | 20 ++++++--------------
 3 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/src/Solver.cpp b/src/Solver.cpp
index 8fd4774..56badcd 100644
--- a/src/Solver.cpp
+++ b/src/Solver.cpp
@@ -1056,7 +1056,6 @@ void Solver::cmptGreenFunction_(Topology *topo[3], double *green, FFTW_plan_dim
         FLUPS_INFO(">> using Green function type %d on 3 dir unbounded", typeGreen_);
         cmpt_Green_3dirunbounded(topo[0], hfact, symstart, green, typeGreen_, kernelLength);
     } else if ((n_unbounded) == 2) {
-        FLUPS_CHECK(!(typeGreen_ == LGF_2 && nbr_spectral == 1), "You cannot use LGF with one spectral direction!!");
         FLUPS_INFO(">> using Green function of type %d on 2 dir unbounded", typeGreen_);
         cmpt_Green_2dirunbounded(topo[0], hfact, kfact, koffset, symstart, green, typeGreen_, kernelLength);
     } else if ((n_unbounded) == 1) {
@@ -1109,7 +1108,9 @@ void Solver::cmptGreenFunction_(Topology *topo[3], double *green, FFTW_plan_dim
     /** - Complete the Green function in 2dirunbounded regularized case: we rewrite on the whole domain
      *      except the plane where k=0 in the spectral direction, as this was correctly computed. */
     // No need to scale this as that part of the Green function has a volfact = 1
-    if (ndim_ == 3 && nbr_spectral == 1 && (typeGreen_ == HEJ_2 || typeGreen_ == HEJ_4 || typeGreen_ == HEJ_6 || typeGreen_ == HEJ_8 || typeGreen_ == HEJ_10)) {
+    const bool is_hej = (typeGreen_ == HEJ_2 || typeGreen_ == HEJ_4 || typeGreen_ == HEJ_6 || typeGreen_ == HEJ_8 || typeGreen_ == HEJ_10);
+    const bool is_lgf = (typeGreen_ == LGF_2 || typeGreen_ == LGF_4 || typeGreen_ == LGF_6 || typeGreen_ == LGF_8);
+    if (ndim_ == 3 && nbr_spectral == 1 && (is_hej || is_lgf)) {
         int istart_cstm[3] = {0, 0, 0};  // global
 
         for (int ip = 0; ip < 3; ip++) {
diff --git a/src/green_functions.cpp b/src/green_functions.cpp
index b769bf3..210bbe0 100644
--- a/src/green_functions.cpp
+++ b/src/green_functions.cpp
@@ -276,7 +276,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             // caution: the value of G in k=r=0 is specified at the end of this routine
             break;
         case LGF_2:
-            FLUPS_CHECK(hfact[2] < 1.0e-14, "This LGF cannot be called in a 3D problem -> h[3] = %e",hfact[3]);
+            FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
             FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs");
             // read the LGF data and store it
             lgf_readfile_(LGF_2, 2, &GN, &Gdata);
@@ -286,7 +286,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             Gr0 = &lgf_2_2unb0spe_;
             break;
         case LGF_4:
-            FLUPS_CHECK(hfact[2] < 1.0e-14, "This LGF cannot be called in a 3D problem -> h[3] = %e",hfact[3]);
+            FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
             FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs");
             lgf_readfile_(LGF_4, 2, &GN, &Gdata);
             G   = &zero_;
@@ -294,7 +294,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             Gr0 = &lgf_4_2unb0spe_;
             break;
         case LGF_6:
-            FLUPS_CHECK(hfact[2] < 1.0e-14, "This LGF cannot be called in a 3D problem -> h[3] = %e",hfact[3]);
+            FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
             FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs");
             lgf_readfile_(LGF_6, 2, &GN, &Gdata);
             G   = &zero_;
@@ -302,7 +302,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             Gr0 = &lgf_6_2unb0spe_;
             break;
         case LGF_8:
-            FLUPS_CHECK(hfact[2] < 1.0e-14, "This LGF cannot be called in a 3D problem -> h[3] = %e",hfact[3]);
+            FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
             FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs");
             lgf_readfile_(LGF_8, 2, &GN, &Gdata);
             G   = &zero_;
diff --git a/test/src/convergence_test.cpp b/test/src/convergence_test.cpp
index c7038ce..2517dd6 100644
--- a/test/src/convergence_test.cpp
+++ b/test/src/convergence_test.cpp
@@ -165,20 +165,12 @@ TEST_P(ConvergenceTest, AllBoundaryConditions){
     bool isZSpectral = (*(mybc_[2][0]) != UNB) && (*(mybc_[2][1]) != UNB);
     int  n_spectral  = isXSpectral + isYSpectral + isZSpectral;
     bool is_green_spectral = (n_spectral == 3) && (CHAT_2 == green_ || HEJ_0 == green_); // Chat2 and Hej0 have a spectral accuracy with spectral bc
-    bool is_green_invalid = (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs.
-    if (is2D) {
-        is_green_invalid |= (MEHR_4L == green_) || (MEHR_4F == green_); // MEHR_4 stencils are for 3D domains
-        is_green_invalid |= (MEHR_6L == green_) || (MEHR_6F == green_); // MEHR_6 stencils are for 3D domains
-    } else {
-        is_green_invalid |= (LGF_2 == green_) && (n_spectral == 1); // LGF_2 cannot handle one spectral direction
-        is_green_invalid |= (LGF_4 == green_) && (n_spectral == 1); // LGF_4 cannot handle one spectral direction
-        is_green_invalid |= (LGF_6 == green_) && (n_spectral == 1); // LGF_6 cannot handle one spectral direction
-        is_green_invalid |= (LGF_8 == green_) && (n_spectral == 1); // LGF_8 cannot handle one spectral direction
-        is_green_invalid |= (MEHR_4L == green_) && (n_spectral == 1); // MEHR_4L cannot handle one spectral direction
-        is_green_invalid |= (MEHR_6L == green_) && (n_spectral == 1); // MEHR_6L cannot handle one spectral direction
-        is_green_invalid |= (MEHR_4F == green_) && (n_spectral == 1); // MEHR_4F cannot handle one spectral direction
-        is_green_invalid |= (MEHR_6F == green_) && (n_spectral == 1); // MEHR_6F cannot handle one spectral direction
-    }
+    bool is_mehrstellen = (MEHR_4L == green_ || MEHR_4F == green_ || MEHR_6L == green_ || MEHR_6F == green_);
+    
+    bool is_green_invalid = false;
+    is_green_invalid |= (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs.
+    is_green_invalid |= ((!is2D) && is_mehrstellen && (n_spectral == 1)); // MEHR stencils cannot handle 3D with one spectral direction
+    is_green_invalid |= (is2D && is_mehrstellen); // MEHR stencils cannot handle 2D domains
 
     //  -------------------------------------------------------------------------
     // Perform the test

From f7ccc15ef0cbd6b780dcac3f4afeb8f397d9104e Mon Sep 17 00:00:00 2001
From: James Gabbard <jgabbard@mit.edu>
Date: Wed, 13 Sep 2023 15:46:59 +0000
Subject: [PATCH 02/19] :bug: fixed isotropy check for LGFs

---
 src/green_functions.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/green_functions.cpp b/src/green_functions.cpp
index 210bbe0..7656d0e 100644
--- a/src/green_functions.cpp
+++ b/src/green_functions.cpp
@@ -223,6 +223,11 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
     //Implementation note: if you want to do Helmolz, you need Hankel functions (3rd order Bessel) which are not implemented in stdC. Consider the use of boost lib.
     //notice that bessel_k has been introduced in c++17
 
+    // check for isotropy
+    const bool unb_dirs_are_isotropic = ((hfact[0] == 0.0) && (hfact[1] == hfact[2])) ||
+                                        ((hfact[1] == 0.0) && (hfact[0] == hfact[2])) ||
+                                        ((hfact[2] == 0.0) && (hfact[0] == hfact[1]));
+
     GreenKernel G;    // the Green kernel (general expression in the whole domain)
     GreenKernel Gk0;  // the Green kernel (particular expression in k=0)
     GreenKernel Gr0;  // the Green kernel (particular expression in r=0)
@@ -277,7 +282,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             break;
         case LGF_2:
             FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
-            FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs");
+            FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the LGFs");
             // read the LGF data and store it
             lgf_readfile_(LGF_2, 2, &GN, &Gdata);
             // associate the Green's function
@@ -287,7 +292,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             break;
         case LGF_4:
             FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
-            FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs");
+            FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the LGFs");
             lgf_readfile_(LGF_4, 2, &GN, &Gdata);
             G   = &zero_;
             Gk0 = &lgf_4_2unb0spe_;
@@ -295,7 +300,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             break;
         case LGF_6:
             FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
-            FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs");
+            FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the LGFs");
             lgf_readfile_(LGF_6, 2, &GN, &Gdata);
             G   = &zero_;
             Gk0 = &lgf_6_2unb0spe_;
@@ -303,7 +308,7 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             break;
         case LGF_8:
             FLUPS_WARNING("LGF kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
-            FLUPS_CHECK(hfact[0] == hfact[1], "the grid has to be isotropic to use the LGFs");
+            FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the LGFs");
             lgf_readfile_(LGF_8, 2, &GN, &Gdata);
             G   = &zero_;
             Gk0 = &lgf_8_2unb0spe_;

From 82e30755872876cddceab4e66dac9bd3489d1772 Mon Sep 17 00:00:00 2001
From: James Gabbard <jgabbard@mit.edu>
Date: Mon, 4 Dec 2023 16:09:10 +0000
Subject: [PATCH 03/19] :construction: kernel/code for 2D MEHR

---
 kernel/MEHR_4F_2d_32.ker      | Bin 0 -> 8192 bytes
 kernel/MEHR_4L6L_2d_32.ker    | Bin 0 -> 8192 bytes
 kernel/MEHR_6F_2d_32.ker      | Bin 0 -> 8192 bytes
 src/green_functions.hpp       |   6 ++--
 src/lgf_3unb0spe.hpp          |  58 ++++++++++++++++++++++++++++++++++
 test/src/convergence_test.cpp |  12 ++++---
 6 files changed, 69 insertions(+), 7 deletions(-)
 create mode 100644 kernel/MEHR_4F_2d_32.ker
 create mode 100644 kernel/MEHR_4L6L_2d_32.ker
 create mode 100644 kernel/MEHR_6F_2d_32.ker

diff --git a/kernel/MEHR_4F_2d_32.ker b/kernel/MEHR_4F_2d_32.ker
new file mode 100644
index 0000000000000000000000000000000000000000..8743a46f2de2ec545bd822ffa4be25a4ff40082e
GIT binary patch
literal 8192
zcmX|GcU+G98<r7Al*mY>2$5AOkyTkO*%?JiC=G=q6s068L^4V#N?Bzq+>&vUk%|y$
zyzjH>B#Glx_&v|(^Sj=Epa1UXzOVTmpdEiK)isCiU;EPKQ?C*VYIx`5zomk1INkTD
zJX=Wt`Je2Y5~?V6_LAwge$}Lr8vjktrH1Ae7`Yh^tEJ~QmuA|P)zV(?gGZ};>WFPN
zRGI#xj-3CgHjLOLBjNeWh97k@x~(}TXpw`QtomO1)qGJ-hdtTBtTICmZq7kHoEgf@
z`?IcpHbXXlm99>1Vd&`i8wpyIS^DT}$G3&BwBJDc!SQ=6$xEkJO>bq%wrF{#sVPUM
z6RhfTy*awFq3iJaRE}PZ^8ebioTDdIH&U#-@RYGCQU8)DPs{$fl;P{n(;z3V$C)^u
zX5ZN2zWNTYnEPp2&{+-Ba(bb4B)j19JDO9s%&5ZnBl-BM#9ftF(aLS-E1RRL>CKqz
zLtXr8=#0|J;mb{GDRi%`!TL|NG-Gvv(&sI8)Oh3MpZxYZnsPL?sU}E9ErqvRgE<-f
zaM+?ZdcK@`T3b2I%#l;WcD<>^e=+o`-E2_eN`{<Oi))r%WJu4-@^Z=#hN69cPvxhu
zG;7eAq@-Oe6<&Jypf|B}$aLkx<89*nm9yV$G~?)>5h|`_J{%1-cWU{5hNHQw_oQ`x
z!%@MyJst+iJVm5!uL?8c$z$a8pX1l?RC;8$+lqrcO-kwCqMFAm_Kmr><$ejP>QhOF
zb1HV-js8TV4TF2S<$b29drQh~Uscn>_@m3Y)Ee69WS!))td_DjmY*tbttHE2lUL1+
zsiRNLxh1dFWK{d!@67Ee8QpoakT3f#qw#J7ZXR)#(~Y^dcL(LkY1AoF^XSb`+4KAD
z25T7_J7RlK&ua`NuQeKy+|CfQIOBw!HA^AAQ^U4Juv9S3KVwHBOX1G78<YRA#P-ZD
z(V4_iuwYVZ>d%q((9qI8=Qwgb9Tmk?aAXqmu(YfjPx|_m8-JVgw4nEs$jG%kwH#>E
zjXccLgV7#G``zUgcaD8y?zpN+FE1H?CW}~$z4yesZ?vjNS2M4nv~cahyZ_YC-QZ=j
zM;xi8gp+5DHAmLb)uaIb_}n_u@R}68ajcA7v^0zC&d4arafn@rikx1?7Bmj`lhaz`
zJ;mnF<aAc4`Pnu-hSC=v`4Jz$P?+IwkG}UAs+6a+1uL_3>CP{Uk@hUPezuzv62nsF
z(%Vjtp0PBJjm)rC7WbQet(WO^jx6tObb1lUQKH5Bu~V}-a!)lKe(57e-|lx@YTc73
z*;SX$gQoEGZ;FT3DleYQ($}QwB=9uh@)VUo;uZMDonzmaJ0{|ET#{N1#Wk;T)6T6S
zeW#~&N)ENuqvFKN&Enlp>pIimg-0Fj9%5nSSzAX>K0HzDyi`VSW1{tT7RqSz_xHy#
zN61Nk@2*V_QF1C+e=R_(S}yX5fm+UZ5$9daN5_XVl<gU9JMjfWSDN1!|E0lFuugv8
z)k|60IPqWi*hH4B7nHh}m$MY`E+t{J8b@oc)0Q)HIQpy}?-LioQDM;bD{FH(Qtva7
zn_I`xxogcOUVV7_Rup#aofS`yf^y&9^yBGJ<k?D{lRQQ4XL}BN#4C6W_{N=M-<W%%
z?Ae1r5${i%j)tjUucdgMFAIJf*HN#}?s|^db>x*W^0B41i~@slC*+HF-z9Zm>yHK*
zm0fgwzgXnGtUK~4Gc)8=lr_j?LYthz$J;a>vuDVv(6;JXB11d%Kc$SRW+=Q@T&wyp
zmj1M@S^99Dc<(LC{lm_&B+U0!KTyNc^xN|*G_*J}>!wuexR9gz)tW&bQ5+>Lcp1Ml
zk0Yas9-evi9NA{HmrT{-$@|TwTzy*+_svZXdjomO%*Z<&lFC#1uMI17pYaMl2d@F&
zxO40qbMHLSGh1J;jx257H!KnHz9EaBJX_@Z%B1G2r(MLm|9iH|t@|=cKJ)qA0&O|X
zi=1)QKTu9PHfkQ6DBit)*rU?wfeekm&{HdGJwu~U?y@e+VJNLr!Vih})Qvw?IZR?{
z$F)^A`UHz}w`n$xyeaBH^P$`aO)Tk$pXt(NBuA%{eN^>aIZ};cmzy8qXrH?ASl`DS
zWoHk2Tm79Qm%GGY9?DaAr@n{3F5qd4;husAAv_t6Rtq#aFYbMw#m5z|d4-OF&%tZJ
zH|`w!#@rQszWZzOb!3vC=h!?;MsNILg}EZ$9etyMEm~#d9uPfetjPCe(Zju&G&!}$
z(%reuaw<NWcGr6vLwS>nM=g(HsKn%ayma?t<kx+Td$APS?5XWLpCuzM<EKU(OPh;&
z7%eDbsf{($vgyc?Zl)e1@t%d@G@lk<j?z<0OeNj3w-&;6OF44#d+L8ti6{RT2I?2a
z@sy%re#FFuC!?gwb<3i7I{di5n^CUF1CF=fJ%7(D^cy+`J_oM>-?($^8*}g9d$!yp
zQAXd#zlaJEb?$2ZxKR%$%4t^w{XJL2`{(s9kLH%i$#j2JhbuY^9h#y2WQ54~CJ+9o
z3=s9+Vwdi%hz5pwrgm1@J({K8+gg9<ZD2|Dp3foa?w=R;al2Q=(%{x@R!x1yy}L$d
zn%Q$yb?xNv^PwE&hcaVU=Ww*i&7x*owfH}R?Yk+f@iZ;frH90O1?vp|l&|Nh=y6)+
z)Ohj!H!H_`<@2;p`ORRjI$q&hpx@9j@Hu!5_{N=M-<Z2w?*nNMjpX!!W(`mcmQ(Af
zT&3g(a%zs%jpan2Q@$i2K2gMbc+HM$o8lPiGHi&7^DBlfKR>8FLzSiEQMHF8zVGDy
z@5;V=#l0Wu@k`SCZRfjtoBR-c4|C~m<S34u_n8lp?%wY&L9g#&j;z1OSZ*ue==sy3
zhh3UDs@uLUx?D$`OX*b2R}uFqMJ>A@Z{dmA-#O6rG*9xPaZN9ui2q?|y~B`3Ug6i^
zTcF?2G4MHf4fw{LW8avY&9#cs6ZL)JJASaa$a9TfTuv5DW{4{?=(%hML*Kf8oN@II
zL#`g2L#l}P)5V3solRI8RGgFk(wikM&B}<M>7wqn>3t3Vz>;5n>3xasgLm4Qms@kR
zC{pdql`R~l7??FkdLP~r)KyjVdrp-zf~C9n_~RG#YYb0z*QeU6E$8X<2e;@i;XIZ3
znI~Sjz*A`Fm$viEL?2M!R?q*(D|{;a8hi`%8#)F)2d@F&xO40qbLZW;rRgz&q0VU)
z!Vyv59W4ewUMBjxlzl>{)eJ)uABCI^&}C`W8Ixd1=N7MY9@$sKd$i-(X36Iye$>&3
z{g<WwlVhIFG8Fwy-O<aL&K!jf*_d?efT;V<{^upWpZ423_z%kw*Jr>zTMeEX16LKS
zoz7FfMxKGB_l_qcE=qT=x-{gM<o5!$a;H94^Hjp@e43*yDEv8mD*PIJ3-lX020jO`
z0pGZD>>G2d4q8<GCX=DQF2(OOs~E~MF<;VM)c089%e~gwvUJdXj7z)d@7FAiV6$>r
z$|xD$*-6II*sVuaNqDc$Y8bL{Hb<|t#)L{fN4H(sxBFQU_s#Lo3|@0obpJy9$X^^e
zth_e2?+9_;5jM#Z-@A9~TK9N6PuDz4+fJnM)Ni<%%GM&DZZ*0ua{t0pT3es~>oo*L
zE`dLXPlaEDZ-IV8$H3>{HQ*a}j(ub9kCS{JOZ@Jc_`|?n^tHyyAM}T8VM(*?Sb@ZI
z)gzfetG6r-b&eM#f2Z4HV{d;mjv9+B_uW~;QH0L0@e<yLZ#Xg{<Q_+ryi;K#C*r_+
zC@cA#5t&9_O(Nf|tr?ss`P?P$H{Onk74JU5b$R18o_5KVswKXk(P`g-SHF3(?ECIk
z`w&5q$B;|l&*4+y*Wg>A-_SAeId~2D#+_r|n7e9idS!_}OZg7-&n-=1>BQoag7YuL
z{YNcGmvT);hY#kGzTdleX<7Laj_j8Dc{PV|^z@_5V8JEP-=40|lKlP9cS9p3yxV#C
zYNZ(RBsdGo_ZEuto~uk%7We*U$NSTg&-ogC>ETe3?^fzXp7<m1<QJDer?;9wF}h8w
zR+|Wl9Ev=KTmpX%p9;SQ-va%Hj)Bj?Yrr?|9Q(%HI&nU^lAkw~g&valeQ2-Aep?1}
zWTU-Lb)AFg=Le|A3tKs=vC~*0>APh==YO1@al{NfWTW?uBdrx5dCA{p7OtO}Wi960
zg1m7O-bYS<wMoi%{x;X$ldp?9A5(MKK-~Mqsb^zb+eJOx?3Ot|PoUjBu0D;h78Lm#
zITU#exdi?kJ{5iqz6JUX9Rr_(*MM)_IrfdYOZVJ+$q(YF-SXQN$<JHOR#q_+^IW%%
zzr7-li@Nrm(ck_+{J!c>l*DuM9rJV~eLodn*ec~(SzOxK4bD8hAAiyM_HGf^CV|h_
zpX2Guih^De-XGrQuGcj1)b(NE&TMh-|M@TN^V(RTx3$?HofZj--U0a=ITU#exdi?k
zJ{5iqz6JUX9Rr_(*MM)_IrfdYv$m{Ako??*V`p0D9OdY3SMNND-@o2&X_I`7%xB;}
zDbLRv{AXm)D4uS|eA8JmpQl+K$p<BU@B8;+gWTgh4cy)_L&~)$I<1fUS|$4558)rC
zDG5aLit;7A2QGi*VK`I7eZb_(ryhc$Z$a;X{EZxnJce8Xe-57tzXsm|{f3T#&%tZJ
zH|`w!#@re6QUkW%=4fD=i=&id`&p})KIy<y>pgkrE25vL!5vmFvEeCu=Fl<O-aIMA
z9M_lnoSiD!8zo=s`l;VyN#7@gwN|#a@HF{We8Q-H0x^!=ojy+x$k%dR7mcOj|CpW=
z@b^|h(KDfMLGOV4jU0+RhFk)F4xb9Y2HyhxhK_;H!E3-b?i~BZ+~JLD;(q?-NJw<m
zn>m=LcO8?%f@kog*8ga##P6H)YJ(*||E+<`lZkm|v9qtYl;?EV|4vIh_tr;yY^aJr
zGyYx`lrT!5nnwo>tn3B4-d^4<bb~-%Um7ZGqXk93hMoz13wj6SZ$%FMe;z|Ffj@^&
zg<pejfqp~Bz~|sK;2U?2ePix<zs*Xd{BEK9;-S>v_0I4-F8R5q#|9M6P3LJ7y_mGU
zP}INvo5o3g{^y|HuX=S7$Zy%%U!LN<8)lsz>}@5`1XG(PbvJ>6Q}<gg4;3ghL#Uma
zBq(}C^lRvu(6^v>K>kJ!MIJ*gfj@^&g<pejfqp~Bz~|sK;2U?2ePixQBhwRsVImH!
z_GU=BHu}{zU8%oOd7K(MSH!v5p`Y;*zniPh%L&jDXwj|>YT1(ndboW4f+X?Y4;O!S
zvknw!aO_>%#CY*P1g-0nctue3spu8auc2o`--6x&`5QSDc?`J({v19Peht0_`VAce
zpM%$cZ`?Wdjk!C71YFd<FZP$|EuZr%MW1W)?+FLd$43WF81_)axqY%poVuw%zq(ol
z8ZQv&khgQ4rk_B=nz{~ov|qf3E_;p{W(xHCxKXN2p`hrI(WjzUM8Aff34IHC2jp+$
zP~<V>68LlYRQNUc7U(y0415k=1HN(R*f-{$R9x+&{*$Mx3IFO!KDNWnui80A0+nj~
zWmw}N(4da1qnkVhI{#mn@;4CzDK)gcjZPJ4j8*z3<-6kje>lE#c7>qm&(R~JPereY
zehobn`WEyK$lu7J$YaPQ@aOQU@N4ib&~NA%_#C_jeB;isZ_It6`^z(N;{^Jd^Jd0P
zJAvkQx^=O1tw61+LGt&z1=_mL#G>$&Ks&ywXh+=?Xh4^&&bqI~+_OB`IkH|*yc?iD
zM~{p?6}=+*HS|pAThKcoe<Oz?k0F=9pTnoZufexKzoBE`bMPAQjXTG_F?Xq_P$lX4
z)kjVV??rx_p6}vRb6g;PyxsJ-R|R@Kry(roxv2YI4fbA)K$CX6UTOU!DBer(Zh-zA
zJu>=K^or=$&@-WLLGOV4jU0+RhFk)F4xb9Y2HyhxhK_;H!E3-b?i~BZ+&=?y&IySE
zRc-cq(Uv37vKyy0IzJO=&#|q0+iC@R^{AhAWt%|T)f~?!Y1Av;iSS;6cLVh2=#kN<
zqE|$}hMoz13wj6SZ{$$qG2{~XbNE#FHTV|jH*^eq4qgMkap%}K=H5R;f4**!K-;?g
zV61Bd8eO#Qz^eZQ>fLm)pSOBFWxd`%&V6LP;(ZM7M0hX3y8-%h^vLK_(JP`~L(hc1
z1-%3EH*zTQ7;*{xIeaSo8hi`%8#)F)2d@F&xO40qa|cCLltlj&$j4vvd5Ky*eYw>y
zy_Z2fU2&+cv7T11co)R`7~YBSUV?W6^ylc2(WjzUM8Aff34IHC2jp+$P~<V>68LlY
zRQNUc7U(y0415k=1HN(R*f-{0|8v#?yW#Z|;T$6`oKjEM&*yYAUsSJn55>D6-pBAx
zg!dA>8=yZ&kBmMQy(0QG^i1em&^sW1BZnf7A(z0P!>7Wp!M8xap=024@EY)qJIB5;
z_e}MfH$oQHQ@F>dcyrHs#XB(GL-8(%_c6Q^;k^X!2I$YxBco46uZVsPJrnvC^bW}1
p$f3w%$R+US@Tu@?@Ga19=ot7Myas&Z&arRIZKKxEZCXe@{SS!{me&9P

literal 0
HcmV?d00001

diff --git a/kernel/MEHR_4L6L_2d_32.ker b/kernel/MEHR_4L6L_2d_32.ker
new file mode 100644
index 0000000000000000000000000000000000000000..380dfd93d89f114f93f4ec48fe83dfaec9856652
GIT binary patch
literal 8192
zcmYLOcUX=68#gL1p`xs$Br0U26pFVHsYoRyD?|y2L=r`bQY0g9MMi~+GLn)zWM-G_
z);Z@{S((ZDJ<oOdoZnyPI)C2JeShZn;hfn!jAJs!#bv8_v<Y}f?z6@mUlsL~qQ-a(
zJCXg8rrhuP<Nm%H+I;Z$@X!@?WIVe(a^&oKG96y}!?J$^WqEwmoL<>LbqlvXZdfLx
zv&SMU?Y_#$wBAc=>nb@lkG7xCER)j_x9F7~&I+1xUH!tRTm{X(v+ESA#!$UuTX{zx
zhFUW0E_FZ2(3U@0ndzSy`o?<Q)g8l<QE-LM&j^;Z%r44PN?Gdfee~zJ7MAJ@E6OY^
zIC2lY&|DD6QQSK_>&6W6zrJeg&Q%;;k?H2ytMZiJ)@*2=1y7%bQU3A;Jk2OSqjNZs
zr;N=5m-=4h33H#$9?@L-PZb4Z{m6{UeMZ?45#Q&UzoG$)iZyka8ak_5b53Jh9fdB8
zOx0OYPp=nwY+qpBKo^sYnwGt8AkDD$)DR-0+E9%xmw(F0XkV***;+Zxv8x{)&dF)+
zWKVT7R|Q$W^iZ3aFV27U@U@Ee4E-n1+q!=aLsKspT6pC$H26!K4~M=o<T%H0HE$!%
zUEc0i+9sABRPK;>A(kHX>bElG2TQ9%#=Z-(<jDGfVMygNj{1Z<sC_-m(d>-Z1u9QC
z>XlL)Y^=u9^@9Q*Wy#Y-E~|~jLY{W_G!K}ylc&R9ihgu1=9T-#+-<h%TCUZ6N#XM?
zlAdjQO??vl%dLxRDZ{$wG^Yo3WI4K(+1reII(otL`+TnkvX1)o`Dsf7U7gyYdfE;d
z&D9#6QK=y(%M|P8i?MRjYojJqeUa1PI<>GpJ_=GSU*x7=tRR&E^5{if81iXy^)~Ti
zNPF#uE1k|V)W-GCob;azecTs%XNo;b%a2!`SQpJw;rb^x!>_S)?{{^4`X83wTDx$1
zqc~b#V!>Ff<S2K~9$D8+j)u*$+{ZlS$n$MbZRI~aeed^b{ckIt9Iwya8RN&3_NIj<
zF-bg4YJEGk+a+Fk=h!#qzI$YCRGZ<oRA^|VpCaD<v}WF>ja5Av=c=$pZ|f<;RQt7Y
zas!pbZ&CC#m66{@tHsF$GRp9?ShI47oO)dzyw~ZloNBF-K1HZ2NR0-E8Lv=~mwQJW
z>pKc^`Mh~Rm;pn62|jko!3-&eEj*%gnV~-adJI{w#?soi27abaES)cpT^g~2rKuTn
zL(1>4WEpdi?A65k&et4ZF^;1(B}M-C)^Id3e9u_hlN_b2yFViD6-NuTDi_#y;wjuB
zr*j`0p3Da<GnyN~Q%Rx6alO4fbxj=CWexF4eB;isZ_J&x>{@V?Mm?3;#RPXMsHgEW
z&9hpZ8fZ~)X8KC;?hl=w{=Q<7jB+%dG%RV5(fxoRQ<a%=%It7^!G>#cq9teK*(M4K
zxBiEmV-*yieENcJoq~2P{p_4Sf}y54Rld77Gc?Wo*jnp*41M?X{?uNZrPSOwE8m$c
zCH4NWDPccLcRbF9S5=AMV|K@!)fybF&$Ej?Jc*+Zy6GDdBRI<I9aQR9z>)g%#h+bd
z95viK_%fg?Pu>e<m!8@2l$idlrf>yMPNiQP^#0@No&FN-{^h)q*MM)_IrfdYPwfmi
zx>Urw$BVr&9nLk7j?1cPEyHDW<NC3$PA6sL9^4~uTyGKQp(+=O#k)^B_44l5H{y4=
zYxh**z0_h;*9ln)vT~G79r;5+4%%j&_d7Av-|pA1JNp?rA9T6Ytd1ey?YvHx{vzH}
zRaahJ%u+(jw5?H@EDh6OjN<EAGAWAXv~|V1ueE5J>c){*@Yd)>u_FIX%00NDn4_r|
z6IK;Ba#VlH`Gu`6PX(8XF7|U2=M9+Zv2_hk!{p8>5g9zSo8mNI{|>L@bMPAQjXTG_
zF*h?}tV<sQ8L0?Y?c7AXo7_7j8!z&`uU^4~^x1MsoqETj@UooLGTeNp_g2t{N9RLU
zu2E3pZpRSm?$=G8cDJq<L!TZS8Jt)m^8T0#m&$yGyoc7RWQe@CzsI<D&ZAiRR(83(
zYZyyCUcCHiTF8>>ltonbfu(u-7IslJ<*3^(on8j>IC8taagkLrN9U&9uv&hdBi{{o
zYwEs;_o124bjpY)pF?^nP1AY$Soz{+Sp-jgZEvkNKhD#mZGQE$9`Q;Y1D}J}fN$J6
z_Kmq6if+dD;ANC>^p(YjiE?^X=f2Ka#CzwHU$=~Ek<;tkxDyiJn`Wi|!yHji-o5it
zE*}+iyU)_pz_AQzR9otL$1?P=TgDFQ?&IE!S}|OcrP-Zp43@jHlw9fkM>~<F+-8N<
z^xG_j@7~gHavP48RIPp|@m~5-_mI!aIr>%<Wh3d{rkjRw`j16EXuD}`Zfl-uV&-(u
z8OoD>;=2^{**vZ9uDi@DmZ#|)vuIcWPb!}JukSwRmHG`G1D}J}fN$J6_KmrBjxaDB
zzF$rU@*>7=5Or=>eao0D)(R?WvM_KF@$U9nFes>0(3bj^GpF<z((K+fS>pTLt|uBL
zy+56sbu{`7L;tEyZ526~CATgn9}SkWbSxrjr*!u<f%(3rH7r$H<WBgY!%>mnsT@lu
zj^>rw4LZJ&qlox&bKiW9mR9O9>*_@QbI|Ujroq$rhhH@%-n--&*Y?R0o;o{U&9U9h
z)A1Ryy#XaWP2LdHKS0JSeGBv(ItD%muL0k<bL<;)8*11mUm2#L?yD}1?-Zt>ZrvK@
zr<EzFZ&_?NPV_kw_R4G|yt~cK4PBMUka<OU-K+-;h5P3HvG2@Mhn&k165sdU_F%fs
zR*~=0Uwo1DK5~oa67#PtHJn;f5;KUS9?_|Nq`S|m4m8wB;z)n;E&Fv>I0|-soILvj
zM;D*E@2Jw_DNQ&gZxV4|@~lnt^$?!)hN!KZcaW#1PQ5<ezrj;K-3!wU-|<Sn2Hyhx
zhK_;H!E3-b?i~BZ+_&c0Kauo3aa8$kiRV((b_~2ShN0I!=Pt|)7xmpFzwMce3|a16
zsVCumae&E86?2ws%b&;H4`gY8zt8ICqbyas^iphoA^IP!=Y<mA+vz)*SJ`vq;AQjX
zbO=X&Lq7|W-p9pzwv+tcRpw2YboWEI9NqTIjHfd;51ci;c`|S^i+{ga<hk1Z2Xb;m
zzvF+`$+c3{1-D;s`R^kB|N2z;HTV|jH*^eq4qgMkap%}K=Kk}dU&g|b3_Us5vQ^Ue
zzE!=}NdE5g_h04n8HR?`TwNWk&yrpIM|Vl*6pVEz9TD$ur)RyEe9jH~lPU?{SW<VL
zeQDxA5%<@!v$B0SdVligz6<djRcF3FCh`5261%m3SdQ+TX(@5k7I7_acg1fUPd}?Z
z8%uisw8=P6y8GTUm;RRg9(T4~`l~vgii<AY%2yMV{v19Peht0_`VAcepM%$cZ`?Wd
zjk#;bgvUL}W~h~>dQ#6ChBlW*_Ua(&`|ZKY>-`*AlDEqqDfxTf*v@xO6tFZR!T#+G
zIZN8pzd1{Ik1&hT33A}*MNp3r$>#{siGCf9iF0o@x^Dc4qhkXr6HR}Ky7ySyNymhz
zp0iU^CBC0Ewrj)n^`if`4E%B62v1D4m3ruH@psKR_XY2H3UN8qeTlZ9%q8&W@Tu@?
z@Ga19=ot7Myas&Z&arRI9o4pHti<mFY<qo@d~NaM>Ti8RSQ^qXEM4NcqrT~0cGWCh
z9idez`MVw}rGF%Ue;{mg*u{mS-gzH1mhfKFF-t$9lq1(~n=ZZMIBK<{xnA-)JCeQp
zK8Sobrt{1_lF$7|ovO?dc&Zqo>GkfchyxAFT8ZyXP8h^L_|4NaExPd2P*CPE<P!LE
z_*D2c_!j6lbPRkBUIV^y=h!#qe&63E@8L?8W+cW$%sj+WM8U9(<M%|I8#=^Fx#sk_
z^9GW>k6(3VV$}?eNMC2x$0&}>k8anUp2yL)QuPeU-!C(8jgavE<;w(}Lj!sGdbdxT
zQa7G#j0UBviF<dy)+tT$ITyPBS7s#goz7m>ls|%)_a>cj=%OKzrP{=~zUG25ha!(5
zm%yLHr^2tnw?Mz4W8ib}8t{!f$G$N)uR1<a^7DnKz6VSE?i#7*5z?2V(f<WE`#Ezo
z>C~=GLMTTeapxyW`X1E&RGsG?j-H%08*A`M{0^)A>m`4;{6>=P345NlkFp&o;eCs0
z&<ZKvtyng0Uivwn7Ml&(X)NyDZTZglmY+QN55AY#!$6>86Sv)nwilH78#xqt47mjU
z96l9(4Za2X4IKlYgV%s>+&T7*xm8s6oaOs)G|0m^Me_5G2UmWT@|;|6o>xo?N2_Lj
z(q3Q2kxs*saEa%#w-4_s>H7?|)*q!@8#rT;@lqe2nonM^DT);J{m;IeOEP&%P`}qv
z!uu=BqJsK2JROM}x&EZMcV`VZtw+NJQXSOorKh`~>>ZH5kwcNkkW1jt;Zxz);9H>I
z&@u2icn$c*onzmayUL+Mq~zzqobR)f_HZ;Irsagh?+3<RdMo*w>L4au%JWV)QuNmj
z;z@m$k<Khvo(}FDzD?5iirC(L3sQLU$a<V1<=W?VJ>!~c#JM+qsUF)}pmNRHA_?zy
z?^i4sI6<IqCexnZTqG#_7W59t-^iiJW5^}&=kTfUYw#`5Z|E5K9J~g6<Ib^f%>BwP
zXa4#kj=pbq7$@b}&%Rd&+-N20+BxU%lAnLv{*&j7$vm}Qw!!FRpy=!V4CyWPIT!7`
z10-MTSXwew(sxB;$kUe3JnbpFo;0YNxc9NI+-gS(6u6?SjrL4|9BMZP-3k@I!{43>
zeG7UA<Zt9q<T2zD_;dJF_%-+z=r?o>d=6d%zH#T+H|CyZP!ievo1<s(M%okl^7K3K
zT4>mKF}Kfew_oCS)iv9LB|rb~tn9OLG0(iTGWL`5+~_082PB@$cpq-MQC%Q6K5+Hk
zK?3DRS{T_m3G_Dlm+Ho)0<}IA`^0g(@c(}8Z_k9j1-%3EH*zTQ7;*{xIeaSo8hi`%
z8#)F)2d@F&xO40qbH96}b6v{sQw(nvN&Vdz)6kugpX1+|mbe_{DLDP1`TA=-y`AD_
zD*5@xHO03z+X|G{zx8*2@!m7pRJ}kufugomzR{X5(0jvVTknkmO>CZDXPYJ{dqwnX
z=$X*Bpm#w2Mh-<DLoR_ohfjrHgKvR;L&w19;5FbIcaD8y?#aJ8?phPYQ)wsLgOaZK
z&2861>Te3eer|OUaqineJ3-=i_occy!MXw&+du1Ya+E+mJzZVW#Csogbio|^H3CKW
zpL5v1TcCF1ZfNd5Ehzg`^or=$&@-WLLGOV4jU0+RhFk)F4xb9Y2HyhxhK_;H!E3-b
z?i~BZ-0zH@9qWCWr)|#a&r4p4J*NM~WM|RGC;XeHe^tbJ=3w*fS{4E=wtcX2_;i76
z(wEitTp`~7;$3~p;{;mnu{L>Nwy1kAW6~#I6Mz5q$mmniE23XR&xF1Oy#w+$awzf`
za*5KP|MjWxYw#`5Z|E5K9J~g6<Ib^f%)QPtda+is*lYAQ=_UDC@0y(M`NIV2)@4%v
zdS`(${B>hK_zUE>(Bk2fXo2Re-CMamL!hni4+g1S5-6$aZJ5JTLD`?9M@FBDUJ?Bo
zdM5NO=pB&1kwcNkkW1jt;Zxz);9H>I&@u2icn$c*onzma+h^0tw8WvJpXo1*Eu13i
z*_YABANvWkz3<}s=aB-1>&A|}mMZ>Fr5>?`0_|x!*INIPKqq%qd&M*g%69|w=jf5q
zr=nLxzlNR(eG7UA<Zt9q<T2zD_;dJF_%-+z=r?o>d=6d%zH#T+H|Dml@OdHW`9#D1
zNzX-ot1{c_R-Yo?e?Y~U>N5hhZ7SQCe^;QZdo&ya7=iNYDso%?i2MKhUV?W6^ylc2
z(WjzUM8Aff34IHC2jp+$P~<V>68LlYRQNUc7U(y0415k=1HN(R*f-`*vZ^>N>=(a7
z&coY3@&zKnRZHcLKqfZ9TYfYMR2<{k^W_hLWL+L)rfD}S--+;Ef_DS-=jf5qr=nLx
zzlNR(eG7UA<Zt9q<T2zD_;dJF_%-+z=r?o>d=6d%zH#T+H|91dzwNAlTcEJ#FJ<=i
z0xhjb+cx*R$PXr+v;(yo=~kwZ*#gr><@*@kiSS;6cLVh2=#kN<qE|$}hMoz13wj6S
zZ{$$qG2{}ZKmY4f;n(0>px@9j@Hu!5_{N=M-<W&1{>OXUn+59d#Q*j~jYhI)|6hit
zaU+fVwYS!OY@_mB5btAnC&GIP-VM;7qen)cie3@@8hR%5E$AJPzmY?c$B;|l&*4+y
z*Wg>A-_SAeId~2D#+_r|n0wptD^sToXe3)73)wZBMpB&|t!CxksC*B_yCB}j@J@vH
z61*FrKSz&@J{7$p`Ze@S=v&Y`Ab%r=B99@Lz@NjX!mq)%K)<15;B)XA@QpjizA^Wv
zP_45OZjIz)-e;GUf1~mp81JEY7sUG*-ih#Df_DS-=jf5qr=nLxzlNR(eG7UA<Zt9q
n<T2zD_;dJF_%-+z=r?o>d=6d%zH#T+H|E}v_FHXiL?itVQIk*h

literal 0
HcmV?d00001

diff --git a/kernel/MEHR_6F_2d_32.ker b/kernel/MEHR_6F_2d_32.ker
new file mode 100644
index 0000000000000000000000000000000000000000..370063835340eaa43108975dd6aabf202d178122
GIT binary patch
literal 8192
zcmX|GcU+D89}kr+QASFcDL2v$w?Rpv<rb1;bR{Yy6m45sB|=7$$jHi0E}xQ6KSBf1
z?wn_ZLP(0=)8oF+U*~oH{5+q}dwvfED5b?Urc;Qg>jAwH&uLTY9lM3wU(pBFyXo86
zw`8-Y)7()B71U~9dvp8BDw<vRWTMr)YP$P%{5kVsHPo+A?a9od8j9cEJ4&{^mWrR%
zH`siwrS<iO#ZhZyq%kY|X;ZC?48mNBXF17fsgrrT#tU+Ck1AzYRfdd`Bh<QkG8Fw{
zensCjhO~y}XB}%~$my<W%>WCQrVKl*-xA5v+l&RD58Y+yk@1sG*3B%HtofdAYRZxI
zROfbAmvPkh_-?DZWRCXdSa<AE%#rKI;Hx$&Jk6c{?9Yp)Jh|#VyuM-)PfNax>whAa
zr>9-5SNwCESIljB;Of_Nor-DD^c{gdX|E|_mR+}SlQQbkBkP$BQ$ZJ^=8qe&y^6eI
zxUSkOtI6U=p7Rou8hTUiGdZBVhWv9=C(43qDM9DaR%)rGppOH+Wx+DaG46CHoRg9L
z!`XgDE^;b0&v&v*m(xu9!0-R-%#fyPSbD;IhCGZl+~!<h$g0w{OY&ESx+g8(&Reqd
zIBaog;s%zSewBafMJ#RB=oWG42TNV8ESdtwa+DChFQRBUN6x{4-M^mTXzAm28OkLb
zbrm`U>#Oo~`*q}pO=EeoJ8?qQ)SIW$nHQGN-NTc{-w%KG$mA9K#@rre%VI**-je%L
zrHkR)%ZUlxI!r&alAPx}PjP%!Mg3>zcd1XVrs_P~>x<^p(2M5J4yDaCbnW@ULYLjO
zG;4E;N3oiWQkZ$0GNWa*H#yU#<co}+XDL70?<uECr`E*kX3FV(arU*vy%-A1@K`^>
zhoQY4`c-zn#!#TwyS2w!7|Pn$TJB)OQl+YJJ7g<M*L=s6gy*ny{Dn%wv0p5ew!iaH
z$BZN0-D5wPuHvZsa(|(BDo2OcYa}wS#P6cBSyt4EC(9<A4L`^8lo>KMewz<Z4Gvi&
zx9#O=!SH)Yeg5SYcaD8yZuJ8tP1(kkw5Tc1V~2S6dY<luE#s=GPuwNfTkossPFd0=
z{rDOhJ>ua=ErVKGySC<0{FPez;XJo!)hHQ#3Ao8Po{*9Dtm)&|ca>8{R&=`lN;y5&
zRrC8RUrx^*hd74nF|;ehZejdthE7lYl&N`-q35ZIHesqP1*^2IHE?9<x?^%k<ZhOl
zE6g`O$QQpy@Yy^YRgM;TCyp|;;%Js}=T%SFaumIy(sn``N8uB#%r2I3RA0DzsZDpD
zJ|^e)8feMW_J^HDEb!w=^UJ35ItO?v-_l)kE%6F`<Ib^f%-!hoByXKsHJRu6ZRvQW
zn*26i&idw9LoGuVyS3f@!u+kP|F^i7GL&ZKEUl@fu35|fRCbq9zvAJM5jiqS@XYOS
zc7&V;c^)fwik6FbNGlppCE}p{d7tz@8G3)rE_mN&hWc|FJO6shP{+(+9lB_+6!5C2
z-9PT)-Iqu0ib-JUT;u6&#l<Z3HA%V_sK$|s{A}!rsT{d|?zkm3lA|$WlyiKpaOC1O
z<-2n&N0ZLgR{Hf8@!jQA*6T?;ovfKsb8{t6K>;$k&Jmun;*|Ohd%!Dr4fw{LW8av&
zewnpd8{Qpm^}5^fdJRSHw7JnVrk0$}H1=~$tEKoM#g<muGCCCea7)|Wn?{_;{Q6Et
z50`noY~#K8H%r`YPs{1{pI&pv{g6}mly_PQjtn((=eqnafuU@ND|tp$3=LiVMz7Z}
zmgrz*==~)k{!jMXvniFO(5lOS?5bvIRhWOh#sH3#6YRfDpTW^48~KjK(HzCDTXr%c
zlcSSUv)5(Tan$OhS}|b&PbO@~yF=`Gx_`vlJ!&ma*Ye*ViA?6ns6*-^-F#lb=ioKq
z8+VR<V{Y3YO$OR}wKTDk&2PiIQs<1%ZG8XY+vux_^JLUo&`@#Xo{T2FTr<X1TTUM~
zb(^<pt(;N~cSv_1nRp|wN{gWl9wP^zTguQ_lj$>x(iz&}cR=F3pT~R}oy^3$e<aKA
z9m>**?ECEvZ?e>(s_sEn14{<)Bm1fta5N->2I?*3$Y;_W-|_Jrtv`6wVnr@TPnOu!
zRDI#dqV;^^rJ+2%E_;9EqbpBG92VziMe<ZJxmTpgS)SC2*455^!7FqOd=6d%zH#T+
zH|AFNE%olpi+8`Ud49c}j4qFIleOV}z=Z5UX3a7hYwPCU#`iUgM_0>J<W%je;qUxO
z#Jz`3?6QdrdB^P4@Q7w;a;vFy_kkl;t{l^orCX(`gIBnSJU_o<d(BvuGUs)+aLr?h
zJzX+<T6>Nb8a<YHuWrts;KmgkZJB;R(!Gk|sxi8S99=6M6?UN=PmVsN{m+jU^{;;X
zQImP%p2JL6&WYw}uV>K`<10Kl538woQpzjz8#)F)2d@F&xO40qb9)<a3NlWRQHR#@
z%r>2~{@cAQ>n}Og-+o%whW8$Idp@NX$w}|ti}*`A3{Bj5M&kP}(=#N!x6XKbbn82Y
zzK>IG+3*KTQTNRn^!!<}RT?7QeXNPwqPrC={ks0yu0fNd-4|wG9P7x@FpYsmXV-JI
zkGX03PdZ17=k?-3syMPV{MkcQji({%1rqN~HFr=cSt|102H%Sl_KEsmy)wb?4o}Rw
zs^NaMyu!CYzoBE`bMPAQjXTG_G57GF&xdCl%V~0a&WCQHa=LqDzg}XNoPNFU&$juT
zI6hIrdwvg(1#4nOp3^FPHuo7r_FZ)wZF;a&zo5^$HoiaL^~p&yiY5Q(?~>lzsJ`_v
z`O1=`syuVsNRB)g<w$oQpsqSZb1z4a?p>H1lFd<dm~8*N29A<qHpCR`@MP7aO#V^C
z{m0W5+j4_=N>worU6{m^UW{#H!9$+Dg_pPt{=h5z8hi`%8#)F)2d@F&xO40qbAR#g
zE9v{MhA59Vo*SB7+?r*<5W773fP1+39sE>;E4LZadl4w%eSNTty|M{Q*&M&IV3~+-
z2hZhACt2Eg+@Wgo8<v91&q{nBG{Moh*oGseCat>5K^(2R6)5Tbt%3HE-!lj`mhOI9
z&A=V4Ms4^Ga8~mW`7cnut9~<2N-1@R&!6XMW6U8Zmm;24U&#N!f9DlG6@CrA1^Nvg
z1D}J}fN$KnV&DJG{Z=(9z<V4+O@>`1eK$$(FZny)X+`I~7=}{D*?FzjWl6`Vo1}Ak
z<=@)1;oa!Cwd8X)u;D*<e-r&qO3f`hLykg~BTk<2<f!pU!TwvjME<`PAo2Z=@80Wv
zu_FI(+J47ggQt-e@+==Ko?cGxCh7fz$$rw^o9pFCes9geuw!LaJZYb@%u81l6#g7O
z6@CrA1^Nvg1D}J}fN$J6_Kmr<{~hjIaz^C4b{;|fE5zT~Dw4h@J&h{xu@`av?<vXO
zr&rnCJ9mYpv#YgaZZei0D!-HPZX7+LXTTJWrfBz+e9rN(y^Fe>;%K9N(|!FH9Bp^B
zk1=Q!^?qL`2h9;8-#xFF_`c-&mFnCuQSbOkKMtqx)X4nRbzL4$DQYD%7uEBWtn#Yw
zQVl_oOW@DpQ{mU(TcF?2G4MHf4fw{LW8awj7dukock7lflCL#Ov0w%Tv9zy2RpL3n
zXN=RNmn>;5O_ThcUXqRE?{EH_7<k)T^z+K?CA^na_STNP%h8_LoZBBbj!c>u$>-$P
z6?iv@J}*bhO!B#TeZCeM#qjj1(@ysf*F^uTT`Tc@z~A3?KKm*D?|$P}%V0r~$B;|l
z&*4+y*Wg>A-_SAeId~2D#+_r|n7caf!I9^ySSqwSy2AZ9OJ&U6eP^GtqAv81a!p6g
zAW7dtpN^hb>?Z1a(8rmdHi^1E=TINliyUe0?<e_tKeOLDr!&8x*KtFhvhV+?baw_%
z?2Ic%RmHuludA1Q&dH6rG*slf6#eB1zXXvN45m%#r6v#;ue9JF6G4$fk;jlr;LqVx
z;n(0>px@9j@Hu!5_{N=M-<bPShKc0o2cCE!@q2l|dHbM29BpNf*84bdlw)KVBCO-6
z?!q2P-+enJzMY-VQSJ;?bG^?Tnb!XHcS*S==I3n0Ju7vU@cy;#l9caOgp|)ecAclP
zr2et`qRx+X%-q@B!qewBBUAh73G_l={oz&{L6N_aLy^ajOW@DpQ{mU(TcF?2G4MHf
z4fw{LW8auNx!~qmejrCJS5qWEZ}HYt%5z%9GhDYF;wWTxQ;)DLjtaIcka+Irt7b{x
zZ?xYi<=Wg+>xcS#@|0WZW}dN;r-NHg<t|O->6FfIy#JN3I=%Xx_#Ldqgr<pm*VWq6
z^Til}Zchn%Gkd0>=pB&1kwcNkkW1jt;Zxz);9H>I&@u2icn$c*onzmadw$&($<Jk;
zpHek-KS%RMEtU8^S*x4mYqTnrr959%8ZaPuBv0YflGNwA@H9TfOw#v*Cyo!ia)_sG
zTS>~bjxk=_KURo3M>|R;wiC!M?z)8cw7Bqp3~dGSp1AY%qs4-vZ$a;X{EZxnJce8X
ze-57tzXsm|{f3T#&%tZJH|`w!#@wM?jYn99sBiZtNI6!&_nJWum3T56cuw;3zJ>2+
zxJ~1!u%GInv}HUs{}?azId^syO1?J2Awkl2kJ{9d=0={L#aYLV>?6>DV+UO;#|fnF
ze5IX+yFfQwlU6=nCn$O*^eyNekiU^bk;jlr;LqVx;n(0>px@9j@Hu!5_{N=M-<Ufv
z&^Em3CrA2*)@rtcc+&M>z9w`sPbW72?lH{cOe8-)!!0XaCgz#y2dAVw_cYUA;<-gb
zQ}ow&704#;)#?Ky1-kaXvcZ!a1^OEOMtQxzKrW8^itTp@ihd0}6Z#hP4#?lgp~z#%
zCGh9)sqky?Ezoc182B8#27Kerv2V=XxIab8?}EYa{%&$hwdCi*FSp)uKFL#kld^GG
z4o}lZRZ4#T@b`pAJv#_wWjFkruc&WUrtP$sO%iBP+Uh!WFM+h(y)8Y~3sl~3e&vKj
zLD4IsUqjD?z6HGl@;7oQ@)&Xn{5gCo{2F`<^cy+`J_oM>-?($^8*{(T^xnO8ljzqE
zzLRwAj_L3IX54~VTb)ImA3T{M@q2cT>bcbe1WGYf?wn>OP~wuCj)~&EPxNZ_v{@_u
z{~T$Tuuq_?p10K!E(?l26}=+*HS|pAThKcoe<Oz?k0F=9pTnoZufexKzoBE`bMPAQ
zjXTG_G4~3Uf)m>Jc=|hj>dQNCc{*-uzSl|g@u~3(bnc5dZ`aNwTHREjLwcP8#<&X9
zSzcP+f2BY^-bOkPb_x^_WVO%mj6g?nf)c0Y2#OvVeJXlI^lRvu(6^v>K>kJ!MIJ*g
zfj@^&g<pejfqp~Bz~|sK;2U?2ePixFzBPHPH;K7saEat&EgJS{rW=cXwsPT+YA1nu
zfA`ta;49+W^I}2CR+0bh%?fuU3l!Mif2Hcb0$Cp~3z_mt{Qa*#M~{p?6}=+*HS|pA
zThKcoe<Oz?k0F=9pTnoZufexKzoBE`bMPAQjXTG_F}Hmm$0M<$1uAp<WO>s;Ahmk|
zrwe@qip$BZEZr#by}R1joTCEiSFPw9eN&)kTcVV7UkFrr`my`AIzjPnfc_jkGWt~X
zis;wSGof!m?|}S`9Ev=KTmpX%p9;UG@Gbx8H*^eq4qgMkap%}K<{qiPL(+5e%fWj}
zMSk-(XmzPRB+%a*e9T{76)4;{A~OAnKz@P3R6j<b!yO--Z~i4H-b?Uqfc_jkGWt~X
zis;wSGof!m?|}S`9Ev=KTmpX%p9;SQ-va%Hj)Bj?Yrr?|9Q#(xJ<Bp#NDz75)9dk%
zbP@MQeyS<w3*;rY*!-hL^!r7Y{oei%NGbf;sYH!B#XAw+OYm-h{v16r`c(9a=-1FQ
zp>IL&fc%Xdiadr~0)Gyl3cm*50{w=LfzQEfz&Gw3`^MawxoaGC^8`vaN~*D`7O10A
zNc4j5qW@i^)O(qF9W^zN`eTtno#K5A??iYn!Mg$abM(mQQ_(A;UqjD?z6HGl@;7oQ
z@)&Xn{5gCo{2F`<^cy+`J_oM>-?($^8*`6{%X_k;NyPtRx5v-b>d1Ni(c?Y!>&PSc
ze1*+KasU6l3*vnY??iYn!Mg$abM(mQQ_(A;UqjD?z6HGl@;7oQ@)&Xn{5gCo{2F`<
z^cy+`J_oM>-?($^8*}^Ale5F{I*RbLt;w;hBda^9D&uF?Dc(czE{OLryc6NQ1n&mu
z&(R~JPereYehobn`WEyK$lu7J$YaPQ@aOQU@N4ib&~NA%_#C_jeB;isZ_KT=-1utb
zj5^vo&}q+j-#W!RFy2G)E{OLryc6NQ1n&mu&(R~JPereYehobn`WEyK$lu7J$YaPQ
j@aOQU@N4ib&~NA%_#C_jeB;isZ_Itw!c28yWF7q<dUbM+

literal 0
HcmV?d00001

diff --git a/src/green_functions.hpp b/src/green_functions.hpp
index c970cfa..0dbcf86 100644
--- a/src/green_functions.hpp
+++ b/src/green_functions.hpp
@@ -80,8 +80,10 @@ static void lgf_readfile_(GreenType typeGreen, const int greendim, int* N, doubl
     } else if (greendim == 2) {
         (*N) = 32;
         datasize = (*N) * (*N);
-        if (MEHR_4L == typeGreen || MEHR_6L == typeGreen || MEHR_4F == typeGreen || MEHR_6F == typeGreen) {
-            FLUPS_CHECK(false, "MEHR kernels not implemented in 2D");
+        if (MEHR_4L == typeGreen || MEHR_6L == typeGreen) {
+            sprintf(lgfname, "%s/MEHR_4L6L_2d_%d.ker", path, (*N));
+        } else if (MEHR_4F == typeGreen || MEHR_6F == typeGreen) {
+            sprintf(lgfname, "%s/MEHR_%dF_2d_%d.ker", path, order, (*N));
         } else {
             sprintf(lgfname, "%s/LGF_%d_2d_%d.ker", path, order, (*N));
         }
diff --git a/src/lgf_3unb0spe.hpp b/src/lgf_3unb0spe.hpp
index 3f0e9b0..c626d16 100644
--- a/src/lgf_3unb0spe.hpp
+++ b/src/lgf_3unb0spe.hpp
@@ -314,4 +314,62 @@ void mehr_6f_3unb0spe_expansion(double* G, const double n1, const double n2, con
   G[0] = (-0.018229166666666668 * n1_12 + -0.018229166666666668 * n2_12 + -0.018229166666666668 * n3_12 + -0.5625 * (n1 * n1) * n2_10 + 8.088541666666666 * n1_6 * n2_6 + 3.5 * n1_4 * n2_8 + 8.088541666666666 * n1_6 * n3_6 + -0.5625 * (n1 * n1) * n3_10 + 3.5 * n1_8 * n2_4 + 3.5 * n1_8 * n3_4 + -0.5625 * n1_10 * (n2 * n2) + 3.5 * n1_4 * n3_8 + -0.5625 * n1_10 * (n3 * n3) + 3.5 * n2_4 * n3_8 + 3.5 * n2_8 * n3_4 + 8.088541666666666 * n2_6 * n3_6 + -0.5625 * n2_10 * (n3 * n3) + -0.5625 * (n2 * n2) * n3_10 + -15.875 * n1_6 * n2_4 * (n3 * n3) + -9.953125 * n1_8 * (n2 * n2) * (n3 * n3) + -15.875 * n1_4 * (n2 * n2) * n3_6 + -15.875 * n1_6 * (n2 * n2) * n3_4 + -33.1640625 * n1_4 * n2_4 * n3_4 + -15.875 * n1_4 * n2_6 * (n3 * n3) + -9.953125 * (n1 * n1) * (n2 * n2) * n3_8 + -15.875 * (n1 * n1) * n2_6 * n3_4 + -15.875 * (n1 * n1) * n2_4 * n3_6 + -9.953125 * (n1 * n1) * n2_8 * (n3 * n3)) / (M_PI * pow(n, 19)) + (-0.67265625 * n1_16 + -0.67265625 * n2_16 + -0.67265625 * n3_16 + -27.74296875 * n1_10 * n2_6 + 7.79296875 * (n1 * n1) * n2_14 + -65.1328125 * n1_8 * n2_8 + 13.2890625 * n1_4 * n2_12 + 13.2890625 * n1_12 * n3_4 + -65.1328125 * n2_8 * n3_8 + -27.74296875 * n1_6 * n3_10 + -65.1328125 * n1_8 * n3_8 + 7.79296875 * n1_14 * (n3 * n3) + 13.2890625 * n1_12 * n2_4 + -27.74296875 * n2_10 * n3_6 + -27.74296875 * n2_6 * n3_10 + 7.79296875 * (n1 * n1) * n3_14 + 13.2890625 * n1_4 * n3_12 + 13.2890625 * n2_4 * n3_12 + 7.79296875 * (n2 * n2) * n3_14 + -27.74296875 * n1_6 * n2_10 + -27.74296875 * n1_10 * n3_6 + 7.79296875 * n1_14 * (n2 * n2) + 13.2890625 * n2_12 * n3_4 + 7.79296875 * n2_14 * (n3 * n3) + -98.109375 * (n1 * n1) * n2_8 * n3_6 + 13.9453125 * (n1 * n1) * n2_12 * (n3 * n3) + 13.9453125 * n1_12 * (n2 * n2) * (n3 * n3) + -27.64453125 * (n1 * n1) * n2_4 * n3_10 + 86.54296875 * n1_4 * n2_8 * n3_4 + -98.109375 * n1_8 * (n2 * n2) * n3_6 + -98.109375 * n1_8 * n2_6 * (n3 * n3) + 13.9453125 * (n1 * n1) * (n2 * n2) * n3_12 + -27.64453125 * n1_10 * n2_4 * (n3 * n3) + -27.64453125 * (n1 * n1) * n2_10 * n3_4 + 254.953125 * n1_6 * n2_4 * n3_6 + -98.109375 * n1_6 * n2_8 * (n3 * n3) + -98.109375 * n1_6 * (n2 * n2) * n3_8 + -27.64453125 * n1_4 * n2_10 * (n3 * n3) + 86.54296875 * n1_4 * n2_4 * n3_8 + 86.54296875 * n1_8 * n2_4 * n3_4 + 254.953125 * n1_6 * n2_6 * n3_4 + 254.953125 * n1_4 * n2_6 * n3_6 + -27.64453125 * n1_4 * (n2 * n2) * n3_10 + -27.64453125 * n1_10 * (n2 * n2) * n3_4 + -98.109375 * (n1 * n1) * n2_6 * n3_8) / (M_PI * pow(n, 25)) + 1 / (12.566370614359172 * n * 1);
 }
 
+void mehr_4l6l_2unb0spe_expansion(double* G, const double n1, const double n2, const double n) {
+  const double n1_4 = pow(n1, 4);
+  const double n1_6 = pow(n1, 6);
+  const double n1_8 = pow(n1, 8);
+  const double n1_10 = pow(n1, 10);
+  const double n1_12 = pow(n1, 12);
+  const double n1_14 = pow(n1, 14);
+  const double n1_16 = pow(n1, 16);
+  const double n2_4 = pow(n2, 4);
+  const double n2_6 = pow(n2, 6);
+  const double n2_8 = pow(n2, 8);
+  const double n2_10 = pow(n2, 10);
+  const double n2_12 = pow(n2, 12);
+  const double n2_14 = pow(n2, 14);
+  const double n2_16 = pow(n2, 16);
+  G[0] = (0.02976190476190476 * n1_12 + -0.7738095238095238 * n1_10 * (n2 * n2) + 0.44642857142857145 * n1_8 * n2_4 + 2.5 * n1_6 * n2_6 + 0.44642857142857145 * n1_4 * n2_8 + -0.7738095238095238 * (n1 * n1) * n2_10 + 0.02976190476190476 * n2_12) / (M_PI * pow(n, 18)) + (0.175 * n1_16 + -4.2 * n1_14 * (n2 * n2) + -6.3 * n1_12 * n2_4 + 15.4 * n1_10 * n2_6 + 34.65 * n1_8 * n2_8 + 15.4 * n1_6 * n2_10 + -6.3 * n1_4 * n2_12 + -4.2 * (n1 * n1) * n2_14 + 0.175 * n2_16) / (M_PI * pow(n, 24)) + (0.016666666666666666 * n1_8 + -0.06666666666666667 * n1_6 * (n2 * n2) + -0.16666666666666666 * n1_4 * n2_4 + -0.06666666666666667 * (n1 * n1) * n2_6 + 0.016666666666666666 * n2_8) / (M_PI * pow(n, 12)) + -0.15915494309189535 * (1.6169364357414509 + log(n));
+}
+
+void mehr_4f_2unb0spe_expansion(double* G, const double n1, const double n2, const double n) {
+  const double n1_4 = pow(n1, 4);
+  const double n1_6 = pow(n1, 6);
+  const double n1_8 = pow(n1, 8);
+  const double n1_10 = pow(n1, 10);
+  const double n1_12 = pow(n1, 12);
+  const double n1_14 = pow(n1, 14);
+  const double n1_16 = pow(n1, 16);
+  const double n1_18 = pow(n1, 18);
+  const double n1_20 = pow(n1, 20);
+  const double n2_4 = pow(n2, 4);
+  const double n2_6 = pow(n2, 6);
+  const double n2_8 = pow(n2, 8);
+  const double n2_10 = pow(n2, 10);
+  const double n2_12 = pow(n2, 12);
+  const double n2_14 = pow(n2, 14);
+  const double n2_16 = pow(n2, 16);
+  const double n2_18 = pow(n2, 18);
+  const double n2_20 = pow(n2, 20);
+  G[0] = (0.32083333333333336 * n1_16 + -7.7 * n1_14 * (n2 * n2) + -11.55 * n1_12 * n2_4 + 28.233333333333334 * n1_10 * n2_6 + 63.525 * n1_8 * n2_8 + 28.233333333333334 * n1_6 * n2_10 + -11.55 * n1_4 * n2_12 + -7.7 * (n1 * n1) * n2_14 + 0.32083333333333336 * n2_16) / (M_PI * pow(n, 24)) + (3.712121212121212 * n1_20 + -230.15151515151516 * n1_18 * (n2 * n2) + 879.7727272727273 * n1_16 * n2_4 + 2464.848484848485 * n1_14 * n2_6 + -1833.7878787878788 * n1_12 * n2_8 + -6370.0 * n1_10 * n2_10 + -1833.7878787878788 * n1_8 * n2_12 + 2464.848484848485 * n1_6 * n2_14 + 879.7727272727273 * n1_4 * n2_16 + -230.15151515151516 * (n1 * n1) * n2_18 + 3.712121212121212 * n2_20) / (M_PI * pow(n, 30)) + (0.02976190476190476 * n1_12 + -0.7738095238095238 * n1_10 * (n2 * n2) + 0.44642857142857145 * n1_8 * n2_4 + 2.5 * n1_6 * n2_6 + 0.44642857142857145 * n1_4 * n2_8 + -0.7738095238095238 * (n1 * n1) * n2_10 + 0.02976190476190476 * n2_12) / (M_PI * pow(n, 18)) + (0.058333333333333334 * n1_8 + -0.23333333333333334 * n1_6 * (n2 * n2) + -0.5833333333333334 * n1_4 * n2_4 + -0.23333333333333334 * (n1 * n1) * n2_6 + 0.058333333333333334 * n2_8) / (M_PI * pow(n, 12)) + -0.15915494309189535 * (1.6169364357414509 + log(n));
+}
+
+void mehr_6f_2unb0spe_expansion(double* G, const double n1, const double n2, const double n) {
+  const double n1_4 = pow(n1, 4);
+  const double n1_6 = pow(n1, 6);
+  const double n1_8 = pow(n1, 8);
+  const double n1_10 = pow(n1, 10);
+  const double n1_12 = pow(n1, 12);
+  const double n1_14 = pow(n1, 14);
+  const double n1_16 = pow(n1, 16);
+  const double n2_4 = pow(n2, 4);
+  const double n2_6 = pow(n2, 6);
+  const double n2_8 = pow(n2, 8);
+  const double n2_10 = pow(n2, 10);
+  const double n2_12 = pow(n2, 12);
+  const double n2_14 = pow(n2, 14);
+  const double n2_16 = pow(n2, 16);
+  G[0] = (0.02976190476190476 * n1_12 + -0.7738095238095238 * n1_10 * (n2 * n2) + 0.44642857142857145 * n1_8 * n2_4 + 2.5 * n1_6 * n2_6 + 0.44642857142857145 * n1_4 * n2_8 + -0.7738095238095238 * (n1 * n1) * n2_10 + 0.02976190476190476 * n2_12) / (M_PI * pow(n, 18)) + (-0.175 * n1_16 + 4.2 * n1_14 * (n2 * n2) + 6.3 * n1_12 * n2_4 + -15.4 * n1_10 * n2_6 + -34.65 * n1_8 * n2_8 + -15.4 * n1_6 * n2_10 + 6.3 * n1_4 * n2_12 + 4.2 * (n1 * n1) * n2_14 + -0.175 * n2_16) / (M_PI * pow(n, 24)) + -0.15915494309189535 * (1.6169364357414509 + log(n));
+}
+
 #endif
\ No newline at end of file
diff --git a/test/src/convergence_test.cpp b/test/src/convergence_test.cpp
index 2517dd6..1385689 100644
--- a/test/src/convergence_test.cpp
+++ b/test/src/convergence_test.cpp
@@ -95,8 +95,10 @@ std::string TestNameGenerator(const ::testing::TestParamInfo<ParamType>& info) {
 class ConvergenceTest : public testing::TestWithParam<ParamType>{
 protected:
     // Default variable for all the tests
-    const int    nproc_3d_[3] = {4, 4, 4};
-    const int    nproc_2d_[3] = {8, 8, 1};
+    // const int    nproc_3d_[3] = {4, 4, 4};
+    // const int    nproc_2d_[3] = {8, 8, 1};
+    const int    nproc_3d_[3] = {1, 1, 1};
+    const int    nproc_2d_[3] = {1, 1, 1};
     const double L_[3]        = {1., 1., 1.};
 
     // Variable specific to a test_case
@@ -165,12 +167,12 @@ TEST_P(ConvergenceTest, AllBoundaryConditions){
     bool isZSpectral = (*(mybc_[2][0]) != UNB) && (*(mybc_[2][1]) != UNB);
     int  n_spectral  = isXSpectral + isYSpectral + isZSpectral;
     bool is_green_spectral = (n_spectral == 3) && (CHAT_2 == green_ || HEJ_0 == green_); // Chat2 and Hej0 have a spectral accuracy with spectral bc
-    bool is_mehrstellen = (MEHR_4L == green_ || MEHR_4F == green_ || MEHR_6L == green_ || MEHR_6F == green_);
+    // bool is_mehrstellen = (MEHR_4L == green_ || MEHR_4F == green_ || MEHR_6L == green_ || MEHR_6F == green_);
     
     bool is_green_invalid = false;
     is_green_invalid |= (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs.
-    is_green_invalid |= ((!is2D) && is_mehrstellen && (n_spectral == 1)); // MEHR stencils cannot handle 3D with one spectral direction
-    is_green_invalid |= (is2D && is_mehrstellen); // MEHR stencils cannot handle 2D domains
+    // is_green_invalid |= ((!is2D) && is_mehrstellen && (n_spectral == 1)); // MEHR stencils cannot handle 3D with one spectral direction
+    // is_green_invalid |= (is2D && is_mehrstellen); // MEHR stencils cannot handle 2D domains
 
     //  -------------------------------------------------------------------------
     // Perform the test

From 61ac0eeb5595107843476bf38f0060f60b285350 Mon Sep 17 00:00:00 2001
From: James Gabbard <jgabbard@mit.edu>
Date: Mon, 4 Dec 2023 08:29:48 -0800
Subject: [PATCH 04/19] :construction: 2D MEHR test all passing, 2unb1spe
 failing

---
 src/green_functions.cpp | 28 ++++++++++++++++++---
 src/green_kernels.hpp   | 54 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/src/green_functions.cpp b/src/green_functions.cpp
index 7656d0e..2a28f2f 100644
--- a/src/green_functions.cpp
+++ b/src/green_functions.cpp
@@ -315,16 +315,36 @@ void cmpt_Green_2dirunbounded(const Topology *topo, const double hfact[3], const
             Gr0 = &lgf_8_2unb0spe_;
             break;
         case MEHR_4L:
-            FLUPS_CHECK(false, "MEHR4 kernel not available for 2D unbounded problems.");
+            FLUPS_WARNING("MEHR kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
+            FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the MEHR kernels");
+            lgf_readfile_(MEHR_4L, 2, &GN, &Gdata);
+            G   = &zero_;
+            Gk0 = &mehr_4l6l_2unb0spe_;
+            Gr0 = &mehr_4l6l_2unb0spe_;
             break;
         case MEHR_6L:
-            FLUPS_CHECK(false, "MEHR6 kernel not available for 2D unbounded problems.");
+            FLUPS_WARNING("MEHR kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
+            FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the MEHR kernels");
+            lgf_readfile_(MEHR_6L, 2, &GN, &Gdata);
+            G   = &zero_;
+            Gk0 = &mehr_4l6l_2unb0spe_;
+            Gr0 = &mehr_4l6l_2unb0spe_;
             break;
         case MEHR_4F:
-            FLUPS_CHECK(false, "MEHR4 kernel not available for 2D unbounded problems.");
+            FLUPS_WARNING("MEHR kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
+            FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the MEHR kernels");
+            lgf_readfile_(MEHR_4F, 2, &GN, &Gdata);
+            G   = &zero_;
+            Gk0 = &mehr_4f_2unb0spe_;
+            Gr0 = &mehr_4f_2unb0spe_;
             break;
         case MEHR_6F:
-            FLUPS_CHECK(false, "MEHR6 kernel not available for 2D unbounded problems.");
+            FLUPS_WARNING("MEHR kernels in 2dirunbounded 1dirspectral entail an approximation in 3D.");
+            FLUPS_CHECK(unb_dirs_are_isotropic, "the grid has to be isotropic to use the MEHR kernels");
+            lgf_readfile_(MEHR_6F, 2, &GN, &Gdata);
+            G   = &zero_;
+            Gk0 = &mehr_6f_2unb0spe_;
+            Gr0 = &mehr_6f_2unb0spe_;
             break;
         default:
             FLUPS_CHECK(false, "Green Function type unknown.");
diff --git a/src/green_kernels.hpp b/src/green_kernels.hpp
index 29c3c3f..b4a31d2 100644
--- a/src/green_kernels.hpp
+++ b/src/green_kernels.hpp
@@ -296,6 +296,60 @@ static inline double mehr_6f_3unb0spe_(const void* params,const double* data) {
     }
     return -green/(h);
 }
+static inline double mehr_4l6l_2unb0spe_(const void* params,const double* data) {
+    int    ix = (int)((double*)params)[4];
+    int    iy = (int)((double*)params)[5];
+    int    iz = (int)((double*)params)[6];
+    int    N  = (int)((double*)params)[7];
+    int    i1 = (ix == 0) ? iy : ix; // first nonzero index
+    int    i2 = (ix == 0) ? iz : (iy == 0) ? iz : iy; // second nonzero index
+
+    // if the point is close enough, it will be already precomputed
+    double green;
+    if (i1 < N && i2 < N) {
+        green = data[i1 + i2 * N];
+    } else {  // if not, we use the extrapolation
+        const double rho = sqrt(i1 * i1 + i2 * i2);
+        mehr_4l6l_2unb0spe_expansion(&green, i1, i2, rho);
+    }
+    return -green;
+}
+static inline double mehr_4f_2unb0spe_(const void* params,const double* data) {
+    int    ix = (int)((double*)params)[4];
+    int    iy = (int)((double*)params)[5];
+    int    iz = (int)((double*)params)[6];
+    int    N  = (int)((double*)params)[7];
+    int    i1 = (ix == 0) ? iy : ix; // first nonzero index
+    int    i2 = (ix == 0) ? iz : (iy == 0) ? iz : iy; // second nonzero index
+
+    // if the point is close enough, it will be already precomputed
+    double green;
+    if (i1 < N && i2 < N) {
+        green = data[i1 + i2 * N];
+    } else {  // if not, we use the extrapolation
+        const double rho = sqrt(i1 * i1 + i2 * i2);
+        mehr_4f_2unb0spe_expansion(&green, i1, i2, rho);
+    }
+    return -green;
+}
+static inline double mehr_6f_2unb0spe_(const void* params,const double* data) {
+    int    ix = (int)((double*)params)[4];
+    int    iy = (int)((double*)params)[5];
+    int    iz = (int)((double*)params)[6];
+    int    N  = (int)((double*)params)[7];
+    int    i1 = (ix == 0) ? iy : ix; // first nonzero index
+    int    i2 = (ix == 0) ? iz : (iy == 0) ? iz : iy; // second nonzero index
+
+    // if the point is close enough, it will be already precomputed
+    double green;
+    if (i1 < N && i2 < N) {
+        green = data[i1 + i2 * N];
+    } else {  // if not, we use the extrapolation
+        const double rho = sqrt(i1 * i1 + i2 * i2);
+        mehr_6f_2unb0spe_expansion(&green, i1, i2, rho);
+    }
+    return -green;
+}
 /**@} */
 
 

From 06f523a8b114380c225c059f17cea39e8f645dd2 Mon Sep 17 00:00:00 2001
From: James Gabbard <jgabbard@mit.edu>
Date: Mon, 4 Dec 2023 08:40:52 -0800
Subject: [PATCH 05/19] :sparkles: 2unb1spe MEHR kernels

---
 src/Solver.cpp                | 3 ++-
 test/src/convergence_test.cpp | 8 ++------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/Solver.cpp b/src/Solver.cpp
index 56badcd..25a7578 100644
--- a/src/Solver.cpp
+++ b/src/Solver.cpp
@@ -1110,7 +1110,8 @@ void Solver::cmptGreenFunction_(Topology *topo[3], double *green, FFTW_plan_dim
     // No need to scale this as that part of the Green function has a volfact = 1
     const bool is_hej = (typeGreen_ == HEJ_2 || typeGreen_ == HEJ_4 || typeGreen_ == HEJ_6 || typeGreen_ == HEJ_8 || typeGreen_ == HEJ_10);
     const bool is_lgf = (typeGreen_ == LGF_2 || typeGreen_ == LGF_4 || typeGreen_ == LGF_6 || typeGreen_ == LGF_8);
-    if (ndim_ == 3 && nbr_spectral == 1 && (is_hej || is_lgf)) {
+    const bool is_meh = (typeGreen_ == MEHR_4L || typeGreen_ == MEHR_4F || typeGreen_ == MEHR_6L || typeGreen_ == MEHR_6F);
+    if (ndim_ == 3 && nbr_spectral == 1 && (is_hej || is_lgf || is_meh)) {
         int istart_cstm[3] = {0, 0, 0};  // global
 
         for (int ip = 0; ip < 3; ip++) {
diff --git a/test/src/convergence_test.cpp b/test/src/convergence_test.cpp
index 1385689..25b2006 100644
--- a/test/src/convergence_test.cpp
+++ b/test/src/convergence_test.cpp
@@ -166,13 +166,9 @@ TEST_P(ConvergenceTest, AllBoundaryConditions){
     bool isYSpectral = (*(mybc_[1][0]) != UNB) && (*(mybc_[1][1]) != UNB);
     bool isZSpectral = (*(mybc_[2][0]) != UNB) && (*(mybc_[2][1]) != UNB);
     int  n_spectral  = isXSpectral + isYSpectral + isZSpectral;
+
     bool is_green_spectral = (n_spectral == 3) && (CHAT_2 == green_ || HEJ_0 == green_); // Chat2 and Hej0 have a spectral accuracy with spectral bc
-    // bool is_mehrstellen = (MEHR_4L == green_ || MEHR_4F == green_ || MEHR_6L == green_ || MEHR_6F == green_);
-    
-    bool is_green_invalid = false;
-    is_green_invalid |= (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs.
-    // is_green_invalid |= ((!is2D) && is_mehrstellen && (n_spectral == 1)); // MEHR stencils cannot handle 3D with one spectral direction
-    // is_green_invalid |= (is2D && is_mehrstellen); // MEHR stencils cannot handle 2D domains
+    bool is_green_invalid  = (HEJ_0 == green_) && (n_spectral != 3); // Hej0 cannot handle unbounded bcs.
 
     //  -------------------------------------------------------------------------
     // Perform the test

From a33da054e3a50a80000c698e5aaede1cbe0d2a9a Mon Sep 17 00:00:00 2001
From: James Gabbard <jgabbard@mit.edu>
Date: Mon, 4 Dec 2023 08:43:55 -0800
Subject: [PATCH 06/19] :bug: revert convergence test size to normal

---
 test/src/convergence_test.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/test/src/convergence_test.cpp b/test/src/convergence_test.cpp
index 25b2006..05dbe62 100644
--- a/test/src/convergence_test.cpp
+++ b/test/src/convergence_test.cpp
@@ -95,10 +95,8 @@ std::string TestNameGenerator(const ::testing::TestParamInfo<ParamType>& info) {
 class ConvergenceTest : public testing::TestWithParam<ParamType>{
 protected:
     // Default variable for all the tests
-    // const int    nproc_3d_[3] = {4, 4, 4};
-    // const int    nproc_2d_[3] = {8, 8, 1};
-    const int    nproc_3d_[3] = {1, 1, 1};
-    const int    nproc_2d_[3] = {1, 1, 1};
+    const int    nproc_3d_[3] = {4, 4, 4};
+    const int    nproc_2d_[3] = {8, 8, 1};
     const double L_[3]        = {1., 1., 1.};
 
     // Variable specific to a test_case

From f19cc5904bea8478069de5ca513ec1abf7d53e5e Mon Sep 17 00:00:00 2001
From: Gilles Poncelet <gilles.poncelet@uclouvain.be>
Date: Wed, 6 Dec 2023 17:26:18 +0100
Subject: [PATCH 07/19] :white_check_mark: update references for validation
 test

---
 .../data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt | 1 +
 .../data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt | 1 +
 .../data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt | 1 +
 .../data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt | 1 +
 .../data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt | 1 +
 .../data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt | 1 +
 .../data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt | 1 +
 .../data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt | 1 +
 .../data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt | 1 +
 .../data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt | 1 +
 .../data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt | 1 +
 .../data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt | 1 +
 .../data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt | 1 +
 .../data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt | 1 +
 .../data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt | 1 +
 .../data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt | 1 +
 samples/validation/scripts/test_3D_kerns.py                   | 4 ----
 17 files changed, 16 insertions(+), 4 deletions(-)
 create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt
 create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt
 create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt
 create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt
 create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt
 create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt
 create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt
 create mode 100644 samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt
 create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt
 create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt
 create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt
 create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt
 create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt
 create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt
 create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt
 create mode 100644 samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt

diff --git a/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt
new file mode 100644
index 0000000..e856869
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=11.txt
@@ -0,0 +1 @@
+16 3.311760673069e-02 6.001754483240e-02 
diff --git a/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt
new file mode 100644
index 0000000..e856869
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=12.txt
@@ -0,0 +1 @@
+16 3.311760673069e-02 6.001754483240e-02 
diff --git a/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt
new file mode 100644
index 0000000..cb19938
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=13.txt
@@ -0,0 +1 @@
+16 3.296578486463e-04 5.974240491287e-04 
diff --git a/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt
new file mode 100644
index 0000000..492d43f
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_CellCenter_333399_typeGreen=14.txt
@@ -0,0 +1 @@
+16 4.420020480575e-05 8.010203741826e-05 
diff --git a/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt
new file mode 100644
index 0000000..2decaf8
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=11.txt
@@ -0,0 +1 @@
+16 8.279381136966e-03 5.177451801366e-02 
diff --git a/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt
new file mode 100644
index 0000000..2decaf8
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=12.txt
@@ -0,0 +1 @@
+16 8.279381136966e-03 5.177451801366e-02 
diff --git a/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt
new file mode 100644
index 0000000..9ac0bea
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=13.txt
@@ -0,0 +1 @@
+16 2.601533757485e-04 8.942064632086e-04 
diff --git a/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt
new file mode 100644
index 0000000..ced3cde
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_CellCenter_401499_typeGreen=14.txt
@@ -0,0 +1 @@
+16 6.344010847012e-05 2.062116091302e-04 
diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt
new file mode 100644
index 0000000..6f600b9
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=11.txt
@@ -0,0 +1 @@
+17 3.311760673069e-02 6.623521346139e-02 
diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt
new file mode 100644
index 0000000..6f600b9
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=12.txt
@@ -0,0 +1 @@
+17 3.311760673069e-02 6.623521346139e-02 
diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt
new file mode 100644
index 0000000..b05f12e
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=13.txt
@@ -0,0 +1 @@
+17 3.296578486463e-04 6.593156972932e-04 
diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt
new file mode 100644
index 0000000..21100ec
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_NodeCenter_333399_typeGreen=14.txt
@@ -0,0 +1 @@
+17 4.420020480569e-05 8.840040961200e-05 
diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt
new file mode 100644
index 0000000..f50098b
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=11.txt
@@ -0,0 +1 @@
+17 8.290370265556e-03 5.322438967995e-02 
diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt
new file mode 100644
index 0000000..f50098b
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=12.txt
@@ -0,0 +1 @@
+17 8.290370265556e-03 5.322438967995e-02 
diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt
new file mode 100644
index 0000000..b2d4500
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=13.txt
@@ -0,0 +1 @@
+17 2.785479427274e-04 1.315704724925e-03 
diff --git a/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt
new file mode 100644
index 0000000..cbce81c
--- /dev/null
+++ b/samples/validation/data_ref/validation_3d_NodeCenter_401499_typeGreen=14.txt
@@ -0,0 +1 @@
+17 7.436135343861e-05 4.195883266850e-04 
diff --git a/samples/validation/scripts/test_3D_kerns.py b/samples/validation/scripts/test_3D_kerns.py
index 5c90e36..66321ef 100644
--- a/samples/validation/scripts/test_3D_kerns.py
+++ b/samples/validation/scripts/test_3D_kerns.py
@@ -59,10 +59,6 @@
             print("skip kernel 7 and 2dirunbounded... unsupported error due to inherent approximation.")
             continue
 
-        if (kern in ['11','12','13','14'] and bcs[4:6] == ["9","9"]) :
-            print("Mehrstellen stencils not implemented for 2D problems")
-            continue
-
         if(bcs[4:6] == ["9","9"]):
             str_bcs = str(bcs[0]) + "," + str(bcs[1])+ "," + str(bcs[2])+ "," + str(bcs[3])+ "," + str(bcs[4])+ "," + str(bcs[5]) 
             r = subprocess.run(["mpirun"] + ["-np"] + ["2"] + ["./flups_validation_nb"] + ["--np=1,2,1"] + ["--kernel="+str(kern)] + ["--center=" + str(centerType)] + ["--res=16,16,1"] + ["--nres=1"] + ["--bc="+str_bcs], capture_output=True)

From 2c095c04a5da9166b60ed664203277318a3c7e89 Mon Sep 17 00:00:00 2001
From: Gilles Poncelet <gilles.poncelet@uclouvain.be>
Date: Thu, 7 Dec 2023 16:57:08 +0100
Subject: [PATCH 08/19] :white_check_mark: further subdivide the tests to avoid
 running out of time

---
 .gitlab-ci/flups-lm3.yml     | 40 +++++++++++++++++-------------------
 test/run/test_convergence.sh |  2 +-
 test/run/test_workflow.sh    | 20 ++++++++++--------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml
index f654471..790a168 100644
--- a/.gitlab-ci/flups-lm3.yml
+++ b/.gitlab-ci/flups-lm3.yml
@@ -111,20 +111,20 @@ check result lm3:
      start_in: 1200 minutes 
   artifacts: 
     paths: 
-      - report_test_a2a_cell.xml
-      - report_test_nb_cell.xml
-      - report_test_isr_cell.xml
-      - report_test_a2a_node.xml
-      - report_test_nb_node.xml
-      - report_test_isr_node.xml
+      - report_test_a2a_cell_*.xml
+      - report_test_nb_cell_*.xml
+      - report_test_isr_cell_*.xml
+      - report_test_a2a_node_*.xml
+      - report_test_nb_node_*.xml
+      - report_test_isr_node_*.xml
     reports:
         junit: 
-          - report_test_a2a_cell.xml
-          - report_test_nb_cell.xml
-          - report_test_isr_cell.xml
-          - report_test_a2a_node.xml
-          - report_test_nb_node.xml
-          - report_test_isr_node.xml
+          - report_test_a2a_cell_*.xml
+          - report_test_nb_cell_*.xml
+          - report_test_isr_cell_*.xml
+          - report_test_a2a_node_*.xml
+          - report_test_nb_node_*.xml
+          - report_test_isr_node_*.xml
     when: always
   needs: [run lm3]
   dependencies:
@@ -137,15 +137,13 @@ check result lm3:
     - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_nb_*") ]]; then echo "At least one of the nb test did not execute till the end."; exit 1; fi
     - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_isr_*") ]]; then echo "At least one of the isr test did not execute till the end."; exit 1; fi
     - scp lm3:${PATH2RUN}/*.xml .
-    - test -f report_test_a2a_cell.xml && if [[ $(grep "<failure" report_test_a2a_cell.xml) ]]; then echo "At least one of the a2a cell centred tests has failed "; exit 1; fi 
-    - test -f report_test_a2a_node.xml && if [[ $(grep "<failure" report_test_a2a_node.xml) ]]; then echo "At least one of the a2a node centred tests has failed "; exit 1; fi 
-    - test -f report_test_nb_cell.xml && if [[ $(grep "<failure" report_test_nb_cell.xml) ]]; then echo "At least one of the nb cell centred tests has failed "; exit 1; fi
-    - test -f report_test_nb_node.xml && if [[ $(grep "<failure" report_test_nb_node.xml) ]]; then echo "At least one of the nb node centred tests has failed "; exit 1; fi
-    - test -f report_test_isr_cell.xml && if [[ $(grep "<failure" report_test_isr_cell.xml) ]]; then echo "At least one of the cell centred isr tests has failed "; exit 1; fi 
-    - test -f report_test_isr_node.xml && if [[ $(grep "<failure" report_test_isr_node.xml) ]]; then echo "At least one of the node centred isr tests has failed "; exit 1; fi 
-    # - if [[ $(grep "<failure" Node_*.xml) ]]; then echo "At least one of the node tests has failed "; exit 1; fi 
-    # - if [[ $(grep "<failure" Cell_*.xml) ]]; then echo "At least one of the cell tests has failed "; exit 1; fi
-    # - test -f cell_report.xml && grep -L "<failure" cell_report.xml   
+    - if [[ $(ls -dq report_test_* | wc -l) -ne 90 ]]; then echo "At least one of the test did not execute till the end."; exit 1; fi
+    - if [[ $(grep "<failure" report_test_a2a_cell_*.xml) ]]; then echo "At least one of the a2a cell centred tests has failed "; exit 1; fi
+    - if [[ $(grep "<failure" report_test_a2a_node_*.xml) ]]; then echo "At least one of the a2a node centred tests has failed "; exit 1; fi
+    - if [[ $(grep "<failure" report_test_nb_cell_*.xml) ]]; then echo "At least one of the nb cell centred tests has failed "; exit 1; fi
+    - if [[ $(grep "<failure" report_test_nb_node_*.xml) ]]; then echo "At least one of the nb node centred tests has failed "; exit 1; fi
+    - if [[ $(grep "<failure" report_test_isr_cell_*.xml) ]]; then echo "At least one of the cell centred isr tests has failed "; exit 1; fi
+    - if [[ $(grep "<failure" report_test_isr_node_*.xml) ]]; then echo "At least one of the node centred isr tests has failed "; exit 1; fi
   retry:                              # Retry -- Relaunch the Job
     max: 2                            # Limited to 2 (because of gitlab)
     when:                             # If the failure is not known or if it's a timeout failure
diff --git a/test/run/test_convergence.sh b/test/run/test_convergence.sh
index 8de0da1..3285717 100755
--- a/test/run/test_convergence.sh
+++ b/test/run/test_convergence.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # Submission script for Lemaitre3
 #SBATCH --job-name=flups_auto_test
-#SBATCH --time=16:00:00 # hh:mm:ss
+#SBATCH --time=02:00:00 # hh:mm:ss
 #
 #SBATCH --ntasks=64
 #SBATCH --mem-per-cpu=4000 # megabytes
diff --git a/test/run/test_workflow.sh b/test/run/test_workflow.sh
index d23419b..a615303 100755
--- a/test/run/test_workflow.sh
+++ b/test/run/test_workflow.sh
@@ -1,19 +1,21 @@
 #!/bin/sh
-# KERNELS='CHAT2 LGF2 HEJ2 HEJ4 HEJ6 HEJ8 HEJ10 HEJ0'
 CENTERS='node cell'
 VERSIONS='nb a2a isr'
+KERNELS='chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l'
 
 export LIBDIR=$1
 
 for version in ${VERSIONS}; do
     for center in ${CENTERS}; do
-        export EXEC=flups_test_${version}
-        export REPORT=xml:report_test_${version}_${center}.xml
-        export FILE_OUT=std_out_${version}_${center}
-        export TESTS=AllTest/ConvergenceTest.AllBoundaryConditions/${center}
-        export LIBPATH=${LIBDIR}
-        echo "Submitting job with command: sbatch --export=LIBPATH=${LIBDIR},REPORT=${REPORT},FILE_OUT=${FILE_OUT},TESTS=${TESTS},EXEC=${EXEC_FLUPS} test_convergence.sh"
-        sbatch test_convergence.sh
-        echo "   "
+        for kernel in ${KERNELS}; do
+            export EXEC=flups_test_${version}
+            export REPORT=xml:report_test_${version}_${center}_${kernel}.xml
+            export FILE_OUT=std_out_${version}_${center}_${kernel}
+            export TESTS=AllTest/ConvergenceTest.AllBoundaryConditions/${center}_${kernel}
+            export LIBPATH=${LIBDIR}
+            echo "Submitting job with command: sbatch --export=LIBPATH=${LIBDIR},REPORT=${REPORT},FILE_OUT=${FILE_OUT},TESTS=${TESTS},EXEC=${EXEC_FLUPS} test_convergence.sh"
+            sbatch test_convergence.sh
+            echo "   "
+        done
     done
 done
\ No newline at end of file

From 2ef9474b5027b95c88817b3540c05922612acfca Mon Sep 17 00:00:00 2001
From: Gilles Poncelet <gilles.poncelet@uclouvain.be>
Date: Thu, 7 Dec 2023 18:06:03 +0100
Subject: [PATCH 09/19] :white_check_mark: more clarity and automation in the
 lm3 test validation

---
 .gitlab-ci/flups-lm3.yml | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml
index 790a168..1acb359 100644
--- a/.gitlab-ci/flups-lm3.yml
+++ b/.gitlab-ci/flups-lm3.yml
@@ -137,13 +137,18 @@ check result lm3:
     - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_nb_*") ]]; then echo "At least one of the nb test did not execute till the end."; exit 1; fi
     - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_isr_*") ]]; then echo "At least one of the isr test did not execute till the end."; exit 1; fi
     - scp lm3:${PATH2RUN}/*.xml .
-    - if [[ $(ls -dq report_test_* | wc -l) -ne 90 ]]; then echo "At least one of the test did not execute till the end."; exit 1; fi
-    - if [[ $(grep "<failure" report_test_a2a_cell_*.xml) ]]; then echo "At least one of the a2a cell centred tests has failed "; exit 1; fi
-    - if [[ $(grep "<failure" report_test_a2a_node_*.xml) ]]; then echo "At least one of the a2a node centred tests has failed "; exit 1; fi
-    - if [[ $(grep "<failure" report_test_nb_cell_*.xml) ]]; then echo "At least one of the nb cell centred tests has failed "; exit 1; fi
-    - if [[ $(grep "<failure" report_test_nb_node_*.xml) ]]; then echo "At least one of the nb node centred tests has failed "; exit 1; fi
-    - if [[ $(grep "<failure" report_test_isr_cell_*.xml) ]]; then echo "At least one of the cell centred isr tests has failed "; exit 1; fi
-    - if [[ $(grep "<failure" report_test_isr_node_*.xml) ]]; then echo "At least one of the node centred isr tests has failed "; exit 1; fi
+    - |
+      for comm_type in a2a nb isr; do
+        for center_type in cell node; do
+          for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do
+            test -f report_test_${comm_type}_${center_type}_${kernel_type}.xml
+            if [[ $(grep "<failure" report_test_${comm_type}_${center_type}_${kernel_type}.xml) ]]; then
+              echo "Test ${comm_type}_${center_type}_${kernel_type} has failed"
+              exit 1
+            fi
+          done
+        done
+      done
   retry:                              # Retry -- Relaunch the Job
     max: 2                            # Limited to 2 (because of gitlab)
     when:                             # If the failure is not known or if it's a timeout failure

From 7a2f5257522b25cb0f112158991f9a10b96a17c1 Mon Sep 17 00:00:00 2001
From: Gilles Poncelet <gilles.poncelet@uclouvain.be>
Date: Thu, 7 Dec 2023 18:18:24 +0100
Subject: [PATCH 10/19] :white_check_mark: check everything before exiting (and
 more clarity)

---
 .gitlab-ci/flups-lm3.yml | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml
index 1acb359..3df4143 100644
--- a/.gitlab-ci/flups-lm3.yml
+++ b/.gitlab-ci/flups-lm3.yml
@@ -133,22 +133,42 @@ check result lm3:
     - echo -e "\e[0Ksection_start:`date +%s`:my_first_section[collapsed=true]\r\e[0KWaiting for job completion"
     - while [[ $(ssh lm3 "squeue -u vortexbot | grep vortex") ]]; do echo "Waiting for job completion:"; echo "$(ssh lm3 squeue -u vortexbot )"; sleep 2m; done
     - echo -e "\e[0Ksection_end:`date +%s`:my_first_section\r\e[0K"
-    - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_a2a_*") ]]; then echo "At least one of the a2a test did not execute till the end."; exit 1; fi
-    - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_nb_*") ]]; then echo "At least one of the nb test did not execute till the end."; exit 1; fi
-    - if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_isr_*") ]]; then echo "At least one of the isr test did not execute till the end."; exit 1; fi
+    - |
+      unfinished_tests=()
+
+      for comm_type in a2a nb isr; do
+        for center_type in cell node; do
+          for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do
+            if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_${comm_type}_${center_type}_${kernel_type}") ]]; then
+              echo "Test ${comm_type}_${center_type}_${kernel_type} did not execute till the end"
+              unfinished_tests+=("${comm_type}_${center_type}_${kernel_type}")
+            fi
+          done
+        done
+      done
+
+      if [[ ${#unfinished_tests[@]} -gt 0 ]]; then
+        exit 1
+      fi
     - scp lm3:${PATH2RUN}/*.xml .
     - |
+      failed_tests=()
+
       for comm_type in a2a nb isr; do
         for center_type in cell node; do
           for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do
             test -f report_test_${comm_type}_${center_type}_${kernel_type}.xml
             if [[ $(grep "<failure" report_test_${comm_type}_${center_type}_${kernel_type}.xml) ]]; then
               echo "Test ${comm_type}_${center_type}_${kernel_type} has failed"
-              exit 1
+              failed_tests+=("${comm_type}_${center_type}_${kernel_type}")
             fi
           done
         done
       done
+
+      if [[ ${#failed_tests[@]} -gt 0 ]]; then
+        exit 1
+      fi
   retry:                              # Retry -- Relaunch the Job
     max: 2                            # Limited to 2 (because of gitlab)
     when:                             # If the failure is not known or if it's a timeout failure

From bab5d8a2fef56d72e634d0e279b51e3045981feb Mon Sep 17 00:00:00 2001
From: Gilles Poncelet <gilles.poncelet@uclouvain.be>
Date: Fri, 8 Dec 2023 14:40:51 +0100
Subject: [PATCH 11/19] :bug: :white_check_mark: add wildcard to account for
 the SLURM jobid at the end of the stdout file

---
 .gitlab-ci/flups-lm3.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml
index 3df4143..f1a6937 100644
--- a/.gitlab-ci/flups-lm3.yml
+++ b/.gitlab-ci/flups-lm3.yml
@@ -139,7 +139,7 @@ check result lm3:
       for comm_type in a2a nb isr; do
         for center_type in cell node; do
           for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do
-            if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_${comm_type}_${center_type}_${kernel_type}") ]]; then
+            if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_${comm_type}_${center_type}_${kernel_type}_*") ]]; then
               echo "Test ${comm_type}_${center_type}_${kernel_type} did not execute till the end"
               unfinished_tests+=("${comm_type}_${center_type}_${kernel_type}")
             fi

From 6454496796621bc47ec95540efc4c24685aaa924 Mon Sep 17 00:00:00 2001
From: Gilles Poncelet <gilles.poncelet@uclouvain.be>
Date: Mon, 11 Dec 2023 10:07:50 +0100
Subject: [PATCH 12/19] :bug: :white_check_mark: increase job time by 30min &
 try to fix the artifact upload

---
 .gitlab-ci/flups-lm3.yml     | 14 ++------------
 test/run/test_convergence.sh |  2 +-
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml
index f1a6937..3c18232 100644
--- a/.gitlab-ci/flups-lm3.yml
+++ b/.gitlab-ci/flups-lm3.yml
@@ -111,20 +111,10 @@ check result lm3:
      start_in: 1200 minutes 
   artifacts: 
     paths: 
-      - report_test_a2a_cell_*.xml
-      - report_test_nb_cell_*.xml
-      - report_test_isr_cell_*.xml
-      - report_test_a2a_node_*.xml
-      - report_test_nb_node_*.xml
-      - report_test_isr_node_*.xml
+      - ./*.xml
     reports:
         junit: 
-          - report_test_a2a_cell_*.xml
-          - report_test_nb_cell_*.xml
-          - report_test_isr_cell_*.xml
-          - report_test_a2a_node_*.xml
-          - report_test_nb_node_*.xml
-          - report_test_isr_node_*.xml
+          - ./*.xml
     when: always
   needs: [run lm3]
   dependencies:
diff --git a/test/run/test_convergence.sh b/test/run/test_convergence.sh
index 3285717..354b725 100755
--- a/test/run/test_convergence.sh
+++ b/test/run/test_convergence.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # Submission script for Lemaitre3
 #SBATCH --job-name=flups_auto_test
-#SBATCH --time=02:00:00 # hh:mm:ss
+#SBATCH --time=02:30:00 # hh:mm:ss
 #
 #SBATCH --ntasks=64
 #SBATCH --mem-per-cpu=4000 # megabytes

From ff01de69df17bf32217b4fac6982dec07ec0a7ed Mon Sep 17 00:00:00 2001
From: Gilles Poncelet <gilles.poncelet@uclouvain.be>
Date: Mon, 11 Dec 2023 16:49:27 +0100
Subject: [PATCH 13/19] :bug: :white_check_mark: error in the kernel list...
 mehr4l -> mehr6f

---
 .gitlab-ci/flups-lm3.yml  | 4 ++--
 test/run/test_workflow.sh | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.gitlab-ci/flups-lm3.yml b/.gitlab-ci/flups-lm3.yml
index 3c18232..d3a0388 100644
--- a/.gitlab-ci/flups-lm3.yml
+++ b/.gitlab-ci/flups-lm3.yml
@@ -128,7 +128,7 @@ check result lm3:
 
       for comm_type in a2a nb isr; do
         for center_type in cell node; do
-          for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do
+          for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr6f; do
             if [[ ! $(ssh lm3 "grep 'Tests are over ' ${PATH2RUN}/std_out_${comm_type}_${center_type}_${kernel_type}_*") ]]; then
               echo "Test ${comm_type}_${center_type}_${kernel_type} did not execute till the end"
               unfinished_tests+=("${comm_type}_${center_type}_${kernel_type}")
@@ -146,7 +146,7 @@ check result lm3:
 
       for comm_type in a2a nb isr; do
         for center_type in cell node; do
-          for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l; do
+          for kernel_type in chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr6f; do
             test -f report_test_${comm_type}_${center_type}_${kernel_type}.xml
             if [[ $(grep "<failure" report_test_${comm_type}_${center_type}_${kernel_type}.xml) ]]; then
               echo "Test ${comm_type}_${center_type}_${kernel_type} has failed"
diff --git a/test/run/test_workflow.sh b/test/run/test_workflow.sh
index a615303..ef41c0c 100755
--- a/test/run/test_workflow.sh
+++ b/test/run/test_workflow.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 CENTERS='node cell'
 VERSIONS='nb a2a isr'
-KERNELS='chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr4l'
+KERNELS='chat2 lgf2 lgf4 lgf6 lgf8 hej2 hej4 hej6 hej8 hej10 hej0 mehr4l mehr6l mehr4f mehr6f'
 
 export LIBDIR=$1
 
@@ -13,7 +13,7 @@ for version in ${VERSIONS}; do
             export FILE_OUT=std_out_${version}_${center}_${kernel}
             export TESTS=AllTest/ConvergenceTest.AllBoundaryConditions/${center}_${kernel}
             export LIBPATH=${LIBDIR}
-            echo "Submitting job with command: sbatch --export=LIBPATH=${LIBDIR},REPORT=${REPORT},FILE_OUT=${FILE_OUT},TESTS=${TESTS},EXEC=${EXEC_FLUPS} test_convergence.sh"
+            echo "Submitting job with command: sbatch --export=LIBPATH=${LIBDIR},REPORT=${REPORT},FILE_OUT=${FILE_OUT},TESTS=${TESTS},EXEC=${EXEC} test_convergence.sh"
             sbatch test_convergence.sh
             echo "   "
         done

From 84a43c30f5c00b273572c4f9f8c7002934e79024 Mon Sep 17 00:00:00 2001
From: pbalty <pierre.balty@uclouvain.be>
Date: Mon, 11 Mar 2024 15:08:19 +0100
Subject: [PATCH 14/19] :tada: Introducing a function to avoid freeing fftw

---
 src/Solver.cpp |  4 ----
 src/Solver.hpp |  3 +++
 src/flups.cpp  | 15 +++++++++++++++
 src/flups.h    |  9 +++++++++
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/Solver.cpp b/src/Solver.cpp
index 25a7578..a680d7a 100644
--- a/src/Solver.cpp
+++ b/src/Solver.cpp
@@ -523,11 +523,7 @@ Solver::~Solver() {
     //    fftw_export_wisdom_to_filename(FLUPS_WISDOM_PATH);
     //#endif
 
-#if FLUPS_OPENMP    
-    fftw_cleanup_threads();
-#endif
 
-    fftw_cleanup();
     // m_profStopi(prof_, "Clean up");
     //-------------------------------------------------------------------------
     END_FUNC;
diff --git a/src/Solver.hpp b/src/Solver.hpp
index 1d9f9f4..c2955cc 100644
--- a/src/Solver.hpp
+++ b/src/Solver.hpp
@@ -52,6 +52,9 @@
 #include "metis.h"
 #endif
 
+
+
+
 /**
  * @brief The Poisson solver
  *
diff --git a/src/flups.cpp b/src/flups.cpp
index 4484fa9..0d63ae3 100644
--- a/src/flups.cpp
+++ b/src/flups.cpp
@@ -133,9 +133,24 @@ Solver* flups_init_timed(Topology* t, BoundaryType* bc[3][2], const double h[3],
 
 // destroy the solver
 void flups_cleanup(Solver* s) {
+    flups_cleanup_ext(s, false);
+}
+
+void flups_cleanup_ext(Solver* s, const bool cleanup_fftw) {
     delete s;
+    if (cleanup_fftw){
+        flups_cleanup_fftw();
+    }
 }
 
+void flups_cleanup_fftw(){
+#if FLUPS_OPENMP    
+    fftw_cleanup_threads();
+#endif
+
+    fftw_cleanup();
+};
+
 // setup the solver
 void flups_set_greenType(Solver* s, const GreenType type) {
     s->set_GreenType(type);
diff --git a/src/flups.h b/src/flups.h
index df4c04a..61ffa0d 100644
--- a/src/flups.h
+++ b/src/flups.h
@@ -401,6 +401,15 @@ FLUPS_Solver* flups_init_timed(FLUPS_Topology* t, FLUPS_BoundaryType* bc[3][2],
  */
 void flups_cleanup(FLUPS_Solver* s);
 
+/**
+ * @brief must be called before execution terminates as it frees the memory used by the solver
+ *
+ * @param s
+ */
+void flups_cleanup_ext(FLUPS_Solver* s, const bool cleanup_fftw);
+
+void flups_cleanup_fftw(); 
+
 /**
  * @brief sets the type of the Green's function used by the solver
  *

From c36f0cc92bebccfaa3078594956d55a08c6ed2fe Mon Sep 17 00:00:00 2001
From: pbalty <pierre.balty@uclouvain.be>
Date: Tue, 30 Apr 2024 12:21:53 +0200
Subject: [PATCH 15/19] :construction: update the samples directory

---
 samples/compareACCFFT/main.cpp                |  57 ++-
 samples/compareP3DFFT++/main_compare++.cpp    | 447 +++++++++---------
 samples/compareP3DFFT/main_compare.cpp        | 300 ++++++------
 samples/simpleTest/main_simple_test.cpp       | 134 +++---
 .../solve_advanced_C/main_solve_advanced.c    | 173 +++----
 samples/solve_vtube/src/vtube.cpp             | 190 ++++----
 samples/validation/src/validation_3d.cpp      | 134 +++---
 src/flups.cpp                                 |   2 +
 src/flups.h                                   |   2 +
 src/flups_interface.h                         |   2 +-
 10 files changed, 713 insertions(+), 728 deletions(-)

diff --git a/samples/compareACCFFT/main.cpp b/samples/compareACCFFT/main.cpp
index ed18280..82e71b8 100644
--- a/samples/compareACCFFT/main.cpp
+++ b/samples/compareACCFFT/main.cpp
@@ -2,10 +2,9 @@
 #include <iostream>
 
 #include "accfft.h"
-#include "h3lpr/profiler.hpp"
-#include "h3lpr/parser.hpp"
 #include "flups.h"
-
+#include "h3lpr/parser.hpp"
+#include "h3lpr/profiler.hpp"
 
 int main(int argc, char *argv[]) {
     //-------------------------------------------------------------------------
@@ -26,29 +25,28 @@ int main(int argc, char *argv[]) {
     // Get info from the command line
     //--------------------------------------------------------------------------
     H3LPR::Parser parser(argc, (const char **)argv);
-    const auto    arg_nglob = parser.GetValues<int, 3>("--nglob", "the global resolution, will be used for both ACCFFT and FLUPS", {64,64,64});
+    const auto    arg_nglob = parser.GetValues<int, 3>("--nglob", "the global resolution, will be used for both ACCFFT and FLUPS", {64, 64, 64});
     const auto    arg_nproc = parser.GetValues<int, 3>("--nproc", "the proc distribution, for FLUPS only", {1, 1, 1});
     const auto    arg_dom   = parser.GetValues<double, 3>("--dom", "the size of the domain, must be compatible with nglob", {1.0, 1.0, 1.0});
     const int     n_iter    = parser.GetValue<int>("--niter", "the number of iterations to perform", 20);
     const int     n_warm    = parser.GetValue<int>("--warm", "the number of iterations to perform when warming up", 1);
-    const bool profile = parser.GetFlag("--profile","forward the profiler to flups");
+    const bool    profile   = parser.GetFlag("--profile", "forward the profiler to flups");
     parser.Finalize();
 
     //--------------------------------------------------------------------------
     // Definition of the problem
     //--------------------------------------------------------------------------
-    const int nglob[3] = {arg_nglob[0], arg_nglob[1], arg_nglob[2]};
-    const int nproc[3] = {arg_nproc[0], arg_nproc[1], arg_nproc[2]};
-    const double L[3]  = {arg_dom[0], arg_dom[1], arg_dom[2]};
-    
+    const int    nglob[3] = {arg_nglob[0], arg_nglob[1], arg_nglob[2]};
+    const int    nproc[3] = {arg_nproc[0], arg_nproc[1], arg_nproc[2]};
+    const double L[3]     = {arg_dom[0], arg_dom[1], arg_dom[2]};
 
     // get the grid spacing
     const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]};
 
     // get the PER PER PER BC everywhere
     const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER};
-    //const FLUPS_CenterType center_type[3] = {NODE_CENTER, NODE_CENTER, NODE_CENTER};
-    FLUPS_BoundaryType    *mybc[3][2];
+    // const FLUPS_CenterType center_type[3] = {NODE_CENTER, NODE_CENTER, NODE_CENTER};
+    FLUPS_BoundaryType *mybc[3][2];
     for (int id = 0; id < 3; id++) {
         for (int is = 0; is < 2; is++) {
             mybc[id][is]    = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1);
@@ -56,7 +54,7 @@ int main(int argc, char *argv[]) {
         }
     }
 
-     //..........................................................................
+    //..........................................................................
     // Display
     flups_info(argc, argv);
     if (rank == 0) {
@@ -70,11 +68,9 @@ int main(int argc, char *argv[]) {
         printf("--------------------------------------------------------------\n");
     }
 
-
-
     //--------------------------------------------------------------------------
-    std::string prof_name = "beatme_nglob" + std::to_string(nglob[0]) +"_"+ std::to_string(nglob[1]) + "_" + std::to_string(nglob[2]) + "_nrank" + std::to_string(comm_size);
-    H3LPR::Profiler    prof(prof_name);
+    std::string     prof_name = "beatme_nglob" + std::to_string(nglob[0]) + "_" + std::to_string(nglob[1]) + "_" + std::to_string(nglob[2]) + "_nrank" + std::to_string(comm_size);
+    H3LPR::Profiler prof(prof_name);
 
     //--------------------------------------------------------------------------
     /** - Initialize FLUPS */
@@ -82,17 +78,17 @@ int main(int argc, char *argv[]) {
     if (rank == 0) printf("Initialization of FLUPS\n");
 
     // create a real topology
-    FLUPS_Profiler* flups_prof = (profile)? (FLUPS_Profiler*) &prof : nullptr;
-    FLUPS_Topology *topoTmp = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
-    FLUPS_Solver  *mysolver = flups_init_timed(topoTmp, mybc, h, L, NOD, center_type, flups_prof);
+    FLUPS_Profiler *flups_prof = (profile) ? (FLUPS_Profiler *)&prof : nullptr;
+    FLUPS_Topology *topoTmp    = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
+    FLUPS_Solver   *mysolver   = flups_init_timed(topoTmp, mybc, h, L, NOD, center_type, flups_prof);
 
     // set the CHAT2 green type (even if it's not used)
     flups_set_greenType(mysolver, CHAT_2);
     flups_setup(mysolver, true);
-    double *solFLU = flups_get_innerBuffer(mysolver);  
+    double *solFLU = flups_get_innerBuffer(mysolver);
 
     // to fill the data we use the inner topo
-    const Topology *topoIn =flups_get_innerTopo_physical(mysolver);
+    const Topology *topoIn = flups_get_innerTopo_physical(mysolver);
     // instruct the solver to skip the first ST
     flups_skip_firstSwitchtopo(mysolver);
 
@@ -110,18 +106,18 @@ int main(int argc, char *argv[]) {
 
     //..........................................................................
     // set some straightforward data
-    int start_id[3]; 
+    int start_id[3];
     flups_topo_get_istartGlob(topoIn, start_id);
     int topo_nmem[3] = {flups_topo_get_nmem(topoIn, 0), flups_topo_get_nmem(topoIn, 1), flups_topo_get_nmem(topoIn, 2)};
 
     // set a simple expression
     double val = 0.0;
-    for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn, 2); ++i2){
-	for(int i1 = 0; i1 < flups_topo_get_nloc(topoIn, 1); ++ i1){
-	      for(int i0 = 0; i0 < flups_topo_get_nloc(topoIn, 0); ++i0){
-                //double x   = 2.0 * M_PI / nglob[0] * (i0 + topoIn->cmpt_start_id(0));
-                //double y   = 2.0 * M_PI / nglob[1] * (i1 + topoIn->cmpt_start_id(1));
-                //double z   = 2.0 * M_PI / nglob[2] * (i2 + topoIn->cmpt_start_id(2));
+    for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn, 2); ++i2) {
+        for (int i1 = 0; i1 < flups_topo_get_nloc(topoIn, 1); ++i1) {
+            for (int i0 = 0; i0 < flups_topo_get_nloc(topoIn, 0); ++i0) {
+                // double x   = 2.0 * M_PI / nglob[0] * (i0 + topoIn->cmpt_start_id(0));
+                // double y   = 2.0 * M_PI / nglob[1] * (i1 + topoIn->cmpt_start_id(1));
+                // double z   = 2.0 * M_PI / nglob[2] * (i2 + topoIn->cmpt_start_id(2));
                 double x   = 2.0 * M_PI / nglob[0] * (i0 + start_id[0]);
                 double y   = 2.0 * M_PI / nglob[1] * (i1 + start_id[1]);
                 double z   = 2.0 * M_PI / nglob[2] * (i2 + start_id[2]);
@@ -142,9 +138,9 @@ int main(int argc, char *argv[]) {
     accfft_create_comm(MPI_COMM_WORLD, c_dims, &c_comm);
 
     // let ACCFFT decide on the topology choice, pencil in Z, as always
-    int    isize[3], osize[3], istart[3], ostart[3];
+    int isize[3], osize[3], istart[3], ostart[3];
 
-    int n_acc[3] = {nglob[2],nglob[1],nglob[0]};
+    int    n_acc[3]  = {nglob[2], nglob[1], nglob[0]};
     size_t alloc_max = accfft_local_size_dft_r2c(n_acc, isize, istart, osize, ostart, c_comm);
 
     double *data_acc = (double *)accfft_alloc(alloc_max);
@@ -273,6 +269,7 @@ int main(int argc, char *argv[]) {
         }
     }
 
+    flups_cleanup_fftw();
     MPI_Finalize();
 }
 
diff --git a/samples/compareP3DFFT++/main_compare++.cpp b/samples/compareP3DFFT++/main_compare++.cpp
index 0c51f69..1d3eff4 100644
--- a/samples/compareP3DFFT++/main_compare++.cpp
+++ b/samples/compareP3DFFT++/main_compare++.cpp
@@ -5,32 +5,32 @@
 #include "p3dfft.h"
 
 void print_res(p3dfft::complex_double *A, int *sdims, int *gstart);
-void print_res(double *A, const Topology* topo);
+void print_res(double *A, const Topology *topo);
 
 /**
  * @brief Compare P3DFFT++ and FLUPS on the same FFT 3D
- * 
+ *
  * This code will time both libraries on a give FFT 3D: the full periodic/DFT.
  * Notice that P3D requires the data to be already in pencils, which
  * is not the case of FLUPS. To do a fair comparison, the time required
- * by FLUPS to switch from the initial topology to the inner pencil topology 
+ * by FLUPS to switch from the initial topology to the inner pencil topology
  * (Switch0) must be substracted from the total time of the solve.
- * 
+ *
  * Alternatively, we could have chosen to do an ODD-ODD/DST2 in the x direction.
- * As a result, FLUPS would automatically skip Switch0. However, we could not 
+ * As a result, FLUPS would automatically skip Switch0. However, we could not
  * manage to do that case with P3DFFT...
- * 
- * @param argc 
- * @param argv 
- * @return int 
+ *
+ * @param argc
+ * @param argv
+ * @return int
  */
 int main(int argc, char *argv[]) {
     //-------------------------------------------------------------------------
     // Default values
     //-------------------------------------------------------------------------
-    int n_iter = 10;
-    int res[3] = {16,16,16};
-    int nproc2D[2] = {1,2};
+    int n_iter     = 10;
+    int res[3]     = {16, 16, 16};
+    int nproc2D[2] = {1, 2};
 
     //-------------------------------------------------------------------------
     // Parse arguments
@@ -43,114 +43,114 @@ int main(int argc, char *argv[]) {
             printf(" --niter, -ni Ni :              Ni is the number of times we call the same 3D FFT (for statistics on the profiler) \n");
             return 0;
         } else if ((arg == "-np") || (arg == "--nprocs")) {
-            for (int j = 0; j<2;j++){
-                if (i + j + 1 < argc) { // Make sure we aren't at the end of argv!
-                    nproc2D[j] = atoi(argv[i+j+1]); 
-                    if(nproc2D[j]<1){
+            for (int j = 0; j < 2; j++) {
+                if (i + j + 1 < argc) {  // Make sure we aren't at the end of argv!
+                    nproc2D[j] = atoi(argv[i + j + 1]);
+                    if (nproc2D[j] < 1) {
                         fprintf(stderr, "nprocs must be >0\n");
                         return 1;
                     }
-                } else { //Missing argument
+                } else {  // Missing argument
                     fprintf(stderr, "missing argument in --nprocs\n");
                     return 1;
-                }  
+                }
             }
-            i+=2;
-        }  else if ((arg == "-res")|| (arg== "--resolution") ) {
-            for (int j = 0; j<3;j++){
-                if (i + j + 1 < argc) { // Make sure we aren't at the end of argv!
-                    res[j] = atoi(argv[i+j+1]); 
-                    if(res[j]<=0.0){
+            i += 2;
+        } else if ((arg == "-res") || (arg == "--resolution")) {
+            for (int j = 0; j < 3; j++) {
+                if (i + j + 1 < argc) {  // Make sure we aren't at the end of argv!
+                    res[j] = atoi(argv[i + j + 1]);
+                    if (res[j] <= 0.0) {
                         fprintf(stderr, "res must be >0\n");
                         return 1;
                     }
-                } else { //Missing argument
+                } else {  // Missing argument
                     fprintf(stderr, "missing argument in -res\n");
                     return 1;
-                }  
+                }
             }
-            i+=3;
-        }  else if ((arg == "-ni")|| (arg== "--niter") ) {
-            if (i + 1 < argc) { // Make sure we aren't at the end of argv!
-                n_iter = atoi(argv[i+1]); 
-                if(n_iter<1){
+            i += 3;
+        } else if ((arg == "-ni") || (arg == "--niter")) {
+            if (i + 1 < argc) {  // Make sure we aren't at the end of argv!
+                n_iter = atoi(argv[i + 1]);
+                if (n_iter < 1) {
                     fprintf(stderr, "niter must be >0\n");
                     return 1;
                 }
-            } else { //Missing argument
+            } else {  // Missing argument
                 fprintf(stderr, "missing --niter\n");
                 return 1;
-            }  
+            }
             i++;
-        // } else if ((arg == "-bc")|| (arg== "--boundary-conditions") ) {
-        //     for (int j = 0; j<6;j++){
-        //         if (i + j + 1 < argc) { // Make sure we aren't at the end of argv!
-        //             bcdef[j/2][j%2] = (BoundaryType) atoi(argv[i+j+1]); 
-        //         } else { //Missing argument
-        //             fprintf(stderr, "missing argument in --boundary-conditions\n");
-        //             return 1;
-        //         }  
-        //     }
-        //     i+=6;
+            // } else if ((arg == "-bc")|| (arg== "--boundary-conditions") ) {
+            //     for (int j = 0; j<6;j++){
+            //         if (i + j + 1 < argc) { // Make sure we aren't at the end of argv!
+            //             bcdef[j/2][j%2] = (BoundaryType) atoi(argv[i+j+1]);
+            //         } else { //Missing argument
+            //             fprintf(stderr, "missing argument in --boundary-conditions\n");
+            //             return 1;
+            //         }
+            //     }
+            //     i+=6;
         }
     }
-    
+
     //-------------------------------------------------------------------------
     // Initialize MPI
     //-------------------------------------------------------------------------
-    int rank, comm_size;
+    int      rank, comm_size;
     MPI_Comm comm = MPI_COMM_WORLD;
 
     int provided;
     // set MPI_THREAD_FUNNELED or MPI_THREAD_SERIALIZED
     int requested = MPI_THREAD_FUNNELED;
     MPI_Init_thread(&argc, &argv, requested, &provided);
-    if(provided < requested){
+    if (provided < requested) {
         FLUPS_ERROR("The MPI-provided thread behavior does not match");
     }
-   
+
     MPI_Comm_rank(comm, &rank);
     MPI_Comm_size(comm, &comm_size);
 
     //-------------------------------------------------------------------------
-    //Definition of the problem
+    // Definition of the problem
     //-------------------------------------------------------------------------
-    const int     nglob[3] = {res[0], res[1], res[2]};
-    const int     nproc[3] = {1, nproc2D[0], nproc2D[1]}; //nproc[0] has to be 1 //CAUTION FOR THIS: nproc[1]<=nproc[2] !!!
-    const double  L[3]     = {1., 1., 1.};;
+    const int    nglob[3] = {res[0], res[1], res[2]};
+    const int    nproc[3] = {1, nproc2D[0], nproc2D[1]};  // nproc[0] has to be 1 //CAUTION FOR THIS: nproc[1]<=nproc[2] !!!
+    const double L[3]     = {1., 1., 1.};
+    ;
 
     const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]};
 
-    FLUPS_BoundaryType* mybc[3][2];
-    for(int id=0; id<3; id++){
-        for(int is=0; is<2; is++){
-            mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*1);
+    FLUPS_BoundaryType *mybc[3][2];
+    for (int id = 0; id < 3; id++) {
+        for (int is = 0; is < 2; is++) {
+            mybc[id][is]    = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1);
             mybc[id][is][0] = PER;
         }
     }
 
-    if(comm_size!=nproc[0]*nproc[1]*nproc[2])
+    if (comm_size != nproc[0] * nproc[1] * nproc[2])
         FLUPS_ERROR("Invalid number of procs");
 
-
     //-------------------------------------------------------------------------
     /** - Initialize FLUPS */
     //-------------------------------------------------------------------------
 
     // create a real topology
-    int FLUnmemIn[3],FLUnmemOUT[3];
-    Topology *topoIn      = new Topology(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
-    const int  nprocOut[3] = {1, 2, 1};
-    const int  nglobOut[3] = {17, 32, 64};
+    int                    FLUnmemIn[3], FLUnmemOUT[3];
+    Topology              *topoIn         = new Topology(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
+    const int              nprocOut[3]    = {1, 2, 1};
+    const int              nglobOut[3]    = {17, 32, 64};
     const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER};
-    
+
     // prepare profiling
 #ifdef SKIP_P3D
-    std::string FLUPSprof = "only_FLUPS_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads());
+    std::string FLUPSprof = "only_FLUPS_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads());
 #else
-    std::string FLUPSprof = "compare_FLUPS_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads());
+    std::string FLUPSprof = "compare_FLUPS_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads());
 #endif
-    Profiler* FLUprof = new Profiler(FLUPSprof);
+    Profiler *FLUprof = new Profiler(FLUPSprof);
 
     // solver creation and init
     Solver *mysolver = new Solver(topoIn, mybc, h, L, NOD, center_type, FLUprof);
@@ -162,13 +162,13 @@ int main(int argc, char *argv[]) {
     MPI_Comm_rank(comm, &rank);
 
     // retrieveing internal info from the solver
-    const Topology *topoSpec    = mysolver->get_innerTopo_spectral();
+    const Topology *topoSpec = mysolver->get_innerTopo_spectral();
 
     for (int i = 0; i < 3; i++) {
         FLUnmemIn[i]  = topoIn->nmem(i);
         FLUnmemOUT[i] = topoSpec->nmem(i);
     }
-    
+
     int istartGloOut[3], istartGlo[3];
     topoIn->get_istart_glob(istartGlo);
     topoSpec->get_istart_glob(istartGloOut);
@@ -181,143 +181,146 @@ int main(int argc, char *argv[]) {
     //-------------------------------------------------------------------------
 #ifndef SKIP_P3D
     int conf;
-    int dims[2] = {nproc[1],nproc[2]};
-    int  proc_order[3] = {0, 1, 2};
-    
+    int dims[2]       = {nproc[1], nproc[2]};
+    int proc_order[3] = {0, 1, 2};
+
     if (rank == 0)
         printf("Using processor grid %d x %d with pencils in x\n", dims[0], dims[1]);
 
-    std::string P3DFFTprof = "compare_P3DFFT_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads());
-    Profiler* P3Dprof = new Profiler(P3DFFTprof);
+    std::string P3DFFTprof = "compare_P3DFFT_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads());
+    Profiler   *P3Dprof    = new Profiler(P3DFFTprof);
 
     P3Dprof->create("init", "root");
     P3Dprof->create("FFTandSwitch", "root");
 
     /* Initialize P3DFFT */
     P3Dprof->start("init");
-    if(rank==0)
-        printf("Initilizing P3D...\n");    fflush(stdout);
+    if (rank == 0)
+        printf("Initilizing P3D...\n");
+    fflush(stdout);
     p3dfft::setup();
-    if(rank==0)
-        printf("...set up.\n");    fflush(stdout);
+    if (rank == 0)
+        printf("...set up.\n");
+    fflush(stdout);
 
     // Define the transform types for these two transforms
-    int type_ids1[3] = {p3dfft::R2CFFT_D,p3dfft::CFFT_FORWARD_D ,p3dfft::CFFT_FORWARD_D };
-    int type_ids2[3] = {p3dfft::C2RFFT_D,p3dfft::CFFT_BACKWARD_D,p3dfft::CFFT_BACKWARD_D};
-    //Unfortunately, P3D segfaults when you do a DST2_REAL_D followed by a R2CFFT_D !!
-    p3dfft::trans_type3D type_rcc(type_ids1); 
+    int type_ids1[3] = {p3dfft::R2CFFT_D, p3dfft::CFFT_FORWARD_D, p3dfft::CFFT_FORWARD_D};
+    int type_ids2[3] = {p3dfft::C2RFFT_D, p3dfft::CFFT_BACKWARD_D, p3dfft::CFFT_BACKWARD_D};
+    // Unfortunately, P3D segfaults when you do a DST2_REAL_D followed by a R2CFFT_D !!
+    p3dfft::trans_type3D type_rcc(type_ids1);
     p3dfft::trans_type3D type_ccr(type_ids2);
 
     // input grid
-    int  gdimsIN[3]      = {topoIn->nglob(0), topoIn->nglob(1), topoIn->nglob(2)};
-    int  mem_orderIN[3]  = {0, 1, 2};
-    int  nprocIN[3]      = {nproc[0], nproc[1], nproc[2]};
-    p3dfft::grid gridIN(gdimsIN,-1,nprocIN,proc_order,mem_orderIN,comm); 
+    int          gdimsIN[3]     = {topoIn->nglob(0), topoIn->nglob(1), topoIn->nglob(2)};
+    int          mem_orderIN[3] = {0, 1, 2};
+    int          nprocIN[3]     = {nproc[0], nproc[1], nproc[2]};
+    p3dfft::grid gridIN(gdimsIN, -1, nprocIN, proc_order, mem_orderIN, comm);
 
-    if(rank==0)
-        printf("...input grid.\n");    fflush(stdout);
+    if (rank == 0)
+        printf("...input grid.\n");
+    fflush(stdout);
 
     // ouput grid
     int gdimsOUT[3];
-    for(int i=0; i < 3;i++) 
+    for (int i = 0; i < 3; i++)
         gdimsOUT[i] = gdimsIN[i];
-    gdimsOUT[0] = gdimsOUT[0]/2+1;
-    int mem_orderOUT[3]  = {2, 1, 0}; //blindly mimicking samples, anything else produces a segfault anyway...
-    int nprocOUT[3]      = {dims[0],dims[1],1};
-    p3dfft::grid gridOUT(gdimsOUT,0,nprocOUT,proc_order,mem_orderOUT,comm); 
+    gdimsOUT[0]                  = gdimsOUT[0] / 2 + 1;
+    int          mem_orderOUT[3] = {2, 1, 0};  // blindly mimicking samples, anything else produces a segfault anyway...
+    int          nprocOUT[3]     = {dims[0], dims[1], 1};
+    p3dfft::grid gridOUT(gdimsOUT, 0, nprocOUT, proc_order, mem_orderOUT, comm);
 
-    if(rank==0)
-        printf("...output grid.\n");    fflush(stdout);
+    if (rank == 0)
+        printf("...output grid.\n");
+    fflush(stdout);
 
     // Set up 3D transforms, including stages and plans, for forward trans.
-    p3dfft::transform3D<double,p3dfft::complex_double> trans_f(gridIN,gridOUT,&type_rcc);
+    p3dfft::transform3D<double, p3dfft::complex_double> trans_f(gridIN, gridOUT, &type_rcc);
     // Set up 3D transforms, including stages and plans, for backward trans.
-    p3dfft::transform3D<p3dfft::complex_double,double> trans_b(gridOUT,gridIN,&type_ccr);
-    if(rank==0)
-        printf("...transforms.\n");    fflush(stdout);
+    p3dfft::transform3D<p3dfft::complex_double, double> trans_b(gridOUT, gridIN, &type_ccr);
+    if (rank == 0)
+        printf("...transforms.\n");
+    fflush(stdout);
 
     P3Dprof->stop("init");
-    if(rank==0)
-        printf("Done with P3D init.\n");    fflush(stdout);
+    if (rank == 0)
+        printf("Done with P3D init.\n");
+    fflush(stdout);
 
     p3dfft::timers.init();
 
-
     // Find local dimensions in storage order, and also the starting position of the local array in the global array
-  
-    //input grid - SIZE IN DOUBLEs
-    int P3DnlocIN[3],glob_startIN[3];
-    for(int i=0;i<3;i++) {
-        P3DnlocIN[mem_orderIN[i]] = gridIN.ldims[i];
+
+    // input grid - SIZE IN DOUBLEs
+    int P3DnlocIN[3], glob_startIN[3];
+    for (int i = 0; i < 3; i++) {
+        P3DnlocIN[mem_orderIN[i]]    = gridIN.ldims[i];
         glob_startIN[mem_orderIN[i]] = gridIN.glob_start[i];
     }
-    int P3DmemsizeIN = P3DnlocIN[0]*P3DnlocIN[1]*P3DnlocIN[2];
+    int P3DmemsizeIN = P3DnlocIN[0] * P3DnlocIN[1] * P3DnlocIN[2];
 
-    //output grid - SIZE IN COMPLEXes
-    int P3DnlocOUT[3],glob_startOUT[3];
-    for(int i=0;i<3;i++) {
-        P3DnlocOUT[mem_orderOUT[i]] = gridOUT.ldims[i];
+    // output grid - SIZE IN COMPLEXes
+    int P3DnlocOUT[3], glob_startOUT[3];
+    for (int i = 0; i < 3; i++) {
+        P3DnlocOUT[mem_orderOUT[i]]    = gridOUT.ldims[i];
         glob_startOUT[mem_orderOUT[i]] = gridOUT.glob_start[i];
     }
-    int P3DmemsizeOUT = P3DnlocOUT[0]*P3DnlocOUT[1]*P3DnlocOUT[2];
+    int P3DmemsizeOUT = P3DnlocOUT[0] * P3DnlocOUT[1] * P3DnlocOUT[2];
 #endif
-    
 
     // //-------------------------------------------------------------------------
     // /** - allocate rhs and solution */
     // //-------------------------------------------------------------------------
-    if(rank == 0) {
-        printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2));
-        printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2));
-        printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2));
-        printf("[FLUPS] topo OUT loc  : nmem: %d*%d*%d nf:%d (nloc: %d %d %d)  \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2));
+    if (rank == 0) {
+        printf("[FLUPS] topo IN glob : %d %d %d \n", topoIn->nglob(0), topoIn->nglob(1), topoIn->nglob(2));
+        printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n", topoIn->nmem(0), topoIn->nmem(1), topoIn->nmem(2), topoIn->memsize(), topoIn->nloc(0), topoIn->nloc(1), topoIn->nloc(2));
+        printf("[FLUPS] topo OUT glob : %d %d %d \n", topoSpec->nglob(0), topoSpec->nglob(1), topoSpec->nglob(2));
+        printf("[FLUPS] topo OUT loc  : nmem: %d*%d*%d nf:%d (nloc: %d %d %d)  \n", topoSpec->nmem(0), topoSpec->nmem(1), topoSpec->nmem(2), topoSpec->nf(), topoSpec->nloc(0), topoSpec->nloc(1), topoSpec->nloc(2));
 
 #ifndef SKIP_P3D
-        printf("[P3DFFT++] topo IN glob  : %d %d %d  \n",gdimsIN[0],gdimsIN[1],gdimsIN[2]);
-        printf("[P3DFFT++] topo IN loc   : %d %d %d (is: %d %d %d) \n",P3DnlocIN[0],P3DnlocIN[1],P3DnlocIN[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]);
-        printf("[P3DFFT++] topo OUT glob : %d %d %d  \n",gdimsOUT[0],gdimsOUT[1],gdimsOUT[2]);
-        printf("[P3DFFT++] topo OUT loc  : %d %d %d (is: %d %d %d) \n",P3DnlocOUT[0],P3DnlocOUT[1],P3DnlocOUT[2],glob_startOUT[0],glob_startOUT[1],glob_startOUT[2]);
+        printf("[P3DFFT++] topo IN glob  : %d %d %d  \n", gdimsIN[0], gdimsIN[1], gdimsIN[2]);
+        printf("[P3DFFT++] topo IN loc   : %d %d %d (is: %d %d %d) \n", P3DnlocIN[0], P3DnlocIN[1], P3DnlocIN[2], glob_startIN[0], glob_startIN[1], glob_startIN[2]);
+        printf("[P3DFFT++] topo OUT glob : %d %d %d  \n", gdimsOUT[0], gdimsOUT[1], gdimsOUT[2]);
+        printf("[P3DFFT++] topo OUT loc  : %d %d %d (is: %d %d %d) \n", P3DnlocOUT[0], P3DnlocOUT[1], P3DnlocOUT[2], glob_startOUT[0], glob_startOUT[1], glob_startOUT[2]);
 #endif
 
-        printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n",FLUmemsizeIN,FLUmemsizeOUT);
-#ifndef SKIP_P3D    
-        printf("                        P3D: %d (out %d C) \n",P3DmemsizeIN,P3DmemsizeOUT);
+        printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n", FLUmemsizeIN, FLUmemsizeOUT);
+#ifndef SKIP_P3D
+        printf("                        P3D: %d (out %d C) \n", P3DmemsizeIN, P3DmemsizeOUT);
 #endif
     }
-    
- 
-    double *rhsFLU   = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN);
-    //solFLU   allocated by sflups setup with the larger size
-    std::memset(rhsFLU, 0, sizeof(double ) * FLUmemsizeIN);
-    std::memset(solFLU, 0, sizeof(double ) * FLUmemsizeOUT); 
-
-#ifndef SKIP_P3D 
-    double *rhsP3D   = (double *)fftw_malloc(sizeof(double) * P3DmemsizeIN);
-    p3dfft::complex_double *solP3D   = (p3dfft::complex_double *)fftw_malloc(sizeof(p3dfft::complex_double) * P3DmemsizeOUT);
-    std::memset(rhsP3D, 0, sizeof(double ) * P3DmemsizeIN);
+
+    double *rhsFLU = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN);
+    // solFLU   allocated by sflups setup with the larger size
+    std::memset(rhsFLU, 0, sizeof(double) * FLUmemsizeIN);
+    std::memset(solFLU, 0, sizeof(double) * FLUmemsizeOUT);
+
+#ifndef SKIP_P3D
+    double                 *rhsP3D = (double *)fftw_malloc(sizeof(double) * P3DmemsizeIN);
+    p3dfft::complex_double *solP3D = (p3dfft::complex_double *)fftw_malloc(sizeof(p3dfft::complex_double) * P3DmemsizeOUT);
+    std::memset(rhsP3D, 0, sizeof(double) * P3DmemsizeIN);
     std::memset(solP3D, 0, sizeof(p3dfft::complex_double) * P3DmemsizeOUT);
 #endif
-    
+
     // printf("istart: %d %d %d =? %d %d %d(P3D)\n",istartGlo[0],istartGlo[1],istartGlo[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]);
 
-    double f = 1; //frequency of the wave
+    double f = 1;  // frequency of the wave
 
     for (int i2 = 0; i2 < topoIn->nloc(2); i2++) {
         for (int i1 = 0; i1 < topoIn->nloc(1); i1++) {
             for (int i0 = 0; i0 < topoIn->nloc(0); i0++) {
-                double       x    = (istartGlo[0] + i0 ) * h[0]; //+ 0.5
-                double       y    = (istartGlo[1] + i1 ) * h[1]; //+ 0.5
-                double       z    = (istartGlo[2] + i2 ) * h[2]; //+ 0.5
+                double x = (istartGlo[0] + i0) * h[0];  //+ 0.5
+                double y = (istartGlo[1] + i1) * h[1];  //+ 0.5
+                double z = (istartGlo[2] + i2) * h[2];  //+ 0.5
 
-                // double       xF   = (istartGlo[0] + i0 + .5 ) * h[0]; 
+                // double       xF   = (istartGlo[0] + i0 + .5 ) * h[0];
 
                 size_t id;
-                id    = localIndex(0, i0, i1, i2, 0, FLUnmemIn, 1, 0);
+                id         = localIndex(0, i0, i1, i2, 0, FLUnmemIn, 1, 0);
                 rhsFLU[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z);
 
-#ifndef SKIP_P3D                 
-                //p3d does not care of the size you allocate, juste fill the first isize elements
-                id =  localIndex(0, i0, i1, i2, 0, P3DnlocIN, 1, 0);
+#ifndef SKIP_P3D
+                // p3d does not care of the size you allocate, juste fill the first isize elements
+                id         = localIndex(0, i0, i1, i2, 0, P3DnlocIN, 1, 0);
                 rhsP3D[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z);
 #endif
             }
@@ -327,114 +330,112 @@ int main(int argc, char *argv[]) {
     // //-------------------------------------------------------------------------
     // /** - Proceed to the solve */
     // //-------------------------------------------------------------------------
-    
-       
-    double factor = 1.0/(nglob[0]* nglob[1]*nglob[2]);
-    //Warmup
-    // trans_f.exec(rhsP3D,solP3D,false);
-
-    for (int iter=0;iter<n_iter;iter++){
-        
+
+    double factor = 1.0 / (nglob[0] * nglob[1] * nglob[2]);
+    // Warmup
+    //  trans_f.exec(rhsP3D,solP3D,false);
+
+    for (int iter = 0; iter < n_iter; iter++) {
         MPI_Barrier(comm);
 
-#ifndef SKIP_P3D  
+#ifndef SKIP_P3D
         // ------------------P3D---------------:
-        if(rank==0){
-            printf("Iter %i P3D\n",iter);
+        if (rank == 0) {
+            printf("Iter %i P3D\n", iter);
         }
         fflush(stdout);
 
         P3Dprof->start("FFTandSwitch");
-        trans_f.exec(rhsP3D,solP3D,false);  // Execute forward real-to-complex FFT
+        trans_f.exec(rhsP3D, solP3D, false);  // Execute forward real-to-complex FFT
         P3Dprof->stop("FFTandSwitch");
 
 // #define PRINT_RES
 #ifdef PRINT_RES
         /* normalize */
-        for(int id = 0; id<P3DmemsizeOUT; id++){
+        for (int id = 0; id < P3DmemsizeOUT; id++) {
             solP3D[id] *= factor;
         }
-        print_res(solP3D,P3DnlocOUT,glob_startOUT);
+        print_res(solP3D, P3DnlocOUT, glob_startOUT);
 #endif
 #endif
 
         // ------------------FLUPS---------------:
         MPI_Barrier(comm);
-        if(rank==0){
-            printf("Iter %i FLUPS\n",iter);
+        if (rank == 0) {
+            printf("Iter %i FLUPS\n", iter);
         }
         fflush(stdout);
 
-        //reinit sol to prepare for inplace 3D FFT
-        std::memcpy(solFLU, rhsFLU, sizeof(double ) * FLUmemsizeIN);
+        // reinit sol to prepare for inplace 3D FFT
+        std::memcpy(solFLU, rhsFLU, sizeof(double) * FLUmemsizeIN);
 
         FLUprof->start("solve");
-        mysolver->do_FFT(solFLU,FLUPS_FORWARD);
+        mysolver->do_FFT(solFLU, FLUPS_FORWARD);
         FLUprof->stop("solve");
 
 #ifdef PRINT_RES
         /* normalize*/
-        for(int id = 0; id<FLUmemsizeOUT; id++){
+        for (int id = 0; id < FLUmemsizeOUT; id++) {
             solFLU[id] *= factor;
         }
-        print_res(solFLU,topoSpec);
+        print_res(solFLU, topoSpec);
 #endif
-        //Note: if we do several FFT in a raw, the results are wrong with FLUPS for iter>=1. This is because
-        // FLUPS would require to do the backward transform before doing a new forward transform (or a reset
-        // function, which we choose not to implement).
+        // Note: if we do several FFT in a raw, the results are wrong with FLUPS for iter>=1. This is because
+        //  FLUPS would require to do the backward transform before doing a new forward transform (or a reset
+        //  function, which we choose not to implement).
     }
 
     // //-------------------------------------------------------------------------
     // /** - get timings */
     // //-------------------------------------------------------------------------
-    
+
     // --- FLUPS -------
     FLUprof->disp("solve");
-    delete(FLUprof);
+    delete (FLUprof);
 
-#ifndef SKIP_P3D  
+#ifndef SKIP_P3D
     // --- P3DFFT -------
 #ifdef P3DMODIF
     double times[8][3];
-    p3dfft::timers.get(times,comm);
-    string P3DNames[8] = {"Reorder_trans","Reorder_out","Reorder_in","Trans_exec","Packsend","Packsend_trans","Unpackrecv","Alltoall"};
+    p3dfft::timers.get(times, comm);
+    string P3DNames[8] = {"Reorder_trans", "Reorder_out", "Reorder_in", "Trans_exec", "Packsend", "Packsend_trans", "Unpackrecv", "Alltoall"};
 #endif
 
-    double timeRef      = P3Dprof->get_timeAcc("FFTandSwitch");
-    double timeInit     = P3Dprof->get_timeAcc("init");
+    double timeRef  = P3Dprof->get_timeAcc("FFTandSwitch");
+    double timeInit = P3Dprof->get_timeAcc("init");
 
     P3Dprof->disp("FFTandSwitch");
-#ifdef P3DMODIF    
-    if(rank == 0) {
+#ifdef P3DMODIF
+    if (rank == 0) {
         // printf("===================================================================================================================================================\n");
         // printf("          TIMER P3DFFT %s\n",P3DFFTprof.c_str());
         // printf("%25s|  %-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\n","-NAME-    ", "-% global-", "-% local-", "-Total time-", "-Self time-", "-time/call-", "-Min time-", "-Max time-","-Mean cnt-","-(MB/s)-");
 
         string filename = "prof/" + P3DFFTprof + "_time.csv";
-        FILE* file      = fopen(filename.c_str(), "w+");
-    
-        printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "init", 100*timeInit/timeRef, 100*timeInit/(timeRef+timeInit), timeInit, 0., timeInit/n_iter, 0., 0.,(double) n_iter,0.);
-        fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "init", 100*timeInit/timeRef, 100*timeInit/(timeRef+timeInit), timeInit, 0., timeInit/n_iter, 0., 0.,(double) n_iter,0);
-        printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "FFTandSwitch", 100., 100*timeRef/(timeRef+timeInit), timeRef, 0., timeRef/n_iter, 0., 0., (double) n_iter,0.);
-        fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "FFTandSwitch", 100., 100*timeRef/(timeRef+timeInit), timeRef, 0., timeRef/n_iter, 0., 0., (double) n_iter,0.);
-        for(int i=0; i<8; i++){
+        FILE  *file     = fopen(filename.c_str(), "w+");
+
+        printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "init", 100 * timeInit / timeRef, 100 * timeInit / (timeRef + timeInit), timeInit, 0., timeInit / n_iter, 0., 0., (double)n_iter, 0.);
+        fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "init", 100 * timeInit / timeRef, 100 * timeInit / (timeRef + timeInit), timeInit, 0., timeInit / n_iter, 0., 0., (double)n_iter, 0);
+        printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "FFTandSwitch", 100., 100 * timeRef / (timeRef + timeInit), timeRef, 0., timeRef / n_iter, 0., 0., (double)n_iter, 0.);
+        fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "FFTandSwitch", 100., 100 * timeRef / (timeRef + timeInit), timeRef, 0., timeRef / n_iter, 0., 0., (double)n_iter, 0.);
+        for (int i = 0; i < 8; i++) {
             int j = i;
-            printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", ("-- "+P3DNames[j]).c_str(), 100.*times[j][0]/timeRef, 100*times[j][0]/timeRef, times[j][0], 0., times[j][0]/n_iter, times[j][1]/n_iter, times[j][2]/n_iter, (double) n_iter,0.);
-            fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", ("-- "+P3DNames[j]).c_str(), 100.*times[j][0]/timeRef, 100*times[j][0]/timeRef, times[j][0], 0., times[j][0]/n_iter, times[j][1]/n_iter, times[j][2]/n_iter, (double) n_iter,0.);
+            printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", ("-- " + P3DNames[j]).c_str(), 100. * times[j][0] / timeRef, 100 * times[j][0] / timeRef, times[j][0], 0., times[j][0] / n_iter, times[j][1] / n_iter, times[j][2] / n_iter, (double)n_iter, 0.);
+            fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", ("-- " + P3DNames[j]).c_str(), 100. * times[j][0] / timeRef, 100 * times[j][0] / timeRef, times[j][0], 0., times[j][0] / n_iter, times[j][1] / n_iter, times[j][2] / n_iter, (double)n_iter, 0.);
         }
         fclose(file);
-    }   
+    }
 #else
     p3dfft::timers.print(comm);
 #endif
-    delete(P3Dprof);
+    delete (P3Dprof);
 #endif
 
-    if(rank==0)
+    if (rank == 0)
         printf("Done ! Now let's clean...\n");
     fflush(stdout);
 
-#ifndef SKIP_P3D      
+#ifndef SKIP_P3D
     fftw_free(solP3D);
     fftw_free(rhsP3D);
 #endif
@@ -443,36 +444,36 @@ int main(int argc, char *argv[]) {
     topoIn->~Topology();
     topoSpec->~Topology();
     mysolver->~Solver();
-#ifndef SKIP_P3D      
+#ifndef SKIP_P3D
     p3dfft::cleanup();
 #endif
 
+    flups_cleanup_fftw();
     MPI_Finalize();
 }
 
-void print_res(double *A, const Topology* topo) {
-    const int ax0     = topo->axis();
-    const int ax1     = (ax0 + 1) % 3;
-    const int ax2     = (ax0 + 2) % 3;
-    const int nf = 2;//topo->nf()
-    int nmem[3];
-    
-    for(int i=0;i<3;i++){
+void print_res(double *A, const Topology *topo) {
+    const int ax0 = topo->axis();
+    const int ax1 = (ax0 + 1) % 3;
+    const int ax2 = (ax0 + 2) % 3;
+    const int nf  = 2;  // topo->nf()
+    int       nmem[3];
+
+    for (int i = 0; i < 3; i++) {
         nmem[i] = topo->nmem(i);
     }
 
     int gstart[3];
     topo->get_istart_glob(gstart);
 
-    if (topo->isComplex()){
+    if (topo->isComplex()) {
         for (int i2 = 0; i2 < topo->nloc(ax2); i2++) {
             for (int i1 = 0; i1 < topo->nloc(ax1); i1++) {
-                //local indexes start
+                // local indexes start
                 const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, 2, 0);
                 for (int i0 = 0; i0 < topo->nloc(ax0); i0++) {
-                    
-                    if (std::fabs(A[id+i0*2]) + std::fabs(A[id+i0*2 + 1]) > 1.25e-4) {
-                        printf("FLU(%d %d %d) %lg +i1* %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id+i0*2], A[id+i0*2 + 1]);
+                    if (std::fabs(A[id + i0 * 2]) + std::fabs(A[id + i0 * 2 + 1]) > 1.25e-4) {
+                        printf("FLU(%d %d %d) %lg +i1* %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0 * 2], A[id + i0 * 2 + 1]);
                     }
                 }
             }
@@ -480,12 +481,11 @@ void print_res(double *A, const Topology* topo) {
     } else {
         for (int i2 = 0; i2 < topo->nloc(ax2); i2++) {
             for (int i1 = 0; i1 < topo->nloc(ax1); i1++) {
-                //local indexes start
+                // local indexes start
                 const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, 1, 0);
                 for (int i0 = 0; i0 < topo->nloc(ax0); i0++) {
-                    
-                    if (std::fabs(A[id+i0]) > 1.25e-4) {
-                        printf("FLU(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id+i0]);
+                    if (std::fabs(A[id + i0]) > 1.25e-4) {
+                        printf("FLU(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0]);
                     }
                 }
             }
@@ -493,18 +493,17 @@ void print_res(double *A, const Topology* topo) {
     }
 }
 
-void print_res(p3dfft::complex_double *A,int *sdims,int *gstart)
-{
-  int x,y,z;
-  p3dfft::complex_double *p;
-  int imo[3],i,j;
-  p = A;
-
-  for(z=0;z < sdims[2];z++)
-    for(y=0;y < sdims[1];y++)
-      for(x=0;x < sdims[0];x++) {
-        if(std::fabs(p->real())+std::fabs(p->imag()) > 1.25e-4) 
-            printf("P3D(%d %d %d) %lg +i1* %lg\n",x+gstart[0],y+gstart[1],z+gstart[2],p->real(),p->imag());
-        p++;
-      }
-}
\ No newline at end of file
+void print_res(p3dfft::complex_double *A, int *sdims, int *gstart) {
+    int                     x, y, z;
+    p3dfft::complex_double *p;
+    int                     imo[3], i, j;
+    p = A;
+
+    for (z = 0; z < sdims[2]; z++)
+        for (y = 0; y < sdims[1]; y++)
+            for (x = 0; x < sdims[0]; x++) {
+                if (std::fabs(p->real()) + std::fabs(p->imag()) > 1.25e-4)
+                    printf("P3D(%d %d %d) %lg +i1* %lg\n", x + gstart[0], y + gstart[1], z + gstart[2], p->real(), p->imag());
+                p++;
+            }
+}
diff --git a/samples/compareP3DFFT/main_compare.cpp b/samples/compareP3DFFT/main_compare.cpp
index eb8c57d..4735e1f 100644
--- a/samples/compareP3DFFT/main_compare.cpp
+++ b/samples/compareP3DFFT/main_compare.cpp
@@ -4,65 +4,62 @@
 #include "Solver.hpp"
 #include "p3dfft.h"
 
-void print_all_P3D(double *A,long int nar);
-void print_res(double *A, const Topology* topo);
+void print_all_P3D(double *A, long int nar);
+void print_res(double *A, const Topology *topo);
 
 int main(int argc, char *argv[]) {
-    
     //-------------------------------------------------------------------------
     // Initialize MPI
     //-------------------------------------------------------------------------
-    int rank, comm_size;
+    int      rank, comm_size;
     MPI_Comm comm = MPI_COMM_WORLD;
 
     int provided;
     // set MPI_THREAD_FUNNELED or MPI_THREAD_SERIALIZED
     int requested = MPI_THREAD_FUNNELED;
     MPI_Init_thread(&argc, &argv, requested, &provided);
-    if(provided < requested){
+    if (provided < requested) {
         FLUPS_ERROR("The MPI-provided thread behavior does not match");
     }
-   
+
     MPI_Comm_rank(comm, &rank);
     MPI_Comm_size(comm, &comm_size);
 
     //-------------------------------------------------------------------------
-    //Definition of the problem
+    // Definition of the problem
     //-------------------------------------------------------------------------
-    const int     nglob[3] = {16, 16, 16};
-    const int     nproc[3] = {1, 1, 2}; //nproc[0] has to be 1 //CAUTION FOR THIS: nproc[1]<=nproc[2] !!!
-    const double  L[3]     = {1., 1., 1.};
+    const int    nglob[3] = {16, 16, 16};
+    const int    nproc[3] = {1, 1, 2};  // nproc[0] has to be 1 //CAUTION FOR THIS: nproc[1]<=nproc[2] !!!
+    const double L[3]     = {1., 1., 1.};
 
     const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]};
-    
+
     const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER};
 
-    FLUPS_BoundaryType* mybc[3][2];
-    for(int id=0; id<3; id++){
-        for(int is=0; is<2; is++){
-            mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*1);
+    FLUPS_BoundaryType *mybc[3][2];
+    for (int id = 0; id < 3; id++) {
+        for (int is = 0; is < 2; is++) {
+            mybc[id][is]    = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1);
             mybc[id][is][0] = PER;
         }
     }
 
-    unsigned char op_f[]="fft", op_b[]="tff";
+    unsigned char op_f[] = "fft", op_b[] = "tff";
 
     const int n_iter = 100;
 
-
-
     //-------------------------------------------------------------------------
     /** - Initialize FLUPS */
     //-------------------------------------------------------------------------
 
     // create a real topology
-    int FLUnmemIn[3],FLUnmemOUT[3];
-    FLUPS_Topology *topoIn = new FLUPS_Topology(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
-    const int  nprocOut[3] = {1, 2, 1};
-    const int  nglobOut[3] = {17, 32, 64};
-    
-    std::string FLUPSprof = "compare_FLUPS_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads());
-    Profiler* FLUprof = new Profiler(FLUPSprof);
+    int             FLUnmemIn[3], FLUnmemOUT[3];
+    FLUPS_Topology *topoIn      = new FLUPS_Topology(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
+    const int       nprocOut[3] = {1, 2, 1};
+    const int       nglobOut[3] = {17, 32, 64};
+
+    std::string FLUPSprof = "compare_FLUPS_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads());
+    Profiler   *FLUprof   = new Profiler(FLUPSprof);
 
     FLUPS_Solver *mysolver = new FLUPS_Solver(topoIn, mybc, h, L, NOD, center_type, FLUprof);
 
@@ -73,13 +70,13 @@ int main(int argc, char *argv[]) {
     comm = flups_topo_get_comm(topoIn);
     MPI_Comm_rank(comm, &rank);
 
-    const Topology *topoSpec    = mysolver->get_innerTopo_spectral();
+    const Topology *topoSpec = mysolver->get_innerTopo_spectral();
 
     for (int i = 0; i < 3; i++) {
         FLUnmemIn[i]  = topoIn->nmem(i);
         FLUnmemOUT[i] = topoSpec->nmem(i);
     }
-    
+
     int istartGloOut[3], istartGlo[3];
     topoIn->get_istart_glob(istartGlo);
     topoSpec->get_istart_glob(istartGloOut);
@@ -92,17 +89,17 @@ int main(int argc, char *argv[]) {
     //-------------------------------------------------------------------------
 
     int conf;
-    int dims[2] = {nproc[1],nproc[2]};
+    int dims[2] = {nproc[1], nproc[2]};
     int P3Dmemsize[3];
-    int istart[3],isize[3],iend[3];
-    int fstart[3],fsize[3],fend[3];
-    int tstart[3],tsize[3],tend[3];
+    int istart[3], isize[3], iend[3];
+    int fstart[3], fsize[3], fend[3];
+    int tstart[3], tsize[3], tend[3];
 
     if (rank == 0)
         printf("Using processor grid %d x %d with pencils in x\n", dims[0], dims[1]);
 
-    std::string P3DFFTprof = "compare_P3DFFT_res" + std::to_string((int)(nglob[0]/L[0])) + "_nrank" + std::to_string(comm_size)+"_nthread" + std::to_string(omp_get_max_threads());
-    Profiler* P3Dprof = new Profiler(P3DFFTprof);
+    std::string P3DFFTprof = "compare_P3DFFT_res" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads());
+    Profiler   *P3Dprof    = new Profiler(P3DFFTprof);
 
     P3Dprof->create("init", "root");
     P3Dprof->create("FFTandSwitch", "root");
@@ -111,76 +108,72 @@ int main(int argc, char *argv[]) {
     P3Dprof->start("init");
     Cp3dfft_setup(dims, nglob[0], nglob[1], nglob[2], MPI_Comm_c2f(comm), nglob[0], nglob[1], nglob[2], 1, P3Dmemsize);
     P3Dprof->stop("init");
-    
+
     /* Get dimensions for input array - real numbers, X-pencil shape.
       Note that we are following the Fortran ordering, i.e.
       the dimension  with stride-1 is X. */
-    //really? well x seems to be the last dimension...
+    // really? well x seems to be the last dimension...
     conf = 1;
     Cp3dfft_get_dims(istart, iend, isize, conf);
-    
+
     /* Get dimensions for output array - complex numbers, Z-pencil shape.
       Stride-1 dimension could be X or Z, depending on how the library
       was compiled (stride1 option) */
     conf = 2;
     Cp3dfft_get_dims(fstart, fend, fsize, conf);
-    
+
     /* Get what you should allocate in place, in double, per proc. Large enough for padding */
     conf = 3;
     Cp3dfft_get_dims(tstart, tend, tsize, conf);
 
-
-
     // //-------------------------------------------------------------------------
     // /** - allocate rhs and solution */
     // //-------------------------------------------------------------------------
-    printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2));
-    printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2));
-    printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2));
-    printf("[FLUPS] topo OUT loc  : nmem: %d*%d*%d nf:%d (nloc: %d %d %d)  \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2));
-
-    printf("[P3DFFT] topo 0 loc : %d %d %d  - (is: %d %d %d ie: %d %d %d size: %d %d %d) \n",P3Dmemsize[0],P3Dmemsize[1],P3Dmemsize[2],istart[0],istart[1],istart[2], iend[0],iend[1],iend[2], isize[0],isize[1],isize[2]);
-    printf("[P3DFFT] topo 2 loc : %d %d %d  - (is: %d %d %d ie: %d %d %d) \n",fsize[0],fsize[1],fsize[2], fstart[0],fstart[1],fstart[2], fend[0],fend[1],fend[2]);
-    printf("[P3DFFT] for alloc: %d %d %d  =? %d %d %d \n",P3Dmemsize[0],P3Dmemsize[1],P3Dmemsize[2], tsize[0],tsize[1],tsize[2]);
+    printf("[FLUPS] topo IN glob : %d %d %d \n", topoIn->nglob(0), topoIn->nglob(1), topoIn->nglob(2));
+    printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n", topoIn->nmem(0), topoIn->nmem(1), topoIn->nmem(2), topoIn->memsize(), topoIn->nloc(0), topoIn->nloc(1), topoIn->nloc(2));
+    printf("[FLUPS] topo OUT glob : %d %d %d \n", topoSpec->nglob(0), topoSpec->nglob(1), topoSpec->nglob(2));
+    printf("[FLUPS] topo OUT loc  : nmem: %d*%d*%d nf:%d (nloc: %d %d %d)  \n", topoSpec->nmem(0), topoSpec->nmem(1), topoSpec->nmem(2), topoSpec->nf(), topoSpec->nloc(0), topoSpec->nloc(1), topoSpec->nloc(2));
+
+    printf("[P3DFFT] topo 0 loc : %d %d %d  - (is: %d %d %d ie: %d %d %d size: %d %d %d) \n", P3Dmemsize[0], P3Dmemsize[1], P3Dmemsize[2], istart[0], istart[1], istart[2], iend[0], iend[1], iend[2], isize[0], isize[1], isize[2]);
+    printf("[P3DFFT] topo 2 loc : %d %d %d  - (is: %d %d %d ie: %d %d %d) \n", fsize[0], fsize[1], fsize[2], fstart[0], fstart[1], fstart[2], fend[0], fend[1], fend[2]);
+    printf("[P3DFFT] for alloc: %d %d %d  =? %d %d %d \n", P3Dmemsize[0], P3Dmemsize[1], P3Dmemsize[2], tsize[0], tsize[1], tsize[2]);
     // => P3Dmemsize = tsize
 
-
     // int nm = isize[0]*isize[1]*isize[2];
     // printf("isize: %d %d %d - fsize: %d %d %d\n", isize[0],isize[1],isize[2], fsize[0],fsize[1],fsize[2]);
     // if(nm < fsize[0]*fsize[1]*fsize[2]*2){ //switch to complex?
     //     nm = fsize[0]*fsize[1]*fsize[2]*2;
     // }
-    int nm = tsize[0]*tsize[1]*tsize[2]; //this is in double
-    printf("I am going to allocate FLUPS: %d (out %d R) , P3D: %d (out %d C) \n",FLUmemsizeIN,FLUmemsizeOUT,nm, tsize[0]*tsize[1]*tsize[2]);
-    
- 
-    double *rhsFLU   = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN);
+    int nm = tsize[0] * tsize[1] * tsize[2];  // this is in double
+    printf("I am going to allocate FLUPS: %d (out %d R) , P3D: %d (out %d C) \n", FLUmemsizeIN, FLUmemsizeOUT, nm, tsize[0] * tsize[1] * tsize[2]);
+
+    double *rhsFLU = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN);
     // solFLU   allocated by setup
-    double *rhsP3D   = (double *)fftw_malloc(sizeof(double) * nm);
-    double *solP3D   = (double *)fftw_malloc(sizeof(double) * nm);
+    double *rhsP3D = (double *)fftw_malloc(sizeof(double) * nm);
+    double *solP3D = (double *)fftw_malloc(sizeof(double) * nm);
 
-    std::memset(rhsFLU, 0, sizeof(double ) * FLUmemsizeIN);
-    std::memset(solFLU, 0, sizeof(double ) * FLUmemsizeOUT); 
-    std::memset(rhsP3D, 0, sizeof(double ) * nm);
-    std::memset(solP3D, 0, sizeof(double ) * nm);
-    
-    printf("istart: %d %d %d =? %d %d %d(P3D)\n",istartGlo[0],istartGlo[1],istartGlo[2],istart[0],istart[1],istart[2]);
+    std::memset(rhsFLU, 0, sizeof(double) * FLUmemsizeIN);
+    std::memset(solFLU, 0, sizeof(double) * FLUmemsizeOUT);
+    std::memset(rhsP3D, 0, sizeof(double) * nm);
+    std::memset(solP3D, 0, sizeof(double) * nm);
+
+    printf("istart: %d %d %d =? %d %d %d(P3D)\n", istartGlo[0], istartGlo[1], istartGlo[2], istart[0], istart[1], istart[2]);
 
     double f = 1;
 
     for (int i2 = 0; i2 < topoIn->nloc(2); i2++) {
         for (int i1 = 0; i1 < topoIn->nloc(1); i1++) {
             for (int i0 = 0; i0 < topoIn->nloc(0); i0++) {
-                double       x    = (istartGlo[0] + i0 ) * h[0]; //+ 0.5
-                double       y    = (istartGlo[1] + i1 ) * h[1]; //+ 0.5
-                double       z    = (istartGlo[2] + i2 ) * h[2]; //+ 0.5
+                double x = (istartGlo[0] + i0) * h[0];  //+ 0.5
+                double y = (istartGlo[1] + i1) * h[1];  //+ 0.5
+                double z = (istartGlo[2] + i2) * h[2];  //+ 0.5
 
                 size_t id;
                 id         = localIndex(0, i0, i1, i2, 0, FLUnmemIn, 1, 0);
                 rhsFLU[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z);
-                
-                //p3d does not care of the size you allocate, juste fill the first isize elements
-                id =  localIndex(0, i0, i1, i2, 0, isize, 1, 0);
+
+                // p3d does not care of the size you allocate, juste fill the first isize elements
+                id         = localIndex(0, i0, i1, i2, 0, isize, 1, 0);
                 rhsP3D[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z);
             }
         }
@@ -189,72 +182,70 @@ int main(int argc, char *argv[]) {
     // //-------------------------------------------------------------------------
     // /** - Proceed to the solve */
     // //-------------------------------------------------------------------------
-    
-    int Ntot = fsize[0]*fsize[1];
-    Ntot *= fsize[2]*2; //the number of real corresponding to the complex numbers
-    
-    double factor = 1.0/(nglob[0]* nglob[1]*nglob[2]);
-
-    for (int iter=0;iter<n_iter;iter++){
-        
+
+    int Ntot = fsize[0] * fsize[1];
+    Ntot *= fsize[2] * 2;  // the number of real corresponding to the complex numbers
+
+    double factor = 1.0 / (nglob[0] * nglob[1] * nglob[2]);
+
+    for (int iter = 0; iter < n_iter; iter++) {
         MPI_Barrier(comm);
-        if(rank==0){
-            printf("Iter %i\n",iter);
+        if (rank == 0) {
+            printf("Iter %i\n", iter);
         }
         fflush(stdout);
 
         // ------------------P3D---------------:
 
         P3Dprof->start("FFTandSwitch");
-        Cp3dfft_ftran_r2c(rhsP3D,solP3D,op_f);
+        Cp3dfft_ftran_r2c(rhsP3D, solP3D, op_f);
         P3Dprof->stop("FFTandSwitch");
 
-//#define PRINT_RES
+// #define PRINT_RES
 #ifdef PRINT_RES
         /* normallize */
-        for(int id = 0; id<Ntot; id++){
+        for (int id = 0; id < Ntot; id++) {
             solP3D[id] *= factor;
-        }   
-        print_all_P3D(solP3D,Ntot);
+        }
+        print_all_P3D(solP3D, Ntot);
 #endif
 
         // ------------------FLUPS---------------:
         MPI_Barrier(comm);
 
-        std::memcpy(solFLU, rhsFLU, sizeof(double ) * FLUmemsizeIN);
-        mysolver->do_FFT(solFLU,FLUPS_FORWARD);
+        std::memcpy(solFLU, rhsFLU, sizeof(double) * FLUmemsizeIN);
+        mysolver->do_FFT(solFLU, FLUPS_FORWARD);
 
 #ifdef PRINT_RES
         /* normalize*/
-        for(int id = 0; id<FLUmemsizeOUT; id++){
+        for (int id = 0; id < FLUmemsizeOUT; id++) {
             solFLU[id] *= factor;
         }
-        print_res(solFLU,topoSpec);
+        print_res(solFLU, topoSpec);
 #endif
-        //Note: if we do several FFT in a raw, the results are wrong with FLUPS for iter>=1. This is because
-        // FLUPS would require to do the backward transform before doing a new forward transform (or a reset
-        // function, which we choose not to implement).
-
+        // Note: if we do several FFT in a raw, the results are wrong with FLUPS for iter>=1. This is because
+        //  FLUPS would require to do the backward transform before doing a new forward transform (or a reset
+        //  function, which we choose not to implement).
     }
 
     // //-------------------------------------------------------------------------
     // /** - get timings */
     // //-------------------------------------------------------------------------
-    
+
     // --- FLUPS -------
     FLUprof->disp("solve");
-    delete(FLUprof);
+    delete (FLUprof);
 
     // --- P3DFFT -------
     double timers[12], gtmean[12], gtmax[12], gtmin[12];
 
     get_timers(timers);
-    MPI_Reduce(&timers,&gtmean,12,MPI_DOUBLE,MPI_SUM,0,comm);
-    MPI_Reduce(&timers,&gtmax ,12,MPI_DOUBLE,MPI_MAX,0,comm);
-    MPI_Reduce(&timers,&gtmin ,12,MPI_DOUBLE,MPI_MIN,0,comm);
+    MPI_Reduce(&timers, &gtmean, 12, MPI_DOUBLE, MPI_SUM, 0, comm);
+    MPI_Reduce(&timers, &gtmax, 12, MPI_DOUBLE, MPI_MAX, 0, comm);
+    MPI_Reduce(&timers, &gtmin, 12, MPI_DOUBLE, MPI_MIN, 0, comm);
 
-    for (int i=0;i < 12;i++) {
-        gtmean[i] = gtmean[i]/ ((double) comm_size);
+    for (int i = 0; i < 12; i++) {
+        gtmean[i] = gtmean[i] / ((double)comm_size);
     }
 
     double timeRef      = P3Dprof->get_timeAcc("FFTandSwitch");
@@ -262,38 +253,37 @@ int main(int argc, char *argv[]) {
     string P3DNames[12] = {"All2All(v) 1", "All2All(v) 2", "?", "?", "fft 1", "reorder 1", "fft 2", "reorder+fft 3", "?", "?", "?", "?"};
     int    order[12]    = {5, 1, 6, 7, 2, 8, 3, 4, 9, 10, 11, 12};
 
-
     P3Dprof->disp("FFTandSwitch");
-    if(rank == 0) {
+    if (rank == 0) {
         // printf("===================================================================================================================================================\n");
         // printf("          TIMER P3DFFT %s\n",P3DFFTprof.c_str());
         // printf("%25s|  %-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\t%-13s\n","-NAME-    ", "-% global-", "-% local-", "-Total time-", "-Self time-", "-time/call-", "-Min time-", "-Max time-","-Mean cnt-","-(MB/s)-");
 
         string filename = "prof/" + P3DFFTprof + "_time.csv";
-        FILE* file      = fopen(filename.c_str(), "w+");
-    
-        printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "init", 100*timeInit/timeRef, 100*timeInit/(timeRef+timeInit), timeInit, 0., timeInit/n_iter, 0., 0.,(double) n_iter,0.);
-        fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "init", 100*timeInit/timeRef, 100*timeInit/(timeRef+timeInit), timeInit, 0., timeInit/n_iter, 0., 0.,(double) n_iter,0);
-        printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "FFTandSwitch", 100., 100*timeRef/(timeRef+timeInit), timeRef, 0., timeRef/n_iter, 0., 0., (double) n_iter,0.);
-        fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "FFTandSwitch", 100., 100*timeRef/(timeRef+timeInit), timeRef, 0., timeRef/n_iter, 0., 0., (double) n_iter,0.);
-        for(int i=0; i<6; i++){
-            int j = order[i]-1;
-            printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", ("-- "+P3DNames[j]).c_str(), 100.*gtmean[j]/timeRef, 100*gtmean[j]/timeRef, gtmean[j], 0., gtmean[j]/n_iter, gtmin[j]/n_iter, gtmax[j]/n_iter, (double) n_iter,0.);
-            fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", ("-- "+P3DNames[j]).c_str(), 100.*gtmean[j]/timeRef, 100*gtmean[j]/timeRef, gtmean[j], 0., gtmean[j]/n_iter, gtmin[j]/n_iter, gtmax[j]/n_iter, (double) n_iter,0.);
+        FILE  *file     = fopen(filename.c_str(), "w+");
+
+        printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "init", 100 * timeInit / timeRef, 100 * timeInit / (timeRef + timeInit), timeInit, 0., timeInit / n_iter, 0., 0., (double)n_iter, 0.);
+        fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "init", 100 * timeInit / timeRef, 100 * timeInit / (timeRef + timeInit), timeInit, 0., timeInit / n_iter, 0., 0., (double)n_iter, 0);
+        printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", "FFTandSwitch", 100., 100 * timeRef / (timeRef + timeInit), timeRef, 0., timeRef / n_iter, 0., 0., (double)n_iter, 0.);
+        fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", "FFTandSwitch", 100., 100 * timeRef / (timeRef + timeInit), timeRef, 0., timeRef / n_iter, 0., 0., (double)n_iter, 0.);
+        for (int i = 0; i < 6; i++) {
+            int j = order[i] - 1;
+            printf("%-25.25s|  %9.4f\t%9.4f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%9.6f\t%09.1f\t%9.2f\n", ("-- " + P3DNames[j]).c_str(), 100. * gtmean[j] / timeRef, 100 * gtmean[j] / timeRef, gtmean[j], 0., gtmean[j] / n_iter, gtmin[j] / n_iter, gtmax[j] / n_iter, (double)n_iter, 0.);
+            fprintf(file, "%s;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.6f;%09.0f;%09.2f\n", ("-- " + P3DNames[j]).c_str(), 100. * gtmean[j] / timeRef, 100 * gtmean[j] / timeRef, gtmean[j], 0., gtmean[j] / n_iter, gtmin[j] / n_iter, gtmax[j] / n_iter, (double)n_iter, 0.);
         }
         fclose(file);
 
         // -- backup of all timers --
         filename = "prof/" + P3DFFTprof + "_backup.csv";
-        file            = fopen(filename.c_str(), "w+");
-        for(int i=0;i < 12;i++) {
-            fprintf(file,"timer[%d] (avg/max/min): %lE %lE %lE\n",i+1,gtmean[i],gtmax[i],gtmin[i]);
+        file     = fopen(filename.c_str(), "w+");
+        for (int i = 0; i < 12; i++) {
+            fprintf(file, "timer[%d] (avg/max/min): %lE %lE %lE\n", i + 1, gtmean[i], gtmax[i], gtmin[i]);
         }
         fclose(file);
-    }   
-    delete(P3Dprof);
+    }
+    delete (P3Dprof);
 
-    if(rank==0)
+    if (rank == 0)
         printf("Done ! Now let's clean...\n");
     fflush(stdout);
 
@@ -303,65 +293,62 @@ int main(int argc, char *argv[]) {
 
     topoIn->~Topology();
     topoSpec->~Topology();
-    
+
     mysolver->~Solver();
-    
-    for(int id=0; id<3; id++){
-        for(int is=0; is<2; is++){
-            mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*1);
+
+    for (int id = 0; id < 3; id++) {
+        for (int is = 0; is < 2; is++) {
+            mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1);
             flups_free(mybc[id][is]);
         }
     }
     Cp3dfft_clean();
+    flups_cleanup_fftw();
 
     MPI_Finalize();
 }
 
+void print_all_P3D(double *A, long int nar) {
+    int      x, y, z, conf, Fstart[3], Fsize[3], Fend[3];
+    long int i;
 
-
-void print_all_P3D(double *A,long int nar)
-{
-  int x,y,z,conf,Fstart[3],Fsize[3],Fend[3];
-  long int i;
-
-  conf = 2;
-  Cp3dfft_get_dims(Fstart,Fend,Fsize,conf);
-  /*
-  Fsize[0] *= 2;
-  Fstart[0] = (Fstart[0]-1)*2;
-  */
-  for(i=0;i < nar;i+=2)
-    if(fabs(A[i]) + fabs(A[i+1]) > 1.25e-4) {
-      z = i/(2*Fsize[0]*Fsize[1]);
-      y = i/(2*Fsize[0]) - z*Fsize[1];
-      x = i/2-z*Fsize[0]*Fsize[1] - y*Fsize[0];
-      printf("P3D(%d,%d,%d) %.16lg %.16lg\n",x+Fstart[0],y+Fstart[1],z+Fstart[2],A[i],A[i+1]);
-    }
+    conf = 2;
+    Cp3dfft_get_dims(Fstart, Fend, Fsize, conf);
+    /*
+    Fsize[0] *= 2;
+    Fstart[0] = (Fstart[0]-1)*2;
+    */
+    for (i = 0; i < nar; i += 2)
+        if (fabs(A[i]) + fabs(A[i + 1]) > 1.25e-4) {
+            z = i / (2 * Fsize[0] * Fsize[1]);
+            y = i / (2 * Fsize[0]) - z * Fsize[1];
+            x = i / 2 - z * Fsize[0] * Fsize[1] - y * Fsize[0];
+            printf("P3D(%d,%d,%d) %.16lg %.16lg\n", x + Fstart[0], y + Fstart[1], z + Fstart[2], A[i], A[i + 1]);
+        }
 }
 
-void print_res(double *A, const Topology* topo) {
-    const int ax0     = topo->axis();
-    const int ax1     = (ax0 + 1) % 3;
-    const int ax2     = (ax0 + 2) % 3;
-    const int nf = 2;//topo->nf()
-    int nmem[3];
-    
-    for(int i=0;i<3;i++){
+void print_res(double *A, const Topology *topo) {
+    const int ax0 = topo->axis();
+    const int ax1 = (ax0 + 1) % 3;
+    const int ax2 = (ax0 + 2) % 3;
+    const int nf  = 2;  // topo->nf()
+    int       nmem[3];
+
+    for (int i = 0; i < 3; i++) {
         nmem[i] = topo->nmem(i);
     }
 
     int gstart[3];
     topo->get_istart_glob(gstart);
 
-    if (topo->isComplex()){
+    if (topo->isComplex()) {
         for (int i2 = 0; i2 < topo->nloc(ax2); i2++) {
             for (int i1 = 0; i1 < topo->nloc(ax1); i1++) {
-                //local indexes start
+                // local indexes start
                 const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, 2, 0);
                 for (int i0 = 0; i0 < topo->nloc(ax0); i0++) {
-                    
-                    if (std::fabs(A[id+i0*2]) + std::fabs(A[id+i0*2 + 1]) > 1.25e-4) {
-                        printf("FLU(%d %d %d) %lg +i1* %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id+i0*2], A[id+i0*2 + 1]);
+                    if (std::fabs(A[id + i0 * 2]) + std::fabs(A[id + i0 * 2 + 1]) > 1.25e-4) {
+                        printf("FLU(%d %d %d) %lg +i1* %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0 * 2], A[id + i0 * 2 + 1]);
                     }
                 }
             }
@@ -369,15 +356,14 @@ void print_res(double *A, const Topology* topo) {
     } else {
         for (int i2 = 0; i2 < topo->nloc(ax2); i2++) {
             for (int i1 = 0; i1 < topo->nloc(ax1); i1++) {
-                //local indexes start
+                // local indexes start
                 const size_t id = localIndex(ax0, 0, i1, i2, ax0, nmem, 1, 0);
                 for (int i0 = 0; i0 < topo->nloc(ax0); i0++) {
-                    
-                    if (std::fabs(A[id+i0]) > 1.25e-4) {
-                        printf("FLU(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id+i0]);
+                    if (std::fabs(A[id + i0]) > 1.25e-4) {
+                        printf("FLU(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0]);
                     }
                 }
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/samples/simpleTest/main_simple_test.cpp b/samples/simpleTest/main_simple_test.cpp
index 2d2936b..220d4ab 100644
--- a/samples/simpleTest/main_simple_test.cpp
+++ b/samples/simpleTest/main_simple_test.cpp
@@ -3,19 +3,18 @@
 #include <stdio.h>
 #include <string.h>
 
-#include "mpi.h"
-#include "h3lpr/profiler.hpp"
-#include "h3lpr/macros.hpp"
 #include "flups.h"
+#include "h3lpr/macros.hpp"
+#include "h3lpr/profiler.hpp"
+#include "mpi.h"
 
 #define m_log(format, ...) m_log_def("test", format, ##__VA_ARGS__)
 
-void print_res(double *A, const FLUPS_Topology* topo, std::string filename);
+void print_res(double *A, const FLUPS_Topology *topo, std::string filename);
 
-int main(int argc, char *argv[])  {    
-    
+int main(int argc, char *argv[]) {
     MPI_Init(&argc, &argv);
-    int rank, comm_size;
+    int      rank, comm_size;
     MPI_Comm comm = MPI_COMM_WORLD;
     MPI_Comm_rank(comm, &rank);
     MPI_Comm_size(comm, &comm_size);
@@ -24,14 +23,14 @@ int main(int argc, char *argv[])  {
     //-------------------------------------------------------------------------
     // Definition of the problem
     //-------------------------------------------------------------------------
-    bool is_node = true;
-    const int nglob[3] = {9, 9, 9};
-    const int nproc[3] = {1, 1, comm_size};
-    const double L[3] = {1., 1., 1.};
-    const double h[3] = {L[0] / (nglob[0] - is_node), L[1] / (nglob[1] - is_node), L[2] / (nglob[2] - is_node)};  
-    int nsolve = 1;
-
-    FLUPS_CenterType center_type[3]; 
+    bool         is_node  = true;
+    const int    nglob[3] = {9, 9, 9};
+    const int    nproc[3] = {1, 1, comm_size};
+    const double L[3]     = {1., 1., 1.};
+    const double h[3]     = {L[0] / (nglob[0] - is_node), L[1] / (nglob[1] - is_node), L[2] / (nglob[2] - is_node)};
+    int          nsolve   = 1;
+
+    FLUPS_CenterType    center_type[3];
     FLUPS_BoundaryType *mybc[3][2];
     for (int id = 0; id < 3; id++) {
         center_type[id] = is_node ? NODE_CENTER : CELL_CENTER;
@@ -45,19 +44,19 @@ int main(int argc, char *argv[])  {
     /** - Initialize FLUPS */
     //-------------------------------------------------------------------------
     // create a real topology
-    FLUPS_Topology *topo      = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
+    FLUPS_Topology *topo = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
 
-    // Profiler creation 
-    FLUPS_Profiler* prof = flups_profiler_new();
+    // Profiler creation
+    FLUPS_Profiler *prof = flups_profiler_new();
 
     // Solver creation and init
     FLUPS_Solver *mysolver = flups_init_timed(topo, mybc, h, L, NOD, center_type, prof);
-    flups_set_greenType(mysolver,CHAT_2);
+    flups_set_greenType(mysolver, CHAT_2);
     flups_setup(mysolver, true);
 
     // recompute the communicator and the rank
     comm = flups_topo_get_comm(topo);
-    MPI_Comm_rank(comm,&rank);
+    MPI_Comm_rank(comm, &rank);
 
     //-------------------------------------------------------------------------
     /** - allocate rhs and solution */
@@ -75,17 +74,17 @@ int main(int argc, char *argv[])  {
     const int ax1     = (ax0 + 1) % 3;
     const int ax2     = (ax0 + 2) % 3;
 
-    const double k = 2.0 * M_PI ;
-    auto fun = [k](double pos[3]) -> double { return cos(k * pos[0]); };
-    auto d2fundx = [k](double pos[3]) -> double { return (-(k*k)*cos(k * pos[0])); };
-
-    const double shift = is_node? 0.0 : 0.5;
-    for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2) ; i2++) {
-        for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1) ; i1++) {
-            for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) {
-                double pos[3] ={(istart[ax0] + i0 + shift) * h[ax0], (istart[ax1] + i1 + shift) * h[ax1], (istart[ax2] + i2 + shift) * h[ax2]};
-                const size_t id    = flups_locID(0, i0, i1, i2, 0, 0, nmem, 1);
-                rhs[id] = d2fundx(pos);
+    const double k       = 2.0 * M_PI;
+    auto         fun     = [k](double pos[3]) -> double { return cos(k * pos[0]); };
+    auto         d2fundx = [k](double pos[3]) -> double { return (-(k * k) * cos(k * pos[0])); };
+
+    const double shift = is_node ? 0.0 : 0.5;
+    for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) {
+        for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) {
+            for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) {
+                double       pos[3] = {(istart[ax0] + i0 + shift) * h[ax0], (istart[ax1] + i1 + shift) * h[ax1], (istart[ax2] + i2 + shift) * h[ax2]};
+                const size_t id     = flups_locID(0, i0, i1, i2, 0, 0, nmem, 1);
+                rhs[id]             = d2fundx(pos);
                 // rhs[id] = (istart[ax0] + i0) + nmem[0]*((istart[ax1] + i1) + nglob[1]*(i2 + istart[ax2])); //d2fundx(pos);
             }
         }
@@ -99,48 +98,47 @@ int main(int argc, char *argv[])  {
         flups_solve(mysolver, field, rhs, STD);
     }
 
-
     //-------------------------------------------------------------------------
     /** - Compute the error */
     //-------------------------------------------------------------------------
     double err = std::numeric_limits<double>::lowest();
-    bool is_last_topo[3]; 
-    for(int id = 0; id < 3; id++){
-        is_last_topo[id] = (istart[id] + flups_topo_get_nloc(topo,(flups_topo_get_axis(topo) + id)%3) == nglob[id]);
+    bool   is_last_topo[3];
+    for (int id = 0; id < 3; id++) {
+        is_last_topo[id] = (istart[id] + flups_topo_get_nloc(topo, (flups_topo_get_axis(topo) + id) % 3) == nglob[id]);
     }
 
-    if((0 == flups_topo_get_nloc(topo,ax2) - is_last_topo[2]) || (0 == flups_topo_get_nloc(topo,ax1) - is_last_topo[1]) || (0 == flups_topo_get_nloc(topo,ax1) - is_last_topo[0])){
+    if ((0 == flups_topo_get_nloc(topo, ax2) - is_last_topo[2]) || (0 == flups_topo_get_nloc(topo, ax1) - is_last_topo[1]) || (0 == flups_topo_get_nloc(topo, ax1) - is_last_topo[0])) {
         printf("[test - %d] You don't have enough points to fill in all the procs - my error (rank %d) is set to 0 by default \n ", rank, rank);
         err = 0.0;
     }
 
-    for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2) - is_last_topo[2] ; i2++) {
-        for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1) - is_last_topo[1] ; i1++) {
-            for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0) - is_last_topo[0]; i0++) {
-                double pos[3] ={(istart[ax0] + i0 + shift) * h[ax0], (istart[ax1] + i1 + shift) * h[ax1], (istart[ax2] + i2 + shift) * h[ax2]};
-                const size_t id    = flups_locID(0, i0, i1, i2, 0, 0, nmem, 1);
+    for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2) - is_last_topo[2]; i2++) {
+        for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1) - is_last_topo[1]; i1++) {
+            for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0) - is_last_topo[0]; i0++) {
+                double       pos[3] = {(istart[ax0] + i0 + shift) * h[ax0], (istart[ax1] + i1 + shift) * h[ax1], (istart[ax2] + i2 + shift) * h[ax2]};
+                const size_t id     = flups_locID(0, i0, i1, i2, 0, 0, nmem, 1);
                 // Gaussian
                 err = std::max(err, abs(field[id] - fun(pos)));
             }
         }
     }
 
-    //m_log("Printing results");
-    //printf("[test %d] Error computation - end %d %d %d ", rank,flups_topo_get_nloc(topo,ax0) - is_last_topo[0], flups_topo_get_nloc(topo,ax1) - is_last_topo[1], flups_topo_get_nloc(topo,ax2) - is_last_topo[2] );
-    //std::string arg_name = (argc == 2) ? argv[1] : "default";
-    //print_res(field, topo, "data/sol_" + arg_name) ;
+    // m_log("Printing results");
+    // printf("[test %d] Error computation - end %d %d %d ", rank,flups_topo_get_nloc(topo,ax0) - is_last_topo[0], flups_topo_get_nloc(topo,ax1) - is_last_topo[1], flups_topo_get_nloc(topo,ax2) - is_last_topo[2] );
+    // std::string arg_name = (argc == 2) ? argv[1] : "default";
+    // print_res(field, topo, "data/sol_" + arg_name) ;
 
     // --------------------------------------------------------------------------
-    // Local error 
+    // Local error
     printf("[test - %d] my error is %e \n", rank, err);
 
     // --------------------------------------------------------------------------
-    // Global error 
+    // Global error
     double err_glob = 0.0;
     MPI_Allreduce(&err, &err_glob, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
     m_log("--------------- Global Error ---------------");
     m_log("The global error == %e", err_glob);
-    
+
     flups_profiler_disp(prof);
 
     //-------------------------------------------------------------------------
@@ -150,6 +148,7 @@ int main(int argc, char *argv[])  {
     flups_free(field);
     flups_topo_free(topo);
     flups_cleanup(mysolver);
+    flups_cleanup_fftw();
     flups_profiler_free(prof);
 
     for (int id = 0; id < 3; id++) {
@@ -161,30 +160,33 @@ int main(int argc, char *argv[])  {
     MPI_Finalize();
 }
 
-void print_res(double *A, const FLUPS_Topology* topo, std::string filename) {
+void print_res(double *A, const FLUPS_Topology *topo, std::string filename) {
     int rank;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
-    FILE *fptr; 
+    FILE       *fptr;
     std::string name = filename + "_" + std::to_string(rank) + ".txt";
-    fptr = fopen(name.c_str(), "w");
+    fptr             = fopen(name.c_str(), "w");
 
-    if(fptr == NULL){ printf("Error!");   exit(1); }
+    if (fptr == NULL) {
+        printf("Error!");
+        exit(1);
+    }
 
     const int ax0     = flups_topo_get_axis(topo);
     const int ax1     = (ax0 + 1) % 3;
     const int ax2     = (ax0 + 2) % 3;
-    int nmem[3] = {flups_topo_get_nmem(topo,0), flups_topo_get_nmem(topo,1), flups_topo_get_nmem(topo,2)};
-    
+    int       nmem[3] = {flups_topo_get_nmem(topo, 0), flups_topo_get_nmem(topo, 1), flups_topo_get_nmem(topo, 2)};
+
     // int gstart[3];
     // flups_topo_get_istartGlob(topo,gstart);
 
-    if (flups_topo_get_isComplex(topo)){
-        for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2); i2++) {
-            for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1); i1++) {
-                //local indexes start
-                const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem,2);
-                for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) {
+    if (flups_topo_get_isComplex(topo)) {
+        for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) {
+            for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) {
+                // local indexes start
+                const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem, 2);
+                for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) {
                     // printf("(%d %d %d) %lg +i1 * %lg\n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0 * 2], A[id + i0 * 2 + 1]);
                     fprintf(fptr, "\t (%e, i1 * %e)", A[id + i0 * 2], A[id + i0 * 2 + 1]);
                 }
@@ -192,13 +194,13 @@ void print_res(double *A, const FLUPS_Topology* topo, std::string filename) {
             }
             fprintf(fptr, "\n \n");
         }
-    
+
     } else {
-        for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2); i2++) {
-            for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1); i1++) {
-                //local indexes start
-                const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem,1);
-                for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) {
+        for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) {
+            for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) {
+                // local indexes start
+                const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem, 1);
+                for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) {
                     // printf("(%d %d %d) %lg \n", i0 + gstart[ax0], i1 + gstart[ax1], i2 + gstart[ax2], A[id + i0]);
                     fprintf(fptr, "\t %e", A[id + i0]);
                 }
diff --git a/samples/solve_advanced_C/main_solve_advanced.c b/samples/solve_advanced_C/main_solve_advanced.c
index 76cfacf..254b849 100644
--- a/samples/solve_advanced_C/main_solve_advanced.c
+++ b/samples/solve_advanced_C/main_solve_advanced.c
@@ -3,90 +3,90 @@
 #include <stdio.h>
 #include <string.h>
 
-#include "mpi.h"
 #include "flups.h"
+#include "mpi.h"
 
-void print_res(double *A, const FLUPS_Topology* topo);
+void print_res(double *A, const FLUPS_Topology *topo);
 
 #define PRINT_RES
 
-int main(int argc, char *argv[]) {    
+int main(int argc, char *argv[]) {
     printf("Starting...\n");
     //-------------------------------------------------------------------------
     // Initialize MPI
     //-------------------------------------------------------------------------
-    int rank, comm_size;
+    int      rank, comm_size;
     MPI_Comm comm = MPI_COMM_WORLD;
 
     int provided;
     // set MPI_THREAD_FUNNELED or MPI_THREAD_SERIALIZED
     int requested = MPI_THREAD_FUNNELED;
     MPI_Init_thread(&argc, &argv, requested, &provided);
-    if(provided < requested){
+    if (provided < requested) {
         printf("Invalid number of procs\n");
-        MPI_Abort(comm,1);
+        MPI_Abort(comm, 1);
     }
-   
+
     MPI_Comm_rank(comm, &rank);
     MPI_Comm_size(comm, &comm_size);
 
     //-------------------------------------------------------------------------
-    //Definition of the problem
+    // Definition of the problem
     //-------------------------------------------------------------------------
-    const int     nglob[3] = {64, 64, 64};
-    const int     nproc[3] = {1, 1, 2};
-    const double  L[3]     = {1., 1., 1.};;
+    const int    nglob[3] = {64, 64, 64};
+    const int    nproc[3] = {1, 1, 2};
+    const double L[3]     = {1., 1., 1.};
+    ;
 
     const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]};
 
     const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER};
 
-    FLUPS_BoundaryType* mybc[3][2];
-    for(int id=0; id<3; id++){
-        for(int is=0; is<2; is++){
-            mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*1);
+    FLUPS_BoundaryType *mybc[3][2];
+    for (int id = 0; id < 3; id++) {
+        for (int is = 0; is < 2; is++) {
+            mybc[id][is]    = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1);
             mybc[id][is][0] = PER;
         }
     }
 
-    if(comm_size!=nproc[0]*nproc[1]*nproc[2]){
+    if (comm_size != nproc[0] * nproc[1] * nproc[2]) {
         printf("Invalid number of procs\n");
-        MPI_Abort(comm,1);
+        MPI_Abort(comm, 1);
     }
 
-
     //-------------------------------------------------------------------------
     /** - Initialize FLUPS */
     //-------------------------------------------------------------------------
 
     // create a real topology
-    FLUPS_Topology *topoIn      = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
+    FLUPS_Topology *topoIn = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
 
     // solver creation and init
     FLUPS_Solver *mysolver = flups_init(topoIn, mybc, h, L, NOD, center_type);
-    flups_set_greenType(mysolver,CHAT_2);
+    flups_set_greenType(mysolver, CHAT_2);
     flups_setup(mysolver, true);
-    double* solFLU = flups_get_innerBuffer(mysolver);
+    double *solFLU = flups_get_innerBuffer(mysolver);
 
     // recompute the communicator and the rank
     comm = flups_topo_get_comm(topoIn);
-    MPI_Comm_rank(comm,&rank);
+    MPI_Comm_rank(comm, &rank);
 
     // retrieveing internal info from the solver
     const FLUPS_Topology *topoSpec = flups_get_innerTopo_spectral(mysolver);
 
     int nmemIn[3];
     int istartIn[3], istartSpec[3];
-    int nmemIN[3],nmemSpec[3];
+    int nmemIN[3], nmemSpec[3];
 
     int memsizeIN = flups_topo_get_memsize(topoIn);
     for (int i = 0; i < 3; i++) {
         nmemIn[i]   = flups_topo_get_nmem(topoIn, i);
         nmemSpec[i] = flups_topo_get_nmem(topoSpec, i);
     }
-    flups_topo_get_istartGlob(topoIn,istartIn);
-    flups_topo_get_istartGlob(topoSpec,istartSpec);
-    
+    flups_topo_get_istartGlob(topoIn, istartIn);
+    flups_topo_get_istartGlob(topoSpec, istartSpec);
+
     // //-------------------------------------------------------------------------
     // /** - allocate rhs and solution */
     // //-------------------------------------------------------------------------
@@ -100,27 +100,27 @@ int main(int argc, char *argv[]) {
         fflush(stdout);
     }
 
-    double *rhsFLU   = (double *)flups_malloc(sizeof(double) * memsizeIN);
-    memset(rhsFLU, 0, sizeof(double ) * memsizeIN);
-    // memset(solFLU, 0, sizeof(double ) * FLUmemsizeOUT); 
-    
-    double f = 1; //frequency of the wave
-    const double c_2pi = 2.0*3.141592653589793;
+    double *rhsFLU = (double *)flups_malloc(sizeof(double) * memsizeIN);
+    memset(rhsFLU, 0, sizeof(double) * memsizeIN);
+    // memset(solFLU, 0, sizeof(double ) * FLUmemsizeOUT);
 
-    for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn,2); i2++) {
-        for (int i1 = 0; i1 < flups_topo_get_nloc(topoIn,1); i1++) {
-            for (int i0 = 0; i0 < flups_topo_get_nloc(topoIn,0); i0++) {
-                double       x    = (istartIn[0] + i0 + 0.5) * h[0];
-                double       y    = (istartIn[1] + i1 + 0.5) * h[1];
-                double       z    = (istartIn[2] + i2 + 0.5) * h[2];
+    double       f     = 1;  // frequency of the wave
+    const double c_2pi = 2.0 * 3.141592653589793;
+
+    for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn, 2); i2++) {
+        for (int i1 = 0; i1 < flups_topo_get_nloc(topoIn, 1); i1++) {
+            for (int i0 = 0; i0 < flups_topo_get_nloc(topoIn, 0); i0++) {
+                double x = (istartIn[0] + i0 + 0.5) * h[0];
+                double y = (istartIn[1] + i1 + 0.5) * h[1];
+                double z = (istartIn[2] + i2 + 0.5) * h[2];
 
                 size_t id;
-                id    = flups_locID(0, i0, i1, i2, 0, 0, nmemIn, 1);
+                id         = flups_locID(0, i0, i1, i2, 0, 0, nmemIn, 1);
                 rhsFLU[id] = sin((c_2pi / L[0] * f) * x) * sin((c_2pi / L[1] * f) * y) * sin((c_2pi / L[2] * f) * z);
             }
         }
     }
-    memcpy(solFLU, rhsFLU, sizeof(double ) * memsizeIN);
+    memcpy(solFLU, rhsFLU, sizeof(double) * memsizeIN);
 
     // //-------------------------------------------------------------------------
     // /** - Skip the copy of data
@@ -128,9 +128,9 @@ int main(int argc, char *argv[]) {
     // By using the advanced features of the API, we skip the copy which is done
     // in flups_solve: data are copied from the location the user allocated himself,
     // to an inner memory reserved by flups, and ensuring proper alignment.
-    // In this example, we have directly filled that "inner memory" (solFlu) with 
+    // In this example, we have directly filled that "inner memory" (solFlu) with
     // the initial condition
-    
+
     // flups_do_copy(mysolver,topoIn,solFLU,FLUPS_FORWARD);
 
     // //-------------------------------------------------------------------------
@@ -148,26 +148,26 @@ int main(int argc, char *argv[]) {
     // //-------------------------------------------------------------------------
 
     double kfact[3], koffset[3], symstart[3];
-    flups_get_spectralInfo(mysolver,kfact,koffset,symstart);
+    flups_get_spectralInfo(mysolver, kfact, koffset, symstart);
     if (rank == 0) {
-        printf("%lf %lf %lf  %lf %lf %lf  %lf %lf %lf  \n",kfact[0],kfact[1],kfact[2],koffset[0],koffset[1],koffset[2],symstart[0],symstart[1],symstart[2]);
+        printf("%lf %lf %lf  %lf %lf %lf  %lf %lf %lf  \n", kfact[0], kfact[1], kfact[2], koffset[0], koffset[1], koffset[2], symstart[0], symstart[1], symstart[2]);
     }
 
-    const int ax0     = flups_topo_get_axis(topoSpec);
-    const int ax1     = (ax0 + 1) % 3;
-    const int ax2     = (ax0 + 2) % 3;
-    const int nf      = 2; //topoSpec is full spectral, there are two doubles per element  
-    
+    const int ax0 = flups_topo_get_axis(topoSpec);
+    const int ax1 = (ax0 + 1) % 3;
+    const int ax2 = (ax0 + 2) % 3;
+    const int nf  = 2;  // topoSpec is full spectral, there are two doubles per element
+
     // int nmemSpec[3];
     // for(int i=0;i<3;i++){
     //     nmemSpec[i] = flups_topo_get_nmem(topoSpec,i);
     // }
-        
-    for (int i2 = 0; i2 < flups_topo_get_nloc(topoSpec,ax2); i2++) {
-        for (int i1 = 0; i1 < flups_topo_get_nloc(topoSpec,ax1); i1++) {
-            //local indexes start
-            const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmemSpec,nf);
-            for (int i0 = 0; i0 < flups_topo_get_nloc(topoSpec,ax0); i0++) {
+
+    for (int i2 = 0; i2 < flups_topo_get_nloc(topoSpec, ax2); i2++) {
+        for (int i1 = 0; i1 < flups_topo_get_nloc(topoSpec, ax1); i1++) {
+            // local indexes start
+            const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmemSpec, nf);
+            for (int i0 = 0; i0 < flups_topo_get_nloc(topoSpec, ax0); i0++) {
                 int is[3];
                 flups_symID(ax0, i0, i1, i2, istartSpec, symstart, 0, is);
 
@@ -176,10 +176,10 @@ int main(int argc, char *argv[]) {
                 const double k1 = (is[ax1] + koffset[ax1]) * kfact[ax1];
                 const double k2 = (is[ax2] + koffset[ax2]) * kfact[ax2];
 
-                const double ksqr   = k0 * k0 + k1 * k1 + k2 * k2;
+                const double ksqr = k0 * k0 + k1 * k1 + k2 * k2;
 
-                solFLU[id + i0 * nf] *= exp(-ksqr); //REAL part
-                solFLU[id + i0 * nf + 1] *= exp(-ksqr); //COMPLEX part
+                solFLU[id + i0 * nf] *= exp(-ksqr);      // REAL part
+                solFLU[id + i0 * nf + 1] *= exp(-ksqr);  // COMPLEX part
             }
         }
     }
@@ -204,7 +204,7 @@ int main(int argc, char *argv[]) {
 // #define PRINT_RES
 #ifdef PRINT_RES
     /* normalize*/
-    print_res(solFLU,topoIn);
+    print_res(solFLU, topoIn);
 #endif
 
     flups_hdf5_dump(topoIn, "result", solFLU);
@@ -212,58 +212,59 @@ int main(int argc, char *argv[]) {
     // //-------------------------------------------------------------------------
     // /** - CLEAN */
     // //-------------------------------------------------------------------------
-    
-    if(rank==0)
+
+    if (rank == 0)
         printf("Done ! Now let's clean...\n");
     fflush(stdout);
-    
+
     flups_free(rhsFLU);
 
     flups_topo_free(topoIn);
     flups_cleanup(mysolver);
+    flups_cleanup_fftw();
 
-    for(int id=0; id<3; id++){
-        for(int is=0; is<2; is++){
+    for (int id = 0; id < 3; id++) {
+        for (int is = 0; is < 2; is++) {
             flups_free(mybc[id][is]);
         }
     }
-    
+
     MPI_Finalize();
 }
 
-void print_res(double *A, const FLUPS_Topology* topo) {
-    const int ax0     = flups_topo_get_axis(topo);
-    const int ax1     = (ax0 + 1) % 3;
-    const int ax2     = (ax0 + 2) % 3;
-    int nmem[3];
-    
-    for(int i=0;i<3;i++){
-        nmem[i] = flups_topo_get_nmem(topo,i);
+void print_res(double *A, const FLUPS_Topology *topo) {
+    const int ax0 = flups_topo_get_axis(topo);
+    const int ax1 = (ax0 + 1) % 3;
+    const int ax2 = (ax0 + 2) % 3;
+    int       nmem[3];
+
+    for (int i = 0; i < 3; i++) {
+        nmem[i] = flups_topo_get_nmem(topo, i);
     }
 
     int gstart[3];
-    flups_topo_get_istartGlob(topo,gstart);
+    flups_topo_get_istartGlob(topo, gstart);
 
-    if (flups_topo_get_isComplex(topo)){
-        for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2); i2++) {
+    if (flups_topo_get_isComplex(topo)) {
+        for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) {
             printf("(z = %d) \n", i2 + gstart[ax2]);
-            for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1); i1++) {
-                //local indexes start
-                const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem,2);
-                for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) {
+            for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) {
+                // local indexes start
+                const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem, 2);
+                for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) {
                     printf("(%1.3lg + i1* %1.3lg) \t", A[id + i0 * 2], A[id + i0 * 2 + 1]);
                 }
-                printf("\n");   
+                printf("\n");
             }
             printf("\n");
         }
     } else {
-        for (int i2 = 0; i2 < flups_topo_get_nloc(topo,ax2); i2++) {
+        for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) {
             printf("(z = %d) \n", i2 + gstart[ax2]);
-            for (int i1 = 0; i1 < flups_topo_get_nloc(topo,ax1); i1++) {
-                //local indexes start
-                const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem,1);
-                for (int i0 = 0; i0 < flups_topo_get_nloc(topo,ax0); i0++) {
+            for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) {
+                // local indexes start
+                const size_t id = flups_locID(ax0, 0, i1, i2, 0, ax0, nmem, 1);
+                for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) {
                     printf(" %lg \t", A[id + i0]);
                 }
                 printf("\n");
diff --git a/samples/solve_vtube/src/vtube.cpp b/samples/solve_vtube/src/vtube.cpp
index 30f8845..d34315d 100644
--- a/samples/solve_vtube/src/vtube.cpp
+++ b/samples/solve_vtube/src/vtube.cpp
@@ -2,32 +2,33 @@
  * @file vtube.cpp
  * @author Thomas Gillis and Denis-Gabriel Caprace
  * @copyright Copyright © UCLouvain 2020
- * 
+ *
  * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers.
- * 
+ *
  * Copyright <2020> <Université catholique de Louvain (UCLouvain), Belgique>
- * 
+ *
  * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE and NOTICE files.
- * 
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  *  http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- * 
+ *
  */
 
 #include "vtube.hpp"
+
 #include "omp.h"
 
-template <> 
-double gexpint<1>(const double z){
+template <>
+double gexpint<1>(const double z) {
     // for real values E1(x) = -Ei(-x):
     // according to
     // https://en.cppreference.com/w/cpp/numeric/special_functions/expint and
@@ -35,9 +36,9 @@ double gexpint<1>(const double z){
     return (-std::expint(-z));
 }
 
-double vort_inf(const double r, const double sigma){
+double vort_inf(const double r, const double sigma) {
     // Gregoire winckelmans 2004 encyclopedia
-    const double rho1  = r / sigma;
+    const double rho1 = r / sigma;
     return 1.0 / (c_2pi * sigma * sigma) * exp(-rho1 * rho1 * 0.5);
 }
 double vel_inf(const double r, const double sigma) {
@@ -47,10 +48,10 @@ double vel_inf(const double r, const double sigma) {
     return (r < eps) ? 0 : 1.0 / (c_2pi * r) * (1.0 - exp(-rho1 * rho1 * 0.5));
 }
 
-double vort_compact(const double r, const double R){
+double vort_compact(const double r, const double R) {
     // Wincklemans 2015, hand-written notes (cfr vortexbib)
-    const double rho2  = r / (R);
-    const double oo_rad2 = 1.0/(R*R);
+    const double rho2    = r / (R);
+    const double oo_rad2 = 1.0 / (R * R);
     return (rho2 >= 1.0) ? (0.0) : (1.0 / c_2pi * (2.0 * oo_rad2) * (1.0 / gexpint<2>(1.0)) * exp(-1.0 / (1.0 - rho2 * rho2)));
 }
 double vel_compact(const double r, const double R) {
@@ -86,9 +87,9 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
     const double fact    = (double)(!is_cell);
 
     // Ensure that 64 points in cell-centred mode is equivalent 64 points in node-centred mode
-    const int    nglob[3] = {myCase.nglob[0] + (int)fact,
-                             myCase.nglob[1] + (int)fact,
-                             myCase.nglob[2] + (int)fact};
+    const int nglob[3] = {myCase.nglob[0] + (int)fact,
+                          myCase.nglob[1] + (int)fact,
+                          myCase.nglob[2] + (int)fact};
 
     // h definition depends on if we are cell- of node-centred
     const double h[3] = {L[0] / (nglob[0] - fact),
@@ -97,13 +98,13 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
 
     FLUPS_CenterType center_type[3];
     for (int i = 0; i < 3; i++) {
-            center_type[i] = myCase.center[i];
+        center_type[i] = myCase.center[i];
     }
 
-    FLUPS_BoundaryType* mybc[3][2];
-    for(int id=0; id<3; id++){
-        for(int is=0; is<2; is++){
-            mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*lda);
+    FLUPS_BoundaryType *mybc[3][2];
+    for (int id = 0; id < 3; id++) {
+        for (int is = 0; is < 2; is++) {
+            mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * lda);
         }
     }
 
@@ -123,7 +124,7 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
     //-------------------------------------------------------------------------
     std::string     name = "tube_" + std::to_string((int)(nglob[0] / L[0])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads());
     FLUPS_Profiler *prof = flups_profiler_new_n(name.c_str());
-    FLUPS_Solver *  mysolver;
+    FLUPS_Solver   *mysolver;
     mysolver = flups_init_timed(topo, mybc, h, L, order, center_type, prof);
 
     flups_set_greenType(mysolver, typeGreen);
@@ -167,8 +168,8 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
         double *rhs1 = rhs + flups_locID(ax0, 0, 0, 0, 1, ax0, nmem, 1);
         double *rhs2 = rhs + flups_locID(ax0, 0, 0, 0, 2, ax0, nmem, 1);
         double *sol0 = sol + flups_locID(ax0, 0, 0, 0, 0, ax0, nmem, 1);
-        double* sol1 = sol + flups_locID(ax0, 0, 0, 0, 1, ax0, nmem, 1);
-        double* sol2 = sol + flups_locID(ax0, 0, 0, 0, 2, ax0, nmem, 1);
+        double *sol1 = sol + flups_locID(ax0, 0, 0, 0, 1, ax0, nmem, 1);
+        double *sol2 = sol + flups_locID(ax0, 0, 0, 0, 2, ax0, nmem, 1);
 
         int dir0 = (vdir + 1) % 3;
         int dir1 = (vdir + 2) % 3;
@@ -317,53 +318,53 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
                                 sol2[id] += 0.0 * myCase.ysign * myCase.xsign;
                             }
                         }
-                    // } else if (cross) {
-                    //     const double lpos[3] = {pos[0] - (myCase.xcntr * L[0]),
-                    //                             pos[1] - (myCase.ycntr * L[1]),
-                    //                             pos[2] - (myCase.zcntr * L[2])};
-                    //     double      *lrhs[3] = {rhs0, rhs1, rhs2};
-                    //     double      *lsol[3] = {sol0, sol1, sol2};
-
-                    //     for (int i = 0; i < 3; ++i) {
-                    //         lrhs[(i + 0) % 3][id] = 0.0;
-                    //         lrhs[(i + 1) % 3][id] = 0.0;
-                    //         lrhs[(i + 2) % 3][id] = 0.0;
-                    //         lsol[(i + 0) % 3][id] = 0.0;
-                    //         lsol[(i + 1) % 3][id] = 0.0;
-                    //         lsol[(i + 2) % 3][id] = 0.0;
-                    //     }
-
-                    //     for (int i = 0; i < 3; ++i) {
-                    //         const double x = lpos[(i + 1) % 3];
-                    //         const double y = lpos[(i + 2) % 3];
-                    //         // tube in z
-                    //         const double theta = std::atan2(y, x);  // get the angle in the x-y plane
-                    //         const double r     = sqrt(x * x + y * y);
-                    //         const double vort  = (compact) ? vort_compact(r, R) : vort_inf(r, sigma);
-                    //         const double vel   = (compact) ? vel_compact(r, R) : vel_inf(r, sigma);
-                    //         lrhs[(i + 0) % 3][id] += -vort;
-                    //         lrhs[(i + 1) % 3][id] += 0.0;
-                    //         lrhs[(i + 2) % 3][id] += 0.0;
-                    //         lsol[(i + 0) % 3][id] += 0.0;
-                    //         lsol[(i + 1) % 3][id] += -sin(theta) * vel;
-                    //         lsol[(i + 2) % 3][id] += +cos(theta) * vel;
-                    //     }
-                    // } else if (gauss) {
-                    //     const double x = pos[0] - (myCase.xcntr * L[0]);
-                    //     const double y = pos[1] - (myCase.ycntr * L[1]);
-                    //     const double z = pos[2] - (myCase.zcntr * L[2]);
-                    //     // tube in z
-                    //     const double r     = sqrt(x * x + y * y + z*z);
-                    //     const double rho = r/sigma;
-                    //     const double vort  = 1.0/(4.0*M_PI*pow(sigma,3)) * sqrt(2.0/M_PI) * exp(-0.5 * rho * rho) ;
-                    //     const double vel   = 1.0;
-                    //     lrhs[(i + 0) % 3][id] += -vort;
-                    //     lrhs[(i + 1) % 3][id] += 0.0;
-                    //     lrhs[(i + 2) % 3][id] += 0.0;
-                    //     lsol[(i + 0) % 3][id] += 0.0;
-                    //     lsol[(i + 1) % 3][id] += -sin(theta) * vel;
-                    //     lsol[(i + 2) % 3][id] += +cos(theta) * vel;
-                    // }
+                        // } else if (cross) {
+                        //     const double lpos[3] = {pos[0] - (myCase.xcntr * L[0]),
+                        //                             pos[1] - (myCase.ycntr * L[1]),
+                        //                             pos[2] - (myCase.zcntr * L[2])};
+                        //     double      *lrhs[3] = {rhs0, rhs1, rhs2};
+                        //     double      *lsol[3] = {sol0, sol1, sol2};
+
+                        //     for (int i = 0; i < 3; ++i) {
+                        //         lrhs[(i + 0) % 3][id] = 0.0;
+                        //         lrhs[(i + 1) % 3][id] = 0.0;
+                        //         lrhs[(i + 2) % 3][id] = 0.0;
+                        //         lsol[(i + 0) % 3][id] = 0.0;
+                        //         lsol[(i + 1) % 3][id] = 0.0;
+                        //         lsol[(i + 2) % 3][id] = 0.0;
+                        //     }
+
+                        //     for (int i = 0; i < 3; ++i) {
+                        //         const double x = lpos[(i + 1) % 3];
+                        //         const double y = lpos[(i + 2) % 3];
+                        //         // tube in z
+                        //         const double theta = std::atan2(y, x);  // get the angle in the x-y plane
+                        //         const double r     = sqrt(x * x + y * y);
+                        //         const double vort  = (compact) ? vort_compact(r, R) : vort_inf(r, sigma);
+                        //         const double vel   = (compact) ? vel_compact(r, R) : vel_inf(r, sigma);
+                        //         lrhs[(i + 0) % 3][id] += -vort;
+                        //         lrhs[(i + 1) % 3][id] += 0.0;
+                        //         lrhs[(i + 2) % 3][id] += 0.0;
+                        //         lsol[(i + 0) % 3][id] += 0.0;
+                        //         lsol[(i + 1) % 3][id] += -sin(theta) * vel;
+                        //         lsol[(i + 2) % 3][id] += +cos(theta) * vel;
+                        //     }
+                        // } else if (gauss) {
+                        //     const double x = pos[0] - (myCase.xcntr * L[0]);
+                        //     const double y = pos[1] - (myCase.ycntr * L[1]);
+                        //     const double z = pos[2] - (myCase.zcntr * L[2]);
+                        //     // tube in z
+                        //     const double r     = sqrt(x * x + y * y + z*z);
+                        //     const double rho = r/sigma;
+                        //     const double vort  = 1.0/(4.0*M_PI*pow(sigma,3)) * sqrt(2.0/M_PI) * exp(-0.5 * rho * rho) ;
+                        //     const double vel   = 1.0;
+                        //     lrhs[(i + 0) % 3][id] += -vort;
+                        //     lrhs[(i + 1) % 3][id] += 0.0;
+                        //     lrhs[(i + 2) % 3][id] += 0.0;
+                        //     lsol[(i + 0) % 3][id] += 0.0;
+                        //     lsol[(i + 1) % 3][id] += -sin(theta) * vel;
+                        //     lsol[(i + 2) % 3][id] += +cos(theta) * vel;
+                        // }
                     } else if (ring) {
                         //---------------------------------------------------------------
                         //                      WARNING
@@ -428,13 +429,13 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
                         }
                     } else if (periodic) {
                         {
-                            const double x = pos[0];
-                            const double y = pos[1];
-                            const double z = pos[2];
+                            const double x    = pos[0];
+                            const double y    = pos[1];
+                            const double z    = pos[2];
                             const double k[3] = {2.0 * M_PI, 2.0 * M_PI, 2.0 * M_PI};
-                            rhs0[id] = cos(k[1] * y) / k[1];
-                            rhs1[id] = cos(k[2] * z) / k[2];
-                            rhs2[id] = cos(k[0] * x) / k[0];
+                            rhs0[id]          = cos(k[1] * y) / k[1];
+                            rhs1[id]          = cos(k[2] * z) / k[2];
+                            rhs2[id]          = cos(k[0] * x) / k[0];
                             // rot_x = sin(k[2] * z) -> sol = -1/k[2] * sin(k[2] * z)
                             // rot_y = sin(k[0] * x) -> sol = -1/k[0] * sin(k[0] * x)
                             // rot_z = sin(k[1] * y) -> sol = -1/k[1] * sin(k[1] * y)
@@ -466,15 +467,15 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
                             const double z    = pos[2];
                             const double k[3] = {1.5 * M_PI, 1.5 * M_PI, 1.5 * M_PI};
 
-                            rhs0[id] = 0.0; //sin(k[1] * y) / k[1];
+                            rhs0[id] = 0.0;  // sin(k[1] * y) / k[1];
                             rhs1[id] = sin(k[2] * z) / k[2];
-                            rhs2[id] = 0.0; //sin(k[0] * x) / k[0];
+                            rhs2[id] = 0.0;  // sin(k[0] * x) / k[0];
                             // rot_x = cos(k[2] * z) -> sol = -1/k[2]^2 * cos(k[2] * z)
                             // rot_y = cos(k[0] * x) -> sol = -1/k[0]^2 * cos(k[0] * x)
                             // rot_z = cos(k[1] * y) -> sol = -1/k[1]^2 * cos(k[1] * y)
                             sol0[id] = -1.0 / (k[2] * k[2]) * cos(k[2] * z);
-                            sol1[id] = 0.0; //-1.0 / (k[0] * k[0]) * cos(k[0] * x);
-                            sol2[id] = 0.0; //-1.0 / (k[1] * k[1]) * cos(k[1] * y);
+                            sol1[id] = 0.0;  //-1.0 / (k[0] * k[0]) * cos(k[0] * x);
+                            sol2[id] = 0.0;  //-1.0 / (k[1] * k[1]) * cos(k[1] * y);
                         }
                     }
                 }
@@ -497,11 +498,9 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
         flups_solve(mysolver, field, rhs, ROT);
     }
 
-
     flups_profiler_disp(prof);
     flups_profiler_free(prof);
 
-
 #ifdef DUMP_DBG
     // write the source term and the solution
     sprintf(msg, "sol_%d%d%d%d%d%d_%dx%dx%d", mybc[0][0][0], mybc[0][1][0], mybc[1][0][0], mybc[1][1][0], mybc[2][0][0], mybc[2][1][0], nglob[0], nglob[1], nglob[2]);
@@ -521,7 +520,7 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
     std::memset(err2, 0, sizeof(double) * lda);
     std::memset(erri, 0, sizeof(double) * lda);
 
-    //determine the volume associated to a mesh
+    // determine the volume associated to a mesh
     double vol = 1.0;
     for (int id = 0; id < 3; id++) {
         if (mybc[id][0][0] != NONE && mybc[id][1][0] != NONE) {
@@ -535,23 +534,23 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
         const int ax2     = (ax0 + 2) % 3;
         const int nmem[3] = {flups_topo_get_nmem(topo, 0), flups_topo_get_nmem(topo, 1), flups_topo_get_nmem(topo, 2)};
         for (int lia = 0; lia < lda; lia++) {
-            // Check if flups is able to compute the last point of the domain. If at least one of the direction is periodic, 
+            // Check if flups is able to compute the last point of the domain. If at least one of the direction is periodic,
             // and we are in an node centred framework, then the last point in each direction may be influenced
-            const bool last_point_valid = !(!is_cell && ( mybc[lia][0][0] == PER || mybc[lia][0][1] == PER || mybc[lia][0][2] == PER ));
-            
+            const bool last_point_valid = !(!is_cell && (mybc[lia][0][0] == PER || mybc[lia][0][1] == PER || mybc[lia][0][2] == PER));
+
             // If the last point is not valid, remove it from the error computation
             const int iend[3] = {flups_topo_get_nloc(topo, ax0),
                                  flups_topo_get_nloc(topo, ax1),
                                  flups_topo_get_nloc(topo, ax2)};
-            
+
             for (int i2 = 0; i2 < iend[2]; i2++) {
                 for (int i1 = 0; i1 < iend[1]; i1++) {
                     for (int i0 = 0; i0 < iend[0]; i0++) {
                         const size_t id  = flups_locID(ax0, i0, i1, i2, lia, ax0, nmem, 1);
                         const double err = sol[id] - field[id];
-                        lerri[lia] = max(lerri[lia], fabs(err));
+                        lerri[lia]       = max(lerri[lia], fabs(err));
                         lerr2[lia] += (err * err) * vol;
-                        error[id]  = err; 
+                        error[id] = err;
                     }
                 }
             }
@@ -571,9 +570,9 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
 #endif
 
     char   filename[512];
-    string folder = "./data";
-    string ct_name = is_cell ? "CellCenter" : "NodeCenter"; 
-    sprintf(filename, "%s/%s_%s_%d_%d%d%d_typeGreen=%d.txt", folder.c_str(), __func__, ct_name.c_str(), type, vdir, (int) myCase.xsign, (int)myCase.ysign, typeGreen);
+    string folder  = "./data";
+    string ct_name = is_cell ? "CellCenter" : "NodeCenter";
+    sprintf(filename, "%s/%s_%s_%d_%d%d%d_typeGreen=%d.txt", folder.c_str(), __func__, ct_name.c_str(), type, vdir, (int)myCase.xsign, (int)myCase.ysign, typeGreen);
 
     if (rank == 0) {
         struct stat st = {0};
@@ -604,12 +603,13 @@ void vtube(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int
     free(lerri);
     free(err2);
     free(erri);
-    
+
     flups_free(sol);
     flups_free(rhs);
     flups_free(field);
     flups_free(error);
     flups_cleanup(mysolver);
+    flups_cleanup_fftw();
     flups_topo_free(topo);
 
     for (int id = 0; id < 3; id++) {
diff --git a/samples/validation/src/validation_3d.cpp b/samples/validation/src/validation_3d.cpp
index e32ebc5..059c93e 100644
--- a/samples/validation/src/validation_3d.cpp
+++ b/samples/validation/src/validation_3d.cpp
@@ -2,32 +2,32 @@
  * @file validation_3d.cpp
  * @author Thomas Gillis and Denis-Gabriel Caprace
  * @copyright Copyright © UCLouvain 2020
- * 
+ *
  * FLUPS is a Fourier-based Library of Unbounded Poisson Solvers.
- * 
+ *
  * Copyright <2020> <Université catholique de Louvain (UCLouvain), Belgique>
- * 
+ *
  * List of the contributors to the development of FLUPS, Description and complete License: see LICENSE and NOTICE files.
- * 
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  *  http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- * 
+ *
  */
 
-#include <unistd.h>
-#include <limits.h>
-
 #include "validation_3d.hpp"
 
+#include <limits.h>
+#include <unistd.h>
+
 #include "omp.h"
 
 using namespace std;
@@ -45,31 +45,31 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
  * @param nSolve number of times we call the same solver (for timing)
  */
 void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, const int lda, const int nSolve, const std::string output_dir) {
-    int rank, comm_size;
+    int      rank, comm_size;
     MPI_Comm comm = MPI_COMM_WORLD;
     MPI_Comm_rank(comm, &rank);
     MPI_Comm_size(comm, &comm_size);
 
-    const int *   nproc  = myCase.nproc;
-    const double *L      = myCase.L;
+    const int    *nproc = myCase.nproc;
+    const double *L     = myCase.L;
 
-    const bool is_cell = myCase.center_type[0] == CELL_CENTER; 
-    const double fact = (double) (!is_cell);
+    const bool   is_cell = myCase.center_type[0] == CELL_CENTER;
+    const double fact    = (double)(!is_cell);
 
-    const int nglob[3] = {myCase.nglob[0] + (int)fact, myCase.nglob[1] + (int)fact,
+    const int    nglob[3] = {myCase.nglob[0] + (int)fact, myCase.nglob[1] + (int)fact,
                           (myCase.nglob[2] == 1) ? 1 : myCase.nglob[2] + (int)fact};
-    const double h[3] = {L[0] / (nglob[0] - fact), L[1] / (nglob[1] - fact),
+    const double h[3]     = {L[0] / (nglob[0] - fact), L[1] / (nglob[1] - fact),
                          nglob[2] == 1 ? 1 : L[2] / (nglob[2] - fact)};
 
     FLUPS_CenterType center_type[3];
     for (int i = 0; i < 3; i++) {
-            center_type[i] = myCase.center_type[i];
+        center_type[i] = myCase.center_type[i];
     }
 
-    FLUPS_BoundaryType* mybc[3][2];
-    for(int id=0; id<3; id++){
-        for(int is=0; is<2; is++){
-            mybc[id][is] = (FLUPS_BoundaryType*) flups_malloc(sizeof(int)*lda);
+    FLUPS_BoundaryType *mybc[3][2];
+    for (int id = 0; id < 3; id++) {
+        for (int is = 0; is < 2; is++) {
+            mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * lda);
         }
     }
 
@@ -100,9 +100,9 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
     //-------------------------------------------------------------------------
     std::string     name = "validation_nx" + std::to_string((int)(nglob[0])) + "_ny" + std::to_string((int)(nglob[1])) + "_nz" + std::to_string((int)(nglob[2])) + "_nrank" + std::to_string(comm_size) + "_nthread" + std::to_string(omp_get_max_threads());
     FLUPS_Profiler *prof = flups_profiler_new_n(name.c_str());
-    
+
     m_profStart(prof, "Validation--Init-Flups");
-    FLUPS_Solver *  mysolver;
+    FLUPS_Solver *mysolver;
     mysolver = flups_init_timed(topo, mybc, h, L, NOD, center_type, prof);
 
     flups_set_greenType(mysolver, typeGreen);
@@ -113,7 +113,7 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
     MPI_Comm_rank(comm, &rank);
     m_profStop(prof, "Validation--Init-Flups");
 
-    //flups_switchtopo_info(mysolver);
+    // flups_switchtopo_info(mysolver);
 
     //-------------------------------------------------------------------------
     /** - allocate rhs and solution */
@@ -145,19 +145,19 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
      */
     for (int lia = 0; lia < lda; lia++) {
         for (int j2 = -1; j2 < 2; j2++) {
-            if (j2 != 0 && mybc[2][(j2 + 1) / 2] == UNB) continue;  //skip unbounded dirs
+            if (j2 != 0 && mybc[2][(j2 + 1) / 2] == UNB) continue;  // skip unbounded dirs
             for (int j1 = -1; j1 < 2; j1++) {
-                if (j1 != 0 && mybc[1][(j1 + 1) / 2] == UNB) continue;  //skip unbounded dirs
+                if (j1 != 0 && mybc[1][(j1 + 1) / 2] == UNB) continue;  // skip unbounded dirs
                 for (int j0 = -1; j0 < 2; j0++) {
-                    if (j0 != 0 && mybc[0][(j0 + 1) / 2] == UNB) continue;  //skip unbounded dirs
+                    if (j0 != 0 && mybc[0][(j0 + 1) / 2] == UNB) continue;  // skip unbounded dirs
 
                     double sign = 1.0;
                     double centerPos[3];
-                    double orig[3] = {j0 * L[0], j1 * L[1], j2 * L[2]};  //inner left corner of the current block i'm looking at
+                    double orig[3] = {j0 * L[0], j1 * L[1], j2 * L[2]};  // inner left corner of the current block i'm looking at
 
-                    sign *= j0 == 0 ? 1.0 : 1 - 2 * (mybc[0][(j0 + 1) / 2] == ODD);  //multiply by -1 if the symm is odd
-                    sign *= j1 == 0 ? 1.0 : 1 - 2 * (mybc[1][(j1 + 1) / 2] == ODD);  //multiply by -1 if the symm is odd
-                    sign *= j2 == 0 ? 1.0 : 1 - 2 * (mybc[2][(j2 + 1) / 2] == ODD);  //multiply by -1 if the symm is odd
+                    sign *= j0 == 0 ? 1.0 : 1 - 2 * (mybc[0][(j0 + 1) / 2] == ODD);  // multiply by -1 if the symm is odd
+                    sign *= j1 == 0 ? 1.0 : 1 - 2 * (mybc[1][(j1 + 1) / 2] == ODD);  // multiply by -1 if the symm is odd
+                    sign *= j2 == 0 ? 1.0 : 1 - 2 * (mybc[2][(j2 + 1) / 2] == ODD);  // multiply by -1 if the symm is odd
 
                     centerPos[0] = orig[0] + ((j0 != 0) && (mybc[0][(j0 + 1) / 2] != PER) ? (1.0 - center[0]) * L[0] : (center[0] * L[0]));
                     centerPos[1] = orig[1] + ((j1 != 0) && (mybc[1][(j1 + 1) / 2] != PER) ? (1.0 - center[1]) * L[1] : (center[1] * L[1]));
@@ -297,13 +297,13 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
             }
         }
 
-        //USE THE FOLLOWING TO TEST THE K=0 PART OF THE 1DIRUNBOUNDED KERNEL
-        // manuRHS[0] = &fZero;
-        // manuSol[0] = &fCst;
-        // manuRHS[1] = &fZero;
-        // manuSol[1] = &fCst;
-        // manuRHS[2] = &d2dx2_fUnbSpietz;
-        // manuSol[2] = &fUnbSpietz;
+        // USE THE FOLLOWING TO TEST THE K=0 PART OF THE 1DIRUNBOUNDED KERNEL
+        //  manuRHS[0] = &fZero;
+        //  manuSol[0] = &fCst;
+        //  manuRHS[1] = &fZero;
+        //  manuSol[1] = &fCst;
+        //  manuRHS[2] = &d2dx2_fUnbSpietz;
+        //  manuSol[2] = &fUnbSpietz;
 
         {
             // Obtaining the reference sol and rhs
@@ -317,32 +317,32 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
             for (int i2 = 0; i2 < flups_topo_get_nloc(topo, ax2); i2++) {
                 for (int i1 = 0; i1 < flups_topo_get_nloc(topo, ax1); i1++) {
                     for (int i0 = 0; i0 < flups_topo_get_nloc(topo, ax0); i0++) {
-                        const size_t id   = flups_locID(ax0, i0, i1, i2, lia, ax0, nmem, 1);
-                        const double shift = is_cell ? 0.5: 0.0;
-                        const double x[3] = {(istart[ax0] + i0 + shift) * h[ax0],
-                                             (istart[ax1] + i1 + shift) * h[ax1],
-                                             (istart[ax2] + i2 + shift) * h[ax2]};
+                        const size_t id    = flups_locID(ax0, i0, i1, i2, lia, ax0, nmem, 1);
+                        const double shift = is_cell ? 0.5 : 0.0;
+                        const double x[3]  = {(istart[ax0] + i0 + shift) * h[ax0],
+                                              (istart[ax1] + i1 + shift) * h[ax1],
+                                              (istart[ax2] + i2 + shift) * h[ax2]};
 
                         // const double y[3] = {0.,(x[1]-.5)/params.sigma[1],(x[2]-.5)/params.sigma[2] };
                         // const double rsq = y[1]*y[1] + y[2]*y[2] ; //((x[1]-.5)*(x[1]-.5)+(x[2]-.5)*(x[2]-.5)) / .25;
                         // const double r = sqrt(rsq);
 
-                        //CST(z):
-                        // sol[id] = fabs(rsq)>=1. ?  0.0 : exp(c_C * (1. - 1. / (1. - rsq))) ;
-                        // rhs[id] = fabs(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \
+                        // CST(z):
+                        //  sol[id] = fabs(rsq)>=1. ?  0.0 : exp(c_C * (1. - 1. / (1. - rsq))) ;
+                        //  rhs[id] = fab s(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \
                         //              (c_C * rsq - 1. + pow(y[1],4) + pow(y[2],4) + 2.* y[1]*y[1]*y[2]*y[2]) ;
 
-                        //SIN(z):
-                        // sol[id] = fabs(rsq) >= 1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) * (sin(2. * M_PI * x[0] / L[0]));
-                        // rhs[id] = fabs(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \
+                        // SIN(z):
+                        //  sol[id] = fabs(rsq) >= 1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) * (sin(2. * M_PI * x[0] / L[0]));
+                        //  rhs[id] = fab s(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \
                         //              (c_C * rsq - 1. + pow(y[1],4) + pow(y[2],4) + 2.* y[1]*y[1]*y[2]*y[2]) * (sin(2.*M_PI*x[0] /L[0])) ;
-                        // rhs[id] += fabs(rsq) >= 1. ? 0.0 : -sin(2*M_PI*x[0] /L[0]) * (2. * M_PI / L[0])* (2. * M_PI / L[0])  * exp(c_C * (1. - 1. / (1. - rsq))) ;
+                        //  rhs[id] += fabs(rsq) >= 1. ? 0.0 : -sin(2*M_PI*x[0] /L[0]) * (2. * M_PI / L[0])* (2. * M_PI / L[0])  * exp(c_C * (1. - 1. / (1. - rsq))) ;
 
-                        //CST(z)+sin(z):
-                        // sol[id] = fabs(rsq) >= 1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) * (1. + sin(2. * M_PI * x[0] / L[0]));
-                        // rhs[id] = fabs(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \
+                        // CST(z)+sin(z):
+                        //  sol[id] = fabs(rsq) >= 1. ? 0.0 : exp(c_C * (1. - 1. / (1. - rsq))) * (1. + sin(2. * M_PI * x[0] / L[0]));
+                        //  rhs[id] = fab s(rsq) >= 1. ? 0.0 : 4.*c_C* exp(c_C * (1. - 1. / (1. - rsq))) / (pow(rsq - 1., 4) * params.sigma[id] * params.sigma[id]) * \
                         //              (c_C * rsq - 1. + pow(y[1],4) + pow(y[2],4) + 2.* y[1]*y[1]*y[2]*y[2]) * (1.+sin(2.*M_PI*x[0] /L[0])) ;
-                        // rhs[id] += fabs(rsq) >= 1. ? 0.0 : -sin(2*M_PI*x[0] /L[0]) * (2. * M_PI / L[0])* (2. * M_PI / L[0])  * exp(c_C * (1. - 1. / (1. - rsq))) ;
+                        //  rhs[id] += fabs(rsq) >= 1. ? 0.0 : -sin(2*M_PI*x[0] /L[0]) * (2. * M_PI / L[0])* (2. * M_PI / L[0])  * exp(c_C * (1. - 1. / (1. - rsq))) ;
 
                         for (int dir = 0; dir < 3; dir++) {
                             const int dir2 = (dir + 1) % 3;
@@ -360,7 +360,6 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
 #endif
     m_profStop(prof, "Validation--Init-RHS");
 
-
 #ifdef DUMP_DBG
     char msg[512];
     // write the source term and the solution
@@ -370,17 +369,16 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
     flups_hdf5_dump(topo, msg, sol);
 #endif
 
-
     //-------------------------------------------------------------------------
     /** - Warm up */
     //-------------------------------------------------------------------------
     for (int is = 0; is < 5; is++) {
         MPI_Barrier(MPI_COMM_WORLD);
         m_profStart(prof, "Validation--Warm-up");
-        flups_solve(mysolver, field, rhs,STD);
+        flups_solve(mysolver, field, rhs, STD);
         m_profStop(prof, "Validation--Warm-up");
         MPI_Barrier(MPI_COMM_WORLD);
-    }	
+    }
 
     //-------------------------------------------------------------------------
     /** - solve the equations */
@@ -388,14 +386,13 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
     for (int is = 0; is < nSolve; is++) {
         MPI_Barrier(MPI_COMM_WORLD);
         m_profStart(prof, "Validation--Solve");
-        flups_solve(mysolver, field, rhs,STD);
+        flups_solve(mysolver, field, rhs, STD);
         m_profStop(prof, "Validation--Solve");
         MPI_Barrier(MPI_COMM_WORLD);
     }
 
     flups_profiler_disp(prof);
 
-
 #ifdef DUMP_DBG
     // write the source term and the solution
     sprintf(msg, "sol_%d%d%d%d%d%d_%dx%dx%d", mybc[0][0][0], mybc[0][1][0], mybc[1][0][0], mybc[1][1][0], mybc[2][0][0], mybc[2][1][0], nglob[0], nglob[1], nglob[2]);
@@ -415,7 +412,7 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
     std::memset(err2, 0, sizeof(double) * lda);
     std::memset(erri, 0, sizeof(double) * lda);
 
-    //determine the volume associated to a mesh
+    // determine the volume associated to a mesh
     double vol = 1.0;
     for (int id = 0; id < 3; id++) {
         if (mybc[id][0][0] != NONE && mybc[id][1][0] != NONE) {
@@ -449,12 +446,10 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
         err2[i] = sqrt(err2[i]);
     }
 
-    
-    
-    string folder = output_dir + "/data";
-    string ct_name = is_cell ? "CellCenter" : "NodeCenter"; 
+    string folder  = output_dir + "/data";
+    string ct_name = is_cell ? "CellCenter" : "NodeCenter";
     char   filename[PATH_MAX];
-    sprintf(filename, "%s/%s_%s_%d%d%d%d%d%d_typeGreen=%d.txt", folder.c_str(),  __func__, ct_name.c_str(),  mybc[0][0][0], mybc[0][1][0], mybc[1][0][0], mybc[1][1][0], mybc[2][0][0], mybc[2][1][0], typeGreen);
+    sprintf(filename, "%s/%s_%s_%d%d%d%d%d%d_typeGreen=%d.txt", folder.c_str(), __func__, ct_name.c_str(), mybc[0][0][0], mybc[0][1][0], mybc[1][0][0], mybc[1][1][0], mybc[2][0][0], mybc[2][1][0], typeGreen);
 
     if (rank == 0) {
         struct stat st_output = {0};
@@ -462,9 +457,9 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
         if (stat(output_dir.c_str(), &st_output) == -1) {
             mkdir(output_dir.c_str(), 0770);
         }
-        
+
         if (stat(folder.c_str(), &st_folder) == -1) {
-                mkdir(folder.c_str(), 0770);
+            mkdir(folder.c_str(), 0770);
         }
 
         FILE *myfile = fopen(filename, "a+");
@@ -498,6 +493,7 @@ void validation_3d(const DomainDescr myCase, const FLUPS_GreenType typeGreen, co
     flups_free(rhs);
     flups_free(field);
     flups_cleanup(mysolver);
+    flups_cleanup_fftw();
     flups_profiler_free(prof);
     flups_topo_free(topo);
 
diff --git a/src/flups.cpp b/src/flups.cpp
index 0d63ae3..7352fac 100644
--- a/src/flups.cpp
+++ b/src/flups.cpp
@@ -136,6 +136,7 @@ void flups_cleanup(Solver* s) {
     flups_cleanup_ext(s, false);
 }
 
+// Destroy the solver and cleanup fftw
 void flups_cleanup_ext(Solver* s, const bool cleanup_fftw) {
     delete s;
     if (cleanup_fftw){
@@ -143,6 +144,7 @@ void flups_cleanup_ext(Solver* s, const bool cleanup_fftw) {
     }
 }
 
+// cleanup all the structures related to fftw
 void flups_cleanup_fftw(){
 #if FLUPS_OPENMP    
     fftw_cleanup_threads();
diff --git a/src/flups.h b/src/flups.h
index 61ffa0d..d3dc73a 100644
--- a/src/flups.h
+++ b/src/flups.h
@@ -410,6 +410,8 @@ void flups_cleanup_ext(FLUPS_Solver* s, const bool cleanup_fftw);
 
 void flups_cleanup_fftw(); 
 
+void flups_fftw_init();
+
 /**
  * @brief sets the type of the Green's function used by the solver
  *
diff --git a/src/flups_interface.h b/src/flups_interface.h
index 3642344..46b1ea4 100644
--- a/src/flups_interface.h
+++ b/src/flups_interface.h
@@ -24,7 +24,7 @@
  * @brief Memory alignment in bytes.
  *
  */
-#define FLUPS_ALIGNMENT 16
+#define FLUPS_ALIGNMENT 8
 
 //==============================================================================
 /**

From f7c18eeb494e968ed78f4f821592abfb1980781f Mon Sep 17 00:00:00 2001
From: pbalty <pierre.balty@uclouvain.be>
Date: Tue, 30 Apr 2024 12:28:19 +0200
Subject: [PATCH 16/19] :memo: Update the ReadMe

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fa04950..b33c02f 100644
--- a/README.md
+++ b/README.md
@@ -224,7 +224,8 @@ flups_solve(mysolver,rhs, rhs);
 
 Then, destroy the solver and the created topology
 ```
-flups_cleanup(mysolver);
+flups_cleanup(mysolver); // destroy the solver
+flups_cleanup_fftw();    // cleanup the fftw stuff
 flups_topo_free(topo);
 for (int id = 0; id < 3; id++) {
     for (int is = 0; is < 2; is++) {
@@ -319,4 +320,4 @@ The _daily test_ is a smaller, in-house, test suite, that can be executed on a d
 ### Resources
 
 - [CI testing](doc/CI.md)
-- [real to real FFT](doc/Mode_Correction.md)
\ No newline at end of file
+- [real to real FFT](doc/Mode_Correction.md)

From 988dfcd3c8ea61a7304ad9b60616632c720a005c Mon Sep 17 00:00:00 2001
From: pbalty <pierre.balty@uclouvain.be>
Date: Tue, 30 Apr 2024 13:30:53 +0200
Subject: [PATCH 17/19] :art: Working on the naming of the new functions

---
 src/flups.cpp | 20 +++++++++-----------
 src/flups.h   | 15 ++++++++++++---
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/src/flups.cpp b/src/flups.cpp
index 7352fac..033a4a3 100644
--- a/src/flups.cpp
+++ b/src/flups.cpp
@@ -30,16 +30,16 @@
 
 #include <algorithm>  // std::remove
 
+#include "FFTW_plan_dim.hpp"
 #include "Solver.hpp"
 #include "Topology.hpp"
-#include "FFTW_plan_dim.hpp"
 #include "defines.hpp"
 #include "h3lpr/profiler.hpp"
 
 extern "C" {
 
 void* flups_malloc(size_t size) {
-    return m_calloc(size); 
+    return m_calloc(size);
 }
 
 void flups_free(void* data) {
@@ -133,20 +133,18 @@ Solver* flups_init_timed(Topology* t, BoundaryType* bc[3][2], const double h[3],
 
 // destroy the solver
 void flups_cleanup(Solver* s) {
-    flups_cleanup_ext(s, false);
+    flups_cleanup_all(s);
 }
 
 // Destroy the solver and cleanup fftw
-void flups_cleanup_ext(Solver* s, const bool cleanup_fftw) {
+void flups_cleanup_all(Solver* s) {
     delete s;
-    if (cleanup_fftw){
-        flups_cleanup_fftw();
-    }
+    flups_cleanup_fftw();
 }
 
 // cleanup all the structures related to fftw
-void flups_cleanup_fftw(){
-#if FLUPS_OPENMP    
+void flups_cleanup_fftw() {
+#if FLUPS_OPENMP
     fftw_cleanup_threads();
 #endif
 
@@ -181,7 +179,7 @@ void flups_set_alpha(Solver* s, const double alpha) {
     s->set_alpha(alpha);
 }
 
-double* flups_get_innerBuffer(FLUPS_Solver* s){
+double* flups_get_innerBuffer(FLUPS_Solver* s) {
     return s->get_innerBuffer();
 }
 
@@ -193,7 +191,7 @@ Topology* flups_get_innerTopo_spectral(Solver* s) {
     return s->get_innerTopo_spectral();
 }
 
-void flups_skip_firstSwitchtopo(Solver* s){
+void flups_skip_firstSwitchtopo(Solver* s) {
     s->skip_firstSwitchtopo();
 }
 
diff --git a/src/flups.h b/src/flups.h
index d3dc73a..6861d68 100644
--- a/src/flups.h
+++ b/src/flups.h
@@ -402,15 +402,24 @@ FLUPS_Solver* flups_init_timed(FLUPS_Topology* t, FLUPS_BoundaryType* bc[3][2],
 void flups_cleanup(FLUPS_Solver* s);
 
 /**
- * @brief must be called before execution terminates as it frees the memory used by the solver
+ * @brief Free the memomry of the solver and the data employed by fftw
+ *
+ * @warning this function free all the data of fftw. If you use multiple solver
+ *        you should not call this function
  *
  * @param s
  */
-void flups_cleanup_ext(FLUPS_Solver* s, const bool cleanup_fftw);
+void flups_cleanup_all(FLUPS_Solver* s);
 
+/**
+ * @brief Free data employed by fftw
+ *
+ * @warning this function free all the data of fftw. If you use multiple solver, this function
+ * should be called after all the solver have freed
+ *
+ */
 void flups_cleanup_fftw(); 
 
-void flups_fftw_init();
 
 /**
  * @brief sets the type of the Green's function used by the solver

From f1fffe050fbda8b7adfbd4b1858d59a2930528c4 Mon Sep 17 00:00:00 2001
From: pbalty <pierre.balty@uclouvain.be>
Date: Tue, 30 Apr 2024 15:07:16 +0200
Subject: [PATCH 18/19] :bug: :white_check_mark: Fix the test

---
 src/Solver.cpp        | 2 +-
 src/flups_interface.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Solver.cpp b/src/Solver.cpp
index a680d7a..01a9257 100644
--- a/src/Solver.cpp
+++ b/src/Solver.cpp
@@ -66,7 +66,7 @@ Solver::Solver(Topology *topo, BoundaryType *rhsbc[3][2], const double h[3], con
         if (fftw_alignment_of(&(data[i])) == 0) {
             // if we are above the minimum requirement, generate an error
             if (i > alignSize) {
-                FLUPS_CHECK(false, "The FLUPS alignement has to be a multiple integer of the FFTW alignement, please change the constant variable FLUPS_ALIGNMENT into file flups.h accordingly: FFTW=%d vs FLUPS=%d", fftwalignment_, FLUPS_ALIGNMENT);
+                FLUPS_CHECK(false, "The FLUPS alignement has to be a multiple integer of the FFTW alignement, please change the constant variable FLUPS_ALIGNMENT into file flups_interface.h accordingly: FFTW=%d vs FLUPS=%d", fftwalignment_, FLUPS_ALIGNMENT);
             }
             // else, just stop and advise the user to change
             break;
diff --git a/src/flups_interface.h b/src/flups_interface.h
index 46b1ea4..3642344 100644
--- a/src/flups_interface.h
+++ b/src/flups_interface.h
@@ -24,7 +24,7 @@
  * @brief Memory alignment in bytes.
  *
  */
-#define FLUPS_ALIGNMENT 8
+#define FLUPS_ALIGNMENT 16
 
 //==============================================================================
 /**

From bf3e8afbc7cd1318875ef9add6f610c74dc1502d Mon Sep 17 00:00:00 2001
From: pbalty <pierre.balty@uclouvain.be>
Date: Fri, 3 May 2024 09:56:42 +0200
Subject: [PATCH 19/19] :memo: Add credits to James for the new kernels

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b32f707..c1d4962 100644
--- a/README.md
+++ b/README.md
@@ -16,10 +16,13 @@ For the list of all the contributors to the development of FLUPS, description an
 FLUPS' design, implementation, and performances are described in two papers.
 
 If you use FLUPS, please cite them in your publications:
-- [Balty et al.](https://arxiv.org/abs/2211.07777), **FLUPS - a flexible and performant massively parallel Fourier transform library**, submitted, 2022
+- [Balty et al.](https://arxiv.org/abs/2211.07777), **FLUPS - a flexible and performant massively parallel Fourier transform library**, IEEE Transactions on Parallel and Distributed Systems, 2023
 - [Caprace et al.](https://arxiv.org/abs/2006.09300), **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2021
 
 
+The high-order Lattice Green's functions (LGF and MEHR) available in FLUPS are described in a third paper. If you use those kernels, please cite the related paper in your publications:
+- [Gabbard et al.](https://arxiv.org/abs/2309.13503), **Lattice Green’s Functions for High-Order Finite Difference Stencils**, SIAM Journal on Numerical Analysis, 2024
+
 ## Why should you use FLUPS?
 - You can solve the Poisson on rectangular and uniform distributed grids;
 - You can use either cell-centred or node-centred data layout;