Skip to content

Commit

Permalink
🔀 Merge the new green kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
pbalty committed May 3, 2024
2 parents b28ece4 + 78c41a9 commit 3b46359
Show file tree
Hide file tree
Showing 36 changed files with 912 additions and 742 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ For the list of all the contributors to the development of FLUPS, description an
FLUPS' design, implementation, and performances are described in two papers.

If you use FLUPS, please cite them in your publications:
- [Balty et al.](https://arxiv.org/abs/2211.07777), **FLUPS - a flexible and performant massively parallel Fourier transform library**, submitted, 2022
- [Balty et al.](https://arxiv.org/abs/2211.07777), **FLUPS - a flexible and performant massively parallel Fourier transform library**, IEEE Transactions on Parallel and Distributed Systems, 2023
- [Caprace et al.](https://arxiv.org/abs/2006.09300), **FLUPS - A Fourier-based Library of Unbounded Poisson Solvers**, SIAM Journal on Scientific Computing, 2021


The high-order Lattice Green's functions (LGF and MEHR) available in FLUPS are described in a third paper. If you use those kernels, please cite the related paper in your publications:
- [Gabbard et al.](https://arxiv.org/abs/2309.13503), **Lattice Green’s Functions for High-Order Finite Difference Stencils**, SIAM Journal on Numerical Analysis, 2024

## Why should you use FLUPS?
- You can solve the Poisson on rectangular and uniform distributed grids;
- You can use either cell-centred or node-centred data layout;
Expand Down Expand Up @@ -231,7 +234,8 @@ flups_solve(mysolver,rhs, rhs);

Then, destroy the solver and the created topology
```
flups_cleanup(mysolver);
flups_cleanup(mysolver); // destroy the solver
flups_cleanup_fftw(); // cleanup the fftw stuff
flups_topo_free(topo);
for (int id = 0; id < 3; id++) {
for (int is = 0; is < 2; is++) {
Expand Down
Binary file added kernel/MEHR_4F_2d_32.ker
Binary file not shown.
Binary file added kernel/MEHR_4L6L_2d_32.ker
Binary file not shown.
Binary file added kernel/MEHR_6F_2d_32.ker
Binary file not shown.
57 changes: 27 additions & 30 deletions samples/compareACCFFT/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
#include <iostream>

#include "accfft.h"
#include "h3lpr/profiler.hpp"
#include "h3lpr/parser.hpp"
#include "flups.h"

#include "h3lpr/parser.hpp"
#include "h3lpr/profiler.hpp"

int main(int argc, char *argv[]) {
//-------------------------------------------------------------------------
Expand All @@ -31,37 +30,36 @@ int main(int argc, char *argv[]) {
// Get info from the command line
//--------------------------------------------------------------------------
H3LPR::Parser parser(argc, (const char **)argv);
const auto arg_nglob = parser.GetValues<int, 3>("--nglob", "the global resolution, will be used for both ACCFFT and FLUPS", {64,64,64});
const auto arg_nglob = parser.GetValues<int, 3>("--nglob", "the global resolution, will be used for both ACCFFT and FLUPS", {64, 64, 64});
const auto arg_nproc = parser.GetValues<int, 3>("--nproc", "the proc distribution, for FLUPS only", {1, 1, 1});
const auto arg_dom = parser.GetValues<double, 3>("--dom", "the size of the domain, must be compatible with nglob", {1.0, 1.0, 1.0});
const int n_iter = parser.GetValue<int>("--niter", "the number of iterations to perform", 20);
const int n_warm = parser.GetValue<int>("--warm", "the number of iterations to perform when warming up", 1);
const bool profile = parser.GetFlag("--profile","forward the profiler to flups");
const bool profile = parser.GetFlag("--profile", "forward the profiler to flups");
parser.Finalize();

//--------------------------------------------------------------------------
// Definition of the problem
//--------------------------------------------------------------------------
const int nglob[3] = {arg_nglob[0], arg_nglob[1], arg_nglob[2]};
const int nproc[3] = {arg_nproc[0], arg_nproc[1], arg_nproc[2]};
const double L[3] = {arg_dom[0], arg_dom[1], arg_dom[2]};

const int nglob[3] = {arg_nglob[0], arg_nglob[1], arg_nglob[2]};
const int nproc[3] = {arg_nproc[0], arg_nproc[1], arg_nproc[2]};
const double L[3] = {arg_dom[0], arg_dom[1], arg_dom[2]};

// get the grid spacing
const double h[3] = {L[0] / nglob[0], L[1] / nglob[1], L[2] / nglob[2]};

// get the PER PER PER BC everywhere
const FLUPS_CenterType center_type[3] = {CELL_CENTER, CELL_CENTER, CELL_CENTER};
//const FLUPS_CenterType center_type[3] = {NODE_CENTER, NODE_CENTER, NODE_CENTER};
FLUPS_BoundaryType *mybc[3][2];
// const FLUPS_CenterType center_type[3] = {NODE_CENTER, NODE_CENTER, NODE_CENTER};
FLUPS_BoundaryType *mybc[3][2];
for (int id = 0; id < 3; id++) {
for (int is = 0; is < 2; is++) {
mybc[id][is] = (FLUPS_BoundaryType *)flups_malloc(sizeof(int) * 1);
mybc[id][is][0] = PER;
}
}

//..........................................................................
//..........................................................................
// Display
flups_info(argc, argv);
if (rank == 0) {
Expand All @@ -75,29 +73,27 @@ int main(int argc, char *argv[]) {
printf("--------------------------------------------------------------\n");
}



//--------------------------------------------------------------------------
std::string prof_name = "beatme_nglob" + std::to_string(nglob[0]) +"_"+ std::to_string(nglob[1]) + "_" + std::to_string(nglob[2]) + "_nrank" + std::to_string(comm_size);
H3LPR::Profiler prof(prof_name);
std::string prof_name = "beatme_nglob" + std::to_string(nglob[0]) + "_" + std::to_string(nglob[1]) + "_" + std::to_string(nglob[2]) + "_nrank" + std::to_string(comm_size);
H3LPR::Profiler prof(prof_name);

//--------------------------------------------------------------------------
/** - Initialize FLUPS */
//--------------------------------------------------------------------------
if (rank == 0) printf("Initialization of FLUPS\n");

// create a real topology
FLUPS_Profiler* flups_prof = (profile)? (FLUPS_Profiler*) &prof : nullptr;
FLUPS_Topology *topoTmp = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
FLUPS_Solver *mysolver = flups_init_timed(topoTmp, mybc, h, L, NOD, center_type, flups_prof);
FLUPS_Profiler *flups_prof = (profile) ? (FLUPS_Profiler *)&prof : nullptr;
FLUPS_Topology *topoTmp = flups_topo_new(0, 1, nglob, nproc, false, NULL, FLUPS_ALIGNMENT, comm);
FLUPS_Solver *mysolver = flups_init_timed(topoTmp, mybc, h, L, NOD, center_type, flups_prof);

// set the CHAT2 green type (even if it's not used)
flups_set_greenType(mysolver, CHAT_2);
flups_setup(mysolver, true);
double *solFLU = flups_get_innerBuffer(mysolver);
double *solFLU = flups_get_innerBuffer(mysolver);

// to fill the data we use the inner topo
const Topology *topoIn =flups_get_innerTopo_physical(mysolver);
const Topology *topoIn = flups_get_innerTopo_physical(mysolver);
// instruct the solver to skip the first ST
flups_skip_firstSwitchtopo(mysolver);

Expand All @@ -115,18 +111,18 @@ int main(int argc, char *argv[]) {

//..........................................................................
// set some straightforward data
int start_id[3];
int start_id[3];
flups_topo_get_istartGlob(topoIn, start_id);
int topo_nmem[3] = {flups_topo_get_nmem(topoIn, 0), flups_topo_get_nmem(topoIn, 1), flups_topo_get_nmem(topoIn, 2)};

// set a simple expression
double val = 0.0;
for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn, 2); ++i2){
for(int i1 = 0; i1 < flups_topo_get_nloc(topoIn, 1); ++ i1){
for(int i0 = 0; i0 < flups_topo_get_nloc(topoIn, 0); ++i0){
//double x = 2.0 * M_PI / nglob[0] * (i0 + topoIn->cmpt_start_id(0));
//double y = 2.0 * M_PI / nglob[1] * (i1 + topoIn->cmpt_start_id(1));
//double z = 2.0 * M_PI / nglob[2] * (i2 + topoIn->cmpt_start_id(2));
for (int i2 = 0; i2 < flups_topo_get_nloc(topoIn, 2); ++i2) {
for (int i1 = 0; i1 < flups_topo_get_nloc(topoIn, 1); ++i1) {
for (int i0 = 0; i0 < flups_topo_get_nloc(topoIn, 0); ++i0) {
// double x = 2.0 * M_PI / nglob[0] * (i0 + topoIn->cmpt_start_id(0));
// double y = 2.0 * M_PI / nglob[1] * (i1 + topoIn->cmpt_start_id(1));
// double z = 2.0 * M_PI / nglob[2] * (i2 + topoIn->cmpt_start_id(2));
double x = 2.0 * M_PI / nglob[0] * (i0 + start_id[0]);
double y = 2.0 * M_PI / nglob[1] * (i1 + start_id[1]);
double z = 2.0 * M_PI / nglob[2] * (i2 + start_id[2]);
Expand All @@ -147,9 +143,9 @@ int main(int argc, char *argv[]) {
accfft_create_comm(MPI_COMM_WORLD, c_dims, &c_comm);

// let ACCFFT decide on the topology choice, pencil in Z, as always
int isize[3], osize[3], istart[3], ostart[3];
int isize[3], osize[3], istart[3], ostart[3];

int n_acc[3] = {nglob[2],nglob[1],nglob[0]};
int n_acc[3] = {nglob[2], nglob[1], nglob[0]};
size_t alloc_max = accfft_local_size_dft_r2c(n_acc, isize, istart, osize, ostart, c_comm);

double *data_acc = (double *)accfft_alloc(alloc_max);
Expand Down Expand Up @@ -278,6 +274,7 @@ int main(int argc, char *argv[]) {
}
}

flups_cleanup_fftw();
MPI_Finalize();
}

Expand Down
Loading

0 comments on commit 3b46359

Please sign in to comment.