diff --git a/.gitignore b/.gitignore index ed98daa..f214f65 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ prof .vscode doc/html doc/latex +*.d +*.in *.x *.xlsx *.log diff --git a/samples/compareP3DFFT++/main_compare++.cpp b/samples/compareP3DFFT++/main_compare++.cpp index 92e7f82..098954b 100644 --- a/samples/compareP3DFFT++/main_compare++.cpp +++ b/samples/compareP3DFFT++/main_compare++.cpp @@ -260,24 +260,24 @@ int main(int argc, char *argv[]) { // //------------------------------------------------------------------------- // /** - allocate rhs and solution */ // //------------------------------------------------------------------------- - - printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2)); - printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2)); - printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2)); - printf("[FLUPS] topo OUT loc : nmem: %d*%d*%d nf:%d (nloc: %d %d %d) \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2)); + if(rank == 0) { + printf("[FLUPS] topo IN glob : %d %d %d \n",topoIn->nglob(0),topoIn->nglob(1),topoIn->nglob(2)); + printf("[FLUPS] topo IN loc : %d*%d*%d = %d (check: %d %d %d)\n",topoIn->nmem(0),topoIn->nmem(1),topoIn->nmem(2),topoIn->memsize(),topoIn->nloc(0),topoIn->nloc(1),topoIn->nloc(2)); + printf("[FLUPS] topo OUT glob : %d %d %d \n",topoSpec->nglob(0),topoSpec->nglob(1),topoSpec->nglob(2)); + printf("[FLUPS] topo OUT loc : nmem: %d*%d*%d nf:%d (nloc: %d %d %d) \n",topoSpec->nmem(0),topoSpec->nmem(1),topoSpec->nmem(2),topoSpec->nf(),topoSpec->nloc(0),topoSpec->nloc(1),topoSpec->nloc(2)); #ifndef SKIP_P3D - printf("[P3DFFT++] topo IN glob : %d %d %d \n",gdimsIN[0],gdimsIN[1],gdimsIN[2]); - printf("[P3DFFT++] topo IN loc : %d %d %d (is: %d %d %d) \n",P3DnlocIN[0],P3DnlocIN[1],P3DnlocIN[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]); - printf("[P3DFFT++] topo OUT glob : %d %d %d \n",gdimsOUT[0],gdimsOUT[1],gdimsOUT[2]); - printf("[P3DFFT++] topo OUT loc : %d %d %d (is: %d %d %d) \n",P3DnlocOUT[0],P3DnlocOUT[1],P3DnlocOUT[2],glob_startOUT[0],glob_startOUT[1],glob_startOUT[2]); + printf("[P3DFFT++] topo IN glob : %d %d %d \n",gdimsIN[0],gdimsIN[1],gdimsIN[2]); + printf("[P3DFFT++] topo IN loc : %d %d %d (is: %d %d %d) \n",P3DnlocIN[0],P3DnlocIN[1],P3DnlocIN[2],glob_startIN[0],glob_startIN[1],glob_startIN[2]); + printf("[P3DFFT++] topo OUT glob : %d %d %d \n",gdimsOUT[0],gdimsOUT[1],gdimsOUT[2]); + printf("[P3DFFT++] topo OUT loc : %d %d %d (is: %d %d %d) \n",P3DnlocOUT[0],P3DnlocOUT[1],P3DnlocOUT[2],glob_startOUT[0],glob_startOUT[1],glob_startOUT[2]); #endif - - printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n",FLUmemsizeIN,FLUmemsizeOUT); + printf("I am going to allocate FLUPS: %d (inside FLUPS: %d)\n",FLUmemsizeIN,FLUmemsizeOUT); #ifndef SKIP_P3D - printf(" P3D: %d (out %d C) \n",P3DmemsizeIN,P3DmemsizeOUT); + printf(" P3D: %d (out %d C) \n",P3DmemsizeIN,P3DmemsizeOUT); #endif + } double *rhsFLU = (double *)fftw_malloc(sizeof(double) * FLUmemsizeIN); diff --git a/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh b/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh index 20411d7..236d498 100755 --- a/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh +++ b/samples/compareP3DFFT++/run/zenobe_weakscaling_a2a.sh @@ -9,7 +9,7 @@ VER=a2a EXE=flups_vs_p3dfft++_${VER} ######### WEAK -> increase the number of CPU and the size -SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_$VER +SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_${VER}_V3 # clean the validation dir # rm -rf ${SCRATCH} @@ -38,18 +38,22 @@ cd $SCRATCH # qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=64,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh ##################### size = 128^3/proc ################################# -# cpu = 64 +## CANNOT DO cpu=96,192,384... due to P3D ! + +# cpu = 128 (-> actually allocating 144) # same on large -qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=32:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=6:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 256 -qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=64:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 256 (->264) +qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=11:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 512 -qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=128:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 512 (->528) +qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=22:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 1024 -qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 1024 (->1032) +qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=43:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh +# cpu = 2048 (->2064) +qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=64,LX=8,LY=16,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=86:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh #end of file diff --git a/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh b/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh index a56f1f0..c78305f 100755 --- a/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh +++ b/samples/compareP3DFFT++/run/zenobe_weakscaling_nb.sh @@ -9,7 +9,7 @@ VER=nb EXE=flups_vs_p3dfft++_${VER} ######### WEAK -> increase the number of CPU and the size -SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_$VER +SCRATCH=/SCRATCH/acad/examples/dcaprace/flupsVSp3dfft3_weak_${VER}_V3 # clean the validation dir # rm -rf ${SCRATCH} @@ -38,18 +38,22 @@ cd $SCRATCH # qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=64,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh ##################### size = 128^3/proc ################################# -# cpu = 64 +## CANNOT DO cpu=96,192,384... due to P3D ! + +# cpu = 128 (-> actually allocating 144) # same on large -qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=32:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +qsub -q large -v EXE=${EXE},MY_NY=8,MY_NZ=16,LX=4,LY=4,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=6:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 256 -qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=64:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 256 (->264) +qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=16,LX=4,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=11:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 512 -qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=128:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 512 (->528) +qsub -q large -v EXE=${EXE},MY_NY=16,MY_NZ=32,LX=8,LY=8,LZ=8,MY_SIZE=128,MY_NTH=1, -l select=22:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh -# cpu = 1024 -qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=256:ncpus=4:mem=10500mb:mpiprocs=4:ompthreads=1 ./zenobe_kernel.sh +# cpu = 1024 (->1032) +qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=32,LX=8,LY=8,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=43:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh +# cpu = 2048 (->2064) +qsub -q large -v EXE=${EXE},MY_NY=32,MY_NZ=64,LX=8,LY=16,LZ=16,MY_SIZE=128,MY_NTH=1, -l select=86:ncpus=24:mem=63000mb:mpiprocs=24:ompthreads=1 ./zenobe_kernel.sh #end of file diff --git a/src/Solver.cpp b/src/Solver.cpp index 8ac6eb8..43e0d89 100644 --- a/src/Solver.cpp +++ b/src/Solver.cpp @@ -170,8 +170,9 @@ double* Solver::setup(const bool changeTopoComm) { /** - Precompute the communication graph */ //------------------------------------------------------------------------- // get the communication size - int worldsize; + int worldsize, rank; MPI_Comm_size(_topo_phys->get_comm(), &worldsize); + MPI_Comm_rank(_topo_phys->get_comm(), &rank); // initialize the sources, sources weights, destination and destination weights int* sources = (int*)flups_malloc(worldsize * sizeof(int)); @@ -218,11 +219,6 @@ double* Solver::setup(const bool changeTopoComm) { worldsize, dests, destsW, \ MPI_INFO_NULL, 1, &graph_comm); - flups_free(sources); - flups_free(sourcesW); - flups_free(dests); - flups_free(destsW); - #if defined(VERBOSE) && VERBOSE==2 int inD, outD, wei; MPI_Dist_graph_neighbors_count(graph_comm, &inD, &outD, &wei); @@ -258,15 +254,12 @@ double* Solver::setup(const bool changeTopoComm) { /** - if asked by the user, we overwrite the graph comm by a forced version (for test purpose) */ //------------------------------------------------------------------------- #ifdef DEV_SIMULATE_GRAPHCOMM - int rank; - MPI_Comm_rank(_topo_phys->get_comm(), &rank); - //switch indices by a random number: -#ifdef DEV_REORDER_SHIFT - int shift = DEV_REORDER_SHIFT; -#else - int shift = worldsize/2; -#endif + #ifdef DEV_REORDER_SHIFT + int shift = DEV_REORDER_SHIFT; + #else + int shift = worldsize/2; + #endif int* outRanks = (int*) flups_malloc(sizeof(int)*worldsize); if(rank == 0){ @@ -286,11 +279,19 @@ double* Solver::setup(const bool changeTopoComm) { flups_free(outRanks); #endif +//end simulate_graph + + #ifdef PROF + //writing reordering to console + int newrank; + MPI_Comm_rank(graph_comm, &newrank); + printf("[MPI ORDER] %i : %i \n", rank, newrank); + #endif #else //Use METIS to find a smart partition of the graph int *order = (int *)flups_malloc(sizeof(int) * worldsize); - reorder_metis(_topo_phys->get_comm(), sources, sourcesW, dests, destsW, order); + _reorder_metis(_topo_phys->get_comm(), sources, sourcesW, dests, destsW, order); // create a new comm based on the order given by metis MPI_Group group_in, group_out; MPI_Comm_group(_topo_phys->get_comm(), &group_in); //get the group of the current comm @@ -320,9 +321,9 @@ double* Solver::setup(const bool changeTopoComm) { _topo_phys->change_comm(graph_comm); } -#ifdef PERF_VERBOSE + #ifdef PERF_VERBOSE _topo_hat[0]->disp_rank(); -#endif + #endif #endif //REORDER_RANKS @@ -1471,3 +1472,288 @@ void Solver::dothemagic_rhs_complex_nmult3(double *data) { FLUPS_CHECK(false, "not implemented yet", LOCATION); END_FUNC; } + + +/** + * @brief reorder the MPI-ranks using metis + * + * @warning this functions assume an evenly distributed amount of procs on the nodes + * + * @param comm + * @param sources + * @param sourcesW + * @param dests + * @param destsW + * @param n_nodes + * @param order + */ +void Solver::_reorder_metis(MPI_Comm comm, int *sources, int *sourcesW, int *dests, int *destsW, int *order) { + int comm_size; + int comm_rank; + MPI_Comm_rank(comm, &comm_rank); + MPI_Comm_size(comm, &comm_size); + +#ifdef HAVE_METIS + + //------------------------------------------------------------------------- + /** - get the total number of nodes */ + //------------------------------------------------------------------------- + // create a group where everybody can create a shared memory region + MPI_Comm nodecomm; + MPI_Info mpinfo; + MPI_Info_create(&mpinfo); + MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, comm_rank, mpinfo, &nodecomm); + // we store the comm size + int local_nodesize; + MPI_Comm_size(nodecomm, &local_nodesize); + + // gather on proc 1 the number of proc per node + int *vec_nodesize = (int *)flups_malloc(sizeof(int) * comm_size); + MPI_Allgather(&local_nodesize, 1, MPI_INT, vec_nodesize, 1, MPI_INT, comm); + + // count the number of partitions we'll need: + int n_nodes = 0; + int id = 0; + while( id < comm_size){ + id += vec_nodesize[id]; + n_nodes++; + } + +#ifdef DEV_SIMULATE_GRAPHCOMM + //CHEATING: imposing that there will be 2 groups (there needs to be at least 4 procs) + n_nodes = 2; + for (int ip = 0; ip 0 && i != comm_rank) n_neighbours++; + } + // allocate the number of neighbours and their weights + int *neighbours = (int *)flups_malloc(sizeof(int) * n_neighbours); + int *weights = (int *)flups_malloc(sizeof(int) * n_neighbours); + n_neighbours = 0; + for (int i = 0; i < comm_size; ++i) { + if (sourcesW[i] + destsW[i] > 0 && i != comm_rank) { + neighbours[n_neighbours] = i; + weights[n_neighbours] = sourcesW[i] + destsW[i]; + n_neighbours++; + } + } + + //------------------------------------------------------------------------- + /** - build the graph on proc 0 and ask for partioning + * The graph structure follows metis rules: + * the edges (= id of the destination of the edges) starting from proc k are located + * from adj[xadj[k]] to adj[xadj[k+1]-1] + * Same structure is used for the weights with the ajdw + * */ + //------------------------------------------------------------------------- + if (comm_rank == 0) { + int *xadj = (int *)flups_malloc((comm_size + 1) * sizeof(int)); + int *nadj = (int *)flups_malloc((comm_size) * sizeof(int)); + + // get the number of neighbours from everybody + MPI_Gather(&n_neighbours, 1, MPI_INT, nadj, 1, MPI_INT, 0, comm); + // get the starting indexes of the neighbour description for everybody + xadj[0] = 0; + for (int i = 0; i < comm_size; ++i) { + xadj[i + 1] = xadj[i] + nadj[i]; + } + + // allocate the adjency list + weights and fill it with the neighbour list from everybody + int *adj = (int *)flups_malloc(xadj[comm_size] * sizeof(int)); + int *adjw = (int *)flups_malloc(xadj[comm_size] * sizeof(int)); + MPI_Gatherv(neighbours, n_neighbours, MPI_INT, adj, nadj, xadj, MPI_INT, 0, comm); + MPI_Gatherv(weights, n_neighbours, MPI_INT, adjw, nadj, xadj, MPI_INT, 0, comm); +#ifdef PROF + { + //writing graph to file, CSR format + string filename = "prof/graph.csr"; + FILE* file = fopen(filename.c_str(), "w+"); + if(file==NULL){FLUPS_ERROR("Could not create file in ./prof. Did you create the folder?",LOCATION);} + for(int i=0; i<=comm_size; i++){ + fprintf(file, "%d ",xadj[i]); + } + fprintf(file,"\n"); + for(int i=0; i 0; --ip) { + rids[ip] = rids[ip-1]; //offset by 1 + } + rids[0] = 0; + if(!succeed){ + FLUPS_INFO("METIS: attempt failed."); + }else{ + // assign the rank value and redistribute + for (int i = 0; i < comm_size; ++i) { + order[i] = rids[part[i]]++ ; + } + break; + } + } + // check that we did not reach max_iter + if(iter>=max_iter){ + FLUPS_WARNING("Failed to find a graph partitioning with the current allocation. I will not change the rank orderegin in the graph_comm!",LOCATION); + for (int i = 0; i < comm_size; ++i) { + order[i] = i; + } + } + + // result of the partitioning + #ifdef PART_OF_EQUAL_SIZE + FLUPS_INFO("I have partitioned the graph in %d chunks of size %d\n",n_nodes,comm_size/n_nodes); + #else + FLUPS_INFO("I have partitioned the graph in %d chunks.",n_nodes); + #endif +#ifdef PROF + //writing graph to file, CSR format + string filename = "prof/partitions.txt"; + FILE* file = fopen(filename.c_str(), "w+"); + #ifdef PART_OF_EQUAL_SIZE + fprintf(file,"%d partitions of size %d\n",n_nodes,comm_size/n_nodes); + #else + fprintf(file,"%d partitions of size:\n",n_nodes); + for(int i=0; i 0 && i != comm_rank) n_neighbours++; - } - // allocate the number of neighbours and their weights - int *neighbours = (int *)flups_malloc(sizeof(int) * n_neighbours); - int *weights = (int *)flups_malloc(sizeof(int) * n_neighbours); - n_neighbours = 0; - for (int i = 0; i < comm_size; ++i) { - if (sourcesW[i] + destsW[i] > 0 && i != comm_rank) { - neighbours[n_neighbours] = i; - weights[n_neighbours] = sourcesW[i] + destsW[i]; - n_neighbours++; - } - } - - //------------------------------------------------------------------------- - /** - build the graph on proc 0 and ask for partioning - * The graph structure follows metis rules: - * the edges (= id of the destination of the edges) starting from proc k are located - * from adj[xadj[k]] to adj[xadj[k+1]-1] - * Same structure is used for the weights with the ajdw - * */ - //------------------------------------------------------------------------- - if (comm_rank == 0) { - int *xadj = (int *)flups_malloc((comm_size + 1) * sizeof(int)); - int *nadj = (int *)flups_malloc((comm_size) * sizeof(int)); - - // get the number of neighbours from everybody - MPI_Gather(&n_neighbours, 1, MPI_INT, nadj, 1, MPI_INT, 0, comm); - // get the starting indexes of the neighbour description for everybody - xadj[0] = 0; - for (int i = 0; i < comm_size; ++i) { - xadj[i + 1] = xadj[i] + nadj[i]; - } - - // allocate the adjency list + weights and fill it with the neighbour list from everybody - int *adj = (int *)flups_malloc(xadj[comm_size] * sizeof(int)); - int *adjw = (int *)flups_malloc(xadj[comm_size] * sizeof(int)); - MPI_Gatherv(neighbours, n_neighbours, MPI_INT, adj, nadj, xadj, MPI_INT, 0, comm); - MPI_Gatherv(weights, n_neighbours, MPI_INT, adjw, nadj, xadj, MPI_INT, 0, comm); -#ifdef PROF - { - //writing graph to file, CSR format - string filename = "prof/graph.csr"; - FILE* file = fopen(filename.c_str(), "w+"); - if(file==NULL){FLUPS_ERROR("Could not create file in ./prof. Did you create the folder?",LOCATION);} - for(int i=0; i<=comm_size; i++){ - fprintf(file, "%d ",xadj[i]); - } - fprintf(file,"\n"); - for(int i=0; i 0; --ip) { - rids[ip] = rids[ip-1]; //offset by 1 - } - rids[0] = 0; - if(!succeed){ - FLUPS_INFO("METIS: attempt failed."); - }else{ - // assign the rank value and redistribute - for (int i = 0; i < comm_size; ++i) { - order[i] = rids[part[i]]++ ; - } - break; - } - } - // check that we did not reach max_iter - if(iter>=max_iter){ - FLUPS_WARNING("Failed to find a graph partitioning with the current allocation. I will not change the rank orderegin in the graph_comm!",LOCATION); - for (int i = 0; i < comm_size; ++i) { - order[i] = i; - } - } - - // result of the partitioning - #ifdef PART_OF_EQUAL_SIZE - FLUPS_INFO("I have partitioned the graph in %d chunks of size %d\n",n_nodes,comm_size/n_nodes); - #else - FLUPS_INFO("I have partitioned the graph in %d chunks.",n_nodes); - #endif -#ifdef PROF - //writing graph to file, CSR format - string filename = "prof/partitions.txt"; - FILE* file = fopen(filename.c_str(), "w+"); - #ifdef PART_OF_EQUAL_SIZE - fprintf(file,"%d partitions of size %d\n",n_nodes,comm_size/n_nodes); - #else - fprintf(file,"%d partitions of size:\n",n_nodes); - for(int i=0; i