-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
24 changed files
with
4,652 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
/* | ||
* Copyright (c) 2003, 2007-14 Matteo Frigo | ||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | ||
* | ||
* The following statement of license applies *only* to this header file, | ||
* and *not* to the other files distributed with FFTW or derived therefrom: | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions | ||
* are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright | ||
* notice, this list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright | ||
* notice, this list of conditions and the following disclaimer in the | ||
* documentation and/or other materials provided with the distribution. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | ||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | ||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | ||
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
/***************************** NOTE TO USERS ********************************* | ||
* | ||
* THIS IS A HEADER FILE, NOT A MANUAL | ||
* | ||
* If you want to know how to use FFTW, please read the manual, | ||
* online at http://www.fftw.org/doc/ and also included with FFTW. | ||
* For a quick start, see the manual's tutorial section. | ||
* | ||
* (Reading header files to learn how to use a library is a habit | ||
* stemming from code lacking a proper manual. Arguably, it's a | ||
* *bad* habit in most cases, because header files can contain | ||
* interfaces that are not part of the public, stable API.) | ||
* | ||
****************************************************************************/ | ||
|
||
#ifndef FFTW3_MPI_H | ||
#define FFTW3_MPI_H | ||
|
||
#include "fftw3.h" | ||
#include <mpi.h> | ||
|
||
#ifdef __cplusplus | ||
extern "C" | ||
{ | ||
#endif /* __cplusplus */ | ||
|
||
struct fftw_mpi_ddim_do_not_use_me { | ||
ptrdiff_t n; /* dimension size */ | ||
ptrdiff_t ib; /* input block */ | ||
ptrdiff_t ob; /* output block */ | ||
}; | ||
|
||
/* | ||
huge second-order macro that defines prototypes for all API | ||
functions. We expand this macro for each supported precision | ||
XM: name-mangling macro (MPI) | ||
X: name-mangling macro (serial) | ||
R: real data type | ||
C: complex data type | ||
*/ | ||
|
||
#define FFTW_MPI_DEFINE_API(XM, X, R, C) \ | ||
\ | ||
typedef struct fftw_mpi_ddim_do_not_use_me XM(ddim); \ | ||
\ | ||
FFTW_EXTERN void XM(init)(void); \ | ||
FFTW_EXTERN void XM(cleanup)(void); \ | ||
\ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_many_transposed) \ | ||
(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ | ||
ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm, \ | ||
ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ | ||
ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_many) \ | ||
(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ | ||
ptrdiff_t block0, MPI_Comm comm, \ | ||
ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_transposed) \ | ||
(int rnk, const ptrdiff_t *n, MPI_Comm comm, \ | ||
ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ | ||
ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size) \ | ||
(int rnk, const ptrdiff_t *n, MPI_Comm comm, \ | ||
ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_many_1d)( \ | ||
ptrdiff_t n0, ptrdiff_t howmany, \ | ||
MPI_Comm comm, int sign, unsigned flags, \ | ||
ptrdiff_t *local_ni, ptrdiff_t *local_i_start, \ | ||
ptrdiff_t *local_no, ptrdiff_t *local_o_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_1d)( \ | ||
ptrdiff_t n0, MPI_Comm comm, int sign, unsigned flags, \ | ||
ptrdiff_t *local_ni, ptrdiff_t *local_i_start, \ | ||
ptrdiff_t *local_no, ptrdiff_t *local_o_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_2d)( \ | ||
ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, \ | ||
ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_2d_transposed)( \ | ||
ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, \ | ||
ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ | ||
ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_3d)( \ | ||
ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, \ | ||
ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ | ||
FFTW_EXTERN ptrdiff_t XM(local_size_3d_transposed)( \ | ||
ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, \ | ||
ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ | ||
ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ | ||
\ | ||
FFTW_EXTERN X(plan) XM(plan_many_transpose) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, \ | ||
ptrdiff_t howmany, ptrdiff_t block0, ptrdiff_t block1, \ | ||
R *in, R *out, MPI_Comm comm, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_transpose) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, \ | ||
R *in, R *out, MPI_Comm comm, unsigned flags); \ | ||
\ | ||
FFTW_EXTERN X(plan) XM(plan_many_dft) \ | ||
(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ | ||
ptrdiff_t block, ptrdiff_t tblock, C *in, C *out, \ | ||
MPI_Comm comm, int sign, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft) \ | ||
(int rnk, const ptrdiff_t *n, C *in, C *out, \ | ||
MPI_Comm comm, int sign, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_1d) \ | ||
(ptrdiff_t n0, C *in, C *out, \ | ||
MPI_Comm comm, int sign, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_2d) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, C *in, C *out, \ | ||
MPI_Comm comm, int sign, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_3d) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, C *in, C *out, \ | ||
MPI_Comm comm, int sign, unsigned flags); \ | ||
\ | ||
FFTW_EXTERN X(plan) XM(plan_many_r2r) \ | ||
(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ | ||
ptrdiff_t iblock, ptrdiff_t oblock, R *in, R *out, \ | ||
MPI_Comm comm, const X(r2r_kind) *kind, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_r2r) \ | ||
(int rnk, const ptrdiff_t *n, R *in, R *out, \ | ||
MPI_Comm comm, const X(r2r_kind) *kind, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_r2r_2d) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, R *in, R *out, MPI_Comm comm, \ | ||
X(r2r_kind) kind0, X(r2r_kind) kind1, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_r2r_3d) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, \ | ||
R *in, R *out, MPI_Comm comm, X(r2r_kind) kind0, \ | ||
X(r2r_kind) kind1, X(r2r_kind) kind2, unsigned flags); \ | ||
\ | ||
FFTW_EXTERN X(plan) XM(plan_many_dft_r2c) \ | ||
(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ | ||
ptrdiff_t iblock, ptrdiff_t oblock, R *in, C *out, \ | ||
MPI_Comm comm, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_r2c) \ | ||
(int rnk, const ptrdiff_t *n, R *in, C *out, \ | ||
MPI_Comm comm, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_r2c_2d) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, R *in, C *out, \ | ||
MPI_Comm comm, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_r2c_3d) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, R *in, C *out, \ | ||
MPI_Comm comm, unsigned flags); \ | ||
\ | ||
FFTW_EXTERN X(plan) XM(plan_many_dft_c2r) \ | ||
(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ | ||
ptrdiff_t iblock, ptrdiff_t oblock, C *in, R *out, \ | ||
MPI_Comm comm, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_c2r) \ | ||
(int rnk, const ptrdiff_t *n, C *in, R *out, \ | ||
MPI_Comm comm, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_c2r_2d) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, C *in, R *out, \ | ||
MPI_Comm comm, unsigned flags); \ | ||
FFTW_EXTERN X(plan) XM(plan_dft_c2r_3d) \ | ||
(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, C *in, R *out, \ | ||
MPI_Comm comm, unsigned flags); \ | ||
\ | ||
FFTW_EXTERN void XM(gather_wisdom)(MPI_Comm comm_); \ | ||
FFTW_EXTERN void XM(broadcast_wisdom)(MPI_Comm comm_); \ | ||
\ | ||
FFTW_EXTERN void XM(execute_dft)(X(plan) p, C *in, C *out); \ | ||
FFTW_EXTERN void XM(execute_dft_r2c)(X(plan) p, R *in, C *out); \ | ||
FFTW_EXTERN void XM(execute_dft_c2r)(X(plan) p, C *in, R *out); \ | ||
FFTW_EXTERN void XM(execute_r2r)(X(plan) p, R *in, R *out); | ||
|
||
|
||
|
||
/* end of FFTW_MPI_DEFINE_API macro */ | ||
|
||
#define FFTW_MPI_MANGLE_DOUBLE(name) FFTW_MANGLE_DOUBLE(FFTW_CONCAT(mpi_,name)) | ||
#define FFTW_MPI_MANGLE_FLOAT(name) FFTW_MANGLE_FLOAT(FFTW_CONCAT(mpi_,name)) | ||
#define FFTW_MPI_MANGLE_LONG_DOUBLE(name) FFTW_MANGLE_LONG_DOUBLE(FFTW_CONCAT(mpi_,name)) | ||
|
||
FFTW_MPI_DEFINE_API(FFTW_MPI_MANGLE_DOUBLE, FFTW_MANGLE_DOUBLE, double, fftw_complex) | ||
FFTW_MPI_DEFINE_API(FFTW_MPI_MANGLE_FLOAT, FFTW_MANGLE_FLOAT, float, fftwf_complex) | ||
FFTW_MPI_DEFINE_API(FFTW_MPI_MANGLE_LONG_DOUBLE, FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex) | ||
|
||
#define FFTW_MPI_DEFAULT_BLOCK (0) | ||
|
||
/* MPI-specific flags */ | ||
#define FFTW_MPI_SCRAMBLED_IN (1U << 27) | ||
#define FFTW_MPI_SCRAMBLED_OUT (1U << 28) | ||
#define FFTW_MPI_TRANSPOSED_IN (1U << 29) | ||
#define FFTW_MPI_TRANSPOSED_OUT (1U << 30) | ||
|
||
#ifdef __cplusplus | ||
} /* extern "C" */ | ||
#endif /* __cplusplus */ | ||
|
||
#endif /* FFTW3_MPI_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/******************************************************************************* | ||
* Copyright 2010-2022 Intel Corporation. | ||
* | ||
* This software and the related documents are Intel copyrighted materials, and | ||
* your use of them is governed by the express license under which they were | ||
* provided to you (License). Unless the License provides otherwise, you may not | ||
* use, modify, copy, publish, distribute, disclose or transmit this software or | ||
* the related documents without Intel's prior written permission. | ||
* | ||
* This software and the related documents are provided as is, with no express | ||
* or implied warranties, other than those that are expressly stated in the | ||
* License. | ||
*******************************************************************************/ | ||
|
||
/* | ||
* | ||
* Definitions for MPI FFTW3 wrappers to Intel(R) oneAPI Math Kernel Library (Intel(R) oneMKL). | ||
* | ||
****************************************************************************** | ||
*/ | ||
|
||
#ifndef FFTW3_MPI_MKL_H | ||
#define FFTW3_MPI_MKL_H | ||
|
||
#include "fftw3-mpi.h" | ||
|
||
#if defined(MKL_SINGLE) | ||
typedef float real_t; | ||
typedef fftwf_complex complex_t; | ||
#define MPI_PREC MPI_FLOAT | ||
#define MKL_PREC DFTI_SINGLE | ||
#define FFTW_MPI_MANGLE(name) FFTW_MPI_MANGLE_FLOAT(name) | ||
#define FFTW_MANGLE(name) FFTW_MANGLE_FLOAT(name) | ||
#else | ||
typedef double real_t; | ||
typedef fftw_complex complex_t; | ||
#define MPI_PREC MPI_DOUBLE | ||
#define MKL_PREC DFTI_DOUBLE | ||
#define FFTW_MPI_MANGLE(name) FFTW_MPI_MANGLE_DOUBLE(name) | ||
#define FFTW_MANGLE(name) FFTW_MANGLE_DOUBLE(name) | ||
#endif | ||
|
||
#include "fftw3_mkl.h" | ||
#include "mkl_cdft.h" | ||
|
||
#define WANT_FAST_INPLACE_CLUSTER_FFT 1 | ||
/* if WANT_FAST_INPLACE_CLUSTER_FFT set to 1, FFTW3 MPI wrappers internally | ||
* allocate additional memory(workspace) needed for fast inplace Intel(R) oneMKL CDFT | ||
* otherwise, no additional memory is used, though the perfomance would be | ||
* worse, because of many MPI communications */ | ||
|
||
#endif /* FFTW3_MPI_MKL_H */ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
!******************************************************************************* | ||
! Copyright (c) 2003, 2007-11 Matteo Frigo | ||
! Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology | ||
! | ||
! This program is distributed with permission | ||
! | ||
!******************************************************************************* | ||
|
||
INTEGER FFTW_R2HC | ||
PARAMETER (FFTW_R2HC=0) | ||
INTEGER FFTW_HC2R | ||
PARAMETER (FFTW_HC2R=1) | ||
INTEGER FFTW_DHT | ||
PARAMETER (FFTW_DHT=2) | ||
INTEGER FFTW_REDFT00 | ||
PARAMETER (FFTW_REDFT00=3) | ||
INTEGER FFTW_REDFT01 | ||
PARAMETER (FFTW_REDFT01=4) | ||
INTEGER FFTW_REDFT10 | ||
PARAMETER (FFTW_REDFT10=5) | ||
INTEGER FFTW_REDFT11 | ||
PARAMETER (FFTW_REDFT11=6) | ||
INTEGER FFTW_RODFT00 | ||
PARAMETER (FFTW_RODFT00=7) | ||
INTEGER FFTW_RODFT01 | ||
PARAMETER (FFTW_RODFT01=8) | ||
INTEGER FFTW_RODFT10 | ||
PARAMETER (FFTW_RODFT10=9) | ||
INTEGER FFTW_RODFT11 | ||
PARAMETER (FFTW_RODFT11=10) | ||
INTEGER FFTW_FORWARD | ||
PARAMETER (FFTW_FORWARD=-1) | ||
INTEGER FFTW_BACKWARD | ||
PARAMETER (FFTW_BACKWARD=+1) | ||
INTEGER FFTW_MEASURE | ||
PARAMETER (FFTW_MEASURE=0) | ||
INTEGER FFTW_DESTROY_INPUT | ||
PARAMETER (FFTW_DESTROY_INPUT=1) | ||
INTEGER FFTW_UNALIGNED | ||
PARAMETER (FFTW_UNALIGNED=2) | ||
INTEGER FFTW_CONSERVE_MEMORY | ||
PARAMETER (FFTW_CONSERVE_MEMORY=4) | ||
INTEGER FFTW_EXHAUSTIVE | ||
PARAMETER (FFTW_EXHAUSTIVE=8) | ||
INTEGER FFTW_PRESERVE_INPUT | ||
PARAMETER (FFTW_PRESERVE_INPUT=16) | ||
INTEGER FFTW_PATIENT | ||
PARAMETER (FFTW_PATIENT=32) | ||
INTEGER FFTW_ESTIMATE | ||
PARAMETER (FFTW_ESTIMATE=64) | ||
INTEGER FFTW_WISDOM_ONLY | ||
PARAMETER (FFTW_WISDOM_ONLY=2097152) | ||
INTEGER FFTW_ESTIMATE_PATIENT | ||
PARAMETER (FFTW_ESTIMATE_PATIENT=128) | ||
INTEGER FFTW_BELIEVE_PCOST | ||
PARAMETER (FFTW_BELIEVE_PCOST=256) | ||
INTEGER FFTW_NO_DFT_R2HC | ||
PARAMETER (FFTW_NO_DFT_R2HC=512) | ||
INTEGER FFTW_NO_NONTHREADED | ||
PARAMETER (FFTW_NO_NONTHREADED=1024) | ||
INTEGER FFTW_NO_BUFFERING | ||
PARAMETER (FFTW_NO_BUFFERING=2048) | ||
INTEGER FFTW_NO_INDIRECT_OP | ||
PARAMETER (FFTW_NO_INDIRECT_OP=4096) | ||
INTEGER FFTW_ALLOW_LARGE_GENERIC | ||
PARAMETER (FFTW_ALLOW_LARGE_GENERIC=8192) | ||
INTEGER FFTW_NO_RANK_SPLITS | ||
PARAMETER (FFTW_NO_RANK_SPLITS=16384) | ||
INTEGER FFTW_NO_VRANK_SPLITS | ||
PARAMETER (FFTW_NO_VRANK_SPLITS=32768) | ||
INTEGER FFTW_NO_VRECURSE | ||
PARAMETER (FFTW_NO_VRECURSE=65536) | ||
INTEGER FFTW_NO_SIMD | ||
PARAMETER (FFTW_NO_SIMD=131072) | ||
INTEGER FFTW_NO_SLOW | ||
PARAMETER (FFTW_NO_SLOW=262144) | ||
INTEGER FFTW_NO_FIXED_RADIX_LARGE_N | ||
PARAMETER (FFTW_NO_FIXED_RADIX_LARGE_N=524288) | ||
INTEGER FFTW_ALLOW_PRUNING | ||
PARAMETER (FFTW_ALLOW_PRUNING=1048576) |
Oops, something went wrong.