diff --git a/Manifest.toml b/Manifest.toml index fd26162..1e616e5 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.4" manifest_format = "2.0" -project_hash = "6a0dbf49b9f181ea3bc8882243fdc78ded95cdb7" +project_hash = "d9ad1fa52dc8a8a415fe050e4fcf94ccb8acb79a" [[deps.Aqua]] deps = ["Compat", "Pkg", "Test"] @@ -88,6 +88,22 @@ uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +[[deps.LinearMaps]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ee79c3208e55786de58f8dcccca098ced79f743f" +uuid = "7a12625a-238d-50fd-b39a-03d52299707e" +version = "3.11.3" + + [deps.LinearMaps.extensions] + LinearMapsChainRulesCoreExt = "ChainRulesCore" + LinearMapsSparseArraysExt = "SparseArrays" + LinearMapsStatisticsExt = "Statistics" + + [deps.LinearMaps.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" diff --git a/Project.toml b/Project.toml index 5f6ce18..4720318 100644 --- a/Project.toml +++ b/Project.toml @@ -7,11 +7,13 @@ version = "1.2.2" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CpuId = "adafc99b-e345-5852-983c-f28acb93d879" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] Aqua = "0.5 - 0.8" CpuId = "0.3" LinearAlgebra = "1" +LinearMaps = "3" Test = "1" julia = "1" diff --git a/deps/build.jl b/deps/build.jl index 40f9b16..28a421d 100644 --- a/deps/build.jl +++ b/deps/build.jl @@ -3,12 +3,22 @@ using CpuId # file ending for OS ending = ".so" path = "/../src/lib/" +glibcversion = "" if Sys.iswindows() ending = ".dll" path = "\\..\\src\\lib\\" elseif Sys.isapple() ending = ".dylib" +else + glibcversion = "glibc2.40/" + if VersionNumber( + unsafe_string( + @ccall string(@__DIR__, path, "glibc-version.so").glibc_version()::Cstring + ), + ) < v"2.35" + glibcversion = "glibc2.22/" + end end if cpufeature(:AVX2) @@ -19,12 +29,12 @@ else flag = "SSE2/" end -lib_path_nfft = string(@__DIR__, path, flag, "libnfftjulia", ending) -lib_path_nfct = string(@__DIR__, path, flag, "libnfctjulia", ending) -lib_path_nfst = string(@__DIR__, path, flag, "libnfstjulia", ending) -lib_path_fastsum = string(@__DIR__, path, flag, "libfastsumjulia", ending) +lib_path_nfft = string(@__DIR__, path, flag, glibcversion, "libnfftjulia", ending) +lib_path_nfct = string(@__DIR__, path, flag, glibcversion, "libnfctjulia", ending) +lib_path_nfst = string(@__DIR__, path, flag, glibcversion, "libnfstjulia", ending) +lib_path_fastsum = string(@__DIR__, path, flag, glibcversion, "libfastsumjulia", ending) -println( lib_path_nfft ) +println(lib_path_nfft) chmod(lib_path_nfft, filemode(lib_path_nfft) | 0o755) chmod(lib_path_nfct, filemode(lib_path_nfct) | 0o755) diff --git a/docs/make.jl b/docs/make.jl index d3104b7..1f59c6e 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -7,11 +7,13 @@ makedocs( pages = [ "Home" => "index.md", "About" => "about.md", - "Transformations" => - ["NFFT" => "NFFT.md", "NFST" => "NFST.md", "NFCT" => "NFCT.md"], - "Applications" => - ["fastsum" => "fastsum.md",], - "Flags" => "Flags.md", + "Transformations" => [ + "NFFT" => "NFFT.md", + "NFST" => "NFST.md", + "NFCT" => "NFCT.md", + "NFMT" => "NFMT.md", + ], + "Applications" => ["fastsum" => "fastsum.md"], ], ) diff --git a/docs/src/NFMT.md b/docs/src/NFMT.md new file mode 100644 index 0000000..5f8d4f8 --- /dev/null +++ b/docs/src/NFMT.md @@ -0,0 +1,61 @@ +# [Nonequispaced Fast Cosine Transform (NFMT)](@id NFMT_site) + +```@meta + CurrentModule = NFFT3 +``` + +## NDMT and NFMT + +We consider for $\pmb{d}\in\{\exp,\cos,\mathrm{alg}\}^d$ the trigonometric polynomial + +$$f^{\pmb{d}}(\pmb{x}) \coloneqq \sum_{\pmb{k} \in I_{\pmb{N},\pmb{d}}^d} \hat{f}_{\pmb{k}}^{\pmb{d}} \, \phi_{\pmb{k}}^{\pmb{d}}(\pmb{x}), \quad \pmb{x} \in \mathbb{R}^d,$$ + +with + +$$\phi_{\pmb{k}}^{\pmb{d}}(\pmb{x})=\prod_{j=1}^d\begin{cases}1,&k_j=0\\\exp(2\pi\mathrm{i}k_jx_j),&d_j=\exp,\;k_j\neq0\\ +\sqrt{2}\cos(\pi k_jx_j),&d_j=\cos,\;k_j\neq0\\ +\sqrt{2}\cos(k_j\arccos(2x_j-1)),&d_j=\mathrm{alg},\;k_j\neq0\end{cases} $$ + +and multibandlimit $\pmb{N} \in (2\mathbb{N})^d$ and index set + +$$I_{\pmb{N},\pmb{d}}^d \coloneqq \overset{d}{\underset{j=1}{\vphantom{\mathop{\raisebox{-.5ex}{\hbox{\huge{$\times$}}}}}⨉}}\begin{cases}\Big\{-\frac{N_j}{2},-\frac{N_j}{2}+1,\ldots,\frac{N_j}{2}\Big\},&d_j=\exp\\\Big\{0,1,\ldots,\frac{N_j}{2}\Big\},&d_j\neq\exp\end{cases}.$$ + +The NDMT is the evaluation of + +$$f^{\pmb{d}}(\pmb{x}_j) \coloneqq \sum_{\pmb{k} \in I_{\pmb{N},\pmb{d}}^d} \hat{f}_{\pmb{k}}^{\pmb{d}} \, \phi_{\pmb{k}}^{\pmb{d}}(\pmb{x}_j)$$ + +at arbitrary nodes $\pmb{x}_j \in [0,1]^d$ for given coefficients $\hat{f}_{\pmb{k}}^{\pmb{d}} \in \mathbb{R}, \pmb{k} \in I_{\pmb{N},\pmb{d}}^d$. Similarly to the NDFT, the transposed NDMT is the evaluation of + +$$\hat{h}^{\pmb{d}}_{\pmb{k}} = \sum_{j=1}^M f^{\pmb{d}}_j \, \phi_{\pmb{k}}^{\pmb{d}}(\pmb{x}_j)$$ + +for the frequencies $\pmb{k} \in I_{\pmb{N},\pmb{d}}^d$ with given coefficients $f^{\pmb{d}}_j \in \mathbb{R}, j = 1,2,\ldots,M$. + +We modify the [NFFT](@ref NFFT_site) in order to derive a fast algorithm for the computation of the NDMT and transposed NDMT, obtaining the NFMT and its transposed counterpart. For details we refer to [[Potts, Schröter, 2024](#PottsSchröter2024)]. + +## Plan structure + +```@docs + NFMT{D} +``` + +## Functions + +```@docs + nfmt_trafo + nfmt_trafo_direct + nfmt_transposed + nfmt_transposed_direct + nfmt_finalize_plan + nfmt_init +``` + +## Literature + +```@raw html + +``` \ No newline at end of file diff --git a/docs/src/fastsum.md b/docs/src/fastsum.md index 62554de..f7efdb3 100644 --- a/docs/src/fastsum.md +++ b/docs/src/fastsum.md @@ -100,6 +100,46 @@ where the inner sum can be computed by an adjoint NFFT which is then followed by fastsum_trafo_exact ``` +## Supported kernel functions + +- `gaussian` +$${K}(x)=\exp\Big(-\frac{x^2}{c^2}\Big)$$ +- `multiquadratic` +$${K}(x)=\sqrt{x^2+c^2}$$ +- `inverse_multiquadratic` +$${K}(x)=\sqrt{\frac{1}{x^2+c^2}}$$ +- `logarithm` +$${K}(x)=\log(\vert x\vert)$$ +- `thinplate_spline` +$${K}(x)=x^2\log(\vert x\vert)$$ +- `one_over_square` +$${K}(x)=\frac{1}{x^2}$$ +- `one_over_modulus` +$${K}(x)=\frac{1}{\vert x\vert}$$ +- `one_over_x` +$${K}(x)=\frac{1}{x}$$ +- `one_over_multiquadric3` +$${K}(x)=\Big(\frac{1}{x^2+c^2}\Big)^\frac{3}{2}$$ +- `sinc_kernel` +$${K}(x)=\frac{\sin(cx)}{x}$$ +- `cosc` +$${K}(x)=\frac{\cos(cx)}{x}$$ +- `kcot` +$${K}(x)=\cot(cx)$$ +- `one_over_cube` +$${K}(x)=\frac{1}{x^3}$$ +- `log_sin` +$${K}(x)=\log(\vert\sin(cx)\vert)$$ +- `laplacian_rbf` +$${K}(x)=\exp\Big(-\frac{\vert x\vert}{c}\Big)$$ +- `der_laplacian_rbf` +$${K}(x)=\frac{\vert x\vert}{c}\exp\Big(-\frac{\vert x\vert}{c}\Big)$$ +- `xx_gaussian` +$${K}(x)=\frac{x^2}{c^2}\exp\Big(-\frac{x^2}{c^2}\Big)$$ +- `absx` +$${K}(x)=\vert x\vert$$ + + ## Literature ```@raw html diff --git a/src/NFCT.jl b/src/NFCT.jl index 07eade2..68e7201 100644 --- a/src/NFCT.jl +++ b/src/NFCT.jl @@ -26,13 +26,13 @@ for the frequencies ``\pmb{k} \in I_{\pmb{N},\mathrm{c}}^D`` with given coeffici * `M` - the number of nodes. * `n` - the oversampling ``(n_1, n_2, \ldots, n_D)`` per dimension. * `m` - the window size. A larger m results in more accuracy but also a higher computational cost. -* `f1` - the NFST flags. +* `f1` - the NFCT flags. * `f2` - the FFTW flags. * `init_done` - indicates if the plan is initialized. * `finalized` - indicates if the plan is finalized. * `x` - the nodes ``x_j \in [0,0.5]^D, \, j = 1, \ldots, M``. -* `f` - the values ``f^c(\pmb{x}_j)`` for the NFST or the coefficients ``f_j^c \in \mathbb{R}, j = 1, \ldots, M,`` for the transposed NFST. -* `fhat` - the Fourier coefficients ``\hat{f}_{\pmb{k}}^c \in \mathbb{R}`` for the NFST or the values ``\hat{h}_{\pmb{k}}^c, \pmb{k} \in I_{\pmb{N},\mathrm{c}}^D,`` for the adjoint NFFT. +* `f` - the values ``f^c(\pmb{x}_j)`` for the NFCT or the coefficients ``f_j^c \in \mathbb{R}, j = 1, \ldots, M,`` for the transposed NFCT. +* `fhat` - the Fourier coefficients ``\hat{f}_{\pmb{k}}^c \in \mathbb{R}`` for the NFCT or the values ``\hat{h}_{\pmb{k}}^c, \pmb{k} \in I_{\pmb{N},\mathrm{c}}^D,`` for the adjoint NFCT. * `plan` - plan (C pointer). # Constructor @@ -43,7 +43,7 @@ for the frequencies ``\pmb{k} \in I_{\pmb{N},\mathrm{c}}^D`` with given coeffici NFCT( N::NTuple{D,Int32}, M::Int32) where {D} # See also -[`NFFT`](@ref) +[`NFCT`](@ref) """ mutable struct NFCT{D} N::NTuple{D,Int32} # bandwidth tuple @@ -487,3 +487,158 @@ end function adjoint(P::NFCT{D}) where {D} return nfct_adjoint(P) end + +@doc raw""" + nfct_get_LinearMap(N::Vector{Int}, X::Array{Float64}; n, m::Integer, f1::UInt32, f2::UInt32)::LinearMap + +gives an linear map which computes the NFCT. + +# Input +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f``. +* `X` - the nodes X. +* `n` - the oversampling ``(n_1, n_2, \ldots, n_D)`` per dimension. +* `m` - the window size. A larger m results in more accuracy but also a higher computational cost. +* `f1` - the NFCT flags. +* `f2` - the FFTW flags. + +# See also +[`NFCT{D}`](@ref) +""" +function nfct_get_LinearMap( + N::Vector{Int}, + X::Array{Float64}; + n = undef, + m::Integer = 5, + f1::UInt32 = (size(X, 1) > 1 ? f1_default : f1_default_1d), + f2::UInt32 = f2_default, +)::LinearMap + if size(X, 1) == 1 + X = vec(X) + d = 1 + M = length(X) + else + (d, M) = size(X) + end + + if N == [] + return LinearMap{Float64}(fhat -> fill(fhat[1], M), f -> [sum(f)], M, 1) + end + + N = Tuple(N) + if n == undef + n = Tuple(2 * collect(N)) + end + + plan = NFCT(N, M, n, m, f1, f2) + plan.x = X + + function trafo(fhat::Vector{Float64})::Vector{Float64} + plan.fhat = fhat + nfct_trafo(plan) + return plan.f + end + + function adjoint(f::Vector{Float64})::Vector{Float64} + plan.f = f + nfct_adjoint(plan) + return plan.fhat + end + + N = prod(N) + return LinearMap{Float64}(trafo, adjoint, M, N) +end + +@doc raw""" + nfct_get_coefficient_vector(fhat::Array{Float64})::Vector{Float64} + +reshapes an coefficient array to an vector for multiplication with the linear map of the NFCT. + +# Input +* `fhat` - the Fourier coefficients. + +# See also +[`NFCT{D}`](@ref), [`nfct_get_LinearMap`](@ref) +""" +function nfct_get_coefficient_vector(fhat::Array{Float64})::Vector{Float64} + N = size(fhat) + return vec(permutedims(fhat, length(N):-1:1)) +end + +@doc raw""" + nfct_get_coefficient_array(fhat::Vector{Float64},P::NFCT{D})::Array{Float64} where {D} + +reshapes an coefficient vector returned from a linear map of the NFCT to an array. + +# Input +* `fhat` - the Fourier coefficients. +* `P` - a NFCT plan structure. + +# See also +[`NFCT{D}`](@ref), [`nfct_get_LinearMap`](@ref) +""" +function nfct_get_coefficient_array( + fhat::Vector{Float64}, + P::NFCT{D}, +)::Array{Float64} where {D} + return permutedims(reshape(fhat, reverse(P.N)), length(P.N):-1:1) +end + +@doc raw""" + nfct_get_coefficient_array(fhat::Vector{Float64},N::Vector{Int64})::Array{Float64} + +reshapes an coefficient vector returned from a linear map of the NFCT to an array. + +# Input +* `fhat` - the Fourier coefficients. +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f``. + +# See also +[`NFCT{D}`](@ref), [`nfct_get_LinearMap`](@ref) +""" +function nfct_get_coefficient_array(fhat::Vector{Float64}, N::Vector{Int64})::Array{Float64} + N = Tuple(N) + return permutedims(reshape(fhat, reverse(N)), length(N):-1:1) +end + +@doc raw""" + *(plan::NFCT{D}, fhat::Array{Float64})::Vector{Float64} + +This function defines the multiplication of an NFCT plan with an coefficient array. +""" +function Base.:*(plan::NFCT{D}, fhat::Array{Float64})::Vector{Float64} where {D} + if !isdefined(plan, :x) + error("x is not set.") + end + plan.fhat = nfct_get_coefficient_vector(fhat) + nfct_trafo(plan) + return plan.f +end + +struct Adjoint_NFCT{D} + plan::NFCT{D} +end + +Adjoint_NFCT{D}(plan::NFCT{D}) where {D} = plan + +@doc raw""" + adjoint(plan::NFCT{D})::Adjoint_NFCT{D} + +This function defines the adjoint operator for an NFCT. +""" +function Base.adjoint(plan::NFCT{D})::Adjoint_NFCT{D} where {D} + return Adjoint_NFCT(plan) +end + +@doc raw""" + *(plan::Adjoint_NFCT{D}, f::Vector{Float64})::Array{Float64} + +This function defines the multiplication of an adjoint NFCT plan with an vector of function values. +""" +function Base.:*(plan::Adjoint_NFCT{D}, f::Vector{Float64})::Array{Float64} where {D} + if !isdefined(plan.plan, :x) + error("x is not set.") + end + plan.plan.f = f + nfct_adjoint(plan.plan) + return nfct_get_coefficient_array(plan.plan.fhat, plan.plan) +end diff --git a/src/NFFT.jl b/src/NFFT.jl index eeae50a..c236d9e 100644 --- a/src/NFFT.jl +++ b/src/NFFT.jl @@ -102,9 +102,15 @@ mutable struct NFFT{D} end # additional constructor for easy use [NFFT((N,N),M) instead of NFFT{2}((N,N),M)] -function NFFT(N::NTuple{D,Integer}, M::Integer) where {D} +function NFFT( + N::NTuple{D,Integer}, + M::Integer; + m::Integer = Int32(default_window_cut_off), + f1::UInt32 = (D > 1 ? f1_default : f1_default_1d), + f2::UInt32 = f2_default, +) where {D} if any(x -> x <= 0, N) - throw(DomainError(N, "argument must be a positive integer")) + throw(DomainError(N, "argument must be a positive integer")) end # convert N to vector for passing it over to C @@ -146,7 +152,7 @@ end # finalizer @doc raw""" - nfft_finalize_plan(P) + nfft_finalize_plan(P::NFFT{D}) destroys a NFFT plan structure. @@ -173,7 +179,7 @@ end # allocate plan memory and init with D,N,M,n,m,f1,f2 @doc raw""" - nfft_init(P) + nfft_init(P::NFFT{D}) intialises the NFFT plan in C. This function does not have to be called by the user. @@ -346,7 +352,7 @@ end # nfft trafo direct [call with NFFT.trafo_direct outside module] @doc raw""" - nfft_trafo_direct(P) + nfft_trafo_direct(P::NFFT{D}) computes the NDFT via naive matrix-vector multiplication for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``\hat{f}_{\pmb{k}} \in \mathbb{C}, \pmb{k} \in I_{\pmb{N}}^D,`` in `P.fhat`. @@ -385,7 +391,7 @@ end # adjoint trafo direct [call with NFFT.adjoint_direct outside module] @doc raw""" - nfft_adjoint_direct(P) + nfft_adjoint_direct(P::NFFT{D}) computes the adjoint NDFT via naive matrix-vector multiplication for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``f_j \in \mathbb{C}, j =1,2,\dots,M,`` in `P.f`. @@ -421,7 +427,7 @@ end # nfft trafo [call with NFFT.trafo outside module] @doc raw""" - nfft_trafo(P) + nfft_trafo(P::NFFT{D}) computes the NDFT via the fast NFFT algorithm for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``\hat{f}_{\pmb{k}} \in \mathbb{C}, \pmb{k} \in I_{\pmb{N}}^D,`` in `P.fhat`. @@ -452,7 +458,7 @@ end # adjoint trafo [call with NFFT.adjoint outside module] @doc raw""" - nfft_adjoint(P) + nfft_adjoint(P::NFFT{D}) computes the adjoint NDFT via the fast adjoint NFFT algorithm for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``f_j \in \mathbb{C}, j =1,2,\dots,M,`` in `P.f`. @@ -480,4 +486,162 @@ end function adjoint(P::NFFT{D}) where {D} return nfft_adjoint(P) -end \ No newline at end of file +end + +@doc raw""" + nfft_get_LinearMap(N::Vector{Int}, X::Array{Float64}; n, m::Integer, f1::UInt32, f2::UInt32)::LinearMap + +gives an linear map which computes the NFFT. + +# Input +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f``. +* `X` - the nodes X. +* `n` - the oversampling ``(n_1, n_2, \ldots, n_D)`` per dimension. +* `m` - the window size. A larger m results in more accuracy but also a higher computational cost. +* `f1` - the NFFT flags. +* `f2` - the FFTW flags. + +# See also +[`NFFT{D}`](@ref) +""" +function nfft_get_LinearMap( + N::Vector{Int}, + X::Array{Float64}; + n = undef, + m::Integer = 5, + f1::UInt32 = (size(X, 1) > 1 ? f1_default : f1_default_1d), + f2::UInt32 = f2_default, +)::LinearMap + if size(X, 1) == 1 + X = vec(X) + d = 1 + M = length(X) + else + (d, M) = size(X) + end + + if N == [] + return LinearMap{ComplexF64}(fhat -> fill(fhat[1], M), f -> [sum(f)], M, 1) + end + + N = Tuple(N) + if n == undef + n = Tuple(2 * collect(N)) + end + + plan = NFFT(N, M, n, m, f1, f2) + plan.x = X + + function trafo(fhat::Vector{ComplexF64})::Vector{ComplexF64} + plan.fhat = fhat + nfft_trafo(plan) + return plan.f + end + + function adjoint(f::Vector{ComplexF64})::Vector{ComplexF64} + plan.f = f + nfft_adjoint(plan) + return plan.fhat + end + + N = prod(N) + return LinearMap{ComplexF64}(trafo, adjoint, M, N) +end + +@doc raw""" + nfft_get_coefficient_vector(fhat::Array{ComplexF64})::Vector{ComplexF64} + +reshapes an coefficient array to an vector for multiplication with the linear map of the NFFT. + +# Input +* `fhat` - the Fourier coefficients. + +# See also +[`NFFT{D}`](@ref), [`nfft_get_LinearMap`](@ref) +""" +function nfft_get_coefficient_vector(fhat::Array{ComplexF64})::Vector{ComplexF64} + N = size(fhat) + return vec(permutedims(fhat, length(N):-1:1)) +end + +@doc raw""" + nfft_get_coefficient_array(fhat::Vector{ComplexF64},P::NFFT{D})::Array{ComplexF64} where {D} + +reshapes an coefficient vector returned from a linear map of the NFFT to an array. + +# Input +* `fhat` - the Fourier coefficients. +* `P` - a NFFT plan structure. + +# See also +[`NFFT{D}`](@ref), [`nfft_get_LinearMap`](@ref) +""" +function nfft_get_coefficient_array( + fhat::Vector{ComplexF64}, + P::NFFT{D}, +)::Array{ComplexF64} where {D} + return permutedims(reshape(fhat, reverse(P.N)), length(P.N):-1:1) +end + +@doc raw""" + nfft_get_coefficient_array(fhat::Vector{ComplexF64},N::Vector{Int64})::Array{ComplexF64} + +reshapes an coefficient vector returned from a linear map of the NFFT to an array. + +# Input +* `fhat` - the Fourier coefficients. +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f``. + +# See also +[`NFFT{D}`](@ref), [`nfft_get_LinearMap`](@ref) +""" +function nfft_get_coefficient_array( + fhat::Vector{ComplexF64}, + N::Vector{Int64}, +)::Array{ComplexF64} + N = Tuple(N) + return permutedims(reshape(fhat, reverse(N)), length(N):-1:1) +end + +@doc raw""" + *(plan::NFFT{D}, fhat::Array{ComplexF64})::Vector{ComplexF64} + +This function defines the multiplication of an NFFT plan with an coefficient array. +""" +function Base.:*(plan::NFFT{D}, fhat::Array{ComplexF64})::Vector{ComplexF64} where {D} + if !isdefined(plan, :x) + error("x is not set.") + end + plan.fhat = nfft_get_coefficient_vector(fhat) + nfft_trafo(plan) + return plan.f +end + +struct Adjoint_NFFT{D} + plan::NFFT{D} +end + +Adjoint_NFFT{D}(plan::NFFT{D}) where {D} = plan + +@doc raw""" + adjoint(plan::NFFT{D})::Adjoint_NFFT{D} + +This function defines the adjoint operator for an NFFT. +""" +function Base.adjoint(plan::NFFT{D})::Adjoint_NFFT{D} where {D} + return Adjoint_NFFT(plan) +end + +@doc raw""" + *(plan::Adjoint_NFFT{D}, f::Vector{ComplexF64})::Array{ComplexF64} + +This function defines the multiplication of an adjoint NFFT plan with an vector of function values. +""" +function Base.:*(plan::Adjoint_NFFT{D}, f::Vector{ComplexF64})::Array{ComplexF64} where {D} + if !isdefined(plan.plan, :x) + error("x is not set.") + end + plan.plan.f = f + nfft_adjoint(plan.plan) + return nfft_get_coefficient_array(plan.plan.fhat, plan.plan) +end diff --git a/src/NFFT3.jl b/src/NFFT3.jl index 8f2899d..a72a59e 100644 --- a/src/NFFT3.jl +++ b/src/NFFT3.jl @@ -3,19 +3,29 @@ """ module NFFT3 - - using Aqua using CpuId +using LinearAlgebra +using LinearMaps ending = ".so" path = "/lib/" +glibcversion = "" if Sys.iswindows() ending = ".dll" path = "\\lib\\" elseif Sys.isapple() ending = ".dylib" +else + glibcversion = "glibc2.40/" + if VersionNumber( + unsafe_string( + @ccall string(@__DIR__, path, "glibc-version.so").glibc_version()::Cstring + ), + ) < v"2.35" + glibcversion = "glibc2.22/" + end end if cpufeature(:AVX2) @@ -26,10 +36,10 @@ else flag = "SSE2/" end -lib_path_nfft = string(@__DIR__, path, flag, "libnfftjulia", ending) -lib_path_nfct = string(@__DIR__, path, flag, "libnfctjulia", ending) -lib_path_nfst = string(@__DIR__, path, flag, "libnfstjulia", ending) -lib_path_fastsum = string(@__DIR__, path, flag, "libfastsumjulia", ending) +lib_path_nfft = string(@__DIR__, path, flag, glibcversion, "libnfftjulia", ending) +lib_path_nfct = string(@__DIR__, path, flag, glibcversion, "libnfctjulia", ending) +lib_path_nfst = string(@__DIR__, path, flag, glibcversion, "libnfstjulia", ending) +lib_path_fastsum = string(@__DIR__, path, flag, glibcversion, "libfastsumjulia", ending) include("NFFT.jl") include("NFCT.jl") @@ -69,6 +79,9 @@ export nfft_finalize_plan, nfft_adjoint, nfft_trafo_direct, nfft_adjoint_direct, + nfft_get_LinearMap, + nfft_get_coefficient_vector, + nfft_get_coefficient_array, nfct_finalize_plan, nfct_init, nfct_trafo, @@ -77,21 +90,29 @@ export nfft_finalize_plan, nfct_trafo_direct, nfct_adjoint_direct, nfct_transposed_direct, + nfct_get_LinearMap, + nfct_get_coefficient_vector, + nfct_get_coefficient_array, nfst_finalize_plan, nfst_init, nfst_trafo, nfst_adjoint, nfst_trafo_direct, nfst_adjoint_direct, + nfst_get_LinearMap, + nfst_get_coefficient_vector, + nfst_get_coefficient_array, + nfmt_finalize_plan, + nfmt_trafo, + nfmt_adjoint, + nfmt_get_LinearMap, + nfmt_get_coefficient_vector, + nfmt_get_coefficient_array, fastsum_finalize_plan, fastsum_init, fastsum_trafo, fastsum_trafo_exact, finalize_plan, - nfmt_finalize_plan, -# nfmt_init, - nfmt_trafo, - nfmt_adjoint, init, trafo, adjoint, diff --git a/src/NFMT.jl b/src/NFMT.jl index 20fac81..255fb44 100644 --- a/src/NFMT.jl +++ b/src/NFMT.jl @@ -1,5 +1,56 @@ using NFFT3 +# NFMT plan struct +@doc raw""" + NFMT{D} + +A NFMT (nonequispaced fast mixed transform) plan, where D is the dimension. + +The NFCT realizes a direct and fast computation of the discrete nonequispaced mixed transform. The aim is to compute + +$$f^{\pmb{d}}(\pmb{x}_j) \coloneqq \sum_{\pmb{k} \in I_{\pmb{N},\pmb{d}}^d} \hat{f}_{\pmb{k}}^{\pmb{d}} \, \phi_{\pmb{k}}^{\pmb{d}}(\pmb{x}_j)$$ + +with + +$$\phi_{\pmb{k}}^{\pmb{d}}(\pmb{x})=\prod_{j=1}^d\begin{cases}1,&k_j=0\\\exp(2\pi\mathrm{i}k_jx_j),&d_j=\exp,\;k_j\neq0\\ +\sqrt{2}\cos(\pi k_jx_j),&d_j=\cos,\;k_j\neq0\\ +\sqrt{2}\cos(k_j\arccos(2x_j-1)),&d_j=\mathrm{alg},\;k_j\neq0\end{cases}$$ + +for $\pmb{d}\in\{\exp,\cos,\mathrm{alg}\}^d$ at given arbitrary knots ``\pmb{x}_j \in [0,1]^D, j = 1, \cdots, M``, for coefficients ``\hat{f}^{c}_{\pmb{k}} \in \mathbb{R}``, + +$$\pmb{k}\inI_{\pmb{N},\pmb{d}}^d \coloneqq \overset{d}{\underset{j=1}{\vphantom{\mathop{\raisebox{-.5ex}{\hbox{\huge{$\times$}}}}}⨉}}\begin{cases}\Big\{-\frac{N_j}{2},-\frac{N_j}{2}+1,\ldots,\frac{N_j}{2}\Big\},&d_j=\exp\\\Big\{0,1,\ldots,\frac{N_j}{2}\Big\},&d_j\neq\exp\end{cases},$$ + +and a multibandlimit vector ``\pmb{N} \in (2\mathbb{N})^{D}``. The transposed problem reads as + +$$\hat{h}^{\pmb{d}}_{\pmb{k}} = \sum_{j=1}^M f^{\pmb{d}}_j \, \phi_{\pmb{k}}^{\pmb{d}}(\pmb{x}_j)$$ + +for the frequencies ``\pmb{k} \in I_{\pmb{N},\pmb{d}}^D`` with given coefficients ``f^{\pmb{d}}_j \in \mathbb{R}, j = 1,2,\ldots,M``. + +# Fields +* `basis_vect` - tuple with the bases (`exp`, `cos`, `alg`) for each dimension +* `NFFT_struct` - underlying [NFFT plan](@ref NFFT_site# Plan structure). + +# Constructor + NFMT{D}( basis_vect::NTuple{D,String}, P::NFFT{D}N::NTuple{D,Int32}) where {D} + + +> **WARNING**: Not for direct usage! + +# Additional Constructor + NFMT( N::NTuple{D,Int32}, M::Int32, n::NTuple{D,Int32}, m::Int32, f1::UInt32, f2::UInt32) where {D} + NFMT( N::NTuple{D,Int32}, M::Int32) where {D} + +with +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f^s``. +* `M` - the number of nodes. +* `n` - the oversampling ``(n_1, n_2, \ldots, n_D)`` per dimension. +* `m` - the window size. A larger m results in more accuracy but also a higher computational cost. +* `f1` - the NFST flags. +* `f2` - the FFTW flags. + +# See also +[NFMT`](@ref) +""" mutable struct NFMT{D} basis_vect::NTuple{D,String} # dimensions with cos NFFT_struct::NFFT{D} # structure for the NFFT @@ -183,11 +234,11 @@ function Base.getproperty(P::NFMT{D}, v::Symbol) where {D} pv[D] = 1 for i = D-1:-1:1 pv[i] = pv[i+1] * P.NFFT_struct.N[i+1] - if (BASES[P.basis_vect[i+1]]>0) - pv[i] ÷= 2; + if (BASES[P.basis_vect[i+1]] > 0) + pv[i] ÷= 2 end end - fhat = zeros(Complex, l) + fhat = zeros(ComplexF64, l) fhat_old = P.NFFT_struct.fhat for i = 1:p idx = 1 @@ -221,7 +272,7 @@ end @doc raw""" nfmt_trafo(P) -computes the NDFCT via the fast NFMT algorithm for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``\hat{f}_{\pmb{k}}^c \in \mathbb{R}, \pmb{k} \in I_{\pmb{N},\mathrm{c}}^D,`` in `P.fhat`. +computes the NDMT via the fast NFMT algorithm for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``\hat{f}_{\pmb{k}}^{\pmb{d}} \in \mathbb{R}, \pmb{k} \in I_{\pmb{N},\pmb{d}}^D,`` in `P.fhat`. # Input * `P` - a NFMT plan structure. @@ -241,7 +292,7 @@ end @doc raw""" nfmt_transposed(P) -computes the transposed NDFCT via the fast transposed NFMT algorithm for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``f_j^c \in \mathbb{R}, j =1,2,\dots,M,`` in `P.f`. +computes the transposed NDFCT via the fast transposed NFMT algorithm for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``f_j^{\pmb{d}} \in \mathbb{R}, j =1,2,\dots,M,`` in `P.f`. # Input * `P` - a NFMT plan structure. @@ -261,7 +312,7 @@ end @doc raw""" nfmt_trafo_direct(P) -computes the NDFCT via naive matrix-vector multiplication for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``\hat{f}_{\pmb{k}} \in \mathbb{C}, \pmb{k} \in I_{\pmb{N}}^D,`` in `P.fhat`. +computes the NDMT via naive matrix-vector multiplication for provided nodes ``\pmb{x}_j, j =1,2,\dots,M,`` in `P.X` and coefficients ``\hat{f}_{\pmb{k}} \in \mathbb{C}, \pmb{k} \in I_{\pmb{N}}^D,`` in `P.fhat`. # Input * `P` - a NFMT plan structure. @@ -295,4 +346,185 @@ end function adjoint_direct(P::NFMT{D}) where {D} return nfmt_adjoint_direct(P) -end \ No newline at end of file +end + +@doc raw""" + nfmt_get_LinearMap(N::Vector{Int}, X::Array{Float64}; n, m::Integer, f1::UInt32, f2::UInt32)::LinearMap + +gives an linear map which computes the NFMT. + +# Input +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f``. +* `X` - the nodes X. +* `n` - the oversampling ``(n_1, n_2, \ldots, n_D)`` per dimension. +* `m` - the window size. A larger m results in more accuracy but also a higher computational cost. +* `f1` - the NFFT flags. +* `f2` - the FFTW flags. + +# See also +[`NFMT{D}`](@ref) +""" +function nfmt_get_LinearMap( + basis_vect::NTuple{D,String}, + N::Vector{Int}, + X::Array{Float64}; + n = undef, + m::Integer = 5, + f1::UInt32 = (size(X, 1) > 1 ? f1_default : f1_default_1d), + f2::UInt32 = f2_default, +)::LinearMap where {D} + if size(X, 1) == 1 + X = vec(X) + d = 1 + M = length(X) + else + (d, M) = size(X) + end + + if N == [] + return LinearMap{ComplexF64}(fhat -> fill(fhat[1], M), f -> [sum(f)], M, 1) + end + + b = copy(N) + for (idx, s) in enumerate(basis_vect) + if (BASES[s] > 0) + b[idx] ÷= 2 + end + end + N = Tuple(N) + + if n == undef + n = Tuple(2 * collect(N)) + end + + plan = NFMT(basis_vect, N, M, n, m, f1, f2) + plan.x = X + + function trafo(fhat::Vector{ComplexF64})::Vector{ComplexF64} + plan.fhat = fhat + nfmt_trafo(plan) + return plan.f + end + + function adjoint(f::Vector{ComplexF64})::Vector{ComplexF64} + plan.f = f + nfmt_adjoint(plan) + return plan.fhat + end + + N = prod(b) + return LinearMap{ComplexF64}(trafo, adjoint, M, N) +end + +@doc raw""" + nfmt_get_coefficient_vector(fhat::Array{ComplexF64})::Vector{ComplexF64} + +reshapes an coefficient array to an vector for multiplication with the linear map of the NFMT. + +# Input +* `fhat` - the Fourier coefficients. + +# See also +[`NFMT{D}`](@ref), [`nfmt_get_LinearMap`](@ref) +""" +function nfmt_get_coefficient_vector(fhat::Array{ComplexF64})::Vector{ComplexF64} + N = size(fhat) + return vec(permutedims(fhat, length(N):-1:1)) +end + +@doc raw""" + nfmt_get_coefficient_array(fhat::Vector{ComplexF64},P::NFMT{D})::Array{ComplexF64} where {D} + +reshapes an coefficient vector returned from a linear map of the NFMT to an array. + +# Input +* `fhat` - the Fourier coefficients. +* `P` - a NFMT plan structure. + +# See also +[`NFMT{D}`](@ref), [`nfmt_get_LinearMap`](@ref) +""" +function nfmt_get_coefficient_array( + fhat::Vector{ComplexF64}, + P::NFMT{D}, +)::Array{ComplexF64} where {D} + b = copy([P.N...]) + for (idx, s) in enumerate(P.basis_vect) + if (BASES[s] > 0) + b[idx] ÷= 2 + end + end + N = Tuple(b) + return permutedims(reshape(fhat, reverse(N)), length(N):-1:1) +end + +@doc raw""" + nfmt_get_coefficient_array(fhat::Vector{ComplexF64},N::Vector{Int64})::Array{ComplexF64} + +reshapes an coefficient vector returned from a linear map of the NFMT to an array. + +# Input +* `fhat` - the Fourier coefficients. +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f``. +* `basis_vect` - tuple with the bases (`exp`, `cos`, `alg`) for each dimension + +# See also +[`NFMT{D}`](@ref), [`nfmt_get_LinearMap`](@ref) +""" +function nfmt_get_coefficient_array( + fhat::Vector{ComplexF64}, + N::Vector{Int64}, + basis_vect::NTuple{D,String}, +)::Array{ComplexF64} where {D} + b = copy(N) + for (idx, s) in enumerate(basis_vect) + if (BASES[s] > 0) + b[idx] ÷= 2 + end + end + N = Tuple(b) + return permutedims(reshape(fhat, reverse(N)), length(N):-1:1) +end + +@doc raw""" + *(plan::NFMT{D}, fhat::Array{ComplexF64})::Vector{ComplexF64} + +This function defines the multiplication of an NFMT plan with an coefficient array. +""" +function Base.:*(plan::NFMT{D}, fhat::Array{ComplexF64})::Vector{ComplexF64} where {D} + if !isdefined(plan.NFFT_struct, :x) + error("x is not set.") + end + plan.fhat = nfmt_get_coefficient_vector(fhat) + nfmt_trafo(plan) + return plan.f +end + +struct Adjoint_NFMT{D} + plan::NFMT{D} +end + +Adjoint_NFMT{D}(plan::NFMT{D}) where {D} = plan + +@doc raw""" + adjoint(plan::NFMT{D})::Adjoint_NFMT{D} + +This function defines the adjoint operator for an NFMT. +""" +function Base.adjoint(plan::NFMT{D})::Adjoint_NFMT{D} where {D} + return Adjoint_NFMT(plan) +end + +@doc raw""" + *(plan::Adjoint_NFMT{D}, f::Vector{ComplexF64})::Array{ComplexF64} + +This function defines the multiplication of an adjoint NFMT plan with an vector of function values. +""" +function Base.:*(plan::Adjoint_NFMT{D}, f::Vector{ComplexF64})::Array{ComplexF64} where {D} + if !isdefined(plan.plan.NFFT_struct, :x) + error("x is not set.") + end + plan.plan.f = f + nfmt_adjoint(plan.plan) + return nfmt_get_coefficient_array(plan.plan.fhat, plan.plan) +end diff --git a/src/NFST.jl b/src/NFST.jl index e02eb6c..09f8a26 100644 --- a/src/NFST.jl +++ b/src/NFST.jl @@ -32,7 +32,7 @@ for the frequencies ``\pmb{k} \in I_{\pmb{N},\mathrm{s}}^D`` with given coeffici * `finalized` - indicates if the plan is finalized. * `x` - the nodes ``x_j \in [0,0.5]^D, \, j = 1, \ldots, M``. * `f` - the values ``f^s(\pmb{x}_j)`` for the NFST or the coefficients ``f_j^s \in \mathbb{R}, j = 1, \ldots, M,`` for the transposed NFST. -* `fhat` - the Fourier coefficients ``\hat{f}_{\pmb{k}}^s \in \mathbb{R}`` for the NFST or the values ``\hat{h}_{\pmb{k}}^s, \pmb{k} \in I_{\pmb{N},\mathrm{s}}^D,`` for the adjoint NFFT. +* `fhat` - the Fourier coefficients ``\hat{f}_{\pmb{k}}^s \in \mathbb{R}`` for the NFST or the values ``\hat{h}_{\pmb{k}}^s, \pmb{k} \in I_{\pmb{N},\mathrm{s}}^D,`` for the adjoint NFST. * `plan` - plan (C pointer). # Constructor @@ -43,7 +43,7 @@ for the frequencies ``\pmb{k} \in I_{\pmb{N},\mathrm{s}}^D`` with given coeffici NFST( N::NTuple{D,Int32}, M::Int32) where {D} # See also -[`NFFT`](@ref) +[`NFST`](@ref) """ mutable struct NFST{D} N::NTuple{D,Int32} # bandwidth tuple @@ -474,3 +474,158 @@ function adjoint(P::NFST{D}) where {D} return nfst_adjoint(P) end +@doc raw""" + nfst_get_LinearMap(N::Vector{Int}, X::Array{Float64}; n, m::Integer, f1::UInt32, f2::UInt32)::LinearMap + +gives an linear map which computes the NFST. + +# Input +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f``. +* `X` - the nodes X. +* `n` - the oversampling ``(n_1, n_2, \ldots, n_D)`` per dimension. +* `m` - the window size. A larger m results in more accuracy but also a higher computational cost. +* `f1` - the NFST flags. +* `f2` - the FFTW flags. + +# See also +[`NFST{D}`](@ref) +""" +function nfst_get_LinearMap( + N::Vector{Int}, + X::Array{Float64}; + n = undef, + m::Integer = 5, + f1::UInt32 = (size(X, 1) > 1 ? f1_default : f1_default_1d), + f2::UInt32 = f2_default, +)::LinearMap + if size(X, 1) == 1 + X = vec(X) + d = 1 + M = length(X) + else + (d, M) = size(X) + end + + if N == [] + return LinearMap{Float64}(fhat -> fill(fhat[1], M), f -> [sum(f)], M, 1) + end + + N = Tuple(N) + if n == undef + n = Tuple(2 * collect(N)) + end + + plan = NFST(N, M, n, m, f1, f2) + plan.x = X + + function trafo(fhat::Vector{Float64})::Vector{Float64} + plan.fhat = fhat + nfst_trafo(plan) + return plan.f + end + + function adjoint(f::Vector{Float64})::Vector{Float64} + plan.f = f + nfst_adjoint(plan) + return plan.fhat + end + + N = prod(N .- 1) + return LinearMap{Float64}(trafo, adjoint, M, N) +end + +@doc raw""" + nfst_get_coefficient_vector(fhat::Array{Float64})::Vector{Float64} + +reshapes an coefficient array to an vector for multiplication with the linear map of the NFST. + +# Input +* `fhat` - the Fourier coefficients. + +# See also +[`NFST{D}`](@ref), [`nfst_get_LinearMap`](@ref) +""" +function nfst_get_coefficient_vector(fhat::Array{Float64})::Vector{Float64} + N = size(fhat) + return vec(permutedims(fhat, length(N):-1:1)) +end + +@doc raw""" + nfst_get_coefficient_array(fhat::Vector{Float64},P::NFST{D})::Array{Float64} where {D} + +reshapes an coefficient vector returned from a linear map of the NFST to an array. + +# Input +* `fhat` - the Fourier coefficients. +* `P` - a NFST plan structure. + +# See also +[`NFST{D}`](@ref), [`nfst_get_LinearMap`](@ref) +""" +function nfst_get_coefficient_array( + fhat::Vector{Float64}, + P::NFST{D}, +)::Array{Float64} where {D} + N = P.N .- 1 + return permutedims(reshape(fhat, reverse(N)), length(N):-1:1) +end + +@doc raw""" + nfst_get_coefficient_array(fhat::Vector{Float64},N::Vector{Int64})::Array{Float64} + +reshapes an coefficient vector returned from a linear map of the NFST to an array. + +# Input +* `fhat` - the Fourier coefficients. +* `N` - the multibandlimit ``(N_1, N_2, \ldots, N_D)`` of the trigonometric polynomial ``f``. + +# See also +[`NFST{D}`](@ref), [`nfst_get_LinearMap`](@ref) +""" +function nfst_get_coefficient_array(fhat::Vector{Float64}, N::Vector{Int64})::Array{Float64} + N = Tuple(N .- 1) + return permutedims(reshape(fhat, reverse(N)), length(N):-1:1) +end + +@doc raw""" + *(plan::NFST{D}, fhat::Array{Float64})::Vector{Float64} + +This function defines the multiplication of an NFST plan with an coefficient array. +""" +function Base.:*(plan::NFST{D}, fhat::Array{Float64})::Vector{Float64} where {D} + if !isdefined(plan, :x) + error("x is not set.") + end + plan.fhat = nfst_get_coefficient_vector(fhat) + nfst_trafo(plan) + return plan.f +end + +struct Adjoint_NFST{D} + plan::NFST{D} +end + +Adjoint_NFST{D}(plan::NFST{D}) where {D} = plan + +@doc raw""" + adjoint(plan::NFST{D})::Adjoint_NFST{D} + +This function defines the adjoint operator for an NFST. +""" +function Base.adjoint(plan::NFST{D})::Adjoint_NFST{D} where {D} + return Adjoint_NFST(plan) +end + +@doc raw""" + *(plan::Adjoint_NFST{D}, f::Vector{Float64})::Array{Float64} + +This function defines the multiplication of an adjoint NFST plan with an vector of function values. +""" +function Base.:*(plan::Adjoint_NFST{D}, f::Vector{Float64})::Array{Float64} where {D} + if !isdefined(plan.plan, :x) + error("x is not set.") + end + plan.plan.f = f + nfst_adjoint(plan.plan) + return nfst_get_coefficient_array(plan.plan.fhat, plan.plan) +end diff --git a/src/fastsum.jl b/src/fastsum.jl index 880d767..1aae2b1 100644 --- a/src/fastsum.jl +++ b/src/fastsum.jl @@ -20,7 +20,7 @@ nonsingular kernel function. The evaluation is done at ``M`` different points `` * `M` - number of target nodes. * `n` - expansion degree. * `p` - degree of smoothness. -* `kernel` - name of kernel function ``K``. +* `kernel` - name of kernel function ``K``, see [Supported kernel functions](# Supported kernel functions). * `c` - kernel parameters. * `eps_I` - inner boundary. * `eps_B` - outer boundary. @@ -31,8 +31,8 @@ nonsingular kernel function. The evaluation is done at ``M`` different points `` * `init_done` - bool for plan init. * `finalized` - bool for finalizer. * `flags` - flags. -* `x` - source nodes. -* `y` - target nodes. +* `x` - source nodes with $\lVert \pmb{x}_k \rVert_2 \leq \frac{1}{2} \ (\frac{1}{2} - \varepsilon_B)$. +* `y` - target nodes with $\lVert \pmb{y}_k \rVert_2 \leq \frac{1}{2} \ (\frac{1}{2} - \varepsilon_B)$. * `alpha` - source coefficients. * `f` - target evaluations. * `plan` - plan (C pointer). @@ -293,6 +293,9 @@ function Base.setproperty!(P::FASTSUM, v::Symbol, val) if size(val)[1] != P.N error("x has to be a Float64 vector of length N.") end + if maximum(val) > 0.25 - P.eps_B / 2 + error("the maximal distance for points x_k to 0 is 0.25-eps_B/2") + end else # => D >1 if typeof(val) != Array{Float64,2} error("x has to be a Float64 matrix.") @@ -300,6 +303,9 @@ function Base.setproperty!(P::FASTSUM, v::Symbol, val) if size(val)[1] != P.N || size(val)[2] != P.d error("x has to be a Float64 matrix of size N.") end + if maximum(norm.(eachrow(val))) > 0.25 - P.eps_B / 2 + error("the maximal distance for points x_k to 0 is 0.25-eps_B/2") + end end ptr = ccall( @@ -321,6 +327,9 @@ function Base.setproperty!(P::FASTSUM, v::Symbol, val) if size(val)[1] != P.M error("y has to be a Float64 vector of length M.") end + if maximum(val) > 0.25 - P.eps_B / 2 + error("the maximal distance for points y_k to 0 is 0.25-eps_B/2") + end else # => D > 1 if typeof(val) != Array{Float64,2} error("y has to be a Float64 matrix.") @@ -328,6 +337,9 @@ function Base.setproperty!(P::FASTSUM, v::Symbol, val) if size(val)[1] != P.M || size(val)[2] != P.d error("y has to be a Float64 matrix of size M.") end + if maximum(norm.(eachrow(val))) > 0.25 - P.eps_B / 2 + error("the maximal distance for points y_k to 0 is 0.25-eps_B/2") + end end ptr = ccall( diff --git a/src/lib/AVX/glibc2.22/libfastsumjulia.so b/src/lib/AVX/glibc2.22/libfastsumjulia.so new file mode 100644 index 0000000..175b053 Binary files /dev/null and b/src/lib/AVX/glibc2.22/libfastsumjulia.so differ diff --git a/src/lib/AVX/glibc2.22/libnfctjulia.so b/src/lib/AVX/glibc2.22/libnfctjulia.so new file mode 100644 index 0000000..547fd13 Binary files /dev/null and b/src/lib/AVX/glibc2.22/libnfctjulia.so differ diff --git a/src/lib/AVX/glibc2.22/libnfftjulia.so b/src/lib/AVX/glibc2.22/libnfftjulia.so new file mode 100644 index 0000000..114d075 Binary files /dev/null and b/src/lib/AVX/glibc2.22/libnfftjulia.so differ diff --git a/src/lib/AVX/glibc2.22/libnfstjulia.so b/src/lib/AVX/glibc2.22/libnfstjulia.so new file mode 100644 index 0000000..534f8e6 Binary files /dev/null and b/src/lib/AVX/glibc2.22/libnfstjulia.so differ diff --git a/src/lib/AVX/libfastsumjulia.so b/src/lib/AVX/glibc2.40/libfastsumjulia.so similarity index 100% rename from src/lib/AVX/libfastsumjulia.so rename to src/lib/AVX/glibc2.40/libfastsumjulia.so diff --git a/src/lib/AVX/libnfctjulia.so b/src/lib/AVX/glibc2.40/libnfctjulia.so similarity index 100% rename from src/lib/AVX/libnfctjulia.so rename to src/lib/AVX/glibc2.40/libnfctjulia.so diff --git a/src/lib/AVX/libnfftjulia.so b/src/lib/AVX/glibc2.40/libnfftjulia.so similarity index 100% rename from src/lib/AVX/libnfftjulia.so rename to src/lib/AVX/glibc2.40/libnfftjulia.so diff --git a/src/lib/AVX/libnfstjulia.so b/src/lib/AVX/glibc2.40/libnfstjulia.so similarity index 100% rename from src/lib/AVX/libnfstjulia.so rename to src/lib/AVX/glibc2.40/libnfstjulia.so diff --git a/src/lib/AVX2/glibc2.22/libfastsumjulia.so b/src/lib/AVX2/glibc2.22/libfastsumjulia.so new file mode 100644 index 0000000..bb78eee Binary files /dev/null and b/src/lib/AVX2/glibc2.22/libfastsumjulia.so differ diff --git a/src/lib/AVX2/glibc2.22/libnfctjulia.so b/src/lib/AVX2/glibc2.22/libnfctjulia.so new file mode 100644 index 0000000..ab9f0f9 Binary files /dev/null and b/src/lib/AVX2/glibc2.22/libnfctjulia.so differ diff --git a/src/lib/AVX2/glibc2.22/libnfftjulia.so b/src/lib/AVX2/glibc2.22/libnfftjulia.so new file mode 100644 index 0000000..cf3dc12 Binary files /dev/null and b/src/lib/AVX2/glibc2.22/libnfftjulia.so differ diff --git a/src/lib/AVX2/glibc2.22/libnfstjulia.so b/src/lib/AVX2/glibc2.22/libnfstjulia.so new file mode 100644 index 0000000..9f8cf42 Binary files /dev/null and b/src/lib/AVX2/glibc2.22/libnfstjulia.so differ diff --git a/src/lib/AVX2/libfastsumjulia.so b/src/lib/AVX2/glibc2.40/libfastsumjulia.so similarity index 100% rename from src/lib/AVX2/libfastsumjulia.so rename to src/lib/AVX2/glibc2.40/libfastsumjulia.so diff --git a/src/lib/AVX2/libnfctjulia.so b/src/lib/AVX2/glibc2.40/libnfctjulia.so similarity index 100% rename from src/lib/AVX2/libnfctjulia.so rename to src/lib/AVX2/glibc2.40/libnfctjulia.so diff --git a/src/lib/AVX2/libnfftjulia.so b/src/lib/AVX2/glibc2.40/libnfftjulia.so similarity index 100% rename from src/lib/AVX2/libnfftjulia.so rename to src/lib/AVX2/glibc2.40/libnfftjulia.so diff --git a/src/lib/AVX2/libnfstjulia.so b/src/lib/AVX2/glibc2.40/libnfstjulia.so similarity index 100% rename from src/lib/AVX2/libnfstjulia.so rename to src/lib/AVX2/glibc2.40/libnfstjulia.so diff --git a/src/lib/SSE2/glibc2.22/libfastsumjulia.so b/src/lib/SSE2/glibc2.22/libfastsumjulia.so new file mode 100644 index 0000000..a8fef7d Binary files /dev/null and b/src/lib/SSE2/glibc2.22/libfastsumjulia.so differ diff --git a/src/lib/SSE2/glibc2.22/libnfctjulia.so b/src/lib/SSE2/glibc2.22/libnfctjulia.so new file mode 100644 index 0000000..fd5fba2 Binary files /dev/null and b/src/lib/SSE2/glibc2.22/libnfctjulia.so differ diff --git a/src/lib/SSE2/glibc2.22/libnfftjulia.so b/src/lib/SSE2/glibc2.22/libnfftjulia.so new file mode 100644 index 0000000..f4c6882 Binary files /dev/null and b/src/lib/SSE2/glibc2.22/libnfftjulia.so differ diff --git a/src/lib/SSE2/glibc2.22/libnfstjulia.so b/src/lib/SSE2/glibc2.22/libnfstjulia.so new file mode 100644 index 0000000..db9bcbd Binary files /dev/null and b/src/lib/SSE2/glibc2.22/libnfstjulia.so differ diff --git a/src/lib/SSE2/libfastsumjulia.so b/src/lib/SSE2/glibc2.40/libfastsumjulia.so similarity index 100% rename from src/lib/SSE2/libfastsumjulia.so rename to src/lib/SSE2/glibc2.40/libfastsumjulia.so diff --git a/src/lib/SSE2/libnfctjulia.so b/src/lib/SSE2/glibc2.40/libnfctjulia.so similarity index 100% rename from src/lib/SSE2/libnfctjulia.so rename to src/lib/SSE2/glibc2.40/libnfctjulia.so diff --git a/src/lib/SSE2/libnfftjulia.so b/src/lib/SSE2/glibc2.40/libnfftjulia.so similarity index 100% rename from src/lib/SSE2/libnfftjulia.so rename to src/lib/SSE2/glibc2.40/libnfftjulia.so diff --git a/src/lib/SSE2/libnfstjulia.so b/src/lib/SSE2/glibc2.40/libnfstjulia.so similarity index 100% rename from src/lib/SSE2/libnfstjulia.so rename to src/lib/SSE2/glibc2.40/libnfstjulia.so diff --git a/src/lib/glibc-version.so b/src/lib/glibc-version.so new file mode 100644 index 0000000..44741a9 Binary files /dev/null and b/src/lib/glibc-version.so differ diff --git a/src/lib/src/glibc-version.c b/src/lib/src/glibc-version.c new file mode 100644 index 0000000..e1169e3 --- /dev/null +++ b/src/lib/src/glibc-version.c @@ -0,0 +1,4 @@ +#include +const char* glibc_version(){ + return(gnu_get_libc_version()); +} diff --git a/src/lib/src/readme.txt b/src/lib/src/readme.txt new file mode 100644 index 0000000..6a42301 --- /dev/null +++ b/src/lib/src/readme.txt @@ -0,0 +1,2 @@ +Compile with +gcc -fPIC -shared -o glibc-version.so glibc-version.c diff --git a/test/NFCT.jl b/test/NFCT.jl index 6c1ea56..5330705 100644 --- a/test/NFCT.jl +++ b/test/NFCT.jl @@ -8,6 +8,12 @@ p = NFCT(N, M) p.x = X p.fhat = fhat +f3 = p * nfct_get_coefficient_array(fhat, p) + +L = nfct_get_LinearMap(collect(N), X) + +f4 = L * fhat + NFFT3.nfct_trafo(p) f2 = p.f @@ -29,6 +35,24 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test E_2 < 10^(-10) @test E_infty < 10^(-10) +error_vector = f1 - f3 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +error_vector = f1 - f4 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-8) +@test E_infty < 10^(-8) + +f3 = nfct_get_coefficient_vector(p' * p.f) + +f4 = L' * p.f + NFFT3.nfct_adjoint(p) f2 = p.fhat @@ -41,6 +65,20 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test E_2 < 10^(-10) @test E_infty < 10^(-10) +error_vector = f1 - f3 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +error_vector = f1 - f4 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-8) +@test E_infty < 10^(-8) + # DomainError tests @test_throws DomainError NFCT((-1, 2), M) diff --git a/test/NFFT.jl b/test/NFFT.jl index 410293c..5d05b32 100644 --- a/test/NFFT.jl +++ b/test/NFFT.jl @@ -8,8 +8,13 @@ p = NFFT(N, M) p.x = X p.fhat = fhat +f3 = p * nfft_get_coefficient_array(fhat, p) -NFFT3.nfft_trafo(p) +L = nfft_get_LinearMap(collect(N), X) + +f4 = L * fhat + +NFFT3.trafo(p) f2 = p.f I = [[j; i; k] for k = -N[3]/2:N[3]/2-1, i = -N[2]/2:N[2]/2-1, j = -N[1]/2:N[1]/2-1] @@ -26,7 +31,25 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test E_2 < 10^(-10) @test E_infty < 10^(-10) -NFFT3.nfft_adjoint(p) +error_vector = f1 - f3 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +error_vector = f1 - f4 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +f3 = nfft_get_coefficient_vector(p' * p.f) + +f4 = L' * p.f + +NFFT3.adjoint(p) f2 = p.fhat f1 = F' * p.f @@ -38,6 +61,20 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test E_2 < 10^(-10) @test E_infty < 10^(-10) +error_vector = f1 - f3 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +error_vector = f1 - f4 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + # Error tests @test_throws DomainError NFFT((-1, 2), M) @@ -56,3 +93,32 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test_throws DomainError NFFT((-16, 8, 4), M, (18, 10, 6), Int32(8), f1_default, f2_default) +NFFT3.finalize_plan(p) + +N = (16,) +M = 100 + +p2 = NFFT(N, M) + +@test_throws "NFFT not initialized." NFFT3.finalize_plan(p2) + +X = rand(3, M) .- 0.5 + +@test_throws "type NFFT has no field X" p2.X = X + +N = (16,) +M = 100 + +p = NFFT(N, M) + +X = rand(M) .- 0.5 +fhat = rand(prod(N)) + im * rand(prod(N)) + +p = NFFT(N, M) +NFFT3.init(p) +p.x = X +p.fhat = fhat + +NFFT3.nfft_trafo(p) + +#@test_logs (:warn,"You can't modify the C pointer to the NFFT plan.") p.plan = p2.plan diff --git a/test/NFMT.jl b/test/NFMT.jl index 7eb171c..758a59d 100644 --- a/test/NFMT.jl +++ b/test/NFMT.jl @@ -65,6 +65,12 @@ p = NFMT(basis_vect, N, M) p.x = X p.fhat = fhat +f3 = p * nfmt_get_coefficient_array(fhat, p) + +L = nfmt_get_LinearMap(basis_vect, collect(N), X) + +f4 = L * fhat + #NFFT3.nfmt_trafo(p) nfmt_trafo(p) f2 = p.f @@ -80,6 +86,24 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test E_2 < 10^(-10) @test E_infty < 10^(-10) +error_vector = f1 - f3 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +error_vector = f1 - f4 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +f3 = nfmt_get_coefficient_vector(p' * p.f) + +f4 = L' * p.f + #NFFT3.nfmt_adjoint(p) nfmt_adjoint(p) f2 = p.fhat @@ -93,4 +117,16 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test E_2 < 10^(-10) @test E_infty < 10^(-10) +error_vector = f1 - f3 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +error_vector = f1 - f4 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) diff --git a/test/NFST.jl b/test/NFST.jl index e02dba9..07916fa 100644 --- a/test/NFST.jl +++ b/test/NFST.jl @@ -1,9 +1,19 @@ +N = (16, 8, 4) +M = 10000 +X = 0.5 .* rand(3, M) + fhat = rand(prod(collect(N) .- 1)) p = NFST(N, M) p.x = X p.fhat = fhat +f3 = p * nfst_get_coefficient_array(fhat, p) + +L = nfst_get_LinearMap(collect(N), X) + +f4 = L * fhat + NFFT3.nfst_trafo(p) f2 = p.f @@ -25,6 +35,24 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test E_2 < 10^(-10) @test E_infty < 10^(-10) +error_vector = f1 - f3 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +error_vector = f1 - f4 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-8) +@test E_infty < 10^(-8) + +f3 = nfst_get_coefficient_vector(p' * p.f) + +f4 = L' * p.f + NFFT3.nfst_adjoint(p) f2 = p.fhat @@ -37,6 +65,19 @@ E_infty = norm(error_vector, Inf) / norm(fhat, 1) @test E_2 < 10^(-10) @test E_infty < 10^(-10) +error_vector = f1 - f3 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-10) +@test E_infty < 10^(-10) + +error_vector = f1 - f4 +E_2 = norm(error_vector) / norm(f1) +E_infty = norm(error_vector, Inf) / norm(fhat, 1) + +@test E_2 < 10^(-8) +@test E_infty < 10^(-8) # Error tests @test_throws DomainError NFST((-1, 2), M)