From 32e2ddd0f98fa3d8bce1e7e77f1346ce06555cbf Mon Sep 17 00:00:00 2001 From: Will Kimmerer Date: Fri, 10 Nov 2023 20:12:28 -0500 Subject: [PATCH] reintegrate old changes, drop numeric resolves --- examples/highlevel.jl | 29 ++- examples/pdrive.jl | 12 +- examples/pdrive_ABglobal.jl | 8 +- lib/common.jl | 6 +- src/drivers.jl | 65 ++++-- src/highlevel.jl | 4 +- src/lowlevel.jl | 4 +- src/matrixmarket.jl | 2 +- src/structs.jl | 409 +++++++++++++++++++++++------------- 9 files changed, 340 insertions(+), 199 deletions(-) diff --git a/examples/highlevel.jl b/examples/highlevel.jl index 0f0d62e..e46f067 100644 --- a/examples/highlevel.jl +++ b/examples/highlevel.jl @@ -9,7 +9,7 @@ using MatrixMarket using SparseBase using LinearAlgebra MPI.Init() -nprow, npcol, nrhs = 1, 1, 4 +nprow, npcol, nrhs = 2, 2, 4 root = 0 comm = MPI.COMM_WORLD grid = Grid{Int32}(nprow, npcol, comm) @@ -22,6 +22,11 @@ isroot = iam == root coo, b, xtrue = SuperLUDIST.mmread_and_generatesolution( Float64, Int32, nrhs, joinpath(@__DIR__, "add32.mtx"), grid; root ) + +# A second rhs and xtrue for testing different sized b. +b2 = [b;; b] +x2 = [xtrue;; xtrue] + csr = isroot ? convert(SparseBase.CSRStore, coo) : nothing chunksizes = isroot ? distribute_evenly(size(csr, 1), nprow * npcol) : nothing @@ -37,19 +42,25 @@ A = Communication.scatterstore!( b_local = b[A.first_row : A.first_row + localsize(A, 1) - 1, :] # shrink b xtrue_local = xtrue[A.first_row : A.first_row + localsize(A, 1) - 1, :] # shrink xtrue +F = lu!(A); -options = SuperLUDIST.Options() -stat = SuperLUDIST.LUStat{Int32}() -b1 = Matrix{Float64}(undef, localsize(A, 2), 0) -_, F = pgssvx!(A, b1; options, stat); +b_local = F \ b_local -b_local, F = pgssvx!(F, b_local); -GC.gc() if !(iam == root) || (nprow * npcol == 1) SuperLUDIST.inf_norm_error_dist(b_local, xtrue_local, grid) + SuperLUDIST.PStatPrint(F) # printing may be messy. end -SuperLUDIST.PStatPrint(options, stat, grid) + +b2_local = b2[A.first_row : A.first_row + localsize(A, 1) - 1, :] # shrink b2 +xtrue2_local = x2[A.first_row : A.first_row + localsize(A, 1) - 1, :] # shrink xtrue2 + +b2_local = F \ b2_local + +if !(iam == root) || (nprow * npcol == 1) + SuperLUDIST.inf_norm_error_dist(b2_local, xtrue2_local, grid) + SuperLUDIST.PStatPrint(F) #printing may be messy. +end + # @show iam b_local xtrue_local MPI.Finalize() - diff --git a/examples/pdrive.jl b/examples/pdrive.jl index f21bf79..3cd6d57 100644 --- a/examples/pdrive.jl +++ b/examples/pdrive.jl @@ -9,7 +9,7 @@ using MatrixMarket using SparseBase using LinearAlgebra MPI.Init() -nprow, npcol, nrhs = 1, 1, 2 +nprow, npcol, nrhs = 1, 1, 3 root = 0 comm = MPI.COMM_WORLD grid = Grid{Int32}(nprow, npcol, comm) @@ -44,17 +44,15 @@ xtrue_local = xtrue[A.first_row : A.first_row + localsize(A, 1) - 1, :] # shrink # for each user. # creating options and stat is optional, they will be created if not provided. -options = SuperLUDIST.Options() -stat = SuperLUDIST.LUStat{Int32}() + # b1 = Matrix{Float64}(undef, localsize(A, 2), 2) -b1 = rand(localsize(A, 1)) -_, F = pgssvx!(A, b1; options, stat); -@show F.options +b1 = rand(localsize(A, 1), 0) +_, F = pgssvx!(A, b1); b_local, F = pgssvx!(F, b_local); if !(iam == root) || (nprow * npcol == 1) SuperLUDIST.inf_norm_error_dist(b_local, xtrue_local, grid) end -SuperLUDIST.PStatPrint(options, stat, grid) +SuperLUDIST.PStatPrint(F) # @show iam b_local xtrue_local MPI.Finalize() diff --git a/examples/pdrive_ABglobal.jl b/examples/pdrive_ABglobal.jl index 37d032e..435febd 100644 --- a/examples/pdrive_ABglobal.jl +++ b/examples/pdrive_ABglobal.jl @@ -1,10 +1,10 @@ using MPI using SuperLUDIST -using SuperLUDIST: Grid, Options, LUStat, ScalePermStruct, +using SuperLUDIST: Grid, Options, LUStat, ScalePerm, ReplicatedSuperMatrix, pgssvx! using SuperLUDIST.Common using MatrixMarket -nprow, npcol, nrhs = (2, 2, 1) +nprow, npcol, nrhs = (1, 1, 1) root = 0 MPI.Init() comm = MPI.COMM_WORLD @@ -13,10 +13,10 @@ iam = grid.iam # This function handles broadcasting internally! A = MatrixMarket.mmread( - ReplicatedSuperMatrix{Float64, Int}, + SuperLUDIST.ReplicatedSuperMatrix{Float64, Int}, joinpath(@__DIR__, "add32.mtx"), grid -) +) ; # on single nodes this will help prevent oversubscription of threads. SuperLUDIST.superlu_set_num_threads(Int64, 2) diff --git a/lib/common.jl b/lib/common.jl index 5aad859..c682530 100644 --- a/lib/common.jl +++ b/lib/common.jl @@ -33,7 +33,7 @@ struct superlu_scope_t Iam::Cint end -struct gridinfo_t{I} +mutable struct gridinfo_t{I} comm::MPI_Comm rscp::superlu_scope_t cscp::superlu_scope_t @@ -42,7 +42,7 @@ struct gridinfo_t{I} npcol::I end -struct gridinfo3d_t{I} +mutable struct gridinfo3d_t{I} comm::MPI_Comm rscp::superlu_scope_t cscp::superlu_scope_t @@ -54,4 +54,4 @@ struct gridinfo3d_t{I} npdep::I rankorder::Cint end -end \ No newline at end of file +end diff --git a/src/drivers.jl b/src/drivers.jl index a012467..9ee942e 100644 --- a/src/drivers.jl +++ b/src/drivers.jl @@ -12,13 +12,21 @@ function pgssvx!( A::ReplicatedSuperMatrix{Tv, Ti}, b::VecOrMat{Tv}; options = Options(), - perm = ScalePermStruct{Tv, Ti}(size(A)...), - LU = LUStruct{Tv, Ti}(size(A, 2), A.grid), + perm = ScalePerm{Tv, Ti}(size(A)...), + LU = LUFactors{Tv, Ti}(size(A, 2), A.grid), stat = LUStat{Ti}(), berr = Vector{Tv}(undef, size(b, 2)) ) where {Tv, Ti} -return pgssvx!(SuperLUFactorization(A, options, nothing, perm, LU, stat, berr, b), b) - + return pgssvx!(SuperLUFactorization(A, options, nothing, perm, LU, stat, berr, b), b) +end +function pgssvx!( + A::ReplicatedSuperMatrix{Tv, Ti}; + options = Options(), + perm = ScalePerm{Tv, Ti}(size(A)...), + LU = LUFactors{Tv, Ti}(size(A, 2), A.grid), + stat = LUStat{Ti}() +) where {Tv, Ti} + return pgssvx!(SuperLUFactorization(A, options, nothing, perm, LU, stat, berr, b), b) end """ @@ -35,36 +43,47 @@ function pgssvx!( A::DistributedSuperMatrix{Tv, Ti}, b::VecOrMat{Tv}; options = Options(), - Solve = SOLVE{Tv, Ti}(options), - perm = ScalePermStruct{Tv, Ti}(size(A)...), - LU = LUStruct{Tv, Ti}(size(A, 2), A.grid), + Solve = SolveData{Tv, Ti}(options), + perm = ScalePerm{Tv, Ti}(size(A)...), + LU = LUFactors{Tv, Ti}(size(A, 2), A.grid), stat = LUStat{Ti}(), berr = Vector{Tv}(undef, size(b, 2)) ) where {Tv, Ti} return pgssvx!(SuperLUFactorization(A, options, Solve, perm, LU, stat, berr, b), b) end +function pgssvx!( + A::DistributedSuperMatrix{Tv, Ti}; + options = Options(), + Solve = SolveData{Tv, Ti}(options), + perm = ScalePerm{Tv, Ti}(size(A)...), + LU = LUFactors{Tv, Ti}(size(A, 2), A.grid), + stat = LUStat{Ti}(), +) where {Tv, Ti} + b = Matrix{Tv}(undef, SparseBase.Communication.localsize(A, 1), 0) + pgssvx!(A, b; options, Solve, perm, LU, stat) +end function pgssvx!(F::SuperLUFactorization{T, I, <:ReplicatedSuperMatrix{T, I}}, b::VecOrMat{T}) where {T, I} - (; mat, options, perm, lu, stat, berr) = F - b, _ = pgssvx_ABglobal!(options, mat, perm, b, lu, berr, stat) + (; mat, options, perm, factors, stat, berr) = F + b, _ = pgssvx_ABglobal!(options, mat, perm, b, factors, berr, stat) F.options.Fact = Common.FACTORED F.b = b return b, F end function pgssvx!(F::SuperLUFactorization{T, I, <:DistributedSuperMatrix{T, I}}, b::VecOrMat{T}) where {T, I} - (; mat, options, solve, perm, lu, stat, berr) = F + (; mat, options, solve, perm, factors, stat, berr) = F currentnrhs = size(F.b, 2) if currentnrhs != size(b, 2) - # F = pgstrs_prep!(F) + F = pgstrs_prep!(F) pgstrs_init!( F.solve, reverse(Communication.localsize(F.mat))..., size(b, 2), F.mat.first_row - 1, F.perm, - F.lu, F.mat.grid + F.factors, F.mat.grid ) end - b, _ = pgssvx_ABdist!(options, mat, perm, b, lu, solve, berr, stat) + b, _ = pgssvx_ABdist!(options, mat, perm, b, factors, solve, berr, stat) F.options.Fact = Common.FACTORED F.b = b return b, F @@ -77,9 +96,9 @@ L = Symbol(:SuperLU_, Symbol(I)) function pgssvx_ABglobal!( options, A::ReplicatedSuperMatrix{$T, $I}, - perm::ScalePermStruct{$T, $I}, + perm::ScalePerm{$T, $I}, b::Array{$T}, - LU::LUStruct{$T, $I}, + LU::LUFactors{$T, $I}, berr, stat::LUStat{$I} ) info = Ref{Int32}() @@ -94,10 +113,10 @@ end function pgssvx_ABdist!( options, A::DistributedSuperMatrix{$T, $I}, - perm::ScalePermStruct{$T, $I}, + perm::ScalePerm{$T, $I}, b::Array{$T}, - LU::LUStruct{$T, $I}, - Solve::SOLVE{$T, $I}, + LU::LUFactors{$T, $I}, + Solve::SolveData{$T, $I}, berr, stat::LUStat{$I} ) info = Ref{Int32}() @@ -118,10 +137,10 @@ function inf_norm_error_dist(x::Array{$T}, xtrue::Array{$T}, grid::Grid{$I}) ) end function pgstrs_init!( - solve::SOLVE{$T, $I}, + solve::SolveData{$T, $I}, n, m_local, nrhs, first_row, - scaleperm::ScalePermStruct{$T, $I}, - lu::LUStruct{$T, $I}, + scaleperm::ScalePerm{$T, $I}, + lu::LUFactors{$T, $I}, grid::Grid{$I} ) $L.$(Symbol(:p, prefixsymbol(T), :gstrs_init))( @@ -136,11 +155,11 @@ function pgstrs_prep!( ) if size(F.b, 2) != 0 gstrs = unsafe_load(F.solve.gstrs_comm) - @show gstrs $L.superlu_free_dist(gstrs.B_to_X_SendCnt) $L.superlu_free_dist(gstrs.X_to_B_SendCnt) $L.superlu_free_dist(gstrs.ptr_to_ibuf) - @show gstrs + else # there has been no gstrs_comm malloc'd, so do that. + end return F end diff --git a/src/highlevel.jl b/src/highlevel.jl index 604f67e..6aba827 100644 --- a/src/highlevel.jl +++ b/src/highlevel.jl @@ -1,8 +1,8 @@ function LinearAlgebra.lu!( - A::AbstractSuperMatrix{Tv, Ti}; + A::AbstractSuperMatrix{Tv, Ti}, + b_local = ones(Tv, Communication.localsize(A, 2), 1); kwargs... ) where {Tv, Ti} - b_local = Matrix{Tv}(undef, Communication.localsize(A, 2), 0) return pgssvx!(A, b_local; kwargs...)[2] # F, drop b_local. end diff --git a/src/lowlevel.jl b/src/lowlevel.jl index 1e6edb5..e5a9144 100644 --- a/src/lowlevel.jl +++ b/src/lowlevel.jl @@ -68,7 +68,7 @@ L = Symbol(String(:SuperLU_) * String(I)) function Destroy_LU(r::Base.RefValue{$(prefixname(T, :LUstruct_t)){$I}}, n, grid) $L.$(prefixname(T, :Destroy_LU))(n, grid, r) # why do I have to grid.grid here?! end - function LUstructInit(r::Base.RefValue{$(prefixname(T, :LUstruct_t)){$I}}, n, grid) + function LUstructInit(r::$(prefixname(T, :LUstruct_t)){$I}, n, grid) $L.$(prefixname(T, :LUstructInit))(n, r) return finalizer(r) do x # !MPI.Finalized() && Destroy_LU(x, n, grid) @@ -88,4 +88,4 @@ function FillRHS_dist!(b::Array, A::AbstractSuperMatrix, xtrue::Array; trans = false, nrhs = size(xtrue, 2), ldx = size(xtrue, 1), ldb = size(b, 1)) FillRHS_dist!(b, A, xtrue, trans, nrhs, ldx, ldb) return b -end \ No newline at end of file +end diff --git a/src/matrixmarket.jl b/src/matrixmarket.jl index ca31ddf..076c53d 100644 --- a/src/matrixmarket.jl +++ b/src/matrixmarket.jl @@ -71,7 +71,7 @@ A = MatrixMarket.mmread(ReplicatedSuperMatrix{Float64, Int32}, ) """ function MatrixMarket.mmread( ::Type{<:DistributedSuperMatrix{Tv, Ti}}, filename, grid::Grid{Ti}; - desymmetrize = true, root = 0, partitioner = distribute_evenly + desymmetrize = true, root = 0, partitioner = Communication.distribute_evenly ) where {Tv <: Union{Float32, Float64, ComplexF64}, Ti <: Union{Int32, Int64}} comm = grid.comm rank = MPI.Comm_rank(comm) diff --git a/src/structs.jl b/src/structs.jl index cc3cf4b..059903e 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -1,217 +1,330 @@ -struct Grid{I, G} - grid::Base.RefValue{G} +# Grid (gridinfo_t) functions: +############################## +# const Grid{I} = SuperLUDIST_Common.gridinfo_t{I} +# const Grid3D{I} = SuperLUDIST_Common.gridinfo3d_t{I} + +struct Grid{I} + comm::MPI.Comm + gridinfo::SuperLUDIST_Common.gridinfo_t{I} +end +struct Grid3D{I} + comm::MPI.Comm + gridinfo::SuperLUDIST_Common.gridinfo3d_t{I} +end + + + +for I ∈ (:Int32, :Int64) +L = Symbol(String(:SuperLU_) * String(I)) +libname = Symbol(:libsuperlu_dist_, I) +@eval begin +function gridmap!(g::Grid{$I}, nprow, npcol, usermap::Matrix{Int32}) + myrank = MPI.Comm_rank(g.comm) + color = myrank ÷ (nprow * npcol) + subcomm = MPI.Comm_split(g.comm, color, myrank) + $L.superlu_gridmap(subcomm, nprow, npcl, usermap, size(usermap, 1), g) + return g +end +function gridinit!(g::Grid{$I}, nprow, npcol) + $L.superlu_gridinit(g.comm, nprow, npcol, g) + return g +end +function Grid{$I}(nprow::Integer, npcol::Integer, comm = MPI.COMM_WORLD; batch = false, usermap = nothing) + !MPI.Initialized() && MPI.Init() + g = Grid{$I}( + comm, + SuperLUDIST_Common.gridinfo_t{$I}( + Base.unsafe_convert(MPI.MPI_Comm, comm), + superlu_scope_t(comm), + superlu_scope_t(comm), + convert(Cint, MPI.Comm_rank(comm)), nprow, npcol + ) + ) + if !batch + gridinit!(g, nprow, npcol) + else + usermap === nothing ? gridmap!(g, nprow, npcol) : gridmap!(g, nprow, npcol, usermap) + end + if g.iam == -1 || g.iam >= nprow * npcol + $L.superlu_gridexit(g) + return g + else + finalizer(g.gridinfo) do G + !MPI.Finalized() && $L.superlu_gridexit(G) + end + return g + end +end +# Base.unsafe_convert(T::Type{Ptr{SuperLUDIST_Common.gridinfo_t{$I}}}, g::Grid{$I}) = + # Base.unsafe_convert(T, g.grid) +Base.unsafe_convert(::Type{Ptr{SuperLUDIST_Common.gridinfo_t{$I}}}, O::Grid{$I}) = + Ptr{gridinfo_t{$I}}(pointer_from_objref(O.gridinfo)) +Base.unsafe_convert( + ::Type{Ptr{SuperLUDIST_Common.gridinfo_t{$I}}}, + O::SuperLUDIST_Common.gridinfo_t{$I} +) = Ptr{gridinfo_t{$I}}(pointer_from_objref(O)) + +end # end eval +end # end for + +function gridmap!(r, comm, nprow, npcol) + usermap = LinearIndices((nprow, npcol))' .- 1 + return gridmap!(r, comm, nprow, npcol, usermap) +end + +function SuperLUDIST_Common.superlu_scope_t( + comm; Np = MPI.Comm_size(comm), Iam = MPI.Comm_rank(comm) +) + return SuperLUDIST_Common.superlu_scope_t( + Base.unsafe_convert(MPI.MPI_Comm, comm), Np, Iam + ) end function Base.getproperty(g::Grid, s::Symbol) - s === :grid && return Base.getfield(g, s) - s === :comm && return MPI.Comm(Base.getproperty(g.grid[], s)) - return getproperty(g.grid[], s) + # Julia level functions expect a Comm, not an MPI_Comm / Int32. + s === :comm && return getfield(g, s) + s === :gridinfo && return getfield(g, s) + return getfield(g.gridinfo, s) end # Option functions: ################### const Options = Common.superlu_dist_options_t - -function Base.setproperty!(x, O::Options, s::Symbol) - return setproperty!(x, O.options, s) -end - Base.unsafe_convert(::Type{Ptr{superlu_dist_options_t}}, O::superlu_dist_options_t) = Ptr{superlu_dist_options_t}(pointer_from_objref(O)) -struct ScalePermStruct{T, I, S} - scaleperm::Base.RefValue{S} +# ScalePerm (ScalePermstruct_t) functions: +########################################## +struct ScalePerm{T, I, S} + scaleperm::S end -ScalePermStruct{T}(m, n) where T = ScalePermStruct{T, Int}(m, n) -ScalePermStruct{T, I}(m, n) where {T, Ti, I<:CIndex{Ti}} = - ScalePermStruct{T, Ti}(m, n) +ScalePerm{T}(m, n) where T = ScalePerm{T, Int}(m, n) +ScalePerm{T, I}(m, n) where {T, Ti, I<:CIndex{Ti}} = + ScalePerm{T, Ti}(m, n) -function Base.getproperty(g::ScalePermStruct, s::Symbol) +function Base.getproperty(g::ScalePerm, s::Symbol) s === :scaleperm && return Base.getfield(g, s) - return getproperty(g.scaleperm[], s) + return getproperty(g.scaleperm, s) end +for I ∈ (:Int32, :Int64) + L = Symbol(String(:SuperLU_) * String(I)) + libname = Symbol(:libsuperlu_dist_, I) + for T ∈ (Float32, Float64, ComplexF64) + @eval begin + + function ScalePerm{$T, $I}(m, n) + r = Common.$(prefixname(T, :ScalePermstruct_t)){$I}( + Common.NOEQUIL, + Ptr{Cvoid}(), Ptr{Cvoid}(), + Ptr{$I}(Libc.malloc(sizeof($I) * m)), + Ptr{$I}(Libc.malloc(sizeof($I) * n)) + ) + + return ScalePerm{$T, $I, typeof(r)}(finalizer(r) do x + if !MPI.Finalized() + Libc.free(x.perm_r) + Libc.free(x.perm_c) + (x.DiagScale == Common.ROW || x.DiagScale == Common.BOTH) && Libc.free(x.R) + (x.DiagScale == Common.COL || x.DiagScale == Common.BOTH) && Libc.free(x.C) + end + end) + end + Base.unsafe_convert(T::Type{Ptr{$(prefixname(T, :ScalePermstruct_t)){$I}}}, S::ScalePerm{$T, $I}) = + Base.unsafe_convert(T, S.scaleperm) + Base.unsafe_convert(T::Type{Ptr{$(prefixname(T, :ScalePermstruct_t)){$I}}}, S::$(prefixname(T, :ScalePermstruct_t)){$I}) = + Ptr{$(prefixname(T, :ScalePermstruct_t)){$I}}(pointer_from_objref(S)) +end # end eval +end # end for +end # end for -struct LUStruct{T, I, S, G} - LU::Base.RefValue{S} +# LUFactors (LUstruct_t) functions: +################################### +struct LUFactors{T, I, S, G} + LU::S grid::G n::I end -LUStruct{T}(n, grid) where T = LUStruct{T, Int}(n, grid) -LUStruct{T, I}(n, grid) where {T, Ti, I<:CIndex{Ti}} = LUStruct{T, Ti}(n, grid) +LUFactors{T}(n, grid) where T = LUFactors{T, Int}(n, grid) +LUFactors{T, I}(n, grid) where {T, Ti, I<:CIndex{Ti}} = LUFactors{T, Ti}(n, grid) -function Base.getproperty(g::LUStruct, s::Symbol) +function Base.getproperty(g::LUFactors, s::Symbol) s === :LU && return Base.getfield(g, s) s === :grid && return Base.getfield(g, s) s === :n && return Base.getfield(g, s) - return getproperty(g.LU[], s) + return getproperty(g.LU, s) end +for I ∈ (:Int32, :Int64) + L = Symbol(String(:SuperLU_) * String(I)) + libname = Symbol(:libsuperlu_dist_, I) + for T ∈ (Float32, Float64, ComplexF64) + @eval begin + function LUFactors{$T, $I}(n, grid::G) where G + r = Common.$(prefixname(T, :LUstruct_t)){$I}(Ptr{Cvoid}(), Ptr{Cvoid}(), Ptr{Cvoid}(), '\0') + LUstructInit(r, n, grid) + return LUFactors{$T, $I, eltype(r), G}(r, grid, n) + end + Base.unsafe_convert(T::Type{Ptr{$(prefixname(T, :LUstruct_t)){$I}}}, S::LUFactors{$T, $I}) = + Base.unsafe_convert(T, S.LU) + Base.unsafe_convert( + ::Type{Ptr{$(prefixname(T, :LUstruct_t)){$I}}}, + L::$(prefixname(T, :LUstruct_t)){$I} + ) = Ptr{$(prefixname(T, :LUstruct_t)){$I}}(pointer_from_objref(L)) + end # end eval + end # end for +end # end for + +# LUStat (SuperLUStat_t) functions: +################################### struct LUStat{I, S} - stat::Base.RefValue{S} + stat::S end LUStat() = LUStat{Int}() LUStat{I}() where {Ti, I<:CIndex{Ti}} = LUStat{Ti}() +for I ∈ (:Int32, :Int64) + L = Symbol(String(:SuperLU_) * String(I)) + libname = Symbol(:libsuperlu_dist_, I) + @eval begin + function PStatInit(r::SuperLUStat_t{$I}) + $L.PStatInit(r) + return finalizer(r) do x + !MPI.Finalized() && $L.PStatFree(x) + end + end + function LUStat{$I}() + r = SuperLUStat_t{$I}( + Ptr{Cvoid}(), + Ptr{Cvoid}(), + Ptr{Cvoid}(), + 0, 0, 0, 0., 0., 0., 0, 0 + ) + PStatInit(r) + return LUStat{$I, typeof(r)}(r) + end + Base.unsafe_convert(T::Type{Ptr{Common.SuperLUStat_t{$I}}}, S::LUStat{$I}) = + Base.unsafe_convert(T, S.stat) + Base.unsafe_convert( + ::Type{Ptr{Common.SuperLUStat_t{$I}}}, + S::Common.SuperLUStat_t{$I} + ) = Ptr{Common.SuperLUStat_t{$I}}(pointer_from_objref(S)) + + function PStatPrint(options, stat::LUStat{$I}, grid) + $L.PStatPrint(options, stat, grid) + end + end # end eval +end # end for -mutable struct SOLVE{T, I, S} - SOLVEstruct::Base.RefValue{S} +# SolveData (SOLVEstruct_t) functions: +###################################### +mutable struct SolveData{T, I, S} + data::S options::Options end -SOLVE{T}(options) where T = SOLVE{T, Int}(options) +SolveData{T}(options) where T = SolveData{T, Int}(options) -SOLVE{T, I}(options) where {T, Ti, I<:CIndex{Ti}} = - SOLVE{T, Ti}(options) +SolveData{T, I}(options) where {T, Ti, I<:CIndex{Ti}} = + SolveData{T, Ti}(options) -function Base.getproperty(g::SOLVE, s::Symbol) +function Base.getproperty(g::SolveData, s::Symbol) s === :options && return Base.getfield(g, s) - s === :SOLVEstruct && return Base.getfield(g, s) - return getproperty(g.SOLVEstruct[], s) + s === :data && return Base.getfield(g, s) + return getproperty(g.data, s) end -mutable struct SuperLUFactorization{T, I, A, Solve, Perm, LU, Stat, B} +for I ∈ (:Int32, :Int64) + L = Symbol(String(:SuperLU_) * String(I)) + libname = Symbol(:libsuperlu_dist_, I) + for T ∈ (Float32, Float64, ComplexF64) + @eval begin + Base.unsafe_convert(T::Type{Ptr{$L.$(prefixname(T, :SOLVEstruct_t)){$I}}}, S::SolveData{$T, $I}) = + Base.unsafe_convert(T, S.data) + Base.unsafe_convert( + T::Type{Ptr{$L.$(prefixname(T, :SOLVEstruct_t)){$I}}}, + S::$L.$(prefixname(T, :SOLVEstruct_t)){$I} + ) = Ptr{$L.$(prefixname(T, :SOLVEstruct_t)){$I}}(pointer_from_objref(S)) + + function SolveData{$T, $I}(options) + r = $L.$(prefixname(T, :SOLVEstruct_t)){$I}( + Ptr{Cvoid}(), + Ptr{Cvoid}(), + 0, + Ptr{Cvoid}(), + Ptr{Cvoid}(), + Ptr{Cvoid}(), + Ptr{Cvoid}(), + Ptr{Cvoid}(), + Ptr{Cvoid}(), + Ptr{Cvoid}() + ) + S = SolveData{$T, $I, eltype(r)}(r, options) + return finalizer(S) do solve + !MPI.Finalized() && options.SolveInitialized == Common.YES && + $L.$(prefixname(T, :SolveFinalize))(options, solve) + end + end + end # end eval +end # end for +end # end for + +mutable struct SuperLUFactorization{T, I, A, Solve, Perm, Factors, Stat, B} <: LinearAlgebra.Factorization{T} mat::A options::Options solve::Solve perm::Perm - lu::LU + factors::Factors stat::Stat berr::Vector{T} b::B - function SuperLUFactorization{T, I, A, Solve, Perm, LU, Stat, B}( + function SuperLUFactorization{T, I, A, Solve, Perm, Factors, Stat, B}( mat::A, options::Options, solve::Solve, perm::Perm, - lustruct::LU, stat::Stat, berr::Vector{T}, b::B + factors::Factors, stat::Stat, berr::Vector{T}, b::B ) where { T<:Union{Float32, Float64, ComplexF64}, I <: Union{Int32, Int64}, A <: AbstractSuperMatrix{T, I}, - Solve <: Union{SOLVE{T, I}, Nothing}, - Perm <: ScalePermStruct{T, I}, - LU <: LUStruct{T, I}, + Solve <: Union{SolveData{T, I}, Nothing}, + Perm <: ScalePerm{T, I}, + Factors <: LUFactors{T, I}, Stat <: LUStat{I}, B <: StridedVecOrMat{T} } - return new(mat, options, solve, perm, lustruct, stat, berr, b) + return new(mat, options, solve, perm, factors, stat, berr, b) end end isfactored(F::SuperLUFactorization) = F.options.Fact == Common.FACTORED - function SuperLUFactorization( A::AbstractSuperMatrix{Tv, Ti}, options, - solve::Solve, perm::Perm, lustruct::LU, stat::Stat, berr::Vector{Tv}, b::B -) where {Tv, Ti, Solve, Perm, LU, Stat, B} - return SuperLUFactorization{Tv, Ti, typeof(A), Solve, Perm, LU, Stat, B}( - A, options, solve, perm, lustruct, stat, berr, b + solve::Solve, perm::Perm, factors::Factors, stat::Stat, berr::Vector{Tv}, b::B +) where {Tv, Ti, Solve, Perm, Factors, Stat, B} + return SuperLUFactorization{Tv, Ti, typeof(A), Solve, Perm, Factors, Stat, B}( + A, options, solve, perm, factors, stat, berr, b ) end +function PStatPrint(F::SuperLUFactorization) + PStatPrint(F.options, F.stat, F.mat.grid) +end + for I ∈ (:Int32, :Int64) -L = Symbol(String(:SuperLU_) * String(I)) -libname = Symbol(:libsuperlu_dist_, I) -@eval begin - superlu_set_num_threads(::Type{$I}, n) = ccall((:omp_set_num_threads_, $libname), - Cvoid, - (Ref{Int32},), - Int32(n)) - # Grid functions: - ################# - function gridmap!(r::Ref{gridinfo_t{$I}}, comm, nprow, npcol, usermap::Matrix{Int32}) - myrank = MPI.Comm_rank(comm) - color = myrank ÷ (nprow * npcol) - subcomm = MPI.Comm_split(comm, color, myrank) - superlu_gridmap(subcomm, nprow, npcl, usermap, size(usermap, 1), r) - return r - end - function gridinit!(r::Ref{gridinfo_t{$I}}, comm, nprow, npcol) - $L.superlu_gridinit(comm, nprow, npcol, r) - return r - end - function Grid{$I}(nprow, npcol, comm = MPI.COMM_WORLD; batch = false, usermap = nothing) - !MPI.Initialized() && MPI.Init() - r = Ref{gridinfo_t{$I}}() - if !batch - gridinit!(r, comm, nprow, npcol) - else - usermap === nothing ? gridmap!(r, comm, nprow, npcol) : gridmap!(r, comm, nprow, npcol, usermap) - end - if r[].iam == -1 || r[].iam >= nprow * npcol - $L.superlu_gridexit(r) - return Grid{$I, gridinfo_t{$I}}(r) - else - return Grid{$I, gridinfo_t{$I}}( - finalizer(r) do ref - !MPI.Finalized() && $L.superlu_gridexit(ref) - end + L = Symbol(String(:SuperLU_) * String(I)) + libname = Symbol(:libsuperlu_dist_, I) + @eval begin + superlu_set_num_threads(::Type{$I}, n) = + ccall( + (:omp_set_num_threads_, $libname), Cvoid, (Ref{Int32},), Int32(n) ) - end + # SuperMatrix functions: + ######################## + Base.unsafe_convert(T::Type{Ptr{SuperMatrix{$I}}}, A::AbstractSuperMatrix{<:Any, $I}) = + Base.unsafe_convert(T, A.supermatrix) end - Base.unsafe_convert(T::Type{Ptr{SuperLUDIST_Common.gridinfo_t{$I}}}, g::Grid{$I}) = - Base.unsafe_convert(T, g.grid) - - # SuperMatrix functions: - ######################## - Base.unsafe_convert(T::Type{Ptr{SuperMatrix{$I}}}, A::AbstractSuperMatrix{<:Any, $I}) = - Base.unsafe_convert(T, A.supermatrix) - - function PStatFree(r::Base.RefValue{SuperLUStat_t{$I}}) - $L.PStatFree(r) - end - function PStatInit(r::Base.RefValue{SuperLUStat_t{$I}}) - $L.PStatInit(r) - return finalizer(r) do x - !MPI.Finalized() && PStatFree(x) + for T ∈ (Float32, Float64, ComplexF64) + @eval begin + function inf_norm_error_dist(n, nrhs, b, ldb, xtrue::AbstractVector{$T}, ldx, grid::Grid{$I}) + return $L.$(prefixname(T, :inf_norm_error_dist))(n, nrhs, b, ldb, xtrue, ldx, grid) end end - function LUStat{$I}() - r = Ref{SuperLUStat_t{$I}}() - PStatInit(r) - return LUStat{$I, eltype(r)}(r) - end - Base.unsafe_convert(T::Type{Ptr{SuperLUStat_t{$I}}}, S::LUStat{$I}) = - Base.unsafe_convert(T, S.stat) - function PStatPrint(options, stat::LUStat{$I}, grid) - $L.PStatPrint(options, stat, grid) end end -for T ∈ (Float32, Float64, ComplexF64) -@eval begin -Base.unsafe_convert(T::Type{Ptr{$(prefixname(T, :SOLVEstruct_t)){$I}}}, S::SOLVE{$T, $I}) = - Base.unsafe_convert(T, S.SOLVEstruct) -function SOLVE{$T, $I}(options) - r = Ref{$(prefixname(T, :SOLVEstruct_t)){$I}}() - S = SOLVE{$T, $I, eltype(r)}(r, options) - return finalizer(S) do solve - !MPI.Finalized() && options.SolveInitialized == Common.YES && - $L.$(prefixname(T, :SolveFinalize))(options, solve) - end -end -function ScalePermStruct{$T, $I}(m, n) - r = Ref{$(prefixname(T, :ScalePermstruct_t)){$I}}() - ScalePermstructInit(r, m, n) - return ScalePermStruct{$T, $I, eltype(r)}(r) -end -Base.unsafe_convert(T::Type{Ptr{$(prefixname(T, :ScalePermstruct_t)){$I}}}, S::ScalePermStruct{$T, $I}) = - Base.unsafe_convert(T, S.scaleperm) -function LUStruct{$T, $I}(n, grid::G) where G - r = Ref{$(prefixname(T, :LUstruct_t)){$I}}() - LUstructInit(r, n, grid) - return LUStruct{$T, $I, eltype(r), G}(r, grid, n) -end -Base.unsafe_convert(T::Type{Ptr{$(prefixname(T, :LUstruct_t)){$I}}}, S::LUStruct{$T, $I}) = - Base.unsafe_convert(T, S.LU) - -function inf_norm_error_dist(n, nrhs, b, ldb, xtrue::AbstractVector{$T}, ldx, grid::Grid{$I}) - return $L.$(prefixname(T, :inf_norm_error_dist))(n, nrhs, b, ldb, xtrue, ldx, grid) -end -end -end -end - -function gridmap!(r, comm, nprow, npcol) - usermap = LinearIndices((nprow, npcol))' .- 1 - return gridmap!(r, comm, nprow, npcol, usermap) -end - -function PStatPrint(F::SuperLUFactorization) - PStatPrint(F.options, F.stat, F.mat.grid) -end