From a597ee2cb6042c8fc3c39d779c5dcec89fa22728 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Tue, 20 Feb 2024 16:54:29 +0100 Subject: [PATCH 1/9] t2 --- src/ExtendableSparse.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 700bbda..36dca95 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -16,6 +16,7 @@ if USE_GPL_LIBS using SuiteSparse end +@info "test2" using DocStringExtensions From 873ff1e9bbf88993ec62a3eb23ac46b6a112f7ed Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Tue, 20 Feb 2024 17:48:30 +0100 Subject: [PATCH 2/9] add ExtendableSparseParallel --- Project.toml | 2 + src/ExtendableSparse.jl | 13 +- .../ExtendableSparseParallel.jl | 258 ++++++ .../preparatory.jl | 427 ++++++++++ .../struct_flush.jl | 263 ++++++ .../supersparse.jl | 788 ++++++++++++++++++ 6 files changed, 1749 insertions(+), 2 deletions(-) create mode 100644 src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl create mode 100644 src/matrix/ExtendableSparseMatrixParallel/preparatory.jl create mode 100644 src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl create mode 100644 src/matrix/ExtendableSparseMatrixParallel/supersparse.jl diff --git a/Project.toml b/Project.toml index 89ce6e1..46d6e61 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,8 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" +ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" [weakdeps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 36dca95..372df82 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -4,6 +4,10 @@ using LinearAlgebra using Sparspak using ILUZero +using Metis +using Base.Threads +using ExtendableGrids + if !isdefined(Base, :get_extension) using Requires end @@ -16,8 +20,6 @@ if USE_GPL_LIBS using SuiteSparse end -@info "test2" - using DocStringExtensions import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros @@ -31,6 +33,13 @@ export SparseMatrixLNK, export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet + +include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") + +export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK +export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light + + include("factorizations/factorizations.jl") export JacobiPreconditioner, diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl new file mode 100644 index 0000000..68dace8 --- /dev/null +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -0,0 +1,258 @@ +include("supersparse.jl") +include("preparatory.jl") +#include("prep_time.jl") + +mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Linked list structure holding data of extension + """ + lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}} + + grid::ExtendableGrid + + nnts::Vector{Ti} + + sortednodesperthread::Matrix{Ti} + + old_noderegions::Matrix{Ti} + + cellsforpart::Vector{Vector{Ti}} + + globalindices::Vector{Vector{Ti}} + + new_indices::Vector{Ti} + + rev_new_indices::Vector{Ti} + + start::Vector{Ti} + + cellparts::Vector{Ti} + + nt::Ti + + depth::Ti + + +end + + + +function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer} + grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) + csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) + lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] + ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth) +end + + + +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer} + if known_that_unknown + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + return + end + + if updatentryCSC2!(A.cscmatrix, i, j, v) + else + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + end +end + + +#= +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer} + if known_that_unknown + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + return + end + + if updatentryCSC2!(A.cscmatrix, i, j, v) + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + end +end +=# + + +""" +`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=true) where {Tv, Ti <: Integer}` + +A[i,j] += v, using any partition. +If the partition should be specified (for parallel use), use +`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=true) where {Tv, Ti <: Integer}`. +""" +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer} + if known_that_unknown + level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + return + end + + if updatentryCSC2!(A.cscmatrix, i, j, v) + else + level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + end +end + +#--------------------------------- + + +function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + return + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j, + tid) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + return + else + updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j, + tid) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + i::Integer, + j::Integer) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + return ext.cscmatrix.nzval[k] + end + + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + ext.lnkmatrices[tid][i, ext.sortednodesperthread[tid, j]] + +end + +function Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) where {Tv, Ti} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + #@info typeof(tid), typeof(j) + jj = ext.sortednodesperthread[tid, j] + ext.lnkmatrices[tid][i, jj] = v + end +end + + + +#------------------------------------ + +function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + A.cscmatrix = spzeros(Tv, Ti, num_nodes(A.grid), num_nodes(A.grid)) + A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt] +end + +function nnz_flush(ext::ExtendableSparseMatrixParallel) + flush!(ext) + return nnz(ext.cscmatrix) +end + +function nnz_noflush(ext::ExtendableSparseMatrixParallel) + return nnz(ext.cscmatrix), sum([ext.lnkmatrices[i].nnz for i=1:ext.nt]) +end + +function matrixindextype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + Ti +end + +function matrixvaluetype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + Tv +end + + + +function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParallel) + #flush!(ext) + xnnzCSC, xnnzLNK = nnz_noflush(ext) + m, n = size(ext) + print(io, + m, + "×", + n, + " ", + typeof(ext), + " with ", + xnnzCSC, + " stored ", + xnnzCSC == 1 ? "entry" : "entries", + " in CSC and ", + xnnzLNK, + " stored ", + xnnzLNK == 1 ? "entry" : "entries", + " in LNK. CSC:") + + if !haskey(io, :compact) + io = IOContext(io, :compact => true) + end + + if !(m == 0 || n == 0 || xnnzCSC == 0) + print(io, ":\n") + Base.print_array(IOContext(io), ext.cscmatrix) + end +end + +Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) + +include("struct_flush.jl") diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl new file mode 100644 index 0000000..e14a066 --- /dev/null +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -0,0 +1,427 @@ +""" +`function preparatory_multi_ps_less_reverse(nm, nt, depth)` + +`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid). +`nt` is the number of threads. +`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... +To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here. +""" +function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0) + grid = getgrid(nm; x0, x1) + + if sequential + (allcells, start, cellparts) = grid_to_graph_ps_multi!(grid, nt, depth)#) + else + (allcells, start, cellparts) = grid_to_graph_ps_multi_par!(grid, nt, depth) + end + + (nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( + cellparts, allcells, start, num_nodes(grid), Ti, nt + ) + + cfp = bettercellsforpart(cellparts, depth*nt+1) + return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts +end + + +""" +`function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)` + +After the cellregions (partitioning of the grid) of the grid have been computed, other things have to be computed, such as `sortednodesperthread` a depth+1 x num_nodes matrix, here `sortednodesperthreads[i,j]` is the point at which the j-th node appears in the i-th level matrix in the corresponding submatrix. +`cellregs` contains the partiton for each cell. +Furthermore, `nnts` (number of nodes of the threads) is computed, which contain for each thread the number of nodes that are contained in the cells of that thread. +`allcells` and `start` together behave like the rowval and colptr arrays of a CSC matrix, such that `allcells[start[j]:start[j+1]-1]` are all cells that contain the j-th node. +`nn` is the number of nodes in the grid. +`Ti` is the type (Int64,...) of the elements in the created arrays. +`nt` is the number of threads. +""" +function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt) + + num_matrices = maximum(cellregs) + depth = Int(floor((num_matrices-1)/nt)) + + #loop over each node, get the cellregion of the cell (the one not in the separator) write the position of that node inside the cellregions sorted ranking into a long vector + #nnts = [zeros(Ti, nt+1) for i=1:depth+1] + nnts = zeros(Ti, nt) + #noderegs_max_tmp = 0 + old_noderegions = zeros(Ti, (depth+1, nn)) + + # Count nodes per thread: + tmp = zeros(depth+1) + for j=1:nn + cells = @view allcells[start[j]:start[j+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + #tmp = [] + tmpctr = 1 + for cr in sortedcellregs + crmod = (cr-1)%nt+1 + level = Int(ceil(cr/nt)) + #nnts[crmod] += 1 + old_noderegions[level,j] = crmod + if !(crmod in tmp[1:tmpctr-1]) + nnts[crmod] += 1 + #sortednodesperthread[crmod,j] = nnts[crmod] #nnts[i][cr] + #push!(tmp, crmod) + tmp[tmpctr] = crmod + tmpctr += 1 + end + end + end + + # Reorder inidices to receive a block structure: + # Taking the original matrix [a_ij] and mapping each i and j to new_indices[i] and new_indices[j], gives a block structure + # the reverse is also defined rev_new_indices[new_indices[k]] = k + # From now on we will only use this new ordering + counter_for_reorder = zeros(Ti, depth*nt+1) + for j=1:nn + level, reg = last_nz(old_noderegions[:, j]) + counter_for_reorder[(level-1)*nt + reg] += 1 #(reg-1)*depth + level] += 1 + end + + starts = vcat([0], cumsum(counter_for_reorder)) + counter_for_reorder2 = zeros(Ti, depth*nt+1) + new_indices = Vector{Ti}(undef, nn) + rev_new_indices = Vector{Ti}(undef, nn) + origin = Vector{Ti}(undef, nn) + for j=1:nn + level, reg = last_nz(old_noderegions[:, j]) + counter_for_reorder2[(level-1)*nt + reg] += 1 + origin[j] = reg + new_indices[j] = starts[(level-1)*nt + reg]+counter_for_reorder2[(level-1)*nt + reg] + rev_new_indices[new_indices[j]] = j + end + starts .+= 1 + + # Build sortednodesperthread and globalindices array: + # They are inverses of each other: globalindices[tid][sortednodeperthread[tid][j]] = j + # Note that j has to be a `new index` + + sortednodesperthread = zeros(Ti, (nt, nn)) #vvcons(Ti, nnts) + globalindices = vvcons(Ti, nnts) + gictrs = zeros(Ti, nt) + + for nj=1:nn + oj = rev_new_indices[nj] + cells = @view allcells[start[oj]:start[oj+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + #tmp = [] + tmpctr = 1 + for cr in sortedcellregs + crmod = (cr-1)%nt+1 + level = Int(ceil(cr/nt)) + if !(crmod in tmp[1:tmpctr-1]) + gictrs[crmod] += 1 # , level] += 1 + sortednodesperthread[crmod,nj] = gictrs[crmod] + globalindices[crmod][gictrs[crmod]] = nj + #push!(tmp, crmod) + tmp[tmpctr] = crmod + tmpctr += 1 + end + end + end + + nnts, sortednodesperthread, old_noderegions, globalindices, gictrs, new_indices, rev_new_indices, starts +end + + + + + + + + +""" +`function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)` + +This function partitons the separator, which is done if `depth`>1 (see `grid_to_graph_ps_multi!` and/or `preparatory_multi_ps`). +`cellregs` contains the regions/partitions/colors of each cell. +`nc` is the number of cells in the grid. +`ACSC` is the adjacency matrix of the graph of the (separator-) grid (vertex in graph is cell in grid, edge in graph means two cells share a node) stored as a CSC. +`nt` is the number of threads. +`level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2... +`preparatory_multi_ps` is the number of separator-cells. +""" +function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes) + sepanodes = findall(x->x==nt+1, cellregs) + + indptr = collect(1:nc+1) + indices = zeros(Int64, nc) + rowval = zeros(Int64, nc) + + indptrT = collect(1:ctr_sepanodes+1) + indicesT = zeros(Int64, ctr_sepanodes) + rowvalT = zeros(Int64, ctr_sepanodes) + + for (i,j) in enumerate(sepanodes) + indices[j] = i + indicesT[i] = j + rowval[j] = 1 + rowvalT[i] = 1 + end + + R = SparseMatrixCSC(ctr_sepanodes, nc, indptr, indices, rowval) + RT = SparseMatrixCSC(nc, ctr_sepanodes, indptrT, indicesT, rowvalT) + prod = ACSC*dropzeros(RT) + RART = dropzeros(R)*ACSC*dropzeros(RT) + + partition2 = Metis.partition(RART, nt) + cellregs2 = copy(partition2) + + ctr_sepanodes = 0 + for (i,j) in enumerate(sepanodes) + rows = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)] + cellregs[j] = level0*nt + cellregs2[i] + if minimum(partition2[rows]) != maximum(partition2[rows]) + cellregs[j] = (level0+1)*nt+1 + ctr_sepanodes += 1 + end + end + + RART, ctr_sepanodes +end + + + +""" +`function grid_to_graph_ps_multi!(grid, nt, depth)` + +The function assigns colors/partitons to each cell in the `grid`. First, the grid is partitoned into `nt` partitions. If `depth` > 1, the separator is partitioned again... +`grid` is a simplexgrid. +`nt` is the number of threads. +`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... +""" +function grid_to_graph_ps_multi!(grid, nt, depth) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + ctr_sepanodes = 0 + for j=1:num_cells(grid) + rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes += 1 + end + end + RART = ACSC + for level=1:depth-1 + RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes) + end + + + return allcells, start, cellregs +end + + + +function grid_to_graph_ps_multi_par!(grid, nt, depth) + time = zeros(12) + As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt] + number_cells_per_node = zeros(Int64, num_nodes(grid)) + + cn = grid[CellNodes] + + for j=1:num_cells(grid) + tmp = view(cn, :, j) + for node_id in tmp + number_cells_per_node[node_id] += 1 + end + end + + + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + + for j=1:num_cells(grid) + tmp = view(cn, :, j) + for node_id in tmp + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + node_range = get_starts(num_nodes(grid), nt) + Threads.@threads for tid=1:nt + for j in node_range[tid]:node_range[tid+1]-1 + cells = @view allcells[start[j]:start[j+1]-1] + l = length(cells) + for (i,id1) in enumerate(cells) + ce = view(cells, i+1:l) + for id2 in ce + As[tid][id1,id2] = 1 + As[tid][id2,id1] = 1 + end + end + end + ExtendableSparse.flush!(As[tid]) + end + + ACSC = add_all_par!(As).cscmatrix + + #SparseArrays.SparseMatrixCSC(A)) + + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + ctr_sepanodes_a = zeros(Int64, nt) + + cell_range = get_starts(num_cells(grid), nt) + Threads.@threads :static for tid=1:nt + for j in cell_range[tid]:cell_range[tid+1]-1 + rows = @view ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes_a[tid] += 1 + end + end + end + + ctr_sepanodes = sum(ctr_sepanodes_a) + + #= + time[10] = @elapsed for j=1:num_cells(grid) + rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes += 1 + end + end + =# + RART = ACSC + for level=1:depth-1 + RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes) + end + + + return allcells, start, cellregs +end + + +function add_all_par!(As) + nt = length(As) + depth = Int(floor(log2(nt))) + ende = nt + for level=1:depth + + @threads :static for tid=1:2^(depth-level) + #@info "$level, $tid" + start = tid+2^(depth-level) + while start <= ende + As[tid] += As[start] + start += 2^(depth-level) + end + end + ende = 2^(depth-level) + end + As[1] + +end + + +""" +`function vvcons(Ti, lengths)` + +`lengths` is a vector of integers. +The function creates a vector of zero vectors of type `Ti` of length `lengths[i]`. +""" +function vvcons(Ti, lengths) + x::Vector{Vector{Ti}} = [zeros(Ti, i) for i in lengths] + return x +end + + +""" +`function bettercellsforpart(xx, upper)` + +`xx` are the CellRegions (i.e. the color/partition of each cell). +`upper` is the number of partitions (upper=depth*nt+1). +The function returns a vector e.g. [v1, v2, v3, v4, v5]. +The element v1 would be the list of cells that are in partition 1 etc. +The function is basically a faster findall. +""" +function bettercellsforpart(xx, upper) + ctr = zeros(Int64, upper) + for x in xx + ctr[x] += 1 + end + cfp = vvcons(Int64, ctr) + ctr .= 1 + for (i,x) in enumerate(xx) + cfp[x][ctr[x]] = i + ctr[x] += 1 + end + cfp +end + +""" +`function getgrid(nm)` + +Returns a simplexgrid with a given number of nodes in each dimension. +`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid). +""" +function getgrid(nm; x0=0.0, x1=1.0) + if length(nm) == 2 + n,m = nm + xx = collect(LinRange(x0, x1, n)) + yy = collect(LinRange(x0, x1, m)) + grid = simplexgrid(xx, yy) + else + n,m,l = nm + xx = collect(LinRange(x0, x1, n)) + yy = collect(LinRange(x0, x1, m)) + zz = collect(LinRange(x0, x1, l)) + grid = simplexgrid(xx, yy, zz) + end + grid +end + +function get_starts(n, nt) + ret = ones(Int64, nt+1) + ret[end] = n+1 + for i=nt:-1:2 + ret[i] = ret[i+1] - Int(round(ret[i+1]/i)) #Int(round(n/nt))-1 + end + ret +end + +function last_nz(x) + n = length(x) + for j=n:-1:1 + if x[j] != 0 + return (j, x[j]) + end + end +end + diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl new file mode 100644 index 0000000..c27aab0 --- /dev/null +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -0,0 +1,263 @@ +function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=true) + + + if !do_dense + A.cscmatrix = A.cscmatrix+sparse_flush!(A; keep_zeros) + + else + if keep_zeros + A.cscmatrix = dense_flush_keepzeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices) + else + A.cscmatrix = dense_flush_removezeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices) + end + end + + A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt] + +end + +""" +`CSC_RLNK_plusequals_less3_reordered_super!` from `plusequals.jl` +""" +function sparse_flush!(A::ExtendableSparseMatrixParallel; keep_zeros=true) + + #dropzeros!( + plus_remap(A.lnkmatrices, A.cscmatrix, A.globalindices; keep_zeros) + #) + +end + + + +""" +`CSC_RLNK_si_oc_ps_dz_less_reordered` from `conversion.jl` +""" +function dense_flush_keepzeros!( + As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, + onr, s, nt, rni + ) where {Tv, Ti <: Integer} + + nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double + indptr = zeros(Ti, As[1].m+1) + indices = zeros(Ti, nnz) #sum(As.nnz)) + data = zeros(Float64, nnz) #sum(As.nnz)) + ctr = 1 + eqctr = 0 + tmp = zeros(Ti, size(onr)[1]) + + for nj=1:As[1].m + indptr[nj] = ctr + oj = rni[nj] + regionctr = 1 + jc = 0 + nrr = view(onr, :, oj) + tmp .= 0 + for region in nrr #nrr #[:,j] + regmod = region #(region-1)%nt+1 + if (region > 0) & !(region in tmp) + k = s[regmod, nj] + if regionctr == 1 + while k>0 + #if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + else + break + end + end + + ctr += 1 + jc += 1 + #end + k = As[regmod].colptr[k] + end + else + while k>0 + #if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + elseif indices[ctr-jcc] == indices[ctr-jcc+1] + data[ctr-jcc] += data[ctr-jcc+1] + eqctr += 1 + + for jccc=1:jcc + indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1] + data[ctr-jcc+jccc] = data[ctr-jcc+jccc+1] + end + + ctr -= 1 + jc -= 1 + + break + else + break + end + end + + ctr += 1 + jc += 1 + #end + k = As[regmod].colptr[k] + end + + end + tmp[regionctr] = region + regionctr += 1 + + end + + end + + end + + #@warn ctr/nnz + + indptr[end] = ctr + resize!(indices, ctr-1) + resize!(data, ctr-1) + + + SparseArrays.SparseMatrixCSC( + As[1].m, As[1].m, indptr, indices, data + ) + +end + + +function dense_flush_removezeros!( + As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, + onr, s, nt, rni + ) where {Tv, Ti <: Integer} + + nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double + indptr = zeros(Ti, As[1].m+1) + indices = zeros(Ti, nnz) #sum(As.nnz)) + data = zeros(Float64, nnz) #sum(As.nnz)) + ctr = 1 + eqctr = 0 + tmp = zeros(Ti, size(onr)[1]) + + for nj=1:As[1].m + indptr[nj] = ctr + oj = rni[nj] + regionctr = 1 + jc = 0 + nrr = view(onr, :, oj) + tmp .= 0 + for region in nrr #nrr #[:,j] + regmod = region #(region-1)%nt+1 + if (region > 0) & !(region in tmp) + k = s[regmod, nj] + if regionctr == 1 + while k>0 + if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + else + break + end + end + + ctr += 1 + jc += 1 + end + k = As[regmod].colptr[k] + end + else + while k>0 + if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + elseif indices[ctr-jcc] == indices[ctr-jcc+1] + data[ctr-jcc] += data[ctr-jcc+1] + eqctr += 1 + + for jccc=1:jcc + indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1] + data[ctr-jcc+jccc] = data[ctr-jcc+jccc+1] + end + + ctr -= 1 + jc -= 1 + + break + else + break + end + end + + ctr += 1 + jc += 1 + end + k = As[regmod].colptr[k] + end + + end + tmp[regionctr] = region + regionctr += 1 + + end + + end + + end + + #@warn ctr/nnz + + indptr[end] = ctr + resize!(indices, ctr-1) + resize!(data, ctr-1) + + + SparseArrays.SparseMatrixCSC( + As[1].m, As[1].m, indptr, indices, data + ) + +end + + + + + + + diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl new file mode 100644 index 0000000..ae52f60 --- /dev/null +++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl @@ -0,0 +1,788 @@ + +using SparseArrays +using ExtendableSparse + +mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} + """ + Number of rows + """ + m::Ti + + """ + Number of columns + """ + n::Ti + + """ + Number of nonzeros + """ + nnz::Ti + + """ + Length of arrays + """ + nentries::Ti + + """ + Linked list of column entries. Initial length is n, + it grows with each new entry. + + colptr[index] contains the next + index in the list or zero, in the later case terminating the list which + starts at index 1<=j<=n for each column j. + """ + colptr::Vector{Ti} + + """ + Row numbers. For each index it contains the zero (initial state) + or the row numbers corresponding to the column entry list in colptr. + + Initial length is n, + it grows with each new entry. + """ + rowval::Vector{Ti} + + """ + Nonzero entry values correspondin to each pair + (colptr[index],rowval[index]) + + Initial length is n, it grows with each new entry. + """ + nzval::Vector{Tv} + + + collnk::Vector{Ti} + + colctr::Ti +end + + +function SparseArrays.SparseMatrixCSC(A::SuperSparseMatrixLNK{Tv, Ti})::SparseArrays.SparseMatrixCSC where {Tv, Ti <: Integer} + SparseArrays.SparseMatrixCSC(SparseMatrixLNK{Tv, Ti}(A.m, A.n, A.nnz, A.nentries, A.colptr, A.rowval, A.nzval)) + +end + +function SuperSparseMatrixLNK{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + SuperSparseMatrixLNK{Tv, Ti}(m, n, 0, n, zeros(Ti, n), zeros(Ti, n), zeros(Tv, n), zeros(Ti, n), 0) +end + + +function findindex(lnk::SuperSparseMatrixLNK, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k = j + k0 = j + while k > 0 + if lnk.rowval[k] == i + return k, 0 + end + k0 = k + k = lnk.colptr[k] + end + return 0, k0 +end + +""" +Return tuple containing size of the matrix. +""" +Base.size(lnk::SuperSparseMatrixLNK) = (lnk.m, lnk.n) + +""" +Return value stored for entry or zero if not found +""" +function Base.getindex(lnk::SuperSparseMatrixLNK{Tv, Ti}, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k == 0 + return zero(Tv) + else + return lnk.nzval[k] + end +end + +function addentry!(lnk::SuperSparseMatrixLNK, i, j, k, k0) + # increase number of entries + lnk.nentries += 1 + if length(lnk.nzval) < lnk.nentries + newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) + resize!(lnk.nzval, newsize) + resize!(lnk.rowval, newsize) + resize!(lnk.colptr, newsize) + end + + # Append entry if not found + lnk.rowval[lnk.nentries] = i + + # Shift the end of the list + lnk.colptr[lnk.nentries] = 0 + lnk.colptr[k0] = lnk.nentries + + # Update number of nonzero entries + lnk.nnz += 1 + return lnk.nentries +end + +""" +Update value of existing entry, otherwise extend matrix if v is nonzero. +""" +function Base.setindex!(lnk::SuperSparseMatrixLNK, v, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 && !iszero(v) + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = v + lnk.nnz += 1 + return lnk + end + + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = v + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = v + end + return lnk +end + +""" +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero, no new +entry is created. +""" +function updateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti} + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 && !iszero(v) + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = op(lnk.nzval[j], v) + lnk.nnz += 1 + return lnk + end + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +function rawupdateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti} + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = op(lnk.nzval[j], v) + lnk.nnz += 1 + return lnk + end + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +#= +mutable struct ColEntry{Tv, Ti <: Integer} + rowval::Ti + nzval::Tv +end + +# Comparison method for sorting +Base.isless(x::ColEntry, y::ColEntry) = (x.rowval < y.rowval) +=# + +function get_column!(col::Vector{ColEntry{Tv, Ti}}, lnk::SuperSparseMatrixLNK{Tv, Ti}, j::Ti)::Ti where {Tv, Ti <: Integer} + k = j + ctr = zero(Ti) + while k>0 + if abs(lnk.nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnk.rowval[k], lnk.nzval[k]) + end + k = lnk.colptr[k] + end + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr +end + + +function remove_doubles!(col, coll) + #input_ctr = 1 + last = 1 + for j=2:coll + if col[j].rowval == col[last].rowval + col[last].nzval += col[j].nzval + else + last += 1 + if last != j + col[last] = col[j] + end + end + end + last +end + +function get_column_removezeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer} + ctr = zero(Ti) + for i=1:length + tid = tids[i] + k = js[i] + #for (tid,j) in zip(tids, js) #j0:j1 + #tid = tids[j] + #k = j + while k>0 + if abs(lnks[tid].nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k]) + end + k = lnks[tid].colptr[k] + end + end + + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr = remove_doubles!(col, ctr) + #print_col(col, ctr) + ctr + +end + +function get_column_keepzeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer} + ctr = zero(Ti) + for i=1:length + tid = tids[i] + k = js[i] + #for (tid,j) in zip(tids, js) #j0:j1 + #tid = tids[j] + #k = j + while k>0 + #if abs(lnks[tid].nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k]) + #end + k = lnks[tid].colptr[k] + end + end + + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr = remove_doubles!(col, ctr) + #print_col(col, ctr) + ctr + +end + +function merge_into!(rowval::Vector{Ti}, nzval::Vector{Tv}, C::SparseArrays.SparseMatrixCSC{Tv, Ti}, col::Vector{ColEntry{Tv, Ti}}, J::Ti, coll::Ti, ptr1::Ti) where {Tv, Ti <: Integer} + j_min = 1 + numshifts = 0 + j_last = 0 + last_row = 0 + + #@warn "MERGING $J" + + #rowval0 = copy(C.rowval[C.colptr[J]:C.colptr[J+1]-1]) + #endptr = C.colptr[J+1] + + for (di,i) in enumerate(C.colptr[J]:C.colptr[J+1]-1) + for j=j_min:coll + #if col[j].rowval == last_row + # #@info "!! col j rowval == last row" + #end + if col[j].rowval < C.rowval[i] #ptr1+di+numshifts] #i+numshifts] + if col[j].rowval == last_row + #@info "$(ptr1+di+numshifts) : backwards EQUALITY: " + nzval[ptr1+di+numshifts] += col[j].nzval + else + #@info "$(ptr1+di+numshifts) : Insert from col: j=$j" + #shift_e!(C.rowval, C.nzval, 1, i+numshifts, C.colptr[end]-1) + rowval[ptr1+di+numshifts] = col[j].rowval + nzval[ptr1+di+numshifts] = col[j].nzval + numshifts += 1 + #endptr += 1 + end + j_last = j + elseif col[j].rowval > C.rowval[i] #if col[j].rowval + #@info "$(ptr1+di+numshifts) : Insert from C: i=$i" + rowval[ptr1+di+numshifts] = C.rowval[i] + nzval[ptr1+di+numshifts] = C.nzval[i] + j_min = j + break + else + #@info "$(ptr1+di+numshifts) : normal EQUALITY: i=$i, j=$j" + rowval[ptr1+di+numshifts] = C.rowval[i] + nzval[ptr1+di+numshifts] = C.nzval[i]+col[j].nzval + #numshifts += 1 + j_min = j+1 + j_last = j + + if j == coll + #@info "$(ptr1+di+numshifts+1) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))" + rowval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i+1:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1] + nzval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.nzval, i+1:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1] + + #@info "FINISH" + return numshifts + end + + break + end + + if j == coll + #@info "$(ptr1+di+numshifts) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))" + rowval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1] + nzval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.nzval, i:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1] + + #@info "FINISH" + return numshifts + end + + last_row = col[j].rowval + end + end + endptr = ptr1 + numshifts + (C.colptr[J+1]-C.colptr[J]) + last_row = 0 + numshifts_old = numshifts + numshifts = 0 + #start_ptr = endptr - 1 #C.colptr[J+1]-1 + if j_last > 0 + last_row = col[j_last].rowval + end + + if j_last != coll + for j=j_last+1:coll + if col[j].rowval != last_row + numshifts += 1 + #shift_e!(C.rowval, C.nzval, 1, start_ptr+numshifts, C.colptr[end]-1) + #for k=start_ptr+numshifts: + #@info "$(endptr+numshifts) : after..." + rowval[endptr+numshifts] = col[j].rowval + nzval[endptr+numshifts] = col[j].nzval + last_row = rowval[endptr+numshifts] + #colptr[J+1:end] .+= 1 + else + nzval[endptr+numshifts] += col[j].nzval + end + end + end + + return numshifts + numshifts_old + +end + + +function print_col(col, coll) + v = zeros((2, coll)) + for j=1:coll + v[1,j] = col[j].rowval + v[2,j] = col[j].nzval + end + @info v +end + +function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + if lnk.nnz == 0 + return csc + elseif length(csc.rowval) == 0 + return SparseMatrixCSC(lnk) + else + return lnk + csc + end +end + +function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + gi = collect(1:csc.n) + + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + sortedcolumns = vcat(sortedcolumns, [csc.n+1]) + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + #@info ">>>>>>> $J <<<<<<<<<<<<<<<" + # insert new added column here / dummy + i = sortedcolumns[J] + coll = get_column!(col, lnk, i) + #print_col(col, coll) + + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1) + + numshifts += nns + #j = colptr[i] #sortedcolumns[J]] + #rowval[j] = J + #nzval[j] = J + # insertion end + + #colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts + + #a = i+1 + #b = sortedcolumns[J+1] + #@info a, b + + + #colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns) + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + + + rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + + + #= + + @info csc.colptr[a:b] + + colptr[a:b] = csc.colptr[a:b].+numshifts + + #colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts + @info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]] + @info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b] + rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) + nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) + #rowval[colptrsortedcolumns[J+1]] + =# + end + + #@info colptr + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + +end + + +function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Vector{Ti}}; keep_zeros=true) where {Tv, Ti <: Integer} + nt = length(lnks) + + if keep_zeros + get_col! = get_column_keepzeros! + else + get_col! = get_column_removezeros! + end + lnkscols = vcat([lnks[i].collnk[1:lnks[i].colctr] for i=1:nt]...) + supersparsecolumns = vcat([gi[i][lnks[i].collnk[1:lnks[i].colctr]] for i=1:nt]...) + num_cols = sum([lnks[i].colctr for i=1:nt]) + tids = Vector{Ti}(undef, num_cols) + ctr = 0 + for i=1:nt + for j=1:lnks[i].colctr + ctr += 1 + tids[ctr] = i + end + end + + + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + sortedcolumns = vcat(sortedcolumns, [Ti(csc.n+1)]) + + coll = sum([lnks[i].nnz for i=1:nt]) + nnz_sum = length(csc.rowval) + coll + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + if csc.m < coll + coll = csc.m + end + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:coll] + numshifts = 0 + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + J = 1 + i0 = 0 + #lj_last = [] + #tid_last = [] + lj_last = Vector{Ti}(undef, nt) + tid_last = Vector{Ti}(undef, nt) + ctr_last = 1 + gj_last = 0 + for J=1:length(sortedcolumns)-1 + gj_now = sortedcolumns[J] + gj_next = sortedcolumns[J+1] + + lj_last[ctr_last] = lnkscols[sortedcolumnids[J]] + tid_last[ctr_last] = tids[sortedcolumnids[J]] + + if gj_now != gj_next + #@info typeof(lnks) + # do stuff from gj_last to gj_now / from last_lj to J + #@info lj_last, tid_last + coll = get_col!(col, lnks, lj_last, tid_last, ctr_last) + + nns = merge_into!(rowval, nzval, csc, col, gj_now, coll, colptr[gj_now]-one(Ti)) + numshifts += nns + + + colptr[gj_now+1:sortedcolumns[J+1]] = csc.colptr[gj_now+1:sortedcolumns[J+1]].+(-csc.colptr[gj_now]+colptr[gj_now] + nns) + + rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1) + + #rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = csc.rowval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1] + #nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = csc.nzval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1] + + + #for k=csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1 + # k2 = k+(-csc.colptr[gj_now]+colptr[gj_now] + nns) + # rowval[k2] = csc.rowval[k] + # nzval[k2] = csc.nzval[k] + #end + + gj_last = gj_now + ctr_last = 0 #tids[sortedcolumnids[J]]] + end + + ctr_last += 1 + + + end + + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + #for ... + # take many columns together if necessary in `get_column` + #end + + + +end + + + +function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Ti}) where {Tv, Ti <: Integer} + + #@info lnk.collnk[1:lnk.colctr] + + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + #@info typeof(supersparsecolumns), typeof(sortedcolumns) + + sortedcolumns = vcat(sortedcolumns, [Ti(csc.n+1)]) + + #@info typeof(supersparsecolumns), typeof(sortedcolumns) + + + #@info supersparsecolumns + #@info sortedcolumns + #@info lnk.collnk[1:length(sortedcolumns)-1] + #@info lnk.collnk[sortedcolumnids[1:length(sortedcolumns)-1]] + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + i = sortedcolumns[J] + + coll = get_column!(col, lnk, lnk.collnk[sortedcolumnids[J]] ) + #@info typeof(i), typeof(coll), typeof(colptr), typeof(colptr[i]), typeof(colptr[i]-1) + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-one(Ti)) + numshifts += nns + + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + + #= + for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1 + k2 = k+(-csc.colptr[i]+colptr[i] + nns) + rowval[k2] = csc.rowval[k] + nzval[k2] = csc.nzval[k] + end + =# + end + + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + +end + + + + +function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + gi = collect(1:csc.n) + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + sortedcolumns = vcat(sortedcolumns, [csc.n+1]) + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + i = sortedcolumns[J] + coll = get_column!(col, lnk, i) + + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1) + numshifts += nns + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + + + for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1 + k2 = k+(-csc.colptr[i]+colptr[i] + nns) + rowval[k2] = csc.rowval[k] + nzval[k2] = csc.nzval[k] + end + + + end + + #@info colptr + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + +end + + + +function twodisjointsets(n, k) + A = rand(1:n, k) + B = zeros(Int64, k) + done = false + ctr = 0 + while ctr != k + v = rand(1:n) + if !(v in A) + ctr += 1 + B[ctr] = v + end + end + + A, B +end + +function distinct(x, n) + y = zeros(typeof(x[1]), n) + ctr = 0 + while ctr != n + v = rand(x) + if !(v in y[1:ctr]) + ctr += 1 + y[ctr] = v + end + end + y +end + + +function mean(x) + sum(x)/length(x) +end + +function form(x) + [minimum(x), mean(x), maximum(x)] +end + + + + + + + + + + + From 4d15e47275db1ac41de6e08b935ea32d3c32e1f2 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Tue, 20 Feb 2024 17:58:45 +0100 Subject: [PATCH 3/9] add Mittal,Al-Kurdi ILU --- src/ExtendableSparse.jl | 2 + src/factorizations/factorizations.jl | 2 + src/factorizations/ilu_Al-Kurdi_Mittal.jl | 136 ++++++++++++++++++++++ src/factorizations/iluam.jl | 35 ++++++ 4 files changed, 175 insertions(+) create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal.jl create mode 100644 src/factorizations/iluam.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 372df82..2894027 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -39,6 +39,8 @@ include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light +include("factorizations/ilu_Al-Kurdi_Mittal.jl") +using .ILUAM include("factorizations/factorizations.jl") diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl index d278d8b..ead23c5 100644 --- a/src/factorizations/factorizations.jl +++ b/src/factorizations/factorizations.jl @@ -157,6 +157,7 @@ end include("jacobi.jl") include("ilu0.jl") include("iluzero.jl") +include("iluam.jl") include("parallel_jacobi.jl") include("parallel_ilu0.jl") include("sparspak.jl") @@ -165,6 +166,7 @@ include("blockpreconditioner.jl") @eval begin @makefrommatrix ILU0Preconditioner @makefrommatrix ILUZeroPreconditioner + @makefrommatrix ILUAMPreconditioner @makefrommatrix PointBlockILUZeroPreconditioner @makefrommatrix JacobiPreconditioner @makefrommatrix ParallelJacobiPreconditioner diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl new file mode 100644 index 0000000..a47bd50 --- /dev/null +++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl @@ -0,0 +1,136 @@ +module ILUAM +using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + + +mutable struct ILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + rowval::AbstractVector + colptr::AbstractVector + +end + +function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} + n = A.n # number of columns + nzval = copy(A.nzval) + diag = Vector{N}(undef, n) + + ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr)) +end + +function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + nzval = LU.nzval + diag = LU.diag + + colptr = LU.colptr + rowval = LU.rowval + n = A.n # number of columns + point = zeros(N, n) #Vector{N}(undef, n) + + # find diagonal entries + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(N) + end + end +end + +function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + LU = ILUAMPrecon(A::SparseMatrixCSC{T,N}) + iluAM!(LU, A) + LU +end + + +function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.colptr + rowval = ilu.rowval + diag = ilu.diag + y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + + +function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.colptr + rowval = ilu.rowval + diag = ilu.diag + wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = wrk[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + wrk[rowval[i]] -= nzval[i]*x[j] + end + end + x +end + +function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(x, ilu, y) + x +end + +function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(b, ilu, y) + b +end + +function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + x = copy(b) + ldiv!(x, ilu, b) +end + +function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + + +end \ No newline at end of file diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl new file mode 100644 index 0000000..5d65e40 --- /dev/null +++ b/src/factorizations/iluam.jl @@ -0,0 +1,35 @@ +mutable struct ILUAMPreconditioner <: AbstractPreconditioner + A::ExtendableSparseMatrix + factorization::ILUAM.ILUAMPrecon + phash::UInt64 + function ILUAMPreconditioner() + p = new() + p.phash = 0 + p + end +end + +""" +``` +ILUAMPreconditioner() +ILUAMPreconditioner(matrix) +``` +Incomplete LU preconditioner with zero fill-in using ... . This preconditioner +also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than the [`ILU0Preconditioner`](@ref). +""" +function ILUAMPreconditioner end + +function update!(p::ILUAMPreconditioner) + flush!(p.A) + if p.A.phash != p.phash + p.factorization = ILUAM.iluAM(p.A.cscmatrix) + p.phash=p.A.phash + else + ILUAM.ilu0!(p.factorization, p.A.cscmatrix) + end + p +end + +allow_views(::ILUAMPreconditioner)=true +allow_views(::Type{ILUAMPreconditioner})=true + From 4e0e30958e8f7e51606e4b6d76b71f5ec8761988 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Fri, 23 Feb 2024 12:47:51 +0100 Subject: [PATCH 4/9] implement ILU (sequential and parallel) based on Mittal and Al-Kurdi --- Project.toml | 6 +- src/ExtendableSparse.jl | 33 ++- src/factorizations/factorizations.jl | 119 +++++--- src/factorizations/ilu_Al-Kurdi_Mittal.jl | 176 ++++++++---- src/factorizations/ilu_Al-Kurdi_Mittal_0.jl | 146 ++++++++++ src/factorizations/ilu_Al-Kurdi_Mittal_1.jl | 229 +++++++++++++++ src/factorizations/iluam.jl | 7 +- src/factorizations/pilu_Al-Kurdi_Mittal.jl | 270 ++++++++++++++++++ src/factorizations/piluam.jl | 36 +++ .../ExtendableSparseParallel.jl | 34 ++- .../struct_flush.jl | 2 +- 11 files changed, 941 insertions(+), 117 deletions(-) create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_0.jl create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_1.jl create mode 100644 src/factorizations/pilu_Al-Kurdi_Mittal.jl create mode 100644 src/factorizations/piluam.jl diff --git a/Project.toml b/Project.toml index 46d6e61..8776054 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,15 @@ name = "ExtendableSparse" uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3" authors = ["Juergen Fuhrmann "] -version = "1.4" +version = "1.4.0" [deps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Requires = "ae029012-a4dd-5104-9daa-d747884805df" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" @@ -15,8 +17,6 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" -ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" [weakdeps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 2894027..285d0c2 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -28,25 +28,39 @@ include("matrix/sparsematrixcsc.jl") include("matrix/sparsematrixlnk.jl") include("matrix/extendable.jl") -export SparseMatrixLNK, - ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse +export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet +@warn "ESMP!" include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") -export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK -export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light include("factorizations/ilu_Al-Kurdi_Mittal.jl") -using .ILUAM - +#using .ILUAM +include("factorizations/pilu_Al-Kurdi_Mittal.jl") +#using .PILUAM include("factorizations/factorizations.jl") +include("factorizations/simple_iteration.jl") +export simple, simple! + +include("matrix/sprand.jl") +export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark + + + + +export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK +export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light + + export JacobiPreconditioner, ILU0Preconditioner, ILUZeroPreconditioner, + ILUAMPreconditioner, + PILUAMPreconditioner, PointBlockILUZeroPreconditioner, ParallelJacobiPreconditioner, ParallelILU0Preconditioner, @@ -57,13 +71,6 @@ export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU export issolver export factorize!, update! -include("factorizations/simple_iteration.jl") -export simple, simple! - -include("matrix/sprand.jl") -export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark - - @static if !isdefined(Base, :get_extension) function __init__() @require Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" begin diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl index ead23c5..2d56fce 100644 --- a/src/factorizations/factorizations.jl +++ b/src/factorizations/factorizations.jl @@ -51,6 +51,52 @@ Determine if factorization is a solver or not issolver(::AbstractLUFactorization) = true issolver(::AbstractPreconditioner) = false + + +"""" + @makefrommatrix(fact) + +For an AbstractFactorization `MyFact`, provide methods +``` + MyFact(A::ExtendableSparseMatrix; kwargs...) + MyFact(A::SparseMatrixCSC; kwargs...) +``` +""" +macro makefrommatrix(fact) + return quote + function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...) + factorize!($(esc(fact))(;kwargs...), A) + end + function $(esc(fact))(A::SparseMatrixCSC; kwargs...) + $(esc(fact))(ExtendableSparseMatrix(A); kwargs...) + end + end +end + +include("ilu0.jl") +include("iluzero.jl") +include("iluam.jl") +include("piluam.jl") +include("parallel_jacobi.jl") +include("parallel_ilu0.jl") +include("sparspak.jl") +include("blockpreconditioner.jl") +include("jacobi.jl") + +@eval begin + @makefrommatrix ILU0Preconditioner + @makefrommatrix ILUZeroPreconditioner + @makefrommatrix ILUAMPreconditioner + @makefrommatrix PILUAMPreconditioner + @makefrommatrix PointBlockILUZeroPreconditioner + @makefrommatrix JacobiPreconditioner + @makefrommatrix ParallelJacobiPreconditioner + @makefrommatrix ParallelILU0Preconditioner + @makefrommatrix SparspakLU + @makefrommatrix UpdateteableBlockpreconditioner + @makefrommatrix BlockPreconditioner +end + """ ``` factorize!(factorization, matrix) @@ -65,8 +111,40 @@ function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix) p end +function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel) + p.A = A + update!(p) + p +end + +#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel) +# p.A = A +# update!(p) +# p +#end + +#factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix)) + +#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix)) factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A)) + +#function factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel) +# factorize!(p, A) +#end + +#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel) +# factorize!(p, A.cscmatrix) +#end + + +#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix) +# factorize!(p, A.cscmatrix) +#end + + +#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,A) + """ ``` lu!(factorization, matrix) @@ -134,47 +212,6 @@ LinearAlgebra.ldiv!(fact::AbstractFactorization, v) = ldiv!(fact.factorization, -"""" - @makefrommatrix(fact) - -For an AbstractFactorization `MyFact`, provide methods -``` - MyFact(A::ExtendableSparseMatrix; kwargs...) - MyFact(A::SparseMatrixCSC; kwargs...) -``` -""" -macro makefrommatrix(fact) - return quote - function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...) - factorize!($(esc(fact))(;kwargs...), A) - end - function $(esc(fact))(A::SparseMatrixCSC; kwargs...) - $(esc(fact))(ExtendableSparseMatrix(A); kwargs...) - end - end -end - -include("jacobi.jl") -include("ilu0.jl") -include("iluzero.jl") -include("iluam.jl") -include("parallel_jacobi.jl") -include("parallel_ilu0.jl") -include("sparspak.jl") -include("blockpreconditioner.jl") - -@eval begin - @makefrommatrix ILU0Preconditioner - @makefrommatrix ILUZeroPreconditioner - @makefrommatrix ILUAMPreconditioner - @makefrommatrix PointBlockILUZeroPreconditioner - @makefrommatrix JacobiPreconditioner - @makefrommatrix ParallelJacobiPreconditioner - @makefrommatrix ParallelILU0Preconditioner - @makefrommatrix SparspakLU - @makefrommatrix UpdateteableBlockpreconditioner - @makefrommatrix BlockPreconditioner -end if USE_GPL_LIBS #requires SuiteSparse which is not available in non-GPL builds diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl index a47bd50..97bb9a8 100644 --- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl @@ -1,34 +1,27 @@ -module ILUAM -using LinearAlgebra, SparseArrays +#module ILUAM +#using LinearAlgebra, SparseArrays import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz +@info "ILUAM" mutable struct ILUAMPrecon{T,N} diag::AbstractVector nzval::AbstractVector - rowval::AbstractVector - colptr::AbstractVector + A::AbstractMatrix end -function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} - n = A.n # number of columns - nzval = copy(A.nzval) - diag = Vector{N}(undef, n) - - ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr)) -end - -function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - nzval = LU.nzval - diag = LU.diag - - colptr = LU.colptr - rowval = LU.rowval - n = A.n # number of columns - point = zeros(N, n) #Vector{N}(undef, n) +function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} + @info "iluAM" + nzval = copy(A.nzval) + colptr = A.colptr + rowval = A.rowval + #nzval = ILU.nzval + n = A.n # number of columns + point = zeros(Ti, n) #Vector{Ti}(undef, n) + diag = Vector{Ti}(undef, n) # find diagonal entries for j=1:n @@ -64,25 +57,23 @@ function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integ for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(N) + point[rowval[v]] = zero(Ti) end end + #nzval, diag + ILUAMPrecon{Tv,Ti}(diag, nzval, A) end -function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - LU = ILUAMPrecon(A::SparseMatrixCSC{T,N}) - iluAM!(LU, A) - LU -end - +function forward_subst_old!(y, v, nzval, diag, A) + n = A.n + colptr = A.colptr + rowval = A.rowval + + for i in eachindex(y) + y[i] = zero(Float64) + end -function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.colptr - rowval = ilu.rowval - diag = ilu.diag - y .= 0 + #y .= 0 @inbounds for j=1:n y[j] += v[j] for v=diag[j]+1:colptr[j+1]-1 @@ -93,44 +84,119 @@ function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} end -function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.colptr - rowval = ilu.rowval - diag = ilu.diag - wrk = copy(y) +function backward_subst_old!(x, y, nzval, diag, A) + n = A.n + colptr = A.colptr + rowval = A.rowval @inbounds for j=n:-1:1 - x[j] = wrk[j] / nzval[diag[j]] + x[j] = y[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 - wrk[rowval[i]] -= nzval[i]*x[j] + y[rowval[i]] -= nzval[i]*x[j] end + end - x + x end -function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(x, ilu, y) - x +function ldiv!(x, ILU::ILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(x, y, nzval, diag, A) + x end -function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(b, ilu, y) - b +function ldiv!(ILU::ILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(b, y, nzval, diag, A) + b end function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} x = copy(b) ldiv!(x, ilu, b) + x end function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} length(ilu.nzval) end +#= +function forward_subst!(y, v, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer} + @info "fw" + n = ilu.A.n + nzval = ilu.nzval + diag = ilu.diag + colptr = ilu.A.colptr + rowval = ilu.A.rowval + + for i in eachindex(y) + y[i] = zero(Float64) + end + + #y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + +function backward_subst!(x, y, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer} + @info "bw" + n = ilu.A.n + nzval = ilu.nzval + diag = ilu.diag + colptr = ilu.A.colptr + rowval = ilu.A.rowval + #wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = y[j] / nzval[diag[j]] + + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + + end + x +end + +function iluam_subst(ILU::ILUAMPrecon, b) + y = copy(b) + forward_subst!(y, b, ILU) + z = copy(b) + backward_subst!(z, y, ILU) + z +end + + + +function iluam_subst_old(ILU::ILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + z = copy(b) + backward_subst_old!(z, y, nzval, diag, A) + #backward_subst!(z, y, ILU) + z +end +=# + + -end \ No newline at end of file +#end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl new file mode 100644 index 0000000..26f9788 --- /dev/null +++ b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl @@ -0,0 +1,146 @@ +module ILUAM +using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + + +mutable struct ILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + rowval::AbstractVector + colptr::AbstractVector + +end + +function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} + @info "ILUAMPrecon" + n = A.n # number of columns + nzval = copy(A.nzval) + diag = Vector{N}(undef, n) + + ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr)) +end + +function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + @info "iluAM!" + nzval = LU.nzval + diag = LU.diag + + colptr = LU.colptr + rowval = LU.rowval + n = A.n # number of columns + point = zeros(N, n) #Vector{N}(undef, n) + + t = zeros(5) + + # find diagonal entries + t[1] = @elapsed for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + t[3] += @elapsed for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(N) + end + end + t +end + +function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + t = zeros(6) + t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})) + t[2:6] = iluAM!(LU, A) + @info t + LU +end + + +function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.colptr + rowval = ilu.rowval + diag = ilu.diag + y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + + +function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.colptr + rowval = ilu.rowval + diag = ilu.diag + wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = wrk[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + wrk[rowval[i]] -= nzval[i]*x[j] + end + end + x +end + +function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM ldiv1" + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(x, ilu, y) + x +end + +function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM ldiv2" + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(b, ilu, y) + b +end + +function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM bs " + x = copy(b) + ldiv!(x, ilu, b) +end + +function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + + +end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl new file mode 100644 index 0000000..a599094 --- /dev/null +++ b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl @@ -0,0 +1,229 @@ +module ILUAM +using LinearAlgebra, SparseArrays + +#import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + +@info "ILUAM" + +mutable struct ILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + A::AbstractMatrix + +end + +function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} + @info "ILUAMPrecon" + n = A.n # number of columns + nzval = copy(A.nzval) + diag = Vector{N}(undef, n) + + ILUAMPrecon{T, N}(diag, copy(A.nzval), A) +end + + + +function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + @info "iluAM!" + nzval = LU.nzval + diag = LU.diag + + colptr = LU.A.colptr + rowval = LU.A.rowval + n = A.n # number of columns + point = zeros(N, n) #Vector{N}(undef, n) + + t = zeros(5) + + # find diagonal entries + t[1] = @elapsed for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + t[3] += @elapsed for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(N) + end + end + t +end + + +function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} + @info "iluAM" + nzval = copy(A.nzval) + colptr = A.colptr + rowval = A.rowval + #nzval = ILU.nzval + n = A.n # number of columns + point = zeros(Ti, n) #Vector{Ti}(undef, n) + diag = Vector{Ti}(undef, n) + + # find diagonal entries + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + #nzval, diag + ILUAMPrecon{Tv,Ti}(diag, nzval, A) +end + +#function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} +# t = zeros(6) +# t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})) +# t[2:6] = iluAM!(LU, A) +# @info t +# LU +#end + + +function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.A.colptr + rowval = ilu.A.rowval + diag = ilu.diag + y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + + +function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.A.colptr + rowval = ilu.A.rowval + diag = ilu.diag + wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = wrk[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + wrk[rowval[i]] -= nzval[i]*x[j] + end + end + x +end + +function ldiv_new!(x, ilu, v) + + n = ilu.A.n + y = Vector{Float64}(undef, n) + y .= 0 + nzval = ilu.nzval + colptr = ilu.A.colptr + rowval = ilu.A.rowval + diag = ilu.diag + #forward + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + + #backward + wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = wrk[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + wrk[rowval[i]] -= nzval[i]*x[j] + end + end + x +end + +function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + #@info "AM ldiv1" + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(x, ilu, y) + x +end + +function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM ldiv2" + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(b, ilu, y) + b +end + +function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM bs " + x = copy(b) + ldiv!(x, ilu, b) + x +end + +function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + + +end \ No newline at end of file diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl index 5d65e40..a4aed06 100644 --- a/src/factorizations/iluam.jl +++ b/src/factorizations/iluam.jl @@ -1,6 +1,6 @@ mutable struct ILUAMPreconditioner <: AbstractPreconditioner A::ExtendableSparseMatrix - factorization::ILUAM.ILUAMPrecon + factorization::ILUAMPrecon phash::UInt64 function ILUAMPreconditioner() p = new() @@ -22,10 +22,11 @@ function ILUAMPreconditioner end function update!(p::ILUAMPreconditioner) flush!(p.A) if p.A.phash != p.phash - p.factorization = ILUAM.iluAM(p.A.cscmatrix) + p.factorization = iluAM(p.A.cscmatrix) p.phash=p.A.phash else - ILUAM.ilu0!(p.factorization, p.A.cscmatrix) + @warn "fuck?" + ilu0!(p.factorization, p.A.cscmatrix) end p end diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl new file mode 100644 index 0000000..a1ef818 --- /dev/null +++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl @@ -0,0 +1,270 @@ +#module PILUAM +#using Base.Threads +#using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + +@info "PILUAM" + +mutable struct PILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + A::AbstractMatrix + start::AbstractVector + nt::Integer + depth::Integer + +end + +function use_vector_par(n, nt, Ti) + point = [Vector{Ti}(undef, n) for tid=1:nt] + @threads for tid=1:nt + point[tid] = zeros(Ti, n) + end + point +end + +function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti) + for j=j0:j1-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end +end + +function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} + start = A.start + nt = A.nt + depth = A.depth + + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + nzval = Vector{Tv}(undef, length(rowval)) #copy(A.nzval) + n = A.cscmatrix.n # number of columns + diag = Vector{Ti}(undef, n) + point = use_vector_par(n, A.nt, Int32) + + # find diagonal entries + # + @threads for tid=1:depth*nt+1 + for j=start[tid]:start[tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + end + #elseif rowval[v] + end + end + end + + #= + @info "piluAM" + nzval = copy(A.cscmatrix.nzval) + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + #nzval = ILU.nzval + n = A.n # number of columns + diag = Vector{Ti}(undef, n) + start = A.start + nt = A.nt + depth = A.depth + point = use_vector_par(n, nt, Ti) + + # find diagonal entries + @threads for tid=1:depth*nt+1 + for j=start[tid]:start[tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + end + + # compute L and U + for level=1:depth + @threads for tid=1:nt + compute_lu!(nzval, point, start[(level-1)*nt+tid], start[(level-1)*nt+tid+1], tid, rowval, colptr, diag, Ti) + end + end + + compute_lu!(nzval, point, start[depth*nt+1], start[depth*nt+2], 1, rowval, colptr, diag, Ti) + =# + + for level=1:depth + @threads for tid=1:nt + for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end + end + end + + #point = zeros(Ti, n) #Vector{Ti}(undef, n) + for j=start[depth*nt+1]:start[depth*nt+2]-1 + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[1][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = zero(Ti) + end + end + + #nzval, diag + PILUAMPrecon{Tv,Ti}(diag, nzval, A.cscmatrix, start, nt, depth) +end + +function forward_subst_old!(y, v, nzval, diag, start, nt, depth, A) + #@info "fwo" + n = A.n + colptr = A.colptr + rowval = A.rowval + + y .= 0 + + for level=1:depth + @threads for tid=1:nt + @inbounds for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + end + end + + @inbounds for j=start[depth*nt+1]:start[depth*nt+2]-1 + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + +end + + +function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + #@info "bwo" + n = A.n + colptr = A.colptr + rowval = A.rowval + #wrk = copy(y) + + + @inbounds for j=start[depth*nt+2]-1:-1:start[depth*nt+1] + x[j] = y[j] / nzval[diag[j]] + + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + + end + + for level=depth:-1:1 + @threads for tid=1:nt + @inbounds for j=start[(level-1)*nt+tid+1]-1:-1:start[(level-1)*nt+tid] + x[j] = y[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + end + end + end + +end + +function ldiv!(x, ILU::PILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + start = ILU.start + nt = ILU.nt + depth = ILU.depth + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, start, nt, depth, A) + backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + x +end + +function ldiv!(ILU::PILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + start = ILU.start + nt = ILU.nt + depth = ILU.depth + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, start, nt, depth, A) + backward_subst_old!(b, y, nzval, diag, start, nt, depth, A) + b +end + +function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer} + x = copy(b) + ldiv!(x, ilu, b) + x +end + +function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + +#end \ No newline at end of file diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl new file mode 100644 index 0000000..4a5fcdc --- /dev/null +++ b/src/factorizations/piluam.jl @@ -0,0 +1,36 @@ +mutable struct PILUAMPreconditioner <: AbstractPreconditioner + A::ExtendableSparseMatrixParallel + factorization::PILUAMPrecon + phash::UInt64 + function PILUAMPreconditioner() + p = new() + p.phash = 0 + p + end +end + +""" +``` +PILUAMPreconditioner() +PILUAMPreconditioner(matrix) +``` +Incomplete LU preconditioner with zero fill-in using ... . This preconditioner +also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than the [`ILU0Preconditioner`](@ref). +""" +function PILUAMPreconditioner end + +function update!(p::PILUAMPreconditioner) + flush!(p.A) + if p.A.phash != p.phash + p.factorization = piluAM(p.A) + p.phash=p.A.phash + else + @warn "fuck?" + ilu0!(p.factorization, p.A.cscmatrix) + end + p +end + +allow_views(::PILUAMPreconditioner)=true +allow_views(::Type{PILUAMPreconditioner})=true + diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index 68dace8..9e63c37 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -36,6 +36,12 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar nt::Ti depth::Ti + + phash::UInt64 + + n::Ti + + m::Ti end @@ -46,7 +52,7 @@ function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) w grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] - ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth) + ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) end @@ -253,6 +259,32 @@ function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParall end end +""" +`function entryexists2(CSC, i, j)` + +Find out if CSC already has an nonzero entry at i,j without any allocations +""" +function entryexists2(CSC, i, j) #find out if CSC already has an nonzero entry at i,j + #vals = + #ids = CSC.colptr[j]:(CSC.colptr[j+1]-1) + i in view(CSC.rowval, CSC.colptr[j]:(CSC.colptr[j+1]-1)) +end + + +function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer, v) where {Tv, Ti <: Integer} + p1 = CSC.colptr[j] + p2 = CSC.colptr[j+1]-1 + + searchk = searchsortedfirst(view(CSC.rowval, p1:p2), i) + p1 - 1 + + if (searchk <= p2) && (CSC.rowval[searchk] == i) + CSC.nzval[searchk] += v + return true + else + return false + end +end + Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) include("struct_flush.jl") diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl index c27aab0..38608ad 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -11,7 +11,7 @@ function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=tr A.cscmatrix = dense_flush_removezeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices) end end - + A.phash = phash(A.cscmatrix) A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt] end From bb0935c46ec2d0dff30283a36064c9e62c5017ca Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Fri, 23 Feb 2024 15:38:05 +0100 Subject: [PATCH 5/9] ColEntry from struct to mutable struct --- src/matrix/sparsematrixlnk.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/matrix/sparsematrixlnk.jl b/src/matrix/sparsematrixlnk.jl index 2976e88..b00c6fc 100644 --- a/src/matrix/sparsematrixlnk.jl +++ b/src/matrix/sparsematrixlnk.jl @@ -278,7 +278,7 @@ end # Struct holding pair of value and row # number, for sorting -struct ColEntry{Tv, Ti <: Integer} +mutable struct ColEntry{Tv, Ti <: Integer} rowval::Ti nzval::Tv end From 47a6a96f71e06c216bb3eeb32403ff45c8179128 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Mon, 26 Feb 2024 14:10:38 +0100 Subject: [PATCH 6/9] enable deeper partitioning / fixing some preparatory functions --- src/factorizations/ilu_Al-Kurdi_Mittal.jl | 4 +- src/factorizations/iluam.jl | 1 + src/factorizations/pilu_Al-Kurdi_Mittal.jl | 2 + src/factorizations/piluam.jl | 1 + .../ExtendableSparseParallel.jl | 2 +- .../preparatory.jl | 416 ++++++++++++++---- 6 files changed, 336 insertions(+), 90 deletions(-) diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl index 97bb9a8..0b6b1b2 100644 --- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl @@ -14,7 +14,7 @@ mutable struct ILUAMPrecon{T,N} end function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} - @info "iluAM" + #@info "iluAM" nzval = copy(A.nzval) colptr = A.colptr rowval = A.rowval @@ -100,6 +100,7 @@ function backward_subst_old!(x, y, nzval, diag, A) end function ldiv!(x, ILU::ILUAMPrecon, b) + #@info "iluam ldiv 1" nzval = ILU.nzval diag = ILU.diag A = ILU.A @@ -111,6 +112,7 @@ function ldiv!(x, ILU::ILUAMPrecon, b) end function ldiv!(ILU::ILUAMPrecon, b) + #@info "iluam ldiv 2" nzval = ILU.nzval diag = ILU.diag A = ILU.A diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl index a4aed06..6d061b0 100644 --- a/src/factorizations/iluam.jl +++ b/src/factorizations/iluam.jl @@ -22,6 +22,7 @@ function ILUAMPreconditioner end function update!(p::ILUAMPreconditioner) flush!(p.A) if p.A.phash != p.phash + @warn "p.A.phash != p.phash" p.factorization = iluAM(p.A.cscmatrix) p.phash=p.A.phash else diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl index a1ef818..15a8b23 100644 --- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl @@ -230,6 +230,7 @@ function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) end function ldiv!(x, ILU::PILUAMPrecon, b) + #@info "piluam ldiv 1" nzval = ILU.nzval diag = ILU.diag A = ILU.A @@ -244,6 +245,7 @@ function ldiv!(x, ILU::PILUAMPrecon, b) end function ldiv!(ILU::PILUAMPrecon, b) + #@info "piluam ldiv 2" nzval = ILU.nzval diag = ILU.diag A = ILU.A diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl index 4a5fcdc..075f73f 100644 --- a/src/factorizations/piluam.jl +++ b/src/factorizations/piluam.jl @@ -22,6 +22,7 @@ function PILUAMPreconditioner end function update!(p::PILUAMPreconditioner) flush!(p.A) if p.A.phash != p.phash + @warn "p.A.phash != p.phash" p.factorization = piluAM(p.A) p.phash=p.A.phash else diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index 9e63c37..b635a33 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -49,7 +49,7 @@ end function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer} - grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) + grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index e14a066..a29356c 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -6,23 +6,36 @@ `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here. """ -function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0) +function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false) grid = getgrid(nm; x0, x1) - + adepth = 0 if sequential - (allcells, start, cellparts) = grid_to_graph_ps_multi!(grid, nt, depth)#) + (allcells, start, cellparts, adepth) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa, do_print)#) else - (allcells, start, cellparts) = grid_to_graph_ps_multi_par!(grid, nt, depth) + (allcells, start, cellparts, adepth) = grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa, do_print) + end + + if (adepth != depth) && do_print + @info "The requested depth of partitioning is too high. The depth is set to $adepth." end + depth = adepth + cfp = bettercellsforpart(cellparts, depth*nt+1) + + if check_partition + validate_partition(grid, cellparts, start, allcells, nt, depth) + end + + @info length.(cfp) + @info minimum(cellparts), maximum(cellparts), nt, depth + (nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( - cellparts, allcells, start, num_nodes(grid), Ti, nt + cellparts, allcells, start, num_nodes(grid), Ti, nt, depth ) - cfp = bettercellsforpart(cellparts, depth*nt+1) - return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts -end + return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, adepth +end """ `function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)` @@ -35,10 +48,10 @@ Furthermore, `nnts` (number of nodes of the threads) is computed, which contain `Ti` is the type (Int64,...) of the elements in the created arrays. `nt` is the number of threads. """ -function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt) +function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth) - num_matrices = maximum(cellregs) - depth = Int(floor((num_matrices-1)/nt)) + #num_matrices = maximum(cellregs) + #depth = Int(floor((num_matrices-1)/nt)) #loop over each node, get the cellregion of the cell (the one not in the separator) write the position of that node inside the cellregions sorted ranking into a long vector #nnts = [zeros(Ti, nt+1) for i=1:depth+1] @@ -62,6 +75,11 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r nnts[crmod] += 1 #sortednodesperthread[crmod,j] = nnts[crmod] #nnts[i][cr] #push!(tmp, crmod) + if tmpctr > depth+1 + @info "Cellregs: ", sortedcellregs + @info "Levels : ", Int.(ceil.(sortedcellregs/nt)) + @info "PartsMod: ", ((sortedcellregs.-1).%nt).+1 + end tmp[tmpctr] = crmod tmpctr += 1 end @@ -127,9 +145,6 @@ end - - - """ `function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)` @@ -141,47 +156,77 @@ This function partitons the separator, which is done if `depth`>1 (see `grid_to_ `level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2... `preparatory_multi_ps` is the number of separator-cells. """ -function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes) - sepanodes = findall(x->x==nt+1, cellregs) +function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print) + # current number of cells treated + nc2 = size(ACSC, 1) - indptr = collect(1:nc+1) - indices = zeros(Int64, nc) - rowval = zeros(Int64, nc) + indptr = collect(1:nc2+1) + indices = zeros(Int64, nc2) + rowval = zeros(Int64, nc2) indptrT = collect(1:ctr_sepanodes+1) indicesT = zeros(Int64, ctr_sepanodes) rowvalT = zeros(Int64, ctr_sepanodes) - for (i,j) in enumerate(sepanodes) - indices[j] = i + for i=1:ctr_sepanodes + j = ri[i] + indices[j] = i indicesT[i] = j rowval[j] = 1 rowvalT[i] = 1 end - R = SparseMatrixCSC(ctr_sepanodes, nc, indptr, indices, rowval) - RT = SparseMatrixCSC(nc, ctr_sepanodes, indptrT, indicesT, rowvalT) - prod = ACSC*dropzeros(RT) + + + R = SparseMatrixCSC(ctr_sepanodes, nc2, indptr, indices, rowval) + RT = SparseMatrixCSC(nc2, ctr_sepanodes, indptrT, indicesT, rowvalT) + # current adjacency matrix, taken as a part of the given one ACSC RART = dropzeros(R)*ACSC*dropzeros(RT) - partition2 = Metis.partition(RART, nt) - cellregs2 = copy(partition2) - - ctr_sepanodes = 0 - for (i,j) in enumerate(sepanodes) - rows = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)] - cellregs[j] = level0*nt + cellregs2[i] - if minimum(partition2[rows]) != maximum(partition2[rows]) - cellregs[j] = (level0+1)*nt+1 - ctr_sepanodes += 1 - end - end - - RART, ctr_sepanodes + cellregs2 = Metis.partition(RART, nt) + + + for i=1:ctr_sepanodes + if cellregs[gi[i]] < level0*nt+1 + @warn "cell treated in this iteration was not a separator-cell last iteration" + end + cellregs[gi[i]] = level0*nt + cellregs2[i] + end + + # how many cells are in the separator of the new partiton (which is only computed on the separator of the old partition) + new_ctr_sepanodes = 0 + ri2 = Vector{Int64}(undef, ctr_sepanodes) + gi2 = Vector{Int64}(undef, ctr_sepanodes) + + for tid=1:nt + for i=1:ctr_sepanodes + if cellregs2[i] == tid + neighbors = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)] + rows = gi[vcat(neighbors, [i])] + #counts how many different regions (besides) the separator are adjacent to the current cell + x = how_many_different_below(cellregs[rows], (level0+1)*nt+1) + if x > 1 + cellregs[gi[i]] = (level0+1)*nt+1 + new_ctr_sepanodes += 1 + gi2[new_ctr_sepanodes] = gi[i] + ri2[new_ctr_sepanodes] = i + end + end + end + end + + + ri2 = ri2[1:new_ctr_sepanodes] + gi2 = gi2[1:new_ctr_sepanodes] + + if do_print + @info "At level $(level0+1), we found $new_ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART, new_ctr_sepanodes, ri2, gi2 end - """ `function grid_to_graph_ps_multi!(grid, nt, depth)` @@ -190,7 +235,7 @@ The function assigns colors/partitons to each cell in the `grid`. First, the gri `nt` is the number of threads. `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... """ -function grid_to_graph_ps_multi!(grid, nt, depth) +function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=false) A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) number_cells_per_node = zeros(Int64, num_nodes(grid)) for j=1:num_cells(grid) @@ -224,27 +269,46 @@ function grid_to_graph_ps_multi!(grid, nt, depth) partition = Metis.partition(ACSC, nt) cellregs = copy(partition) + sn = Vector{Int64}(undef, num_cells(grid)) + gi = Vector{Int64}(undef, num_cells(grid)) ctr_sepanodes = 0 - for j=1:num_cells(grid) - rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] - if minimum(partition[rows]) != maximum(partition[rows]) - cellregs[j] = nt+1 - ctr_sepanodes += 1 - end + + for tid=1:nt + for j=1:num_cells(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodes += 1 + sn[ctr_sepanodes] = j + gi[ctr_sepanodes] = j + end + end + end end - RART = ACSC + + sn = sn[1:ctr_sepanodes] + gi = gi[1:ctr_sepanodes] + + if do_print + @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = copy(ACSC) + actual_depth = 1 for level=1:depth-1 - RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes) + RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end end - - - return allcells, start, cellregs + + return allcells, start, cellregs, actual_depth, ACSC end - -function grid_to_graph_ps_multi_par!(grid, nt, depth) - time = zeros(12) +function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print=false) As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt] number_cells_per_node = zeros(Int64, num_nodes(grid)) @@ -288,54 +352,64 @@ function grid_to_graph_ps_multi_par!(grid, nt, depth) end ACSC = add_all_par!(As).cscmatrix - - #SparseArrays.SparseMatrixCSC(A)) - - - partition = Metis.partition(ACSC, nt) - cellregs = copy(partition) - ctr_sepanodes_a = zeros(Int64, nt) + cellregs = Metis.partition(ACSC, nt) - cell_range = get_starts(num_cells(grid), nt) - Threads.@threads :static for tid=1:nt - for j in cell_range[tid]:cell_range[tid+1]-1 - rows = @view ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] - if minimum(partition[rows]) != maximum(partition[rows]) - cellregs[j] = nt+1 - ctr_sepanodes_a[tid] += 1 - end - end + sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt] + ctr_sepanodess = zeros(Int64, nt) + + @threads for tid=1:nt + for j=1:num_cells(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodess[tid] += 1 + sn[tid][ctr_sepanodess[tid]] = j + end + end + end end - - ctr_sepanodes = sum(ctr_sepanodes_a) - - #= - time[10] = @elapsed for j=1:num_cells(grid) - rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] - if minimum(partition[rows]) != maximum(partition[rows]) - cellregs[j] = nt+1 - ctr_sepanodes += 1 - end - end - =# - RART = ACSC + + for tid=1:nt + sn[tid] = sn[tid][1:ctr_sepanodess[tid]] + end + ctr_sepanodes = sum(ctr_sepanodess) + sn = vcat(sn...) + gi = copy(sn) + + if do_print + @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = ACSC + actual_depth = 1 for level=1:depth-1 - RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes) + RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end end - - - return allcells, start, cellregs + + #grid[CellRegions] = cellregs + #grid + return allcells, start, cellregs, actual_depth end +""" +`function add_all_par!(As)` +Add LNK matrices (stored in a vector) parallely (tree structure). +The result is stored in the first LNK matrix. +""" function add_all_par!(As) nt = length(As) depth = Int(floor(log2(nt))) ende = nt for level=1:depth - @threads :static for tid=1:2^(depth-level) + @threads for tid=1:2^(depth-level) #@info "$level, $tid" start = tid+2^(depth-level) while start <= ende @@ -425,3 +499,169 @@ function last_nz(x) end end + +function how_many_different_below(x0, y; u=0) + x = copy(x0) + z = unique(x) + t = findall(w->ww>u,z[t]) + length(t) +end + + + +function lookat_grid_to_graph_ps_multi!(nm, nt, depth) + grid = getgrid(nm) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + sn = [] + gi = [] + ctr_sepanodes = 0 + for j=1:num_cells(grid) + rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes += 1 + push!(sn, j) + push!(gi, j) + end + end + RART = ACSC + #sn = 1:num_cells(grid) + #gi = 1:num_cells(grid) + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate_careful!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi) + if ctr_sepanodes == 0 + return RART + end + end + + + #return allcells, start, cellregs + RART +end + + +function adjacencies(grid) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + allcells, start, SparseArrays.SparseMatrixCSC(A) +end + +function check_adjacencies(nm) + grid = getgrid(nm) + allcells, start, A = adjacencies(grid) + + i = 1 + cells1 = sort(vcat([i], A.rowval[A.colptr[i]:(A.colptr[i+1]-1)])) #adjacent cells + nodes2 = grid[CellNodes][:,i] + cells2 = sort(unique(vcat([allcells[start[j]:start[j+1]-1] for j in nodes2]...))) + + @info cells1 + @info cells2 + @info maximum(abs.(cells1-cells2)) + + +end + +#= +function check_partition(nm, nt, depth) + grid = getgrid(nm) + + (allcells, start, cellregs, adepth, ACSC) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=true)#) + + if (adepth != depth) + @info "The requested depth of partitioning is too high. The depth is set to $adepth." + end + depth = adepth + + validate_partition(num_nodes(grid), num_cells(grid), grid, cellregs, start, allcells, nt, depth, ACSC) +end +=# + +function validate_partition(grid, cellregs, start, allcells, nt, depth) + @info "Node based validation" + violation_ctr = 0 + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + levels = Int.(ceil.(sortedcellregs/nt)) + + for i=1:depth+1 + ids_lev = findall(x->x==i, levels) + if length(ids_lev) > 1 + violation_ctr += 1 + + if violation_ctr == 1 + @info "Node Id : ", j + @info "Cellregs: ", sortedcellregs + @info "Levels : ", levels + + loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt))) + cells_at_level4 = allcells[loc.+(start[j]-1)] + @info cells_at_level4, cellregs[cells_at_level4] + @info grid[CellNodes][:,cells_at_level4[1]], grid[CellNodes][:,cells_at_level4[2]] + end + end + end + end + @info "We found $violation_ctr violation(s)" +end \ No newline at end of file From b2693ec41fc21fb3fdbb71648f47a6d21aa1e690 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Sun, 17 Mar 2024 10:57:59 +0100 Subject: [PATCH 7/9] add parallel matrix vector product --- src/ExtendableSparse.jl | 1 + src/factorizations/filu_Al-Kurdi_Mittal.jl | 160 ++++++++++++++++++ src/factorizations/ilu_Al-Kurdi_Mittal.jl | 65 ++++++- src/factorizations/iluam.jl | 4 +- src/factorizations/pilu_Al-Kurdi_Mittal.jl | 149 ++++++++++++---- src/factorizations/piluam.jl | 5 +- .../ExtendableSparseParallel.jl | 63 ++++++- .../preparatory.jl | 4 +- .../struct_flush.jl | 4 + 9 files changed, 410 insertions(+), 45 deletions(-) create mode 100644 src/factorizations/filu_Al-Kurdi_Mittal.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 285d0c2..bcf85e6 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -37,6 +37,7 @@ export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") + include("factorizations/ilu_Al-Kurdi_Mittal.jl") #using .ILUAM include("factorizations/pilu_Al-Kurdi_Mittal.jl") diff --git a/src/factorizations/filu_Al-Kurdi_Mittal.jl b/src/factorizations/filu_Al-Kurdi_Mittal.jl new file mode 100644 index 0000000..2099208 --- /dev/null +++ b/src/factorizations/filu_Al-Kurdi_Mittal.jl @@ -0,0 +1,160 @@ +#module PILUAM +#using Base.Threads +#using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + +@info "PILUAM" + +mutable struct PILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + A::AbstractMatrix + +end + +function iluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <:Integer} + @info "filuAM!" + diag = ILU.diag + nzval = ILU.nzval + + nzval = copy(A.cscmatrix.nzval) + diag = Vector{Ti}(undef, n) + ILU.A = A + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + n = A.n + point = zeros(Ti, n) + + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + +end + + +function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} + @info "filuAM, $(A[1,1])" + nzval = copy(A.cscmatrix.nzval) + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + #nzval = ILU.nzval + n = A.n # number of columns + point = zeros(Ti, n) #Vector{Ti}(undef, n) + diag = Vector{Ti}(undef, n) + + # find diagonal entries + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + #nzval, diag + PILUAMPrecon{Tv,Ti}(diag, nzval, A) +end + + + +function ldiv!(x, ILU::PILUAMPrecon, b) + #@info "iluam ldiv 1" + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A.cscmatrix + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(x, y, nzval, diag, A) + @info "FILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)) + #, maximum(abs.(b-A*x)), b[1], x[1], y[1] + x +end + + +function ldiv!(ILU::PILUAMPrecon, b) + #@info "iluam ldiv 2" + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A.cscmatrix + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(b, y, nzval, diag, A) + b +end + +function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer} + x = copy(b) + ldiv!(x, ilu, b) + x +end + +function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + +#end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl index 0b6b1b2..ad2207d 100644 --- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl @@ -3,7 +3,7 @@ import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz -@info "ILUAM" +#@info "ILUAM" mutable struct ILUAMPrecon{T,N} @@ -13,6 +13,58 @@ mutable struct ILUAMPrecon{T,N} end + +function iluAM!(ILU::ILUAMPrecon{Tv,Ti}, A::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:Integer} + diag = ILU.diag + nzval = ILU.nzval + + nzval = copy(A.nzval) + diag = Vector{Ti}(undef, n) + ILU.A = A + colptr = A.colptr + rowval = A.rowval + n = A.n + point = zeros(Ti, n) + + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + +end + function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} #@info "iluAM" nzval = copy(A.nzval) @@ -33,6 +85,9 @@ function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} #elseif rowval[v] end end + + #@info diag[1:20]' + #@info diag[end-20:end]' # compute L and U for j=1:n @@ -65,6 +120,7 @@ function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} end function forward_subst_old!(y, v, nzval, diag, A) + #@info "fso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" n = A.n colptr = A.colptr rowval = A.rowval @@ -85,6 +141,7 @@ end function backward_subst_old!(x, y, nzval, diag, A) + #@info "bso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" n = A.n colptr = A.colptr rowval = A.rowval @@ -99,7 +156,9 @@ function backward_subst_old!(x, y, nzval, diag, A) x end + function ldiv!(x, ILU::ILUAMPrecon, b) + #t = @elapsed begin #@info "iluam ldiv 1" nzval = ILU.nzval diag = ILU.diag @@ -108,6 +167,10 @@ function ldiv!(x, ILU::ILUAMPrecon, b) #forward_subst!(y, b, ILU) forward_subst_old!(y, b, nzval, diag, A) backward_subst_old!(x, y, nzval, diag, A) + #@info "ILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10] + #, b[1], x[1], y[1]#maximum(abs.(b)), maximum(abs.(x)) + #end + #println("$t") #@info t x end diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl index 6d061b0..24b75be 100644 --- a/src/factorizations/iluam.jl +++ b/src/factorizations/iluam.jl @@ -22,12 +22,10 @@ function ILUAMPreconditioner end function update!(p::ILUAMPreconditioner) flush!(p.A) if p.A.phash != p.phash - @warn "p.A.phash != p.phash" p.factorization = iluAM(p.A.cscmatrix) p.phash=p.A.phash else - @warn "fuck?" - ilu0!(p.factorization, p.A.cscmatrix) + iluam!(p.factorization, p.A.cscmatrix) end p end diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl index 15a8b23..f2861ed 100644 --- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl @@ -4,7 +4,7 @@ import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz -@info "PILUAM" +#@info "PILUAM" mutable struct PILUAMPrecon{T,N} @@ -51,6 +51,95 @@ function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti) end end +function piluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} + @info "piluAM!" + diag = ILU.diag + nzval = ILU.nzval + ILU.A = A + start = ILU.start + + ILU.nt = A.nt + nt = A.nt + + ILU.depth = A.depth + depth = A.depth + + + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + n = A.cscmatrix.n # number of columns + diag = Vector{Ti}(undef, n) + nzval = Vector{Tv}(undef, length(rowval)) #copy(A.nzval) + point = use_vector_par(n, A.nt, Int32) + + + @threads for tid=1:depth*nt+1 + for j=start[tid]:start[tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + end + #elseif rowval[v] + end + end + end + + for level=1:depth + @threads for tid=1:nt + for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end + end + end + + #point = zeros(Ti, n) #Vector{Ti}(undef, n) + for j=start[depth*nt+1]:start[depth*nt+2]-1 + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[1][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = zero(Ti) + end + end + +end + function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} start = A.start nt = A.nt @@ -65,6 +154,22 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege # find diagonal entries # + + #= + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + #break + end + #elseif rowval[v] + end + end + =# + + + @threads for tid=1:depth*nt+1 for j=start[tid]:start[tid+1]-1 for v=colptr[j]:colptr[j+1]-1 @@ -77,41 +182,9 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege end end - #= - @info "piluAM" - nzval = copy(A.cscmatrix.nzval) - colptr = A.cscmatrix.colptr - rowval = A.cscmatrix.rowval - #nzval = ILU.nzval - n = A.n # number of columns - diag = Vector{Ti}(undef, n) - start = A.start - nt = A.nt - depth = A.depth - point = use_vector_par(n, nt, Ti) - # find diagonal entries - @threads for tid=1:depth*nt+1 - for j=start[tid]:start[tid+1]-1 - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - end - - # compute L and U - for level=1:depth - @threads for tid=1:nt - compute_lu!(nzval, point, start[(level-1)*nt+tid], start[(level-1)*nt+tid+1], tid, rowval, colptr, diag, Ti) - end - end - - compute_lu!(nzval, point, start[depth*nt+1], start[depth*nt+2], 1, rowval, colptr, diag, Ti) - =# + #@info diag[1:20]' + #@info diag[end-20:end]' for level=1:depth @threads for tid=1:nt @@ -171,6 +244,7 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege end function forward_subst_old!(y, v, nzval, diag, start, nt, depth, A) + #@info "pfso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" #@info "fwo" n = A.n colptr = A.colptr @@ -200,6 +274,8 @@ end function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + #@info "pbso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" + #@info "bwo" n = A.n colptr = A.colptr @@ -229,6 +305,7 @@ function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) end + function ldiv!(x, ILU::PILUAMPrecon, b) #@info "piluam ldiv 1" nzval = ILU.nzval @@ -241,6 +318,8 @@ function ldiv!(x, ILU::PILUAMPrecon, b) #forward_subst!(y, b, ILU) forward_subst_old!(y, b, nzval, diag, start, nt, depth, A) backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + #@info "PILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10] + #@info "PILUAM:", maximum(abs.(b-A*x)), b[1], x[1], maximum(abs.(b)), maximum(abs.(x)) x end diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl index 075f73f..50a46fd 100644 --- a/src/factorizations/piluam.jl +++ b/src/factorizations/piluam.jl @@ -20,14 +20,13 @@ also calculates and stores updates to the off-diagonal entries and thus delivers function PILUAMPreconditioner end function update!(p::PILUAMPreconditioner) + #@warn "Should flush now", nnz_noflush(p.A) flush!(p.A) if p.A.phash != p.phash - @warn "p.A.phash != p.phash" p.factorization = piluAM(p.A) p.phash=p.A.phash else - @warn "fuck?" - ilu0!(p.factorization, p.A.cscmatrix) + piluAM!(p.factorization, p.A) end p end diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index b635a33..2c91a12 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -285,6 +285,67 @@ function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, end end -Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) + + +Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) include("struct_flush.jl") + + + +import LinearAlgebra.mul! + +""" +```function LinearAlgebra.mul!(y, A, x)``` + +This overwrites the mul! function for A::ExtendableSparseMatrixParallel +""" +function LinearAlgebra.mul!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv, Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer} + #@info "my matvec" + _, nnzLNK = nnz_noflush(A) + @assert nnzLNK == 0 + #mul!(y, A.cscmatrix, x) + matvec!(y, A, x) +end + + +""" +```function matvec!(y, A, x)``` + +y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel +this computation is done in parallel, it has the same result as y = A.cscmatrix*x +""" +function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer} + #a1 = @allocated begin + nt = A.nt + depth = A.depth + colptr = A.cscmatrix.colptr + nzv = A.cscmatrix.nzval + rv = A.cscmatrix.rowval + + LinearAlgebra._rmul_or_fill!(y, 0.0) + + #end + #a2 = @allocated + for level=1:depth + @threads for tid::Int64=1:nt + for col::Int64=A.start[(level-1)*nt+tid]:A.start[(level-1)*nt+tid+1]-1 + for row::Int64=colptr[col]:colptr[col+1]-1 # in nzrange(A, col) + y[rv[row]] += nzv[row]*x[col] + end + end + end + end + + @threads for tid=1:1 + #a3 = @allocated + for col::Int64=A.start[depth*nt+1]:A.start[depth*nt+2]-1 + for row::Int64=colptr[col]:colptr[col+1]-1 #nzrange(A, col) + y[rv[row]] += nzv[row]*x[col] + end + end + end + + #println(a1,a2,a3) + y +end diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index a29356c..7eeb3d3 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -26,8 +26,8 @@ function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, validate_partition(grid, cellparts, start, allcells, nt, depth) end - @info length.(cfp) - @info minimum(cellparts), maximum(cellparts), nt, depth + #@info length.(cfp) + #@info minimum(cellparts), maximum(cellparts), nt, depth (nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( cellparts, allcells, start, num_nodes(grid), Ti, nt, depth diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl index 38608ad..73471dc 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -1,5 +1,9 @@ function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=true) + _, nnzLNK = nnz_noflush(A) + if nnzLNK == 0 + return + end if !do_dense A.cscmatrix = A.cscmatrix+sparse_flush!(A; keep_zeros) From c3f262c09f64648f35222c82c5775f266df344e9 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Sun, 17 Mar 2024 11:03:16 +0100 Subject: [PATCH 8/9] remove old code --- src/ExtendableSparse.jl | 2 +- src/factorizations/filu_Al-Kurdi_Mittal.jl | 160 -------------- src/factorizations/ilu_Al-Kurdi_Mittal_0.jl | 146 ------------- src/factorizations/ilu_Al-Kurdi_Mittal_1.jl | 229 -------------------- 4 files changed, 1 insertion(+), 536 deletions(-) delete mode 100644 src/factorizations/filu_Al-Kurdi_Mittal.jl delete mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_0.jl delete mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_1.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index bcf85e6..9c490ca 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -33,7 +33,7 @@ export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawup export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet -@warn "ESMP!" +#@warn "ESMP!" include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") diff --git a/src/factorizations/filu_Al-Kurdi_Mittal.jl b/src/factorizations/filu_Al-Kurdi_Mittal.jl deleted file mode 100644 index 2099208..0000000 --- a/src/factorizations/filu_Al-Kurdi_Mittal.jl +++ /dev/null @@ -1,160 +0,0 @@ -#module PILUAM -#using Base.Threads -#using LinearAlgebra, SparseArrays - -import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz - -@info "PILUAM" - -mutable struct PILUAMPrecon{T,N} - - diag::AbstractVector - nzval::AbstractVector - A::AbstractMatrix - -end - -function iluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <:Integer} - @info "filuAM!" - diag = ILU.diag - nzval = ILU.nzval - - nzval = copy(A.cscmatrix.nzval) - diag = Vector{Ti}(undef, n) - ILU.A = A - colptr = A.cscmatrix.colptr - rowval = A.cscmatrix.rowval - n = A.n - point = zeros(Ti, n) - - for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(Ti) - end - end - -end - - -function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} - @info "filuAM, $(A[1,1])" - nzval = copy(A.cscmatrix.nzval) - colptr = A.cscmatrix.colptr - rowval = A.cscmatrix.rowval - #nzval = ILU.nzval - n = A.n # number of columns - point = zeros(Ti, n) #Vector{Ti}(undef, n) - diag = Vector{Ti}(undef, n) - - # find diagonal entries - for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(Ti) - end - end - #nzval, diag - PILUAMPrecon{Tv,Ti}(diag, nzval, A) -end - - - -function ldiv!(x, ILU::PILUAMPrecon, b) - #@info "iluam ldiv 1" - nzval = ILU.nzval - diag = ILU.diag - A = ILU.A.cscmatrix - y = copy(b) - #forward_subst!(y, b, ILU) - forward_subst_old!(y, b, nzval, diag, A) - backward_subst_old!(x, y, nzval, diag, A) - @info "FILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)) - #, maximum(abs.(b-A*x)), b[1], x[1], y[1] - x -end - - -function ldiv!(ILU::PILUAMPrecon, b) - #@info "iluam ldiv 2" - nzval = ILU.nzval - diag = ILU.diag - A = ILU.A.cscmatrix - y = copy(b) - #forward_subst!(y, b, ILU) - forward_subst_old!(y, b, nzval, diag, A) - backward_subst_old!(b, y, nzval, diag, A) - b -end - -function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer} - x = copy(b) - ldiv!(x, ilu, b) - x -end - -function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer} - length(ilu.nzval) -end - -#end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl deleted file mode 100644 index 26f9788..0000000 --- a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl +++ /dev/null @@ -1,146 +0,0 @@ -module ILUAM -using LinearAlgebra, SparseArrays - -import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz - - -mutable struct ILUAMPrecon{T,N} - - diag::AbstractVector - nzval::AbstractVector - rowval::AbstractVector - colptr::AbstractVector - -end - -function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} - @info "ILUAMPrecon" - n = A.n # number of columns - nzval = copy(A.nzval) - diag = Vector{N}(undef, n) - - ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr)) -end - -function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - @info "iluAM!" - nzval = LU.nzval - diag = LU.diag - - colptr = LU.colptr - rowval = LU.rowval - n = A.n # number of columns - point = zeros(N, n) #Vector{N}(undef, n) - - t = zeros(5) - - # find diagonal entries - t[1] = @elapsed for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - t[3] += @elapsed for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(N) - end - end - t -end - -function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - t = zeros(6) - t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})) - t[2:6] = iluAM!(LU, A) - @info t - LU -end - - -function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.colptr - rowval = ilu.rowval - diag = ilu.diag - y .= 0 - @inbounds for j=1:n - y[j] += v[j] - for v=diag[j]+1:colptr[j+1]-1 - y[rowval[v]] -= nzval[v]*y[j] - end - end - y -end - - -function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.colptr - rowval = ilu.rowval - diag = ilu.diag - wrk = copy(y) - @inbounds for j=n:-1:1 - x[j] = wrk[j] / nzval[diag[j]] - for i=colptr[j]:diag[j]-1 - wrk[rowval[i]] -= nzval[i]*x[j] - end - end - x -end - -function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM ldiv1" - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(x, ilu, y) - x -end - -function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM ldiv2" - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(b, ilu, y) - b -end - -function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM bs " - x = copy(b) - ldiv!(x, ilu, b) -end - -function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} - length(ilu.nzval) -end - - -end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl deleted file mode 100644 index a599094..0000000 --- a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl +++ /dev/null @@ -1,229 +0,0 @@ -module ILUAM -using LinearAlgebra, SparseArrays - -#import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz - -@info "ILUAM" - -mutable struct ILUAMPrecon{T,N} - - diag::AbstractVector - nzval::AbstractVector - A::AbstractMatrix - -end - -function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} - @info "ILUAMPrecon" - n = A.n # number of columns - nzval = copy(A.nzval) - diag = Vector{N}(undef, n) - - ILUAMPrecon{T, N}(diag, copy(A.nzval), A) -end - - - -function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - @info "iluAM!" - nzval = LU.nzval - diag = LU.diag - - colptr = LU.A.colptr - rowval = LU.A.rowval - n = A.n # number of columns - point = zeros(N, n) #Vector{N}(undef, n) - - t = zeros(5) - - # find diagonal entries - t[1] = @elapsed for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - t[3] += @elapsed for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(N) - end - end - t -end - - -function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} - @info "iluAM" - nzval = copy(A.nzval) - colptr = A.colptr - rowval = A.rowval - #nzval = ILU.nzval - n = A.n # number of columns - point = zeros(Ti, n) #Vector{Ti}(undef, n) - diag = Vector{Ti}(undef, n) - - # find diagonal entries - for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(Ti) - end - end - #nzval, diag - ILUAMPrecon{Tv,Ti}(diag, nzval, A) -end - -#function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} -# t = zeros(6) -# t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})) -# t[2:6] = iluAM!(LU, A) -# @info t -# LU -#end - - -function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.A.colptr - rowval = ilu.A.rowval - diag = ilu.diag - y .= 0 - @inbounds for j=1:n - y[j] += v[j] - for v=diag[j]+1:colptr[j+1]-1 - y[rowval[v]] -= nzval[v]*y[j] - end - end - y -end - - -function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.A.colptr - rowval = ilu.A.rowval - diag = ilu.diag - wrk = copy(y) - @inbounds for j=n:-1:1 - x[j] = wrk[j] / nzval[diag[j]] - for i=colptr[j]:diag[j]-1 - wrk[rowval[i]] -= nzval[i]*x[j] - end - end - x -end - -function ldiv_new!(x, ilu, v) - - n = ilu.A.n - y = Vector{Float64}(undef, n) - y .= 0 - nzval = ilu.nzval - colptr = ilu.A.colptr - rowval = ilu.A.rowval - diag = ilu.diag - #forward - @inbounds for j=1:n - y[j] += v[j] - for v=diag[j]+1:colptr[j+1]-1 - y[rowval[v]] -= nzval[v]*y[j] - end - end - - #backward - wrk = copy(y) - @inbounds for j=n:-1:1 - x[j] = wrk[j] / nzval[diag[j]] - for i=colptr[j]:diag[j]-1 - wrk[rowval[i]] -= nzval[i]*x[j] - end - end - x -end - -function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - #@info "AM ldiv1" - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(x, ilu, y) - x -end - -function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM ldiv2" - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(b, ilu, y) - b -end - -function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM bs " - x = copy(b) - ldiv!(x, ilu, b) - x -end - -function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} - length(ilu.nzval) -end - - -end \ No newline at end of file From 85de6691898ecf8c8b9b1abad545e253183c6d71 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Sun, 24 Mar 2024 17:40:05 +0100 Subject: [PATCH 9/9] added preparation for edgewise assembly --- src/factorizations/pilu_Al-Kurdi_Mittal.jl | 2 +- .../ExtendableSparseParallel.jl | 9 +- .../preparatory.jl | 234 ++++++++++++++++-- .../struct_flush.jl | 14 +- 4 files changed, 231 insertions(+), 28 deletions(-) diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl index f2861ed..ad9529b 100644 --- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl @@ -52,7 +52,7 @@ function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti) end function piluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} - @info "piluAM!" + #@info "piluAM!" diag = ILU.diag nzval = ILU.nzval ILU.A = A diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index 2c91a12..b413c5c 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -103,7 +103,7 @@ function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_t if updatentryCSC2!(A.cscmatrix, i, j, v) else - level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) + _, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v end end @@ -316,7 +316,6 @@ y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel this computation is done in parallel, it has the same result as y = A.cscmatrix*x """ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer} - #a1 = @allocated begin nt = A.nt depth = A.depth colptr = A.cscmatrix.colptr @@ -325,8 +324,6 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti} LinearAlgebra._rmul_or_fill!(y, 0.0) - #end - #a2 = @allocated for level=1:depth @threads for tid::Int64=1:nt for col::Int64=A.start[(level-1)*nt+tid]:A.start[(level-1)*nt+tid+1]-1 @@ -337,8 +334,9 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti} end end + + @threads for tid=1:1 - #a3 = @allocated for col::Int64=A.start[depth*nt+1]:A.start[depth*nt+2]-1 for row::Int64=colptr[col]:colptr[col+1]-1 #nzrange(A, col) y[rv[row]] += nzv[row]*x[col] @@ -346,6 +344,5 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti} end end - #println(a1,a2,a3) y end diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index 7eeb3d3..033a2fa 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -6,24 +6,31 @@ `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here. """ -function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false) +function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, assembly=:cellwise, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false) grid = getgrid(nm; x0, x1) adepth = 0 if sequential - (allcells, start, cellparts, adepth) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa, do_print)#) + (allcells, start, cellparts, adepth) = grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa, do_print)#) else - (allcells, start, cellparts, adepth) = grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa, do_print) + (allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa, do_print) end if (adepth != depth) && do_print @info "The requested depth of partitioning is too high. The depth is set to $adepth." end - depth = adepth - cfp = bettercellsforpart(cellparts, depth*nt+1) + + if assembly == :cellwise + cfp = bettercellsforpart(cellparts, depth*nt+1) + + else + edgeparts = edgewise_partition_from_cellwise_partition(grid, cellparts) + cfp = bettercellsforpart(edgeparts, depth*nt+1) + end + if check_partition - validate_partition(grid, cellparts, start, allcells, nt, depth) + validate_partition(grid, cellparts, start, allcells, nt, depth, assembly) end #@info length.(cfp) @@ -126,7 +133,7 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r tmpctr = 1 for cr in sortedcellregs crmod = (cr-1)%nt+1 - level = Int(ceil(cr/nt)) + #level = Int(ceil(cr/nt)) if !(crmod in tmp[1:tmpctr-1]) gictrs[crmod] += 1 # , level] += 1 sortednodesperthread[crmod,nj] = gictrs[crmod] @@ -235,7 +242,7 @@ The function assigns colors/partitons to each cell in the `grid`. First, the gri `nt` is the number of threads. `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... """ -function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=false) +function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false) A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) number_cells_per_node = zeros(Int64, num_nodes(grid)) for j=1:num_cells(grid) @@ -304,11 +311,10 @@ function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=fals end end - return allcells, start, cellregs, actual_depth, ACSC + return allcells, start, cellregs, actual_depth end - -function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print=false) +function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=false) As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt] number_cells_per_node = zeros(Int64, num_nodes(grid)) @@ -397,6 +403,195 @@ function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print= return allcells, start, cellregs, actual_depth end +function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=false) + ce = grid[CellEdges] + A = SparseMatrixLNK{Int64, Int64}(num_edges(grid), num_edges(grid)) + number_edges_per_node = zeros(Int64, num_nodes(grid)) + + for i=1:num_edges(grid) + for node_id in grid[EdgeNodes][:,i] + number_edges_per_node[node_id] += 1 + end + end + + alledges = zeros(Int64, sum(number_edges_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_edges_per_node) + number_edges_per_node .= 0 + + for j=1:num_edges(grid) + for node_id in grid[EdgeNodes][:,j] + alledges[start[node_id] + number_edges_per_node[node_id]] = j + number_edges_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + edges = @view alledges[start[j]:start[j+1]-1] + for (i,id1) in enumerate(edges) + for id2 in edges[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + + sn = Vector{Int64}(undef, num_edges(grid)) + gi = Vector{Int64}(undef, num_edges(grid)) + ctr_sepanodes = 0 + + edgeregs = copy(partition) + for tid=1:nt + for j=1:num_edges(grid) + if edgeregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(edgeregs[rows], nt+1) > 1 + edgeregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodes += 1 + sn[ctr_sepanodes] = j + gi[ctr_sepanodes] = j + end + end + end + end + + sn = sn[1:ctr_sepanodes] + gi = gi[1:ctr_sepanodes] + + if do_print + @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = copy(ACSC) + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(edgeregs, num_edges(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + return alledges, start, edgeregs, actual_depth +end + +function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print=false) + ce = grid[CellEdges] + cn = grid[EdgeNodes] + + As = [ExtendableSparseMatrix{Int64, Int64}(num_edges(grid), num_edges(grid)) for tid=1:nt] + number_edges_per_node = zeros(Int64, num_nodes(grid)) + + + for j=1:num_edges(grid) + tmp = view(cn, :, j) + for node_id in tmp + number_edges_per_node[node_id] += 1 + end + end + + + alledges = zeros(Int64, sum(number_edges_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_edges_per_node) + number_edges_per_node .= 0 + + for j=1:num_edges(grid) + tmp = view(cn, :, j) + for node_id in tmp + alledges[start[node_id] + number_edges_per_node[node_id]] = j + number_edges_per_node[node_id] += 1 + end + end + + node_range = get_starts(num_nodes(grid), nt) + Threads.@threads for tid=1:nt + for j in node_range[tid]:node_range[tid+1]-1 + edges = @view alledges[start[j]:start[j+1]-1] + l = length(edges) + for (i,id1) in enumerate(edges) + ce = view(edges, i+1:l) + for id2 in ce + As[tid][id1,id2] = 1 + As[tid][id2,id1] = 1 + + end + end + end + ExtendableSparse.flush!(As[tid]) + end + + ACSC = add_all_par!(As).cscmatrix + + cellregs = Metis.partition(ACSC, nt) + + sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt] + ctr_sepanodess = zeros(Int64, nt) + + @threads for tid=1:nt + for j=1:num_edges(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodess[tid] += 1 + sn[tid][ctr_sepanodess[tid]] = j + end + end + end + end + + for tid=1:nt + sn[tid] = sn[tid][1:ctr_sepanodess[tid]] + end + ctr_sepanodes = sum(ctr_sepanodess) + sn = vcat(sn...) + gi = copy(sn) + + if do_print + @info "At level $(1), we found $ctr_sepanodes edges that have to be treated in the next iteration!" + end + + RART = ACSC + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + #grid[CellRegions] = cellregs + #grid + return alledges, start, cellregs, actual_depth +end + + +function edgewise_partition_from_cellwise_partition(grid, cellregs) + ce = grid[CellEdges] + if num_edges(grid) == 0 + grid[EdgeNodes] + end + + edgeregs = maximum(cellregs)*ones(Int64, num_edges(grid)) + + for icell=1:num_cells(grid) + tmp = cellregs[icell] + for iedge in ce[:,icell] + if tmp < edgeregs[iedge] + edgeregs[iedge] = tmp + end + end + end + + edgeregs +end + """ `function add_all_par!(As)` @@ -636,10 +831,15 @@ function check_partition(nm, nt, depth) end =# -function validate_partition(grid, cellregs, start, allcells, nt, depth) - @info "Node based validation" +function validate_partition(grid, cellregs, start, allcells, nt, depth, assemblytype) violation_ctr = 0 + if assemblytype == :cellwise + key = CellNodes + else + key = EdgeNodes + end + for j=1:num_nodes(grid) cells = @view allcells[start[j]:start[j+1]-1] sortedcellregs = unique(sort(cellregs[cells])) @@ -651,14 +851,14 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth) violation_ctr += 1 if violation_ctr == 1 - @info "Node Id : ", j - @info "Cellregs: ", sortedcellregs - @info "Levels : ", levels + @info "Node Id : $j (we only show one violation)" + @info "Cellregs: $sortedcellregs" + @info "Levels : $levels" loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt))) cells_at_level4 = allcells[loc.+(start[j]-1)] @info cells_at_level4, cellregs[cells_at_level4] - @info grid[CellNodes][:,cells_at_level4[1]], grid[CellNodes][:,cells_at_level4[2]] + @info grid[key][:,cells_at_level4[1]], grid[key][:,cells_at_level4[2]] end end end diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl index 73471dc..1b8fc48 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -49,6 +49,9 @@ function dense_flush_keepzeros!( eqctr = 0 tmp = zeros(Ti, size(onr)[1]) + #@warn [As[i].nnz for i=1:nt], [As[i].n for i=1:nt], [As[i].m for i=1:nt] + #@info maximum.([As[i].colptr for i=1:nt]) + for nj=1:As[1].m indptr[nj] = ctr oj = rni[nj] @@ -62,7 +65,10 @@ function dense_flush_keepzeros!( k = s[regmod, nj] if regionctr == 1 while k>0 - #if As[regmod].nzval[k] != 0.0 + if As[regmod].rowval[k] != 0 + if ctr > nnz + @info "ctr > nnz, $nj, $oj" + end indices[ctr] = As[regmod].rowval[k] data[ctr] = As[regmod].nzval[k] @@ -82,12 +88,12 @@ function dense_flush_keepzeros!( ctr += 1 jc += 1 - #end + end k = As[regmod].colptr[k] end else while k>0 - #if As[regmod].nzval[k] != 0.0 + if As[regmod].rowval[k] != 0 indices[ctr] = As[regmod].rowval[k] data[ctr] = As[regmod].nzval[k] @@ -120,7 +126,7 @@ function dense_flush_keepzeros!( ctr += 1 jc += 1 - #end + end k = As[regmod].colptr[k] end