Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUSPARSE: Fix sparse constructor with duplicate elements. #2495

Merged
merged 1 commit into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 19 additions & 31 deletions lib/cusparse/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ function SparseArrays.sparse(I::CuVector{Cint}, J::CuVector{Cint}, V::CuVector{T

# find groups of values that correspond to the same position in the matrix.
# if there's no duplicates, `groups` will just be a vector of ones.
# otherwise, it will contain the number of duplicates for each group,
# with subsequent values that are part of the group set to zero.
# otherwise, it will contain gaps of zeros that indicates duplicate values.
coo = sort_coo(coo, 'R')
groups = similar(I, Int)
function find_groups(groups, I, J)
Expand All @@ -51,14 +50,7 @@ function SparseArrays.sparse(I::CuVector{Cint}, J::CuVector{Cint}, V::CuVector{T
len = 0

# check if we're at the start of a new group
@inbounds if i == 1 || I[i] != I[i-1] || J[i] != J[i-1]
len = 1
while i+len <= length(groups) && I[i] == I[i+len] && J[i] == J[i+len]
len += 1
end
end

@inbounds groups[i] = len
@inbounds groups[i] = i == 1 || I[i] != I[i-1] || J[i] != J[i-1]

return
end
Expand All @@ -82,49 +74,45 @@ function SparseArrays.sparse(I::CuVector{Cint}, J::CuVector{Cint}, V::CuVector{T
end
end

total_lengths = cumsum(groups) # TODO: add and use `scan!(; exclusive=true)`
# by scanning the mask of groups, we can find a mapping for old to new indices
indices = accumulate(+, groups)

I = similar(I, ngroups)
J = similar(J, ngroups)
V = similar(V, ngroups)

# use one thread per value, and if it's at the start of a group,
# use one thread per (old) value, and if it's at the start of a group,
# combine (if needed) all values and update the output vectors.
function combine_groups(groups, total_lengths, oldI, oldJ, oldV, newI, newJ, newV, combine)
function combine_groups(groups, indices, oldI, oldJ, oldV, newI, newJ, newV, combine)
i = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if i > length(groups)
return
end

# check if we're at the start of a group
@inbounds if groups[i] != 0
# get an exclusive offset from the inclusive cumsum
offset = total_lengths[i] - groups[i] + 1
# get a destination index
j = indices[i]

# copy values
newI[i] = oldI[offset]
newJ[i] = oldJ[offset]
newV[i] = if groups[i] == 1
oldV[offset]
else
# combine all values in the group
val = oldV[offset]
j = 1
while j < groups[i]
val = combine(val, oldV[offset+j])
j += 1
end
val
newI[j] = oldI[i]
newJ[j] = oldJ[i]
val = oldV[i]
while i < length(groups) && groups[i+1] == 0
i += 1
val = combine(val, oldV[i])
end
newV[j] = val
end

return
end
kernel = @cuda launch=false combine_groups(groups, total_lengths, coo.rowInd, coo.colInd, coo.nzVal, I, J, V, combine)
kernel = @cuda launch=false combine_groups(groups, indices, coo.rowInd, coo.colInd, coo.nzVal, I, J, V, combine)
config = launch_configuration(kernel.fun)
threads = min(length(groups), config.threads)
blocks = cld(length(groups), threads)
kernel(groups, total_lengths, coo.rowInd, coo.colInd, coo.nzVal, I, J, V, combine; threads, blocks)

kernel(groups, indices, coo.rowInd, coo.colInd, coo.nzVal, I, J, V, combine; threads, blocks)
synchronize()
coo = CuSparseMatrixCOO{Tv}(I, J, V, (m, n))
end

Expand Down
11 changes: 11 additions & 0 deletions test/libraries/cusparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1117,4 +1117,15 @@ end
@test Array(coo.colInd) == [1, 2, 3]
@test Array(coo.nzVal) == [1f0, 2f0, 13f0]
end

# JuliaGPU/CUDA.jl#2494
let
I = [1, 2, 1]
J = [1, 2, 1]
V = [10f0, 1f0, 2f0]
coo = sparse(cu(I), cu(J), cu(V); fmt=:coo)
@test Array(coo.rowInd) == [1, 2]
@test Array(coo.colInd) == [1, 2]
@test Array(coo.nzVal) == [12f0, 1f0]
end
end