Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

another attempt with CUSPARSE #220

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ version = "0.5.0"
[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
CUDNN_jll = "62b44479-cb7b-5706-934f-f13b2eb2e645"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Expand All @@ -26,7 +28,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
Adapt = "3"
CUDA = "3.3"
CUDA = "3, 4"
ChainRulesCore = "1"
DataStructures = "0.18"
Flux = "0.13.4"
Expand Down
2 changes: 2 additions & 0 deletions src/GNNGraphs/GNNGraphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ using ChainRulesCore
using LinearAlgebra, Random, Statistics
import MLUtils
using MLUtils: getobs, numobs
using CUDA.CUSPARSE: AbstractCuSparseMatrix


include("gnngraph.jl")
export GNNGraph,
Expand Down
2 changes: 1 addition & 1 deletion src/GNNGraphs/convert.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ function to_sparse(A::ADJMAT_T, T=nothing; dir=:out, num_nodes=nothing, weighted
A = sparse(A)
end
if !weighted
A = map(x -> ifelse(x > 0, T(1), T(0)), A)
A = binarize(A, T)
end
return A, num_nodes, num_edges
end
Expand Down
11 changes: 3 additions & 8 deletions src/GNNGraphs/query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,7 @@ User may specify the eltype `T` of the returned matrix.
If `weighted=true`, the `A` will contain the edge weigths if any, otherwise the elements of `A` will be either 0 or 1.
"""
function Graphs.adjacency_matrix(g::GNNGraph{<:COO_T}, T::DataType=eltype(g); dir=:out, weighted=true)
if g.graph[1] isa CuVector
# TODO revisit after https://github.com/JuliaGPU/CUDA.jl/pull/1152
A, n, m = to_dense(g.graph, T; num_nodes=g.num_nodes, weighted)
else
A, n, m = to_sparse(g.graph, T; num_nodes=g.num_nodes, weighted)
end
A, n, m = to_sparse(g.graph, T; num_nodes=g.num_nodes, weighted)
@assert size(A) == (n, n)
return dir == :out ? A : A'
end
Expand All @@ -157,7 +152,7 @@ function Graphs.adjacency_matrix(g::GNNGraph{<:ADJMAT_T}, T::DataType=eltype(g);
@assert dir ∈ [:in, :out]
A = g.graph
if !weighted
A = binarize(A)
A = binarize(A, T)
end
A = T != eltype(A) ? T.(A) : A
return dir == :out ? A : A'
Expand Down Expand Up @@ -232,7 +227,7 @@ function Graphs.degree(g::GNNGraph{<:ADJMAT_T}, T::TT=nothing; dir=:out, edge_we
end
A = adjacency_matrix(g)
if edge_weight === false
A = binarize(A)
A = binarize(A, T)
end
A = eltype(A) != T ? T.(A) : A
return dir == :out ? vec(sum(A, dims=2)) :
Expand Down
40 changes: 39 additions & 1 deletion src/GNNGraphs/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,46 @@ function edge_decoding(idx, n; directed=true)
return s, t
end

binarize(x) = map(>(0), x)
# binarize(x) = map(>(0), x) # map is not supported by CuSparse types
binarize(x::AbstractArray) = >(0).(x)
binarize(x::Number, T::Type)::T = ifelse(x > 0, T(1), T(0))
binarize(x::AbstractArray, T) = T.(binarize(x)) # didn't find a better cusparse compatible implementation


@non_differentiable binarize(x...)
@non_differentiable edge_encoding(x...)
@non_differentiable edge_decoding(x...)


## PIRACY. THESE SHOULD GO in CUDA.jl

# Workaround https://github.com/JuliaGPU/CUDA.jl/issues/1406
Base.sum(x::AbstractCuSparseMatrix; dims=:) = cusparse_sum(x, Val(dims))

cusparse_sum(x, ::Val{:}) = sum(cusparse_sum(x, Val(1)))

function cusparse_sum(x::AbstractCuSparseMatrix, ::Val{1})
m, n = size(x)
v = ones_like(x, (1, m))
return v * x
end

function cusparse_sum(x::AbstractCuSparseMatrix, ::Val{2})
m, n = size(x)
v = ones_like(x, (n, 1))
return x * v
end

# workaround https://github.com/JuliaGPU/CUDA.jl/issues/1664
function CUDA.CuMatrix{T}(x::AbstractCuSparseMatrix{T}) where T <: Integer
return T.(CuMatrix(Float32.(x)))
end

function Base.:(*)(x::AbstractCuSparseMatrix, d::Diagonal)
return x .* d.diag'
end

function Base.:(*)(d::Diagonal, x::AbstractCuSparseMatrix)
return d.diag .* CuArray(x) # couldn't do better
end

26 changes: 13 additions & 13 deletions src/msgpass.jl
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,14 @@ end
## COPY_XJ

function propagate(::typeof(copy_xj), g::GNNGraph, ::typeof(+), xi, xj::AbstractMatrix, e)
A = adjacency_matrix(g, weighted=false)
A = adjacency_matrix(g, eltype(xj), weighted=false)
return xj * A
end

## avoid the fast path on gpu until we have better cuda support
function propagate(::typeof(copy_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e)
propagate((xi,xj,e) -> copy_xj(xi,xj,e), g, +, xi, xj, e)
end
# ## avoid the fast path on gpu until we have better cuda support
# function propagate(::typeof(copy_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e)
# propagate((xi,xj,e) -> copy_xj(xi,xj,e), g, +, xi, xj, e)
# end

## E_MUL_XJ

Expand All @@ -207,10 +207,10 @@ function propagate(::typeof(e_mul_xj), g::GNNGraph, ::typeof(+), xi, xj::Abstrac
return xj * A
end

## avoid the fast path on gpu until we have better cuda support
function propagate(::typeof(e_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::AbstractVector)
propagate((xi,xj,e) -> e_mul_xj(xi,xj,e), g, +, xi, xj, e)
end
# ## avoid the fast path on gpu until we have better cuda support
# function propagate(::typeof(e_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::AbstractVector)
# propagate((xi,xj,e) -> e_mul_xj(xi,xj,e), g, +, xi, xj, e)
# end

## W_MUL_XJ

Expand All @@ -220,10 +220,10 @@ function propagate(::typeof(w_mul_xj), g::GNNGraph, ::typeof(+), xi, xj::Abstrac
return xj * A
end

## avoid the fast path on gpu until we have better cuda support
function propagate(::typeof(w_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::Nothing)
propagate((xi,xj,e) -> w_mul_xj(xi,xj,e), g, +, xi, xj, e)
end
# ## avoid the fast path on gpu until we have better cuda support
# function propagate(::typeof(w_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::Nothing)
# propagate((xi,xj,e) -> w_mul_xj(xi,xj,e), g, +, xi, xj, e)
# end



Expand Down
85 changes: 85 additions & 0 deletions test_cuda.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@

### how to make it work with CUDA.jl v4.0 ####
# dev Flux
# In the Flux project folder:
# - change the CUDA compat bound
# - comment out usages of CUDA.has_cudnn()
# dev NNLibCUDA
# In the NNlibCUDA project folder:
# - change the CUDA compat bound
# - add CUDA#master
# - convert all the using CUDA.CUDNN to using CUDNN
# - add https://github.com/JuliaGPU/CUDA.jl:lib/cudnn
# add CUDA#master
# add https://github.com/JuliaGPU/CUDA.jl:lib/cudnn # CUDNN subpackage not registered yet

using GraphNeuralNetworks, CUDA, Flux
using CUDA.CUSPARSE
using LinearAlgebra, SparseArrays
using GraphNeuralNetworks.GNNGraphs: binarize
CUDA.allowscalar(false)

g_cpu = rand_graph(10, 10, graph_type = :sparse)
g = g_cpu |> gpu

a = adjacency_matrix(g, Float32)
# maximum(a)
# minimum(a)
# extrema(a)
# sum(a)

x = rand(2, 10) |> gpu
z = rand(10, 2) |> gpu

@assert x * z isa CuMatrix
@assert a .+ 1 isa CuMatrix
@assert tanh.(a) isa CuSparseMatrix
@assert a + a isa CuSparseMatrix
@assert mul!(deepcopy(z), a, z, 0, 1) isa CuArray
@assert mul!(deepcopy(x), x, a, 0, 1) isa CuArray
# @assert mm!('N', 'N', 0, a, z, 1, deepcopy(z), 'O') isa CuArray

@assert x * a isa CuMatrix
@assert a * z isa CuMatrix
# a * a
f(x) = x > 0
@assert f.(a) isa CuSparseMatrixCSC{Bool}
# map(f, a)
@assert binarize.(a) isa CuSparseMatrix
# show(a')
# CUDA.ones(10) .* a
# a .* CUDA.ones(10)


b = CuSparseMatrixCSR(a)
@assert x * z isa CuMatrix
@assert b .+ 1 isa CuMatrix
@assert tanh.(b) isa CuSparseMatrix
@assert b + b isa CuSparseMatrix
@assert x * b isa CuMatrix
@assert b * z isa CuMatrix
f(x) = x > 0
#BUG # @assert f.(b) isa CuSparseMatrixCSC{Bool}
# map(f, b)

c = CuSparseMatrixCOO(a)
@assert x * z isa CuMatrix
# BUG @assert c .+ 1 isa CuMatrix
# BUG @assert tanh.(c) isa CuSparseMatrix
# BUG @assert c + c isa CuSparseMatrix
@assert x * c isa CuMatrix
@assert c * z isa CuMatrix
f(x) = x > 0
# map(f, c)
# BUG @assert f.(c) isa CuSparseMatrixCSC{Bool}


# b * b
m = GCNConv(2 => 2) |> gpu
y = m(g, x)

g2 = rand_graph(10, 10, graph_type=:coo) |> gpu
adjacency_matrix(g2)


a