diff --git a/Project.toml b/Project.toml index 9f640f545..bb4e20bd9 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,8 @@ version = "0.5.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CUDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" +CUDNN_jll = "62b44479-cb7b-5706-934f-f13b2eb2e645" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" @@ -26,7 +28,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] Adapt = "3" -CUDA = "3.3" +CUDA = "3, 4" ChainRulesCore = "1" DataStructures = "0.18" Flux = "0.13.4" diff --git a/src/GNNGraphs/GNNGraphs.jl b/src/GNNGraphs/GNNGraphs.jl index a0baa83ee..f2dc59bd7 100644 --- a/src/GNNGraphs/GNNGraphs.jl +++ b/src/GNNGraphs/GNNGraphs.jl @@ -15,6 +15,8 @@ using ChainRulesCore using LinearAlgebra, Random, Statistics import MLUtils using MLUtils: getobs, numobs +using CUDA.CUSPARSE: AbstractCuSparseMatrix + include("gnngraph.jl") export GNNGraph, diff --git a/src/GNNGraphs/convert.jl b/src/GNNGraphs/convert.jl index 8da7345a4..0f83ab1dc 100644 --- a/src/GNNGraphs/convert.jl +++ b/src/GNNGraphs/convert.jl @@ -161,7 +161,7 @@ function to_sparse(A::ADJMAT_T, T=nothing; dir=:out, num_nodes=nothing, weighted A = sparse(A) end if !weighted - A = map(x -> ifelse(x > 0, T(1), T(0)), A) + A = binarize(A, T) end return A, num_nodes, num_edges end diff --git a/src/GNNGraphs/query.jl b/src/GNNGraphs/query.jl index a537a90c4..ab2882b43 100644 --- a/src/GNNGraphs/query.jl +++ b/src/GNNGraphs/query.jl @@ -143,12 +143,7 @@ User may specify the eltype `T` of the returned matrix. If `weighted=true`, the `A` will contain the edge weigths if any, otherwise the elements of `A` will be either 0 or 1. """ function Graphs.adjacency_matrix(g::GNNGraph{<:COO_T}, T::DataType=eltype(g); dir=:out, weighted=true) - if g.graph[1] isa CuVector - # TODO revisit after https://github.com/JuliaGPU/CUDA.jl/pull/1152 - A, n, m = to_dense(g.graph, T; num_nodes=g.num_nodes, weighted) - else - A, n, m = to_sparse(g.graph, T; num_nodes=g.num_nodes, weighted) - end + A, n, m = to_sparse(g.graph, T; num_nodes=g.num_nodes, weighted) @assert size(A) == (n, n) return dir == :out ? A : A' end @@ -157,7 +152,7 @@ function Graphs.adjacency_matrix(g::GNNGraph{<:ADJMAT_T}, T::DataType=eltype(g); @assert dir ∈ [:in, :out] A = g.graph if !weighted - A = binarize(A) + A = binarize(A, T) end A = T != eltype(A) ? T.(A) : A return dir == :out ? A : A' @@ -232,7 +227,7 @@ function Graphs.degree(g::GNNGraph{<:ADJMAT_T}, T::TT=nothing; dir=:out, edge_we end A = adjacency_matrix(g) if edge_weight === false - A = binarize(A) + A = binarize(A, T) end A = eltype(A) != T ? T.(A) : A return dir == :out ? vec(sum(A, dims=2)) : diff --git a/src/GNNGraphs/utils.jl b/src/GNNGraphs/utils.jl index 0e5c498c0..ec50b555f 100644 --- a/src/GNNGraphs/utils.jl +++ b/src/GNNGraphs/utils.jl @@ -172,8 +172,46 @@ function edge_decoding(idx, n; directed=true) return s, t end -binarize(x) = map(>(0), x) +# binarize(x) = map(>(0), x) # map is not supported by CuSparse types +binarize(x::AbstractArray) = >(0).(x) +binarize(x::Number, T::Type)::T = ifelse(x > 0, T(1), T(0)) +binarize(x::AbstractArray, T) = T.(binarize(x)) # didn't find a better cusparse compatible implementation + @non_differentiable binarize(x...) @non_differentiable edge_encoding(x...) @non_differentiable edge_decoding(x...) + + +## PIRACY. THESE SHOULD GO in CUDA.jl + +# Workaround https://github.com/JuliaGPU/CUDA.jl/issues/1406 +Base.sum(x::AbstractCuSparseMatrix; dims=:) = cusparse_sum(x, Val(dims)) + +cusparse_sum(x, ::Val{:}) = sum(cusparse_sum(x, Val(1))) + +function cusparse_sum(x::AbstractCuSparseMatrix, ::Val{1}) + m, n = size(x) + v = ones_like(x, (1, m)) + return v * x +end + +function cusparse_sum(x::AbstractCuSparseMatrix, ::Val{2}) + m, n = size(x) + v = ones_like(x, (n, 1)) + return x * v +end + +# workaround https://github.com/JuliaGPU/CUDA.jl/issues/1664 +function CUDA.CuMatrix{T}(x::AbstractCuSparseMatrix{T}) where T <: Integer + return T.(CuMatrix(Float32.(x))) +end + +function Base.:(*)(x::AbstractCuSparseMatrix, d::Diagonal) + return x .* d.diag' +end + +function Base.:(*)(d::Diagonal, x::AbstractCuSparseMatrix) + return d.diag .* CuArray(x) # couldn't do better +end + diff --git a/src/msgpass.jl b/src/msgpass.jl index d6f6df6d7..6f5cb204f 100644 --- a/src/msgpass.jl +++ b/src/msgpass.jl @@ -189,14 +189,14 @@ end ## COPY_XJ function propagate(::typeof(copy_xj), g::GNNGraph, ::typeof(+), xi, xj::AbstractMatrix, e) - A = adjacency_matrix(g, weighted=false) + A = adjacency_matrix(g, eltype(xj), weighted=false) return xj * A end -## avoid the fast path on gpu until we have better cuda support -function propagate(::typeof(copy_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e) - propagate((xi,xj,e) -> copy_xj(xi,xj,e), g, +, xi, xj, e) -end +# ## avoid the fast path on gpu until we have better cuda support +# function propagate(::typeof(copy_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e) +# propagate((xi,xj,e) -> copy_xj(xi,xj,e), g, +, xi, xj, e) +# end ## E_MUL_XJ @@ -207,10 +207,10 @@ function propagate(::typeof(e_mul_xj), g::GNNGraph, ::typeof(+), xi, xj::Abstrac return xj * A end -## avoid the fast path on gpu until we have better cuda support -function propagate(::typeof(e_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::AbstractVector) - propagate((xi,xj,e) -> e_mul_xj(xi,xj,e), g, +, xi, xj, e) -end +# ## avoid the fast path on gpu until we have better cuda support +# function propagate(::typeof(e_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::AbstractVector) +# propagate((xi,xj,e) -> e_mul_xj(xi,xj,e), g, +, xi, xj, e) +# end ## W_MUL_XJ @@ -220,10 +220,10 @@ function propagate(::typeof(w_mul_xj), g::GNNGraph, ::typeof(+), xi, xj::Abstrac return xj * A end -## avoid the fast path on gpu until we have better cuda support -function propagate(::typeof(w_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::Nothing) - propagate((xi,xj,e) -> w_mul_xj(xi,xj,e), g, +, xi, xj, e) -end +# ## avoid the fast path on gpu until we have better cuda support +# function propagate(::typeof(w_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::Nothing) +# propagate((xi,xj,e) -> w_mul_xj(xi,xj,e), g, +, xi, xj, e) +# end diff --git a/test_cuda.jl b/test_cuda.jl new file mode 100644 index 000000000..5e547a8ff --- /dev/null +++ b/test_cuda.jl @@ -0,0 +1,85 @@ + +### how to make it work with CUDA.jl v4.0 #### +# dev Flux +# In the Flux project folder: + # - change the CUDA compat bound + # - comment out usages of CUDA.has_cudnn() +# dev NNLibCUDA +# In the NNlibCUDA project folder: + # - change the CUDA compat bound + # - add CUDA#master + # - convert all the using CUDA.CUDNN to using CUDNN + # - add https://github.com/JuliaGPU/CUDA.jl:lib/cudnn +# add CUDA#master +# add https://github.com/JuliaGPU/CUDA.jl:lib/cudnn # CUDNN subpackage not registered yet + +using GraphNeuralNetworks, CUDA, Flux +using CUDA.CUSPARSE +using LinearAlgebra, SparseArrays +using GraphNeuralNetworks.GNNGraphs: binarize +CUDA.allowscalar(false) + +g_cpu = rand_graph(10, 10, graph_type = :sparse) +g = g_cpu |> gpu + +a = adjacency_matrix(g, Float32) +# maximum(a) +# minimum(a) +# extrema(a) +# sum(a) + +x = rand(2, 10) |> gpu +z = rand(10, 2) |> gpu + +@assert x * z isa CuMatrix +@assert a .+ 1 isa CuMatrix +@assert tanh.(a) isa CuSparseMatrix +@assert a + a isa CuSparseMatrix +@assert mul!(deepcopy(z), a, z, 0, 1) isa CuArray +@assert mul!(deepcopy(x), x, a, 0, 1) isa CuArray +# @assert mm!('N', 'N', 0, a, z, 1, deepcopy(z), 'O') isa CuArray + +@assert x * a isa CuMatrix +@assert a * z isa CuMatrix +# a * a +f(x) = x > 0 +@assert f.(a) isa CuSparseMatrixCSC{Bool} +# map(f, a) +@assert binarize.(a) isa CuSparseMatrix +# show(a') +# CUDA.ones(10) .* a +# a .* CUDA.ones(10) + + +b = CuSparseMatrixCSR(a) +@assert x * z isa CuMatrix +@assert b .+ 1 isa CuMatrix +@assert tanh.(b) isa CuSparseMatrix +@assert b + b isa CuSparseMatrix +@assert x * b isa CuMatrix +@assert b * z isa CuMatrix +f(x) = x > 0 +#BUG # @assert f.(b) isa CuSparseMatrixCSC{Bool} +# map(f, b) + +c = CuSparseMatrixCOO(a) +@assert x * z isa CuMatrix +# BUG @assert c .+ 1 isa CuMatrix +# BUG @assert tanh.(c) isa CuSparseMatrix +# BUG @assert c + c isa CuSparseMatrix +@assert x * c isa CuMatrix +@assert c * z isa CuMatrix +f(x) = x > 0 +# map(f, c) +# BUG @assert f.(c) isa CuSparseMatrixCSC{Bool} + + +# b * b +m = GCNConv(2 => 2) |> gpu +y = m(g, x) + +g2 = rand_graph(10, 10, graph_type=:coo) |> gpu +adjacency_matrix(g2) + + +a \ No newline at end of file