When I use your example to test the GPU support, I found some bugs. #435
Answered
by
amontoison
liaoweiyang2017
asked this question in
Q&A
-
using CUDA, Krylov, LinearOperators, BenchmarkTools
using CUDA.CUSPARSE, SparseArrays, LinearAlgebra, IncompleteLU
function symmetric_definite(n :: Int=10)
A = spdiagm(-5 => ones(n-5), 0 => 4*ones(n), 5 => ones(n-5)) * 1im
b = A * [1:n;]
return A, b
end
# CPU Arrays
n=20000
A_cpu, b_cpu = symmetric_definite(n)
# GPU Arrays
A_gpu = CuSparseMatrixCSC(A_cpu)
b_gpu = CuVector(b_cpu)
# LLᵀ ≈ A for CuSparseMatrixCSC matrices
P = ic02(A_gpu, 'O')
# Solve Py = x
function ldiv!(y, P, x)
copyto!(y, x) # Variant for CuSparseMatrixCSR
sv2!('T', 'U', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'N', 1.0, P, y, 'O')
sv2!('N', 'U', 'N', 1.0, P, y, 'O') # sv2!('T', 'L', 'N', 1.0, P, y, 'O')
return y
end
# Operator that model P⁻¹
T = eltype(b_gpu)
opM = LinearOperator(T, n, n, true, true, (y, x, α, β) -> ldiv!(y, P, x))
# Solve an unsymmetric system with an incomplete LU preconditioner on GPU
@time (x, stats) = bicgstab(A_gpu, b_gpu, M=opM)
@time x1 = A_cpu\b_cpu
println("x=")
println(x[1:10])
println("x1=")
println(x1[1:10])
println("norm=")
println(norm(A_cpu*x1-b_cpu))
println("norm1=")
println(norm(A_cpu*Array(x)-b_cpu)) When the input datas are complex datas, the code runs wrong. Please fix it. |
Beta Was this translation helpful? Give feedback.
Answered by
amontoison
Apr 17, 2022
Replies: 3 comments
-
Complex systems are not yet supported. |
Beta Was this translation helpful? Give feedback.
0 replies
-
@liaoweiyang2017 |
Beta Was this translation helpful? Give feedback.
0 replies
-
@liaoweiyang2017 using CUDA, Krylov, LinearOperators, BenchmarkTools
using CUDA.CUSPARSE, SparseArrays, LinearAlgebra
function symmetric_definite(n :: Int=10)
A = spdiagm(-5 => (1 + im) * ones(n-5), 0 => 4*ones(n), 5 => (1 - im) * ones(n-5))
b = A * [1:n;]
return A, b
end
# CPU Arrays
n = 20000
A_cpu, b_cpu = symmetric_definite(n)
# GPU Arrays
A_gpu = CuSparseMatrixCSC(A_cpu)
b_gpu = CuVector(b_cpu)
# LLᵀ ≈ A for CuSparseMatrixCSC matrices
P = ic02(A_gpu, 'O')
# Solve Py = x
function ldiv!(y, P, x)
copyto!(y, x) # Variant for CuSparseMatrixCSR
sv2!('T', 'U', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'N', 1.0, P, y, 'O')
sv2!('N', 'U', 'N', 1.0, P, y, 'O') # sv2!('T', 'L', 'N', 1.0, P, y, 'O')
return y
end
# Operator that model P⁻¹
T = eltype(b_gpu)
opM = LinearOperator(T, n, n, true, true, (y, x) -> ldiv!(y, P, x))
# Solve an unsymmetric system with an incomplete LU preconditioner on GPU
CUDA.@time (x, stats) = bicgstab(A_gpu, b_gpu, M=opM, atol=1e-10, rtol=0.0)
# 2.617514 seconds (443.93 k CPU allocations: 23.450 MiB) (153 GPU allocations: 69.610 MiB, 0.03% memmgmt time)
CUDA.@time (x2, stats2) = bicgstab(A_gpu, b_gpu, atol=1e-10, rtol=0.0)
# 0.008278 seconds (4.01 k CPU allocations: 143.609 KiB) (60 GPU allocations: 1.841 MiB, 3.21% memmgmt time)
@time x1 = A_cpu \ b_cpu
# 0.009131 seconds (61 allocations: 9.614 MiB)
println(norm(A_cpu * x1 - b_cpu))
# 1.0652172968251716e-9
println(norm(A_cpu * Array(x) - b_cpu))
# 2.731299881395772e-9 |
Beta Was this translation helpful? Give feedback.
0 replies
Answer selected by
amontoison
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@liaoweiyang2017
The matrix of your example wasn't hermitian.
I fixed it and with the release 0.8 I was able to solve this complex linear system on GPU.