From 3e16299f356a5f371c0024f41a54d3e112199b2e Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Mon, 15 Apr 2024 10:25:57 -0400 Subject: [PATCH] Add an ordering for cuDSS --- lib/MadNLPGPU/src/cudss.jl | 23 ++++++++++++++++++++--- lib/MadNLPGPU/src/cusolverrf.jl | 6 ++++-- lib/MadNLPGPU/test/madnlpgpu_test.jl | 28 +++++++++++++++++++++++----- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/lib/MadNLPGPU/src/cudss.jl b/lib/MadNLPGPU/src/cudss.jl index ebafed14..535fbae6 100644 --- a/lib/MadNLPGPU/src/cudss.jl +++ b/lib/MadNLPGPU/src/cudss.jl @@ -2,8 +2,10 @@ import CUDSS import SparseArrays @kwdef mutable struct CudssSolverOptions <: MadNLP.AbstractOptions - # Use LDL by default in CUDSS as Cholesky can lead to undefined behavior. + # Use LDLᵀ by default in CUDSS as Cholesky can lead to undefined behavior. cudss_algorithm::MadNLP.LinearFactorization = MadNLP.LDL + ordering::ORDERING = DEFAULT_ORDERING + perm::Vector{Cint} = Cint[] end mutable struct CUDSSSolver{T} <: MadNLP.AbstractLinearSolver{T} @@ -42,9 +44,25 @@ function CUDSSSolver( # TODO: pass config options here. config = CUDSS.CudssConfig() data = CUDSS.CudssData() - solver = CUDSS.CudssSolver(matrix, config, data) + if opt.ordering != DEFAULT_ORDERING + if opt.ordering == METIS_ORDERING + A = SparseArrays.SparseMatrixCSC(csc) + A = A + A' - LinearAlgebra.Diagonal(A) + G = Metis.graph(A, check_hermitian=false) + opt.perm, _ = Metis.permutation(G) + elseif opt.ordering == AMD_ORDERING + A = SparseArrays.SparseMatrixCSC(csc) + opt.perm = AMD.amd(A) + elseif opt.ordering == USER_ORDERING + (!isempty(opt.perm) && isperm(opt.perm)) || error("The vector opt.perm is not a valid permutation.") + else + error("The ordering $(opt.ordering) is not supported.") + end + CUDSS.cudss_set(solver, "user_perm", opt.perm) + end + x_gpu = CUDA.zeros(T, n) b_gpu = CUDA.zeros(T, n) @@ -97,4 +115,3 @@ MadNLP.improve!(M::CUDSSSolver) = false MadNLP.is_supported(::Type{CUDSSSolver},::Type{Float32}) = true MadNLP.is_supported(::Type{CUDSSSolver},::Type{Float64}) = true MadNLP.introduce(M::CUDSSSolver) = "cuDSS v$(CUDSS.version())" - diff --git a/lib/MadNLPGPU/src/cusolverrf.jl b/lib/MadNLPGPU/src/cusolverrf.jl index 9d2952d2..26c231fb 100644 --- a/lib/MadNLPGPU/src/cusolverrf.jl +++ b/lib/MadNLPGPU/src/cusolverrf.jl @@ -181,12 +181,14 @@ MadNLP.introduce(M::GLUSolver) = "GLU" Undocumented Cholesky Solver =# -@enum CUCHOLESKYORDERING begin +@enum ORDERING begin + DEFAULT_ORDERING = 0 METIS_ORDERING = 1 AMD_ORDERING = 2 + USER_ORDERING = 3 end @kwdef mutable struct CuCholeskySolverOptions <: MadNLP.AbstractOptions - ordering::CUCHOLESKYORDERING = METIS_ORDERING + ordering::ORDERING = METIS_ORDERING end mutable struct CuCholeskySolver{T} <: MadNLP.AbstractLinearSolver{T} diff --git a/lib/MadNLPGPU/test/madnlpgpu_test.jl b/lib/MadNLPGPU/test/madnlpgpu_test.jl index 8220efc9..ad2045c7 100644 --- a/lib/MadNLPGPU/test/madnlpgpu_test.jl +++ b/lib/MadNLPGPU/test/madnlpgpu_test.jl @@ -1,7 +1,7 @@ testset = [ # Temporarily commented out since LapackGPUSolver does not currently support sparse callbacks [ - "LapackGPU-CUSOLVERRF", + "CUDSS", ()->MadNLP.Optimizer( linear_solver=MadNLPGPU.CUDSSSolver, print_level=MadNLP.ERROR @@ -9,7 +9,25 @@ testset = [ [], ], [ - "LapackGPU-CUSOLVERRF", + "CUDSS", + ()->MadNLP.Optimizer( + linear_solver=MadNLPGPU.CUDSSSolver, + print_level=MadNLP.ERROR, + ordering=MadNLPGPU.AMD_ORDERING, + ), + [], + ], + [ + "CUDSS", + ()->MadNLP.Optimizer( + linear_solver=MadNLPGPU.CUDSSSolver, + print_level=MadNLP.ERROR, + ordering=MadNLPGPU.METIS_ORDERING, + ), + [], + ], + [ + "CUSOLVERRF", ()->MadNLP.Optimizer( linear_solver=MadNLPGPU.RFSolver, print_level=MadNLP.ERROR @@ -17,7 +35,7 @@ testset = [ [], ], [ - "LapackGPU-CUSOLVERRF", + "CUSOLVER-CHOLESKY", ()->MadNLP.Optimizer( linear_solver=MadNLPGPU.CuCholeskySolver, print_level=MadNLP.ERROR @@ -25,7 +43,7 @@ testset = [ [], ], [ - "LapackGPU-CUSOLVERRF", + "GLU", ()->MadNLP.Optimizer( linear_solver=MadNLPGPU.GLUSolver, print_level=MadNLP.ERROR @@ -75,6 +93,6 @@ testset = [ MadNLPTests.test_linear_solver(LapackGPUSolver,Float64) # Test LapackGPU wrapper for (name,optimizer_constructor,exclude) in testset - test_madnlp(name,optimizer_constructor,exclude; Arr= CuArray) + test_madnlp(name,optimizer_constructor,exclude; Arr=CuArray) end end