diff --git a/Project.toml b/Project.toml index 65e3ff9b5..e437e885f 100644 --- a/Project.toml +++ b/Project.toml @@ -32,6 +32,7 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" [weakdeps] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" +blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771" @@ -44,6 +45,7 @@ RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" [extensions] LinearSolveBandedMatricesExt = "BandedMatrices" +LinearSolveBLISExt = "blis_jll" LinearSolveBlockDiagonalsExt = "BlockDiagonals" LinearSolveCUDAExt = "CUDA" LinearSolveEnzymeExt = "Enzyme" @@ -58,6 +60,7 @@ LinearSolveRecursiveArrayToolsExt = "RecursiveArrayTools" [compat] ArrayInterface = "7.4.11" BandedMatrices = "1" +blis_jll = "0.9.0" BlockDiagonals = "0.1" ConcreteStructs = "0.2" DocStringExtensions = "0.9" diff --git a/ext/LinearSolveBLISExt.jl b/ext/LinearSolveBLISExt.jl new file mode 100644 index 000000000..f5d0553ad --- /dev/null +++ b/ext/LinearSolveBLISExt.jl @@ -0,0 +1,248 @@ +module LinearSolveBLISExt + +using Libdl +using blis_jll +using LinearAlgebra +using LinearSolve + +using LinearAlgebra: BlasInt, LU +using LinearAlgebra.LAPACK: require_one_based_indexing, chkfinite, chkstride1, + @blasfunc, chkargsok +using LinearSolve: ArrayInterface, BLISLUFactorization, @get_cacheval, LinearCache, SciMLBase + +const global libblis = dlopen(blis_jll.blis_path) + +function getrf!(A::AbstractMatrix{<:ComplexF64}; + ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))), + info = Ref{BlasInt}(), + check = false) + require_one_based_indexing(A) + check && chkfinite(A) + chkstride1(A) + m, n = size(A) + lda = max(1, stride(A, 2)) + if isempty(ipiv) + ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))) + end + ccall((@blasfunc(zgetrf_), libblis), Cvoid, + (Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64}, + Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}), + m, n, A, lda, ipiv, info) + chkargsok(info[]) + A, ipiv, info[], info #Error code is stored in LU factorization type +end + +function getrf!(A::AbstractMatrix{<:ComplexF32}; + ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))), + info = Ref{BlasInt}(), + check = false) + require_one_based_indexing(A) + check && chkfinite(A) + chkstride1(A) + m, n = size(A) + lda = max(1, stride(A, 2)) + if isempty(ipiv) + ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))) + end + ccall((@blasfunc(cgetrf_), libblis), Cvoid, + (Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32}, + Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}), + m, n, A, lda, ipiv, info) + chkargsok(info[]) + A, ipiv, info[], info #Error code is stored in LU factorization type +end + +function getrf!(A::AbstractMatrix{<:Float64}; + ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))), + info = Ref{BlasInt}(), + check = false) + require_one_based_indexing(A) + check && chkfinite(A) + chkstride1(A) + m, n = size(A) + lda = max(1, stride(A, 2)) + if isempty(ipiv) + ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))) + end + ccall((@blasfunc(dgetrf_), libblis), Cvoid, + (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64}, + Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}), + m, n, A, lda, ipiv, info) + chkargsok(info[]) + A, ipiv, info[], info #Error code is stored in LU factorization type +end + +function getrf!(A::AbstractMatrix{<:Float32}; + ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))), + info = Ref{BlasInt}(), + check = false) + require_one_based_indexing(A) + check && chkfinite(A) + chkstride1(A) + m, n = size(A) + lda = max(1, stride(A, 2)) + if isempty(ipiv) + ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))) + end + ccall((@blasfunc(sgetrf_), libblis), Cvoid, + (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32}, + Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}), + m, n, A, lda, ipiv, info) + chkargsok(info[]) + A, ipiv, info[], info #Error code is stored in LU factorization type +end + +function getrs!(trans::AbstractChar, + A::AbstractMatrix{<:ComplexF64}, + ipiv::AbstractVector{BlasInt}, + B::AbstractVecOrMat{<:ComplexF64}; + info = Ref{BlasInt}()) + require_one_based_indexing(A, ipiv, B) + LinearAlgebra.LAPACK.chktrans(trans) + chkstride1(A, B, ipiv) + n = LinearAlgebra.checksquare(A) + if n != size(B, 1) + throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n")) + end + if n != length(ipiv) + throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n")) + end + nrhs = size(B, 2) + ccall(("zgetrs_", libblis), Cvoid, + (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64}, Ref{BlasInt}, + Ptr{BlasInt}, Ptr{ComplexF64}, Ref{BlasInt}, Ptr{BlasInt}, Clong), + trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info, + 1) + LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[])) + B +end + +function getrs!(trans::AbstractChar, + A::AbstractMatrix{<:ComplexF32}, + ipiv::AbstractVector{BlasInt}, + B::AbstractVecOrMat{<:ComplexF32}; + info = Ref{BlasInt}()) + require_one_based_indexing(A, ipiv, B) + LinearAlgebra.LAPACK.chktrans(trans) + chkstride1(A, B, ipiv) + n = LinearAlgebra.checksquare(A) + if n != size(B, 1) + throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n")) + end + if n != length(ipiv) + throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n")) + end + nrhs = size(B, 2) + ccall(("cgetrs_", libblis), Cvoid, + (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32}, Ref{BlasInt}, + Ptr{BlasInt}, Ptr{ComplexF32}, Ref{BlasInt}, Ptr{BlasInt}, Clong), + trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info, + 1) + LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[])) + B +end + +function getrs!(trans::AbstractChar, + A::AbstractMatrix{<:Float64}, + ipiv::AbstractVector{BlasInt}, + B::AbstractVecOrMat{<:Float64}; + info = Ref{BlasInt}()) + require_one_based_indexing(A, ipiv, B) + LinearAlgebra.LAPACK.chktrans(trans) + chkstride1(A, B, ipiv) + n = LinearAlgebra.checksquare(A) + if n != size(B, 1) + throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n")) + end + if n != length(ipiv) + throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n")) + end + nrhs = size(B, 2) + ccall(("dgetrs_", libblis), Cvoid, + (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64}, Ref{BlasInt}, + Ptr{BlasInt}, Ptr{Float64}, Ref{BlasInt}, Ptr{BlasInt}, Clong), + trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info, + 1) + LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[])) + B +end + +function getrs!(trans::AbstractChar, + A::AbstractMatrix{<:Float32}, + ipiv::AbstractVector{BlasInt}, + B::AbstractVecOrMat{<:Float32}; + info = Ref{BlasInt}()) + require_one_based_indexing(A, ipiv, B) + LinearAlgebra.LAPACK.chktrans(trans) + chkstride1(A, B, ipiv) + n = LinearAlgebra.checksquare(A) + if n != size(B, 1) + throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n")) + end + if n != length(ipiv) + throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n")) + end + nrhs = size(B, 2) + ccall(("sgetrs_", libblis), Cvoid, + (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32}, Ref{BlasInt}, + Ptr{BlasInt}, Ptr{Float32}, Ref{BlasInt}, Ptr{BlasInt}, Clong), + trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info, + 1) + LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[])) + B +end + +default_alias_A(::BLISLUFactorization, ::Any, ::Any) = false +default_alias_b(::BLISLUFactorization, ::Any, ::Any) = false + +const PREALLOCATED_BLIS_LU = begin + A = rand(0, 0) + luinst = ArrayInterface.lu_instance(A), Ref{BlasInt}() +end + +function LinearSolve.init_cacheval(alg::BLISLUFactorization, A, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + PREALLOCATED_BLIS_LU +end + +function LinearSolve.init_cacheval(alg::BLISLUFactorization, A::AbstractMatrix{<:Union{Float32,ComplexF32,ComplexF64}}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, verbose::Bool, + assumptions::OperatorAssumptions) + A = rand(eltype(A), 0, 0) + ArrayInterface.lu_instance(A), Ref{BlasInt}() +end + +function SciMLBase.solve!(cache::LinearCache, alg::BLISLUFactorization; + kwargs...) + A = cache.A + A = convert(AbstractMatrix, A) + if cache.isfresh + cacheval = @get_cacheval(cache, :BLISLUFactorization) + res = getrf!(A; ipiv = cacheval[1].ipiv, info = cacheval[2]) + fact = LU(res[1:3]...), res[4] + cache.cacheval = fact + cache.isfresh = false + end + + y = ldiv!(cache.u, @get_cacheval(cache, :BLISLUFactorization)[1], cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache) + + #= + A, info = @get_cacheval(cache, :BLISLUFactorization) + LinearAlgebra.require_one_based_indexing(cache.u, cache.b) + m, n = size(A, 1), size(A, 2) + if m > n + Bc = copy(cache.b) + getrs!('N', A.factors, A.ipiv, Bc; info) + return copyto!(cache.u, 1, Bc, 1, n) + else + copyto!(cache.u, cache.b) + getrs!('N', A.factors, A.ipiv, cache.u; info) + end + + SciMLBase.build_linear_solution(alg, cache.u, nothing, cache) + =# +end + +end \ No newline at end of file diff --git a/src/extension_algs.jl b/src/extension_algs.jl index d2e968911..5bb274317 100644 --- a/src/extension_algs.jl +++ b/src/extension_algs.jl @@ -326,3 +326,5 @@ A wrapper over Apple's Metal GPU library. Direct calls to Metal in a way that pr to avoid allocations and automatically offloads to the GPU. """ struct MetalLUFactorization <: AbstractFactorization end + +struct BLISLUFactorization <: AbstractFactorization end