diff --git a/.ci/ci.jl b/.ci/ci.jl
index ff1b4624..23ead768 100644
--- a/.ci/ci.jl
+++ b/.ci/ci.jl
@@ -8,7 +8,8 @@ Pkg.activate(@__DIR__)
 
 
 if ARGS[1] == "full"
-    pkgs = ["MadNLPHSL","MadNLPPardiso","MadNLPMumps","MadNLPKrylov"]
+    pkgs = ["MadNLPHSL","MadNLPPardiso","MadNLPMumps"]
+            # ,"MadNLPKrylov"] # Krylov has been discontinued since the introduction of iterative refinement on the full space.
 elseif ARGS[1] == "basic"
     pkgs = ["MadNLPMumps","MadNLPKrylov"]
 elseif ARGS[1] == "cuda"
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index c502bfe7..a8bb17ea 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -14,7 +14,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@latest
         with:
-          version: '1.6'
+          version: '1.9'
       - name: Install dependencies
         run: julia --project=docs/ docs/install.jl
       - name: Build and deploy
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9bdf107b..cc59da69 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        julia-version: ['1.6','^1.7']
+        julia-version: ['1.9']
         julia-arch: [x64]
         os: [ubuntu-latest,macos-latest,windows-latest]
     steps:
@@ -23,11 +23,11 @@ jobs:
       - run: julia --color=yes --project=.ci .ci/ci.jl basic
   test-moonshot:
     env:
-      JULIA_DEPOT_PATH: /scratch/sshin/github-actions/julia_depot_madnlp
+      JULIA_DEPOT_PATH: /home/sshin/action-runners/MadNLP/julia-depot/
     runs-on: self-hosted
     strategy:
       matrix:
-        julia-version: ['1.6','^1.7']
+        julia-version: ['1.9']
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@latest
@@ -43,12 +43,11 @@ jobs:
   test-moonshot-cuda:
     env:
       CUDA_VISIBLE_DEVICES: 1
-      JULIA_DEPOT_PATH: /scratch/sshin/github-actions/julia_depot_madnlp
-      JULIA_CUDA_USE_BINARYBUILDER: true
+      JULIA_DEPOT_PATH: /home/sshin/action-runners/MadNLP/julia-depot/
     runs-on: self-hosted
     strategy:
       matrix:
-        julia-version: ['^1.7']
+        julia-version: ['1.9']
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@latest
diff --git a/Project.toml b/Project.toml
index 9bc77af0..559a4115 100644
--- a/Project.toml
+++ b/Project.toml
@@ -20,7 +20,7 @@ MadNLPTests = "0.3, 0.4"
 MathOptInterface = "1"
 NLPModels = "~0.17.2, 0.18, 0.19, 0.20"
 SolverCore = "~0.3"
-julia = "1.6"
+julia = "1.9"
 
 [extras]
 MINLPTests = "ee0a3090-8ee9-5cdb-b8cb-8eeba3165522"
diff --git a/README.md b/README.md
index 6a5b65c6..a952122d 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,12 @@
-<img src="https://github.com/MadNLP/MadNLP.jl/blob/master/logo-full.svg?raw=true"/>
+![logo](https://github.com/MadNLP/MadNLP.jl/blob/master/logo-full.svg)
 
-| **Documentation** | **Build Status** | **Coverage** | **DOI** |
-|:-----------------:|:----------------:|:----------------:|:----------------:|
-| [![doc](https://img.shields.io/badge/docs-dev-blue.svg)](https://madnlp.github.io/MadNLP.jl/dev) | [![build](https://github.com/MadNLP/MadNLP.jl/actions/workflows/test.yml/badge.svg)](https://github.com/MadNLP/MadNLP.jl/actions/workflows/test.yml) | [![codecov](https://codecov.io/gh/MadNLP/MadNLP.jl/branch/master/graph/badge.svg?token=MBxH2AAu8Z)](https://codecov.io/gh/MadNLP/MadNLP.jl) | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5825776.svg)](https://doi.org/10.5281/zenodo.5825776) |
+*A [nonlinear programming](https://en.wikipedia.org/wiki/Nonlinear_programming) solver based on the filter line-search [interior point method](https://en.wikipedia.org/wiki/Interior-point_method) (as in [Ipopt](https://github.com/coin-or/Ipopt)) that can handle/exploit diverse classes of data structures, either on [host](https://en.wikipedia.org/wiki/Central_processing_unit) or [device](https://en.wikipedia.org/wiki/Graphics_processing_unit) memories.*
 
-MadNLP is a [nonlinear programming](https://en.wikipedia.org/wiki/Nonlinear_programming) (NLP) solver, purely implemented in [Julia](https://julialang.org/). MadNLP implements a filter line-search algorithm, as that used in [Ipopt](https://github.com/coin-or/Ipopt). MadNLP seeks to streamline the development of modeling and algorithmic paradigms in order to exploit structures and to make efficient use of high-performance computers.
+---
 
-## License
-
-MadNLP is available under the [MIT license](https://github.com/MadNLP/MadNLP.jl/blob/master/LICENSE).
+| **License** | **Documentation** | **Build Status** | **Coverage** | **DOI** |
+|:-----------------:|:-----------------:|:----------------:|:----------------:|:----------------:|
+| [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/MadNLP/MadNLP.jl/blob/master/LICENSE) | [![doc](https://img.shields.io/badge/docs-dev-blue.svg)](https://madnlp.github.io/MadNLP.jl/stable) [![doc](https://img.shields.io/badge/docs-dev-blue.svg)](https://madnlp.github.io/MadNLP.jl/dev) | [![build](https://github.com/MadNLP/MadNLP.jl/actions/workflows/test.yml/badge.svg)](https://github.com/MadNLP/MadNLP.jl/actions/workflows/test.yml) | [![codecov](https://codecov.io/gh/MadNLP/MadNLP.jl/branch/master/graph/badge.svg?token=MBxH2AAu8Z)](https://codecov.io/gh/MadNLP/MadNLP.jl) | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5825776.svg)](https://doi.org/10.5281/zenodo.5825776) |
 
 ## Installation
 
@@ -18,12 +16,13 @@ pkg> add MadNLP
 
 Optionally, various extension packages can be installed together:
 ```julia
-pkg> add MadNLPHSL, MadNLPPardiso, MadNLPMumps, MadNLPGPU, MadNLPGraph, MadNLPKrylov
+pkg> add MadNLPHSL, MadNLPPardiso, MadNLPMumps, MadNLPGPU
 ```
 
 These packages are stored in the `lib` subdirectory within the main MadNLP repository. Some extension packages may require additional dependencies or specific hardware. For the instructions for the build procedure, see the following links:
 
  * [MadNLPHSL](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPHSL)
+ * [MadNLPMumps](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPMumps)
  * [MadNLPPardiso](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPHSL)
  * [MadNLPGPU](https://github.com/MadNLP/MadNLP.jl/tree/master/lib/MadNLPGPU)
 
@@ -34,7 +33,6 @@ These packages are stored in the `lib` subdirectory within the main MadNLP repos
 MadNLP is interfaced with modeling packages:
 
 - [JuMP](https://github.com/jump-dev/JuMP.jl)
-- [Plasmo](https://github.com/zavalab/Plasmo.jl)
 - [NLPModels](https://github.com/JuliaSmoothOptimizers/NLPModels.jl).
 
 Users can pass various options to MadNLP also through the modeling packages. The interface-specific syntax are shown below. To see the list of MadNLP solver options, check the [OPTIONS.md](https://github.com/MadNLP/MadNLP/blob/master/OPTIONS.md) file.
@@ -58,37 +56,20 @@ model = CUTEstModel("PRIMALC1")
 madnlp(model, print_level=MadNLP.WARN, max_wall_time=3600)
 ```
 
-#### Plasmo interface (requires extension `MadNLPGraph`)
-
-```julia
-using MadNLP, MadNLPGraph, Plasmo
-graph = OptiGraph()
-@optinode(graph,n1)
-@optinode(graph,n2)
-@variable(n1,0 <= x <= 2)
-@variable(n1,0 <= y <= 3)
-@constraint(n1,x+y <= 4)
-@objective(n1,Min,x)
-@variable(n2,x)
-@NLnodeconstraint(n2,exp(x) >= 2)
-@linkconstraint(graph,n1[:x] == n2[:x])
-MadNLP.optimize!(graph; print_level=MadNLP.DEBUG, max_iter=100)
-```
-
 ### Linear Solvers
 
 MadNLP is interfaced with non-Julia sparse/dense linear solvers:
 - [Umfpack](https://people.engr.tamu.edu/davis/suitesparse.html)
-- [MKL-Pardiso](https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-fortran/top/sparse-solver-routines/intel-mkl-pardiso-parallel-direct-sparse-solver-interface.html)
-- [MKL-Lapack](https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-fortran/top/lapack-routines.html)
+- [Lapack](https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-fortran/top/lapack-routines.html)
 - [HSL solvers](http://www.hsl.rl.ac.uk/ipopt/) (requires extension)
 - [Pardiso](https://www.pardiso-project.org/) (requires extension)
+- [Pardiso-MKL](https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-fortran/top/sparse-solver-routines/intel-mkl-pardiso-parallel-direct-sparse-solver-interface.html) (requires extension)
 - [Mumps](http://mumps.enseeiht.fr/)  (requires extension)
 - [cuSOLVER](https://docs.nvidia.com/cuda/cusolver/index.html) (requires extension)
 
 Each linear solver in MadNLP is a Julia type, and the `linear_solver` option should be specified by the actual type. Note that the linear solvers are always exported to `Main`.
 
-#### Built-in Solvers: Umfpack, PardisoMKL, LapackCPU
+#### Built-in Solvers: Umfpack, LapackCPU
 
 ```julia
 using MadNLP, JuMP
@@ -134,19 +115,6 @@ using MadNLP, MadNLPGPU, JuMP
 model = Model(()->MadNLP.Optimizer(linear_solver=LapackGPUSolver))
 ```
 
-#### Schur and Schwarz (requires extension `MadNLPGraph`)
-
-```julia
-using MadNLP, MadNLPGraph, JuMP
-# ...
-model = Model(()->MadNLP.Optimizer(linear_solver=MadNLPSchwarz))
-model = Model(()->MadNLP.Optimizer(linear_solver=MadNLPSchur))
-```
-The solvers in `MadNLPGraph` (`Schur` and `Schwarz`) use multi-thread parallelism; thus, Julia session should be started with `-t` flag.
-```sh
-julia -t 16 # to use 16 threads
-```
-
 ## Citing MadNLP.jl
 
 If you use MadNLP.jl in your research, we would greatly appreciate your citing it.
diff --git a/benchmark/benchmark-cutest.jl b/benchmark/benchmark-cutest.jl
index b3b79ec5..6489ebf3 100644
--- a/benchmark/benchmark-cutest.jl
+++ b/benchmark/benchmark-cutest.jl
@@ -1,12 +1,12 @@
 include("config.jl")
-Pkg.add(PackageSpec(name="CUTEst",rev="main")) # will be removed once the new CUTEst version is released
 
 @everywhere using CUTEst
 
 if SOLVER == "master" || SOLVER == "current"
     @everywhere begin
         using MadNLP, MadNLPHSL
-        solver = nlp -> madnlp(nlp,linear_solver=MadNLPMa57,max_wall_time=900., print_level=PRINT_LEVEL)
+        LinSol = @isdefined(MadNLPMa57) ? MadNLPMa57 : MadNLPMa57 # for older version of MadNLP
+        solver = nlp -> madnlp(nlp,linear_solver=LinSol,max_wall_time=900., print_level=PRINT_LEVEL, tol=1e-6)
         function get_status(code::MadNLP.Status)
             if code == MadNLP.SOLVE_SUCCEEDED
                 return 1
@@ -19,7 +19,7 @@ if SOLVER == "master" || SOLVER == "current"
     end
 elseif SOLVER == "ipopt"
     @everywhere begin
-        solver = nlp -> ipopt(nlp,linear_solver="ma57",max_cpu_time=900., print_level=PRINT_LEVEL)
+        solver = nlp -> ipopt(nlp,linear_solver="ma57",max_cpu_time=900., print_level=PRINT_LEVEL, tol=1e-6)
         using NLPModelsIpopt
         function get_status(code::Symbol)
             if code == :first_order
@@ -58,8 +58,9 @@ end
         return (status=get_status(retval.status),time=t,mem=mem,iter=retval.iter)
     catch e
         finalize(nlp)
-        throw(e)
+        return (status=3,time=0.,mem=0,iter=0)
     end
+    println("Solved $name")
 end
 
 function benchmark(solver,probs;warm_up_probs = [], decode = false)
diff --git a/benchmark/benchmark-power.jl b/benchmark/benchmark-power.jl
index 645223d3..fa8a4aa5 100644
--- a/benchmark/benchmark-power.jl
+++ b/benchmark/benchmark-power.jl
@@ -39,9 +39,17 @@ end
 if SOLVER == "master" || SOLVER == "current"
     @everywhere begin
         using MadNLP, MadNLPHSL
+        LinSol = @isdefined(MadNLPMa57) ? MadNLPMa57 : Ma57Solver
+
         solver = pm -> begin
-            set_optimizer(pm.model,()->
-                MadNLP.Optimizer(linear_solver=MadNLPMa57,max_wall_time=900.,tol=1e-6, print_level=PRINT_LEVEL))
+            set_optimizer(
+                pm.model,()-> MadNLP.Optimizer(
+                    linear_solver=LinSol,
+                    max_wall_time=900.,
+                    tol=1e-6,
+                    print_level=PRINT_LEVEL
+                )
+            )
             mem=@allocated begin
                 t=@elapsed begin
                     optimize_model!(pm)
@@ -56,7 +64,7 @@ elseif SOLVER == "ipopt"
         
         const ITER = [-1]
         function ipopt_callback(
-            prob::IpoptProblem,alg_mod::Cint,iter_count::Cint,obj_value::Float64,
+            alg_mod::Cint,iter_count::Cint,obj_value::Float64,
             inf_pr::Float64,inf_du::Float64,mu::Float64,d_norm::Float64,
             regularization_size::Float64,alpha_du::Float64,alpha_pr::Float64,ls_trials::Cint)
             
@@ -66,8 +74,14 @@ elseif SOLVER == "ipopt"
 
         solver = pm -> begin
             ITER[] = 0
-            set_optimizer(pm.model,()->
-                Ipopt.Optimizer(linear_solver="ma57",max_cpu_time=900.,tol=1e-6, print_level=PRINT_LEVEL))
+            set_optimizer(pm.model, Ipopt.Optimizer)
+            set_optimizer_attributes(
+                pm.model,
+                "linear_solver"=>"ma57",
+                "max_cpu_time"=>900.,
+                "tol"=>1e-6,
+                "print_level"=>PRINT_LEVEL
+            )
             MOI.set(pm.model, Ipopt.CallbackFunction(), ipopt_callback)
             mem=@allocated begin
                 t=@elapsed begin
diff --git a/benchmark/config.jl b/benchmark/config.jl
index 58c3acf1..740985cb 100644
--- a/benchmark/config.jl
+++ b/benchmark/config.jl
@@ -7,7 +7,6 @@ const QUICK = ARGS[4] == "true"
 const GCOFF = ARGS[5] == "true"
 const DECODE = ARGS[6] == "true"
 
-addprocs(parse(Int,NP),exeflags="--project=.")
 Pkg.instantiate()
 
 if SOLVER == "master"
@@ -21,7 +20,9 @@ elseif SOLVER == "current"
 elseif SOLVER == "ipopt"
 elseif SOLVER == "knitro"
 else
-    error("Proper ARGS should be given")
+    Pkg.add(PackageSpec(name="MadNLP",rev="$SOLVER"))
+    Pkg.add(PackageSpec(name="MadNLPHSL",rev="$SOLVER"))
+    Pkg.build("MadNLPHSL")
 end
 
 # Set verbose option
@@ -34,5 +35,4 @@ else
     const PRINT_LEVEL = VERBOSE ? MadNLP.INFO : MadNLP.ERROR
 end
 
-# Set quick option
-
+addprocs(parse(Int,NP))
diff --git a/benchmark/runbenchmarks.jl b/benchmark/runbenchmarks.jl
index 09b94412..c41e1101 100644
--- a/benchmark/runbenchmarks.jl
+++ b/benchmark/runbenchmarks.jl
@@ -59,7 +59,7 @@ function main()
         joinpath(PROJECT_PATH, "Project.toml"),
         force=true
     )
-
+    
     for class in CLASSES
         for solver in SOLVERS
             launch_script = joinpath(PROJECT_PATH, "benchmark-$class.jl")
diff --git a/lib/MadNLPGPU/Project.toml b/lib/MadNLPGPU/Project.toml
index ccfabbc2..9d99df38 100644
--- a/lib/MadNLPGPU/Project.toml
+++ b/lib/MadNLPGPU/Project.toml
@@ -4,12 +4,14 @@ version = "0.6"
 
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
 
 [compat]
 CUDA = "~4"
+CUSOLVERRF = "0.2"
 KernelAbstractions = "0.9"
 MadNLP = "0.7"
 MadNLPTests = "0.3, 0.4"
@@ -17,7 +19,8 @@ julia = "1.7"
 
 [extras]
 MadNLPTests = "b52a2a03-04ab-4a5f-9698-6a2deff93217"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "MadNLPTests"]
+test = ["Test", "MadNLPTests", "CUDA"]
diff --git a/lib/MadNLPGPU/src/MadNLPGPU.jl b/lib/MadNLPGPU/src/MadNLPGPU.jl
index e8c1187f..0e8d1846 100644
--- a/lib/MadNLPGPU/src/MadNLPGPU.jl
+++ b/lib/MadNLPGPU/src/MadNLPGPU.jl
@@ -2,15 +2,16 @@ module MadNLPGPU
 
 import LinearAlgebra
 # CUDA
-import CUDA: CUDA, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version
-import CUDA: CUDABackend
+import CUDA: CUDA, CUSPARSE, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F,
+    has_cuda, @allowscalar, runtime_version, CUDABackend
 import .CUSOLVER:
     libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle
 import .CUBLAS: handle, CUBLAS_DIAG_NON_UNIT,
     CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T
+import CUSOLVERRF
 
 # Kernels
-import KernelAbstractions: @kernel, @index, synchronize
+import KernelAbstractions: @kernel, @index, synchronize, @Const
 
 import MadNLP: NLPModels
 import MadNLP
@@ -23,15 +24,46 @@ import MadNLP:
 
 symul!(y, A, x::CuVector{T}, α = 1., β = 0.) where T = CUBLAS.symv!('L', T(α), A, x, T(β), y)
 MadNLP._ger!(alpha::Number, x::CuVector{T}, y::CuVector{T}, A::CuMatrix{T}) where T = CUBLAS.ger!(alpha, x, y, A)
-
+function MadNLP._madnlp_unsafe_wrap(vec::VT, n, shift=1) where {T, VT <: CuVector{T}}
+    return view(vec,shift:shift+n-1)
+end
 
 include("kernels.jl")
-include("callbacks.jl")
-
-export CuMadNLPSolver
-
 include("interface.jl")
 include("lapackgpu.jl")
+include("cusolverrf.jl")
+
+# option preset
+function MadNLP.MadNLPOptions(nlp::AbstractNLPModel{T,VT}) where {T, VT <: CuVector{T}}
+
+    # if dense callback is defined, we use dense callback
+    is_dense_callback =
+        hasmethod(MadNLP.jac_dense!, Tuple{typeof(nlp), AbstractVector, AbstractMatrix}) &&
+        hasmethod(MadNLP.hess_dense!, Tuple{typeof(nlp), AbstractVector, AbstractVector, AbstractMatrix})
+
+    callback = is_dense_callback ? MadNLP.DenseCallback : MadNLP.SparseCallback
+
+    # if dense callback is used, we use dense condensed kkt system
+    kkt_system = is_dense_callback ? MadNLP.DenseCondensedKKTSystem : MadNLP.SparseCondensedKKTSystem
+
+    # if dense kkt system, we use a dense linear solver
+    linear_solver = is_dense_callback ? LapackGPUSolver : RFSolver
+
+    equality_treatment = is_dense_callback ? MadNLP.EnforceEquality : MadNLP.RelaxEquality
+
+    fixed_variable_treatment = is_dense_callback ? MadNLP.MakeParameter : MadNLP.RelaxBound
+
+    tol = MadNLP.get_tolerance(T,kkt_system)
+
+    return MadNLP.MadNLPOptions(
+        callback = callback,
+        kkt_system = kkt_system,
+        linear_solver = linear_solver,
+        equality_treatment = equality_treatment,
+        fixed_variable_treatment = fixed_variable_treatment,
+        tol = tol
+    )
+end
 
 export LapackGPUSolver
 
diff --git a/lib/MadNLPGPU/src/callbacks.jl b/lib/MadNLPGPU/src/callbacks.jl
deleted file mode 100644
index 54ff09ec..00000000
--- a/lib/MadNLPGPU/src/callbacks.jl
+++ /dev/null
@@ -1,68 +0,0 @@
-import MadNLP: variable
-import CUDA.CUBLAS: axpy!
-
-function _init_buffer_bfgs!(kkt::MadNLP.AbstractKKTSystem{T, VT, MT, QN}, n, m) where {T, VT, MT, QN}
-    haskey(kkt.etc, :x_gh) || (kkt.etc[:x_gh] = zeros(T, n))
-    haskey(kkt.etc, :j_gh) || (kkt.etc[:j_gh] = zeros(T, n))
-    haskey(kkt.etc, :j_gd) || (kkt.etc[:j_gd] = VT(undef, n))
-    return
-end
-
-function MadNLP.eval_lag_hess_wrapper!(
-    solver::MadNLP.MadNLPSolver,
-    kkt::MadNLP.AbstractKKTSystem{T, VT, MT, QN},
-    x::MadNLP.PrimalVector{T},
-    l::Vector{T};
-    is_resto=false,
-) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}, QN<:MadNLP.AbstractQuasiNewton{T, VT}}
-    nlp = solver.nlp
-    cnt = solver.cnt
-    MadNLP.@trace(solver.logger, "Update BFGS matrices.")
-
-    qn = kkt.quasi_newton
-    Bk = kkt.hess
-    sk, yk = qn.sk, qn.yk
-    n = length(qn.sk)
-    m = size(kkt.jac, 1)
-
-    # Load the buffers to transfer data between the host and the device.
-    _init_buffer_bfgs!(kkt, n, m)
-    x_g = get(kkt.etc, :x_gh, nothing)
-    j_g = get(kkt.etc, :j_gh, nothing) # on host
-    j_d = get(kkt.etc, :j_gd, nothing) # on device
-    # Init buffers.
-    copyto!(x_g, qn.last_x)
-    fill!(j_d, zero(T))
-    fill!(j_g, zero(T))
-
-    if cnt.obj_grad_cnt >= 2
-        # Build sk = x+ - x
-        copyto!(sk, 1, variable(solver.x), 1, n)   # sₖ = x₊
-        axpy!(n, -one(T), qn.last_x, sk)    # sₖ = x₊ - x
-
-        # Build yk = ∇L+ - ∇L
-        copyto!(yk, 1, variable(solver.f), 1, n)   # yₖ = ∇f₊
-        axpy!(n, -one(T), qn.last_g, yk)    # yₖ = ∇f₊ - ∇f
-        if m > 0
-            MadNLP.jtprod!(solver.jacl, kkt, l)
-            copyto!(j_d, 1, solver.jacl, 1, n)
-            yk .+= j_d                   # yₖ += J₊ᵀ l₊
-            NLPModels.jtprod!(nlp, x_g, l, j_g)
-            copyto!(qn.last_jv, j_g)
-            axpy!(n, -one(T), qn.last_jv, yk)        # yₖ += J₊ᵀ l₊ - Jᵀ l₊
-        end
-
-        if cnt.obj_grad_cnt == 2
-            MadNLP.init!(qn, Bk, sk, yk)
-        end
-        MadNLP.update!(qn, Bk, sk, yk)
-    end
-
-    # Backup data for next step
-    copyto!(qn.last_x, 1, variable(solver.x), 1, n)
-    copyto!(qn.last_g, 1, variable(solver.f), 1, n)
-
-    MadNLP.compress_hessian!(kkt)
-    return MadNLP.get_hessian(kkt)
-end
-
diff --git a/lib/MadNLPGPU/src/cusolverrf.jl b/lib/MadNLPGPU/src/cusolverrf.jl
new file mode 100644
index 00000000..3ca6096d
--- /dev/null
+++ b/lib/MadNLPGPU/src/cusolverrf.jl
@@ -0,0 +1,176 @@
+# MIT License
+
+# Copyright (c) 2020 Exanauts
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+const CuSubVector{T} = SubArray{T, 1, CUDA.CuArray{T, 1, CUDA.Mem.DeviceBuffer}, Tuple{CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}}, false}
+
+#=
+    cusolverRF
+=#
+
+@kwdef mutable struct RFSolverOptions <: MadNLP.AbstractOptions
+    rf_symbolic_analysis::Symbol = :klu
+    rf_fast_mode::Bool = true
+    rf_pivot_tol::Float64 = 1e-14
+    rf_boost::Float64 = 1e-14
+    rf_factorization_algo::CUSOLVER.cusolverRfFactorization_t = CUSOLVER.CUSOLVERRF_FACTORIZATION_ALG0
+    rf_triangular_solve_algo::CUSOLVER.cusolverRfTriangularSolve_t = CUSOLVER.CUSOLVERRF_TRIANGULAR_SOLVE_ALG1
+end
+
+mutable struct RFSolver{T} <: MadNLP.AbstractLinearSolver{T}
+    inner::Union{Nothing,CUSOLVERRF.RFLowLevel}
+
+    tril::CUSPARSE.CuSparseMatrixCSC{T}
+    full::CUSPARSE.CuSparseMatrixCSR{T}
+    tril_to_full_view::CuSubVector{T}
+    buffer::CUDA.CuVector{T}
+
+    opt::RFSolverOptions
+    logger::MadNLP.MadNLPLogger
+end
+
+function RFSolver(
+    csc::CUSPARSE.CuSparseMatrixCSC;
+    opt=RFSolverOptions(),
+    logger=MadNLP.MadNLPLogger(),
+)
+    n, m = size(csc)
+    @assert n == m
+
+    full,tril_to_full_view = MadNLP.get_tril_to_full(csc)
+
+    full = CUSPARSE.CuSparseMatrixCSR(
+        full.colPtr,
+        full.rowVal,
+        full.nzVal,
+        full.dims
+    )
+
+    return RFSolver(
+        nothing, csc, full, tril_to_full_view, similar(csc.nzVal,1),
+        opt, logger
+    )
+end
+
+function MadNLP.factorize!(M::RFSolver)
+    copyto!(M.full.nzVal, M.tril_to_full_view)
+    if M.inner == nothing
+        sym_lu = CUSOLVERRF.klu_symbolic_analysis(M.full)
+        M.inner = CUSOLVERRF.RFLowLevel(
+            sym_lu;
+            fast_mode=M.opt.rf_fast_mode,
+            factorization_algo=M.opt.rf_factorization_algo,
+            triangular_algo=M.opt.rf_triangular_solve_algo,
+            # nboost=M.opt.rf_boost,
+            # nzero=M.opt.rf_pivot_tol,
+        )
+    end
+    CUSOLVERRF.rf_refactor!(M.inner, M.full)
+    return M
+end
+
+function MadNLP.solve!(M::RFSolver{T}, x) where T
+    CUSOLVERRF.rf_solve!(M.inner, x)
+    # this is necessary to not distort the timing in MadNLP
+    copyto!(M.buffer, M.buffer)
+    synchronize(CUDABackend())
+    # -----------------------------------------------------
+    return x
+end
+
+MadNLP.input_type(::Type{RFSolver}) = :csc
+MadNLP.default_options(::Type{RFSolver}) = RFSolverOptions()
+MadNLP.is_inertia(M::RFSolver) = false
+MadNLP.improve!(M::RFSolver) = false
+MadNLP.is_supported(::Type{RFSolver},::Type{Float32}) = true
+MadNLP.is_supported(::Type{RFSolver},::Type{Float64}) = true
+MadNLP.introduce(M::RFSolver) = "cuSolverRF"
+
+
+#=
+    GLU
+=#
+
+@kwdef mutable struct GLUSolverOptions <: MadNLP.AbstractOptions
+    glu_symbolic_analysis::Symbol = :klu
+end
+
+mutable struct GLUSolver{T} <: MadNLP.AbstractLinearSolver{T}
+    inner::Union{Nothing,CUSOLVERRF.GLULowLevel}
+
+    tril::CUSPARSE.CuSparseMatrixCSC{T}
+    full::CUSPARSE.CuSparseMatrixCSR{T}
+    tril_to_full_view::CuSubVector{T}
+    buffer::CUDA.CuVector{T}
+
+    opt::GLUSolverOptions
+    logger::MadNLP.MadNLPLogger
+end
+
+function GLUSolver(
+    csc::CUSPARSE.CuSparseMatrixCSC;
+    opt=GLUSolverOptions(),
+    logger=MadNLP.MadNLPLogger(),
+)
+    n, m = size(csc)
+    @assert n == m
+
+    full,tril_to_full_view = MadNLP.get_tril_to_full(csc)
+
+    full = CUSPARSE.CuSparseMatrixCSR(
+        full.colPtr,
+        full.rowVal,
+        full.nzVal,
+        full.dims
+    )
+
+    return GLUSolver(
+        nothing, csc, full, tril_to_full_view, similar(csc.nzVal,1),
+        opt, logger
+    )
+end
+
+function MadNLP.factorize!(M::GLUSolver)
+    copyto!(M.full.nzVal, M.tril_to_full_view)
+    if M.inner == nothing  
+        sym_lu = CUSOLVERRF.klu_symbolic_analysis(M.full)
+        M.inner = CUSOLVERRF.GLULowLevel(sym_lu)
+    end
+    CUSOLVERRF.glu_refactor!(M.inner, M.full)
+    return M
+end
+
+function MadNLP.solve!(M::GLUSolver{T}, x) where T
+    CUSOLVERRF.glu_solve!(M.inner, x)
+    # this is necessary to not distort the timing in MadNLP
+    copyto!(M.buffer, M.buffer)
+    synchronize(CUDABackend())
+    # -----------------------------------------------------
+    return x
+end
+
+MadNLP.input_type(::Type{GLUSolver}) = :csc
+MadNLP.default_options(::Type{GLUSolver}) = GLUSolverOptions()
+MadNLP.is_inertia(M::GLUSolver) = false
+MadNLP.improve!(M::GLUSolver) = false
+MadNLP.is_supported(::Type{GLUSolver},::Type{Float32}) = true
+MadNLP.is_supported(::Type{GLUSolver},::Type{Float64}) = true
+MadNLP.introduce(M::GLUSolver) = "GLU"
diff --git a/lib/MadNLPGPU/src/interface.jl b/lib/MadNLPGPU/src/interface.jl
index 65450c7a..22ab90ca 100644
--- a/lib/MadNLPGPU/src/interface.jl
+++ b/lib/MadNLPGPU/src/interface.jl
@@ -1,24 +1,356 @@
-function CuMadNLPSolver(nlp::AbstractNLPModel{T}; kwargs...) where T
-    opt_ipm, opt_linear_solver, logger = MadNLP.load_options(; linear_solver=LapackGPUSolver, kwargs...)
-
-    @assert is_supported(opt_ipm.linear_solver, T)
-    MT = CuMatrix{T}
-    VT = CuVector{T}
-    # Determine Hessian approximation
-    QN = if opt_ipm.hessian_approximation == MadNLP.DENSE_BFGS
-        MadNLP.BFGS{T, VT}
-    elseif opt_ipm.hessian_approximation == MadNLP.DENSE_DAMPED_BFGS
-        MadNLP.DampedBFGS{T, VT}
-    else
-        MadNLP.ExactHessian{T, VT}
-    end
-    KKTSystem = if (opt_ipm.kkt_system == MadNLP.SPARSE_KKT_SYSTEM) || (opt_ipm.kkt_system == MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM)
-        error("Sparse KKT system are currently not supported on CUDA GPU.\n" *
-            "Please use `DENSE_KKT_SYSTEM` or `DENSE_CONDENSED_KKT_SYSTEM` instead.")
-    elseif opt_ipm.kkt_system == MadNLP.DENSE_KKT_SYSTEM
-        MadNLP.DenseKKTSystem{T, VT, MT, QN}
-    elseif opt_ipm.kkt_system == MadNLP.DENSE_CONDENSED_KKT_SYSTEM
-        MadNLP.DenseCondensedKKTSystem{T, VT, MT, QN}
-    end
-    return MadNLP.MadNLPSolver{T,KKTSystem}(nlp, opt_ipm, opt_linear_solver; logger=logger)
+function MadNLP.coo_to_csc(coo::MadNLP.SparseMatrixCOO{T,I,VT,VI}) where {T,I, VT <: CuArray, VI <: CuArray}
+    csc, map = MadNLP.coo_to_csc(
+        MadNLP.SparseMatrixCOO(
+            coo.m, coo.n,
+            Array(coo.I), Array(coo.J), Array(coo.V)
+        )
+    )
+
+    return CUDA.CUSPARSE.CuSparseMatrixCSC(csc), CuArray(map) 
+end
+
+function MadNLP.get_tril_to_full(csc::CUDA.CUSPARSE.CuSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    cscind = MadNLP.SparseMatrixCSC{Int,Ti}(
+        MadNLP.Symmetric(
+            MadNLP.SparseMatrixCSC{Int,Ti}(
+                size(csc)...,
+                Array(csc.colPtr),
+                Array(csc.rowVal),
+                collect(1:MadNLP.nnz(csc))
+            ),
+            :L
+        )
+    )
+    return CUDA.CUSPARSE.CuSparseMatrixCSC{Tv,Ti}(
+        CuArray(cscind.colptr),
+        CuArray(cscind.rowval),
+        CuVector{Tv}(undef,MadNLP.nnz(cscind)),
+        size(csc),
+    ),
+    view(csc.nzVal,CuArray(cscind.nzval))
+end
+
+
+
+function MadNLP.transfer!(dest::CUDA.CUSPARSE.CuSparseMatrixCSC, src::MadNLP.SparseMatrixCOO, map)
+    copyto!(view(dest.nzVal, map), src.V)
+end
+
+function MadNLP.build_condensed_aug_coord!(kkt::MadNLP.SparseCondensedKKTSystem{T,VT,MT}) where {T, VT, MT <: CUDA.CUSPARSE.CuSparseMatrixCSC{T}}
+    fill!(kkt.aug_com.nzVal, zero(T))
+    _transfer!(CUDABackend())(kkt.aug_com.nzVal, kkt.hptr, kkt.hess_com.nzVal; ndrange = length(kkt.hptr))
+    synchronize(CUDABackend())
+    _transfer!(CUDABackend())(kkt.aug_com.nzVal, kkt.dptr, kkt.pr_diag; ndrange = length(kkt.dptr))
+    synchronize(CUDABackend())
+    if length(kkt.ext.jptrptr) > 1 # otherwise error is thrown
+        _jtsj!(CUDABackend())(kkt.aug_com.nzVal, kkt.jptr, kkt.ext.jptrptr, kkt.jt_csc.nzVal, kkt.diag_buffer; ndrange = length(kkt.ext.jptrptr)-1)
+    end
+    synchronize(CUDABackend())
+end
+
+@kernel function _transfer!(y, @Const(ptr), @Const(x))
+    index = @index(Global)
+    @inbounds i,j = ptr[index]
+    @inbounds y[i] += x[j]
+end
+
+@kernel function _jtsj!(y, @Const(ptr), @Const(ptrptr), @Const(x), @Const(s))
+    index = @index(Global)
+    @inbounds for index2 in ptrptr[index]:ptrptr[index+1]-1
+        i,(j,k,l) = ptr[index2]
+        y[i] += s[j] * x[k] * x[l]
+    end
+end
+
+
+function MadNLP.get_sparse_condensed_ext(
+    ::Type{VT},
+    hess_com, jptr, jt_map, hess_map, 
+    ) where {T, VT <: CuVector{T}}
+
+    hess_com_ptr = map((i,j)->(i,j), hess_map, 1:length(hess_map))
+    if length(hess_map) > 0 # otherwise error is thrown
+        sort!(hess_com_ptr)
+    end
+    
+    jt_csc_ptr = map((i,j)->(i,j), jt_map, 1:length(jt_map))
+    if length(jt_map) > 0 # otherwise error is thrown
+        sort!(jt_csc_ptr)
+    end
+
+    by = (i,j) -> i[1] != j[1]
+    jptrptr = MadNLP.getptr(jptr, by = by)
+    hess_com_ptrptr = MadNLP.getptr(hess_com_ptr, by = by)
+    jt_csc_ptrptr = MadNLP.getptr(jt_csc_ptr, by = by)
+
+    diag_map_to, diag_map_fr = get_diagonal_mapping(hess_com.colPtr, hess_com.rowVal)
+    
+    return (
+        jptrptr = jptrptr,
+        hess_com_ptr = hess_com_ptr,
+        hess_com_ptrptr = hess_com_ptrptr,
+        jt_csc_ptr = jt_csc_ptr,
+        jt_csc_ptrptr = jt_csc_ptrptr,
+        diag_map_to = diag_map_to,
+        diag_map_fr = diag_map_fr,
+    )
+end
+
+function MadNLP.mul!(
+    w::MadNLP.AbstractKKTVector{T,VT},
+    kkt::MadNLP.SparseCondensedKKTSystem,
+    x::MadNLP.AbstractKKTVector,
+    alpha = one(T), beta = zero(T)
+    ) where {T, VT <: CuVector{T}}
+
+
+    n = size(kkt.hess_com, 1)
+    m = size(kkt.jt_csc, 2)
+
+    # Decompose results
+    xx = view(MadNLP.full(x), 1:n)
+    xs = view(MadNLP.full(x), n+1:n+m)
+    xz = view(MadNLP.full(x), n+m+1:n+2*m)
+
+    # Decompose buffers
+    wx = view(MadNLP.full(w), 1:n)
+    ws = view(MadNLP.full(w), n+1:n+m)
+    wz = view(MadNLP.full(w), n+m+1:n+2*m)
+    
+    MadNLP.mul!(wx, kkt.hess_com , xx, alpha, beta)
+    MadNLP.mul!(wx, kkt.hess_com', xx, alpha, one(T))
+    MadNLP.mul!(wx, kkt.jt_csc,  xz, alpha, beta)
+    diag_operation(CUDABackend())(
+        wx, kkt.hess_com.nzVal, xx, alpha,
+        kkt.ext.diag_map_to,
+        kkt.ext.diag_map_fr;
+        ndrange = length(kkt.ext.diag_map_to)
+    )
+    synchronize(CUDABackend())
+
+    MadNLP.mul!(wz, kkt.jt_csc', xx, alpha, one(T))
+    MadNLP.axpy!(-alpha, xz, ws)
+    MadNLP.axpy!(-alpha, xs, wz)    
+        
+    MadNLP._kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+    
+end
+@kernel function diag_operation(y,@Const(A),@Const(x),@Const(alpha),@Const(idx_to),@Const(idx_fr))
+    i = @index(Global)
+    @inbounds begin
+        to = idx_to[i]
+        fr = idx_fr[i]
+        y[to] -= alpha * A[fr] * x[to]
+    end
+end
+
+function MadNLP.mul_hess_blk!(
+    wx::VT,
+    kkt::Union{MadNLP.SparseKKTSystem,MadNLP.SparseCondensedKKTSystem},
+    t
+    ) where {T, VT <: CuVector{T}}
+    
+    n = size(kkt.hess_com, 1)
+    wxx = @view(wx[1:n])
+    tx  = @view(t[1:n])
+    
+    MadNLP.mul!(wxx, kkt.hess_com , tx, one(T), zero(T))
+    MadNLP.mul!(wxx, kkt.hess_com', tx, one(T), one(T))
+    diag_operation(CUDABackend())(
+        wxx, kkt.hess_com.nzVal, tx, one(T),
+        kkt.ext.diag_map_to,
+        kkt.ext.diag_map_fr;
+        ndrange = length(kkt.ext.diag_map_to)
+    )
+    synchronize(CUDABackend())
+    
+    fill!(@view(wx[n+1:end]), 0)
+    wx .+= t .* kkt.pr_diag
+end
+
+
+function get_diagonal_mapping(colptr, rowval) 
+    
+    nnz = length(rowval)
+    inds1 = findall(map((x,y)-> ((x <= nnz) && (x != y)), @view(colptr[1:end-1]), @view(colptr[2:end])))
+    ptrs = colptr[inds1]
+    rows = rowval[ptrs]
+    inds2 = findall(inds1 .== rows)
+    
+    return rows[inds2], ptrs[inds2]
+end
+
+function MadNLP.initialize!(kkt::MadNLP.AbstractSparseKKTSystem{T,VT}) where {T, VT <: CuVector{T}}
+    fill!(kkt.reg, one(T))
+    fill!(kkt.pr_diag, one(T))
+    fill!(kkt.du_diag, zero(T))
+    fill!(kkt.hess, zero(T))
+    fill!(kkt.l_lower, zero(T))
+    fill!(kkt.u_lower, zero(T))
+    fill!(kkt.l_diag, one(T))
+    fill!(kkt.u_diag, one(T))
+    fill!(kkt.hess_com.nzVal, 0.) # so that mul! in the initial primal-dual solve has no effect
+end
+
+function MadNLP.compress_hessian!(kkt::MadNLP.AbstractSparseKKTSystem{T, VT, MT}) where {T, VT, MT<:CUDA.CUSPARSE.CuSparseMatrixCSC{T, Int32}}
+    fill!(kkt.hess_com.nzVal, zero(T))
+    _transfer!(CUDABackend())(kkt.hess_com.nzVal, kkt.ext.hess_com_ptr, kkt.ext.hess_com_ptrptr, kkt.hess_raw.V; ndrange = length(kkt.ext.hess_com_ptrptr)-1)
+    synchronize(CUDABackend())
+end
+function MadNLP.compress_jacobian!(kkt::MadNLP.SparseCondensedKKTSystem{T, VT, MT}) where {T, VT, MT<:CUDA.CUSOLVER.CuSparseMatrixCSC{T, Int32}}
+    fill!(kkt.jt_csc.nzVal, zero(T))
+    if length(kkt.ext.jt_csc_ptrptr) > 1 # otherwise error is thrown
+        _transfer!(CUDABackend())(kkt.jt_csc.nzVal, kkt.ext.jt_csc_ptr, kkt.ext.jt_csc_ptrptr, kkt.jt_coo.V; ndrange = length(kkt.ext.jt_csc_ptrptr)-1)
+    end
+    synchronize(CUDABackend())    
+end
+
+@kernel function _transfer!(y, @Const(ptr), @Const(ptrptr), @Const(x))
+    index = @index(Global)
+    @inbounds for index2 in ptrptr[index]:ptrptr[index+1]-1
+        i,j = ptr[index2]
+        y[i] += x[j]
+    end
+end
+
+function MadNLP._set_con_scale_sparse!(con_scale::VT, jac_I, jac_buffer) where {T, VT <: CuVector{T}}
+    con_scale_cpu = Array(con_scale)
+    MadNLP._set_con_scale_sparse!(con_scale_cpu, Array(jac_I), Array(jac_buffer))
+    copyto!(con_scale, con_scale_cpu)
+end 
+
+
+function MadNLP._sym_length(Jt::CUDA.CUSPARSE.CuSparseMatrixCSC)
+    return mapreduce(
+        (x,y) -> begin
+            z = x-y
+            div(z^2 + z, 2)
+        end,
+        +,
+        @view(Jt.colPtr[2:end]),
+        @view(Jt.colPtr[1:end-1])
+    )
+end
+
+function MadNLP._build_condensed_aug_symbolic_hess(H::CUDA.CUSPARSE.CuSparseMatrixCSC{Tv,Ti}, sym, sym2) where {Tv,Ti}
+    ker_build_condensed_aug_symbolic_hess(CUDABackend())(
+        sym, sym2, H.colPtr, H.rowVal;
+        ndrange = size(H,2)
+    )
+    synchronize(CUDABackend())
+end
+
+@kernel function ker_build_condensed_aug_symbolic_hess(sym, sym2, @Const(colptr), @Const(rowval))
+    i = @index(Global)
+    @inbounds for j in colptr[i]:colptr[i+1]-1
+        c = rowval[j]
+        sym[j] = (0,j,0)
+        sym2[j] = (c,i)
+    end
+end
+
+function MadNLP._build_condensed_aug_symbolic_jt(Jt::CUDA.CUSPARSE.CuSparseMatrixCSC{Tv,Ti}, sym, sym2) where {Tv,Ti}
+    sym_cpu = Array(sym)
+    sym2_cpu = Array(sym2)
+    MadNLP._build_condensed_aug_symbolic_jt(
+        MadNLP.SparseMatrixCSC(Jt),
+        sym_cpu,
+        sym2_cpu
+    )
+
+    copyto!(sym, sym_cpu)
+    copyto!(sym2, sym2_cpu)
+end
+
+function MadNLP._first_and_last_col(sym2::CuVector,ptr2)
+    CUDA.@allowscalar begin
+        first= sym2[1][2]
+        last = sym2[ptr2[end]][2]
+    end
+    return (first, last)    
+end
+
+MadNLP.nzval(H::CUDA.CUSPARSE.CuSparseMatrixCSC) = H.nzVal
+
+function MadNLP._set_colptr!(colptr::CuVector, ptr2, sym2, guide)
+    if length(ptr2) == 1 # otherwise error is thrown
+        return
+    end
+    
+    ker_set_colptr(CUDABackend())(
+        colptr,
+        sym2,
+        ptr2,
+        guide;
+        ndrange = length(ptr2)-1
+    )
+    synchronize(CUDABackend())
+    return
+end
+
+
+@kernel function ker_set_colptr(colptr, @Const(sym2), @Const(ptr2), @Const(guide))
+    idx = @index(Global)
+    @inbounds begin
+        i = ptr2[idx+1]
+
+        (~, prevcol) = sym2[i-1]
+        (row, col)   = sym2[i]
+
+        for j in prevcol+1:col
+            colptr[j] = guide[i]
+        end
+    end
+end
+
+function MadNLP._get_sparse_csc(dims, colptr::CuVector, rowval, nzval)
+    return CUDA.CUSPARSE.CuSparseMatrixCSC( 
+        colptr,
+        rowval,
+        nzval,
+        dims,
+    )
+end
+function MadNLP.tril_to_full!(dense::CuMatrix{T}) where T
+    n = size(dense,1)
+    _tril_to_full!(CUDABackend())(dense; ndrange = div(n^2 + n,2))
+end
+
+@kernel function _tril_to_full!(dense)
+    idx = @index(Global)
+    n = size(dense,1)
+    i,j = getij(idx,n)
+    
+    @inbounds dense[j,i] = dense[i,j]
+end
+
+function getij(idx,n) 
+    j = ceil(Int,((2n+1)-sqrt((2n+1)^2-8*idx))/2)
+    i = idx-div((j-1)*(2n-j),2)
+    return (i,j)
+end
+
+
+
+function MadNLP.force_lower_triangular!(I::CuVector{T},J) where T
+    _force_lower_triangular!(CUDABackend())(I,J; ndrange=length(I))
+end
+
+@kernel function _force_lower_triangular!(I,J)
+    i = @index(Global)
+    
+    @inbounds if J[i] > I[i]
+        tmp=J[i]
+        J[i]=I[i]
+        I[i]=tmp
+    end
+end
+
+if VERSION < v"1.10" 
+    function MadNLP.mul_hess_blk!(wx::CuVector{T}, kkt::Union{MadNLP.DenseKKTSystem,MadNLP.DenseCondensedKKTSystem}, t) where T
+        n = size(kkt.hess, 1)
+        CUDA.CUBLAS.symv!('L', one(T), kkt.hess, @view(t[1:n]), zero(T), @view(wx[1:n]))
+        fill!(@view(wx[n+1:end]), 0)
+        wx .+= t .* kkt.pr_diag
+    end
 end
diff --git a/lib/MadNLPGPU/src/kernels.jl b/lib/MadNLPGPU/src/kernels.jl
index 0d484c76..306abb41 100644
--- a/lib/MadNLPGPU/src/kernels.jl
+++ b/lib/MadNLPGPU/src/kernels.jl
@@ -4,7 +4,7 @@
 
 @kernel function _copy_diag!(dest, src)
     i = @index(Global)
-    dest[i] = src[i, i]
+    @inbounds dest[i] = src[i, i]
 end
 
 function MadNLP.diag!(dest::CuVector{T}, src::CuMatrix{T}) where T
@@ -15,7 +15,7 @@ end
 
 @kernel function _add_diagonal!(dest, src1, src2)
     i = @index(Global)
-    dest[i, i] = src1[i] + src2[i]
+    @inbounds dest[i, i] = src1[i] + src2[i]
 end
 
 function MadNLP.diag_add!(dest::CuMatrix, src1::CuVector, src2::CuVector)
@@ -41,115 +41,19 @@ end
     MadNLP kernels
 =#
 
-# Overload MadNLP.is_valid to avoid fallback to default is_valid, slow on GPU
-MadNLP.is_valid(src::CuArray) = true
+# #=
+#     AbstractDenseKKTSystem
+# =#
 
-# Constraint scaling
-function MadNLP.scale_constraints!(
-    nlp::AbstractNLPModel,
-    con_scale::AbstractVector,
-    jac::CuMatrix;
-    max_gradient=1e-8,
-)
-    # Compute reduction on the GPU with built-in CUDA.jl function
-    d_con_scale = maximum(abs, jac, dims=2)
-    copyto!(con_scale, d_con_scale)
-    con_scale .= min.(1.0, max_gradient ./ con_scale)
-end
-
-@kernel function _treat_fixed_variable_kernell!(dest, ind_fixed)
-    k, j = @index(Global, NTuple)
-    i = ind_fixed[k]
-
-    if i == j
-        dest[i, i] = 1.0
-    else
-        dest[i, j] = 0.0
-        dest[j, i] = 0.0
-    end
-end
-
-function MadNLP.treat_fixed_variable!(kkt::MadNLP.AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:CuMatrix{T}}
-    length(kkt.ind_fixed) == 0 && return
-    aug = kkt.aug_com
-    d_ind_fixed = kkt.ind_fixed |> CuVector # TODO: allocate ind_fixed directly on the GPU
-    ndrange = (length(d_ind_fixed), size(aug, 1))
-    ev = _treat_fixed_variable_kernell!(CUDABackend())(aug, d_ind_fixed, ndrange=ndrange)
-    synchronize(CUDABackend())
-end
-
-
-#=
-    AbstractDenseKKTSystem
-=#
-
-function MadNLP.jtprod!(y::AbstractVector, kkt::MadNLP.AbstractDenseKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    # Load buffers
-    m = size(kkt.jac, 1)
-    nx = size(kkt.jac, 2)
-    ns = length(kkt.ind_ineq)
-    haskey(kkt.etc, :jac_w1) || (kkt.etc[:jac_w1] = CuVector{T}(undef, m))
-    haskey(kkt.etc, :jac_w2) || (kkt.etc[:jac_w2] = CuVector{T}(undef, nx))
-    haskey(kkt.etc, :jac_w3) || (kkt.etc[:jac_w3] = CuVector{T}(undef, ns))
-
-    d_x = kkt.etc[:jac_w1]::VT
-    d_yx = kkt.etc[:jac_w2]::VT
-    d_ys = kkt.etc[:jac_w3]::VT
-
-    # x and y can be host arrays. Copy them on the device to avoid side effect.
-    _copyto!(d_x, x)
-
-    # / x
-    LinearAlgebra.mul!(d_yx, kkt.jac', d_x)
-    copyto!(parent(y), 1, d_yx, 1, nx)
-
-    # / s
-    d_ys .= -d_x[kkt.ind_ineq] .* kkt.constraint_scaling[kkt.ind_ineq]
-    copyto!(parent(y), nx+1, d_ys, 1, ns)
-    return
-end
-
-function MadNLP.set_aug_diagonal!(kkt::MadNLP.AbstractDenseKKTSystem{T, VT, MT}, solver::MadNLP.MadNLPSolver) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    haskey(kkt.etc, :pr_diag_host) || (kkt.etc[:pr_diag_host] = Vector{T}(undef, length(kkt.pr_diag)))
-    pr_diag_h = kkt.etc[:pr_diag_host]::Vector{T}
-    x = MadNLP.full(solver.x)
-    zl = MadNLP.full(solver.zl)
-    zu = MadNLP.full(solver.zu)
-    xl = MadNLP.full(solver.xl)
-    xu = MadNLP.full(solver.xu)
-    # Broadcast is not working as MadNLP array are allocated on the CPU,
-    # whereas pr_diag is allocated on the GPU
-    pr_diag_h .= zl./(x.-xl) .+ zu./(xu.-x)
-    copyto!(kkt.pr_diag, pr_diag_h)
-    fill!(kkt.du_diag, 0.0)
-end
-
-#=
-    DenseKKTSystem kernels
-=#
-
-function LinearAlgebra.mul!(y::AbstractVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    # Load buffers
-    haskey(kkt.etc, :hess_w1) || (kkt.etc[:hess_w1] = CuVector{T}(undef, size(kkt.aug_com, 1)))
-    haskey(kkt.etc, :hess_w2) || (kkt.etc[:hess_w2] = CuVector{T}(undef, size(kkt.aug_com, 1)))
-
-    d_x = kkt.etc[:hess_w1]::VT
-    d_y = kkt.etc[:hess_w2]::VT
-
-    # x and y can be host arrays. Copy them on the device to avoid side effect.
-    copyto!(d_x, x)
-    symul!(d_y, kkt.aug_com, d_x)
-    copyto!(y, d_y)
-end
-function LinearAlgebra.mul!(y::MadNLP.ReducedKKTVector, kkt::MadNLP.DenseKKTSystem{T, VT, MT}, x::MadNLP.ReducedKKTVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    LinearAlgebra.mul!(MadNLP.full(y), kkt, MadNLP.full(x))
-end
+# #=
+#     DenseKKTSystem kernels
+# =#
 
 @kernel function _build_dense_kkt_system_kernel!(
-    dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq, con_scale, n, m, ns
+    dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq, n, m, ns
 )
     i, j = @index(Global, NTuple)
-    if (i <= n)
+    @inbounds if (i <= n)
         # Transfer Hessian
         if (i == j)
             dest[i, i] = pr_diag[i] + diag_hess[i]
@@ -162,8 +66,8 @@ end
         # Transfer Jacobian wrt slack
         js = i - n
         is = ind_ineq[js]
-        dest[is + n + ns, is + n] = - con_scale[is]
-        dest[is + n, is + n + ns] = - con_scale[is]
+        dest[is + n + ns, is + n] = - 1
+        dest[is + n, is + n + ns] = - 1
     elseif i <= n + ns + m
         # Transfer Jacobian wrt variable x
         i_ = i - n - ns
@@ -173,54 +77,50 @@ end
         dest[i, i] = du_diag[i_]
     end
 end
-
+ 
 function MadNLP._build_dense_kkt_system!(
     dest::CuMatrix, hess::CuMatrix, jac::CuMatrix,
-    pr_diag::CuVector, du_diag::CuVector, diag_hess::CuVector, ind_ineq, con_scale, n, m, ns
-)
+    pr_diag::CuVector, du_diag::CuVector, diag_hess::CuVector, ind_ineq,  n, m, ns
+) 
     ind_ineq_gpu = ind_ineq |> CuArray
     ndrange = (n+m+ns, n)
     ev = _build_dense_kkt_system_kernel!(CUDABackend())(
-        dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq_gpu, con_scale, n, m, ns,
+        dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq_gpu, n, m, ns,
         ndrange=ndrange
     )
     synchronize(CUDABackend())
 end
 
 
-#=
-    DenseCondensedKKTSystem
-=#
-function MadNLP.get_slack_regularization(kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    n, ns = MadNLP.num_variables(kkt), kkt.n_ineq
-    return view(kkt.pr_diag, n+1:n+ns) |> Array
-end
-function MadNLP.get_scaling_inequalities(kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    return kkt.constraint_scaling[kkt.ind_ineq] |> Array
-end
+# #=
+#     DenseCondensedKKTSystem
+# =#
+# function MadNLP.get_slack_regularization(kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
+#     n, ns = MadNLP.num_variables(kkt), kkt.n_ineq
+#     return view(kkt.pr_diag, n+1:n+ns) |> Array
+# end
+
 
 @kernel function _build_jacobian_condensed_kernel!(
-    dest, jac, pr_diag, ind_ineq, con_scale, n, m_ineq,
+    dest, jac, diag_buffer, ind_ineq,  m_ineq,
 )
     i, j = @index(Global, NTuple)
-    is = ind_ineq[i]
-    @inbounds dest[i, j] = jac[is, j] * sqrt(pr_diag[n+i]) / con_scale[is]
+    @inbounds is = ind_ineq[i]
+    @inbounds dest[i, j] = jac[is, j] * sqrt(diag_buffer[i])
 end
-
+ 
 function MadNLP._build_ineq_jac!(
-    dest::CuMatrix, jac::CuMatrix, pr_diag::CuVector,
-    ind_ineq::AbstractVector, ind_fixed::AbstractVector, con_scale::CuVector, n, m_ineq,
+    dest::CuMatrix, jac::CuMatrix, diag_buffer::CuVector,
+    ind_ineq::AbstractVector, n, m_ineq,
 )
     (m_ineq == 0) && return # nothing to do if no ineq. constraints
     ind_ineq_gpu = ind_ineq |> CuArray
     ndrange = (m_ineq, n)
     ev = _build_jacobian_condensed_kernel!(CUDABackend())(
-        dest, jac, pr_diag, ind_ineq_gpu, con_scale, n, m_ineq,
+        dest, jac, diag_buffer, ind_ineq_gpu, m_ineq,
         ndrange=ndrange,
     )
     synchronize(CUDABackend())
-    # need to zero the fixed components
-    dest[:, ind_fixed] .= 0.0
     return
 end
 
@@ -230,20 +130,20 @@ end
     i, j = @index(Global, NTuple)
 
     # Transfer Hessian
-    if i <= n
+    @inbounds if i <= n
         if i == j
-            @inbounds dest[i, i] += pr_diag[i] + hess[i, i]
+            dest[i, i] += pr_diag[i] + hess[i, i]
         else
-            @inbounds dest[i, j] += hess[i, j]
+            dest[i, j] += hess[i, j]
         end
     elseif i <= n + m_eq
         i_ = i - n
-        @inbounds is = ind_eq[i_]
+        is = ind_eq[i_]
         # Jacobian / equality
-        @inbounds dest[i_ + n, j] = jac[is, j]
-        @inbounds dest[j, i_ + n] = jac[is, j]
+        dest[i_ + n, j] = jac[is, j]
+        dest[j, i_ + n] = jac[is, j]
         # Transfer dual regularization
-        @inbounds dest[i_ + n, i_ + n] = du_diag[is]
+        dest[i_ + n, i_ + n] = du_diag[is]
     end
 end
 
@@ -260,48 +160,22 @@ function MadNLP._build_condensed_kkt_system!(
     synchronize(CUDABackend())
 end
 
-function LinearAlgebra.mul!(y::AbstractVector, kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    if length(y) == length(x) == size(kkt.aug_com, 1)
-        # Load buffers
-        haskey(kkt.etc, :hess_w1) || (kkt.etc[:hess_w1] = CuVector{T}(undef, size(kkt.aug_com, 1)))
-        haskey(kkt.etc, :hess_w2) || (kkt.etc[:hess_w2] = CuVector{T}(undef, size(kkt.aug_com, 1)))
-
-        d_x = kkt.etc[:hess_w1]::VT
-        d_y = kkt.etc[:hess_w2]::VT
-
-        # Call parent() as CUDA does not dispatch on proper copyto! when passed a view
-        copyto!(d_x, 1, parent(x), 1, length(x))
-        symul!(d_y,  kkt.aug_com, d_x)
-        copyto!(y, d_y)
-    else
-        # Load buffers
-        haskey(kkt.etc, :hess_w3) || (kkt.etc[:hess_w3] = CuVector{T}(undef, length(x)))
-        haskey(kkt.etc, :hess_w4) || (kkt.etc[:hess_w4] = CuVector{T}(undef, length(y)))
-
-        d_x = kkt.etc[:hess_w3]::VT
-        d_y = kkt.etc[:hess_w4]::VT
-
-        # Call parent() as CUDA does not dispatch on proper copyto! when passed a view
-        copyto!(d_x, 1, parent(x), 1, length(x))
-        MadNLP._mul_expanded!(d_y, kkt, d_x)
-        copyto!(y, d_y)
+function MadNLP._set_diag!(A::CuMatrix, inds, a)
+    if !isempty(inds)
+        _set_diag_kernel!(CUDABackend())(
+            A, inds, a;
+            ndrange = length(inds)
+        )
+        synchronize(CUDABackend())
     end
 end
-function LinearAlgebra.mul!(y::MadNLP.ReducedKKTVector, kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}, x::MadNLP.ReducedKKTVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    LinearAlgebra.mul!(MadNLP.full(y), kkt, MadNLP.full(x))
-end
 
-function MadNLP.jprod_ineq!(y::AbstractVector, kkt::MadNLP.DenseCondensedKKTSystem{T, VT, MT}, x::AbstractVector) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
-    # Create buffers
-    haskey(kkt.etc, :jac_ineq_w1) || (kkt.etc[:jac_ineq_w1] = CuVector{T}(undef, kkt.n_ineq))
-    haskey(kkt.etc, :jac_ineq_w2) || (kkt.etc[:jac_ineq_w2] = CuVector{T}(undef, size(kkt.jac_ineq, 2)))
-
-    y_d = kkt.etc[:jac_ineq_w1]::VT
-    x_d = kkt.etc[:jac_ineq_w2]::VT
-
-    # Call parent() as CUDA does not dispatch on proper copyto! when passed a view
-    copyto!(x_d, 1, parent(x), 1, length(x))
-    LinearAlgebra.mul!(y_d, kkt.jac_ineq, x_d)
-    copyto!(parent(y), 1, y_d, 1, length(y))
+@kernel function _set_diag_kernel!(
+    A, inds, a
+    )
+    i = @index(Global)
+    @inbounds begin
+        index = inds[i]
+        A[index,index] = a
+    end
 end
-
diff --git a/lib/MadNLPGPU/src/lapackgpu.jl b/lib/MadNLPGPU/src/lapackgpu.jl
index ebea76a4..dcb0546c 100644
--- a/lib/MadNLPGPU/src/lapackgpu.jl
+++ b/lib/MadNLPGPU/src/lapackgpu.jl
@@ -1,5 +1,5 @@
 mutable struct LapackGPUSolver{T} <: AbstractLinearSolver{T}
-    dense::AbstractMatrix{T}
+    A::AbstractMatrix{T}
     fact::CuMatrix{T}
     rhs::CuVector{T}
     work::CuVector{T}
@@ -14,14 +14,14 @@ end
 
 
 function LapackGPUSolver(
-    dense::MT;
+    A::MT;
     option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
     opt=LapackOptions(),logger=MadNLPLogger(),
     kwargs...) where {T,MT <: AbstractMatrix{T}}
 
     set_options!(opt,option_dict,kwargs...)
-    fact = CuMatrix{T}(undef,size(dense))
-    rhs = CuVector{T}(undef,size(dense,1))
+    fact = CuMatrix{T}(undef,size(A))
+    rhs = CuVector{T}(undef,size(A,1))
     work  = CuVector{T}(undef, 1)
     lwork = Int32[1]
     work_host  = Vector{T}(undef, 1)
@@ -30,7 +30,7 @@ function LapackGPUSolver(
     etc = Dict{Symbol,Any}()
 
 
-    return LapackGPUSolver{T}(dense,fact,rhs,work,lwork,work_host,lwork_host,info,etc,opt,logger)
+    return LapackGPUSolver{T}(A,fact,rhs,work,lwork,work_host,lwork_host,info,etc,opt,logger)
 end
 
 function factorize!(M::LapackGPUSolver)
@@ -87,10 +87,10 @@ for (sytrf,sytrf_buffer,getrf,getrf_buffer,getrs,geqrf,geqrf_buffer,ormqr,ormqr_
     )
     @eval begin
         function factorize_bunchkaufman!(M::LapackGPUSolver{$typ})
-            haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
+            haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.A,1)))
             haskey(M.etc,:ipiv64) || (M.etc[:ipiv64] = CuVector{Int64}(undef,length(M.etc[:ipiv])))
 
-            copyto!(M.fact,M.dense)
+            _copyto!(M.fact,M.A)
             CUSOLVER.$sytrf_buffer(
                 dense_handle(),Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
             length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
@@ -128,9 +128,9 @@ for (sytrf,sytrf_buffer,getrf,getrf_buffer,getrs,geqrf,geqrf_buffer,ormqr,ormqr_
         end
 
         function factorize_lu!(M::LapackGPUSolver{$typ})
-            haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
-            tril_to_full!(M.dense)
-            copyto!(M.fact,M.dense)
+            haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.A,1)))
+            _copyto!(M.fact,M.A)
+            tril_to_full!(M.fact)
             CUSOLVER.$getrf_buffer(
                 dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),
                 M.fact,Int32(size(M.fact,2)),M.lwork)
@@ -152,10 +152,10 @@ for (sytrf,sytrf_buffer,getrf,getrf_buffer,getrs,geqrf,geqrf_buffer,ormqr,ormqr_
         end
 
         function factorize_qr!(M::LapackGPUSolver{$typ})
-            haskey(M.etc,:tau) || (M.etc[:tau] = CuVector{$typ}(undef,size(M.dense,1)))
+            haskey(M.etc,:tau) || (M.etc[:tau] = CuVector{$typ}(undef,size(M.A,1)))
             haskey(M.etc,:one) || (M.etc[:one] = ones($typ,1))
-            tril_to_full!(M.dense)
-            copyto!(M.fact,M.dense)
+            _copyto!(M.fact,M.A)
+            tril_to_full!(M.fact)
             CUSOLVER.$geqrf_buffer(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.lwork)
             length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
             CUSOLVER.$geqrf(dense_handle(),Int32(size(M.fact,1)),Int32(size(M.fact,2)),M.fact,Int32(size(M.fact,2)),M.etc[:tau],M.work,M.lwork[],M.info)
@@ -176,7 +176,7 @@ for (sytrf,sytrf_buffer,getrf,getrf_buffer,getrs,geqrf,geqrf_buffer,ormqr,ormqr_
         end
 
         function factorize_cholesky!(M::LapackGPUSolver{$typ})
-            copyto!(M.fact,M.dense)
+            _copyto!(M.fact,M.A)
             CUSOLVER.$potrf_buffer(
                 dense_handle(),CUBLAS_FILL_MODE_LOWER,
                 Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
@@ -216,3 +216,16 @@ MadNLP.default_options(::Type{LapackGPUSolver}) = LapackOptions()
 is_supported(::Type{LapackGPUSolver},::Type{Float32}) = true
 is_supported(::Type{LapackGPUSolver},::Type{Float64}) = true
 
+_copyto!(y,x) = copyto!(y,x)
+function _copyto!(y, x::CUSPARSE.CuSparseMatrixCSC{T}) where T
+    n = size(y,2)
+    fill!(y, zero(T))
+    kernel_copyto!(CUDABackend())(y, x.colPtr, x.rowVal, x.nzVal, ndrange=n)
+end
+@kernel function kernel_copyto!(y, @Const(colptr), @Const(rowval), @Const(nzval))
+    col = @index(Global)
+    @inbounds for ptr in colptr[col]:colptr[col+1]-1
+        row = rowval[ptr]
+        y[row,col] = nzval[ptr]
+    end
+end
diff --git a/lib/MadNLPGPU/test/densekkt_gpu.jl b/lib/MadNLPGPU/test/densekkt_gpu.jl
index 571eee83..9c806360 100644
--- a/lib/MadNLPGPU/test/densekkt_gpu.jl
+++ b/lib/MadNLPGPU/test/densekkt_gpu.jl
@@ -4,36 +4,41 @@ using MadNLPTests
 
 function _compare_gpu_with_cpu(KKTSystem, n, m, ind_fixed)
 
-    opt_kkt = if (KKTSystem == MadNLP.DenseKKTSystem)
-        MadNLP.DENSE_KKT_SYSTEM
-    elseif (KKTSystem == MadNLP.DenseCondensedKKTSystem)
-        MadNLP.DENSE_CONDENSED_KKT_SYSTEM
-    end
-
-    for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)]
+    for (T,tol,atol) in [
+        (Float32,1e-3,1e-1),
+        (Float64,1e-8,1e-6)
+        ]
         madnlp_options = Dict{Symbol, Any}(
-            :kkt_system=>opt_kkt,
+            :callback=>MadNLP.DenseCallback,
+            :kkt_system=>KKTSystem,
             :linear_solver=>LapackGPUSolver,
             :print_level=>MadNLP.ERROR,
             :tol=>tol
         )
 
-        nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed)
+        nlph = MadNLPTests.DenseDummyQP(zeros(T,n); m=m, fixed_variables=ind_fixed)
+
+        # Some weird issue: there's some non-deterministic behavior in generating the model for the first call
+        # Not sure where this error is originating, but seems to be resolved in v1.10
+        # Here, we call this twice to avoid this error
+        nlpd = MadNLPTests.DenseDummyQP(CUDA.zeros(T,n); m=m, fixed_variables=CuArray(ind_fixed))
+        
+        nlpd = MadNLPTests.DenseDummyQP(CUDA.zeros(T,n); m=m, fixed_variables=CuArray(ind_fixed))
 
         # Solve on CPU
-        h_solver = MadNLP.MadNLPSolver(nlp; madnlp_options...)
+        h_solver = MadNLPSolver(nlph; madnlp_options...)
         results_cpu = MadNLP.solve!(h_solver)
 
         # Solve on GPU
-        d_solver = MadNLPGPU.CuMadNLPSolver(nlp; madnlp_options...)
+        d_solver = MadNLPSolver(nlpd; madnlp_options...)
         results_gpu = MadNLP.solve!(d_solver)
 
-        @test isa(d_solver.kkt, KKTSystem{T, CuVector{T}, CuMatrix{T}})
+        @test isa(d_solver.kkt, KKTSystem{T})
         # # Check that both results match exactly
         @test h_solver.cnt.k == d_solver.cnt.k
         @test results_cpu.objective ≈ results_gpu.objective
-        @test results_cpu.solution ≈ results_gpu.solution atol=atol
-        @test results_cpu.multipliers ≈ results_gpu.multipliers atol=atol
+        @test results_cpu.solution ≈ Array(results_gpu.solution) atol=atol
+        @test results_cpu.multipliers ≈ Array(results_gpu.multipliers) atol=atol
     end
 end
 
@@ -44,31 +49,33 @@ end
     @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
         _compare_gpu_with_cpu(kkt_system, n, m, Int[])
     end
-    @testset "Fixed variables" begin
-        n, m = 20, 0 # warning: setting m >= 1 does not work in inertia free mode
+    @testset "Fixed variables" for (n,m) in [(10, 0), (10, 5), (50, 10)]
         _compare_gpu_with_cpu(kkt_system, n, m, Int[1, 2])
     end
 end
 
 @testset "MadNLP: $QN + $KKT" for QN in [
-    MadNLP.DENSE_BFGS,
-    MadNLP.DENSE_DAMPED_BFGS,
+    MadNLP.BFGS,
+    MadNLP.DampedBFGS,
 ], KKT in [
-    MadNLP.DENSE_KKT_SYSTEM,
-    MadNLP.DENSE_CONDENSED_KKT_SYSTEM,
+    MadNLP.DenseKKTSystem,
+    MadNLP.DenseCondensedKKTSystem,
 ]
     @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
-        nlp = MadNLPTests.DenseDummyQP{Float64}(; n=n, m=m)
-        solver_exact = MadNLP.MadNLPSolver(
+        nlp = MadNLPTests.DenseDummyQP(zeros(Float64, n); m=m)
+        solver_exact = MadNLPSolver(
             nlp;
+            callback=MadNLP.DenseCallback,
             print_level=MadNLP.ERROR,
             kkt_system=KKT,
             linear_solver=LapackGPUSolver,
         )
         results_ref = MadNLP.solve!(solver_exact)
 
-        solver_qn = MadNLPGPU.CuMadNLPSolver(
+        nlp = MadNLPTests.DenseDummyQP(CUDA.zeros(Float64, n); m=m)
+        solver_qn = MadNLPSolver(
             nlp;
+            callback=MadNLP.DenseCallback,
             print_level=MadNLP.ERROR,
             kkt_system=KKT,
             hessian_approximation=QN,
@@ -78,7 +85,7 @@ end
 
         @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
         @test results_qn.objective ≈ results_ref.objective atol=1e-6
-        @test results_qn.solution ≈ results_ref.solution atol=1e-6
+        @test Array(results_qn.solution) ≈ Array(results_ref.solution) atol=1e-6
         @test solver_qn.cnt.lag_hess_cnt == 0
     end
 end
diff --git a/lib/MadNLPGPU/test/madnlpgpu_test.jl b/lib/MadNLPGPU/test/madnlpgpu_test.jl
new file mode 100644
index 00000000..d9b73df4
--- /dev/null
+++ b/lib/MadNLPGPU/test/madnlpgpu_test.jl
@@ -0,0 +1,66 @@
+testset = [
+    # Temporarily commented out since LapackGPUSolver does not currently support sparse callbacks
+    [
+        "LapackGPU-CUSOLVERRF",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.RFSolver,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-CUSOLVERRF",
+        ()->MadNLP.Optimizer(
+            linear_solver=MadNLPGPU.GLUSolver,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-BUNCHKAUFMAN",
+        ()->MadNLP.Optimizer(
+            linear_solver=LapackGPUSolver,
+            lapack_algorithm=MadNLP.BUNCHKAUFMAN,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-LU",
+        ()->MadNLP.Optimizer(
+            linear_solver=LapackGPUSolver,
+            lapack_algorithm=MadNLP.LU,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-QR",
+        ()->MadNLP.Optimizer(
+            linear_solver=LapackGPUSolver,
+            lapack_algorithm=MadNLP.QR,
+            print_level=MadNLP.ERROR
+        ),
+        [],
+    ],
+    [
+        "LapackGPU-CHOLESKY",
+        ()->MadNLP.Optimizer(
+            linear_solver=LapackGPUSolver,
+            lapack_algorithm=MadNLP.CHOLESKY,
+            print_level=MadNLP.ERROR
+        ),
+        ["infeasible", "lootsma", "eigmina"], # KKT system not PD
+    ],
+]
+
+@testset "MadNLPGPU test" begin
+
+    MadNLPTests.test_linear_solver(LapackGPUSolver,Float32)
+    MadNLPTests.test_linear_solver(LapackGPUSolver,Float64)
+
+    # Test LapackGPU wrapper
+    for (name,optimizer_constructor,exclude) in testset
+        test_madnlp(name,optimizer_constructor,exclude; Arr= CuArray)
+    end
+end
diff --git a/lib/MadNLPGPU/test/runtests.jl b/lib/MadNLPGPU/test/runtests.jl
index 07956b05..4e957a90 100644
--- a/lib/MadNLPGPU/test/runtests.jl
+++ b/lib/MadNLPGPU/test/runtests.jl
@@ -1,50 +1,9 @@
-using Test, MadNLP, MadNLPGPU, MadNLPTests
+using Test, CUDA, MadNLP, MadNLPGPU, MadNLPTests
 
-testset = [
-    [
-        "LapackGPU-BUNCHKAUFMAN",
-        ()->MadNLP.Optimizer(
-            linear_solver=LapackGPUSolver,
-            lapackgpu_algorithm=MadNLP.BUNCHKAUFMAN,
-            print_level=MadNLP.ERROR),
-        [],
-    ],
-    [
-        "LapackGPU-LU",
-        ()->MadNLP.Optimizer(
-            linear_solver=LapackGPUSolver,
-            lapackgpu_algorithm=MadNLP.LU,
-            print_level=MadNLP.ERROR),
-        [],
-    ],
-    [
-        "LapackGPU-QR",
-        ()->MadNLP.Optimizer(
-            linear_solver=LapackGPUSolver,
-            lapackgpu_algorithm=MadNLP.QR,
-            print_level=MadNLP.ERROR),
-        [],
-    ],
-    [
-        "LapackGPU-CHOLESKY",
-        ()->MadNLP.Optimizer(
-            linear_solver=LapackGPUSolver,
-            lapackgpu_algorithm=MadNLP.CHOLESKY,
-            print_level=MadNLP.ERROR),
-        ["infeasible", "lootsma", "eigmina"],
-    ],
-]
 
-@testset "MadNLPGPU test" begin
-
-    MadNLPTests.test_linear_solver(LapackGPUSolver,Float32)
-    MadNLPTests.test_linear_solver(LapackGPUSolver,Float64)
+# Test DenseKKTSystem on GPU
 
-    # Test LapackGPU wrapper
-    for (name,optimizer_constructor,exclude) in testset
-        test_madnlp(name,optimizer_constructor,exclude)
-    end
+@testset "MadNLPGPU test" begin
+    include("madnlpgpu_test.jl")
+    include("densekkt_gpu.jl")
 end
-
-# Test DenseKKTSystem on GPU
-include("densekkt_gpu.jl")
diff --git a/lib/MadNLPHSL/src/ma27.jl b/lib/MadNLPHSL/src/ma27.jl
index 93d2b219..28b575cc 100644
--- a/lib/MadNLPHSL/src/ma27.jl
+++ b/lib/MadNLPHSL/src/ma27.jl
@@ -156,8 +156,11 @@ end
 
 is_inertia(::Ma27Solver) = true
 function inertia(M::Ma27Solver)
-    rank = M.info[1]==3 ? M.info[2] : rank = M.csc.n
-    return (rank-M.info[15],M.csc.n-rank,M.info[15])
+    dim = M.csc.n
+    rank = (Int(M.info[1])==3) ? Int(M.info[2]) : dim
+    neg = Int(M.info[15])
+
+    return (rank-neg,dim-rank,neg) 
 end
 
 function improve!(M::Ma27Solver)
diff --git a/lib/MadNLPMumps/Project.toml b/lib/MadNLPMumps/Project.toml
index 0dcfb8ff..1e882ef9 100644
--- a/lib/MadNLPMumps/Project.toml
+++ b/lib/MadNLPMumps/Project.toml
@@ -4,11 +4,14 @@ version = "0.3.2"
 
 [deps]
 MUMPS_seq_jll = "d7ed1dd3-d0ae-5e8e-bfb4-87a502085b8d"
+OpenBLAS32_jll = "656ef2d0-ae68-5445-9ca0-591084a874a2"
 MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [compat]
-MUMPS_seq_jll = "~5.2.1, ~5.3.5"
+MUMPS_seq_jll = "~5.3, ~500.600"
+OpenBLAS32_jll = "0.3"
 MadNLP = "0.5, 0.6, 0.7"
 MadNLPTests = "0.3, 0.4"
 StaticArrays = "1"
diff --git a/lib/MadNLPMumps/src/MadNLPMumps.jl b/lib/MadNLPMumps/src/MadNLPMumps.jl
index 652ff55b..8ba261ee 100644
--- a/lib/MadNLPMumps/src/MadNLPMumps.jl
+++ b/lib/MadNLPMumps/src/MadNLPMumps.jl
@@ -9,6 +9,16 @@ import MadNLP:
     SymbolicException,FactorizationException,SolveException,InertiaException,
     AbstractOptions, AbstractLinearSolver, set_options!, input_type, default_options,
     introduce, factorize!, solve!, improve!, is_inertia, is_supported, inertia, findIJ, nnz
+import LinearAlgebra, OpenBLAS32_jll
+
+function __init__()
+    if VERSION ≥ v"1.9"
+        config = LinearAlgebra.BLAS.lbt_get_config()
+        if !any(lib -> lib.interface == :lp64, config.loaded_libs)
+            LinearAlgebra.BLAS.lbt_forward(OpenBLAS32_jll.libopenblas_path)
+        end
+    end
+end
 
 const version = parsefile(joinpath(dirname(pathof(MUMPS_seq_jll)),"..","Project.toml"))["version"]
 
@@ -22,212 +32,109 @@ const version = parsefile(joinpath(dirname(pathof(MUMPS_seq_jll)),"..","Project.
     mumps_scaling::Int = 77
 end
 
-if version == "5.3.5+0"
-    @kwdef mutable struct Struc{T}
-        sym::Cint = 0
-        par::Cint = 0
-        job::Cint = 0
-
-        comm_fortran::Cint = 0
-
-        icntl::SVector{60,Cint} = zeros(60)
-        keep::SVector{500,Cint} = zeros(500)
-        cntl::SVector{15,T} = zeros(15)
-        dkeep::SVector{230,T} = zeros(230)
-        keep8::SVector{150,Int64} = zeros(150)
-        n::Cint = 0
-        nblk::Cint = 0
-
-        nz_alloc::Cint = 0
-
-        nz::Cint = 0
-        nnz::Int64 = 0
-        irn::Ptr{Cint} = C_NULL
-        jcn::Ptr{Cint} = C_NULL
-        a::Ptr{T} = C_NULL
-
-        nz_loc::Cint = 0
-        nnz_loc::Int64 = 0
-        irn_loc::Ptr{Cint} = C_NULL
-        jcn_loc::Ptr{Cint} = C_NULL
-        a_loc::Ptr{T} = C_NULL ###
-
-        nelt::Cint = 0
-        eltptr::Ptr{Cint} = C_NULL
-        eltvar::Ptr{Cint} = C_NULL
-        a_elt::Ptr{T} = C_NULL
-
-        blkptr::Ptr{Cint} = C_NULL
-        blkvar::Ptr{Cint} = C_NULL
-
-        perm_in::Ptr{Cint} = C_NULL
-
-        sym_perm::Ptr{Cint} = C_NULL
-        uns_perm::Ptr{Cint} = C_NULL
-
-        colsca::Ptr{T} = C_NULL
-        rowsca::Ptr{T} = C_NULL
-        colsca_from_mumps::Cint = 0
-        rowsca_from_mumps::Cint = 0
-
-        rhs::Ptr{T} = C_NULL
-        redrhs::Ptr{T} = C_NULL
-        rhs_sparse::Ptr{T} = C_NULL
-        sol_loc::Ptr{T} = C_NULL
-        rhs_loc::Ptr{T} = C_NULL
-
-        irhs_sparse::Ptr{Cint} = C_NULL
-        irhs_ptr::Ptr{Cint} = C_NULL
-        isol_loc::Ptr{Cint} = C_NULL
-        irhs_loc::Ptr{Cint} = C_NULL
-
-        nrhs::Cint = 0
-        lrhs::Cint = 0
-        lredrhs::Cint = 0
-        nz_rhs::Cint = 0
-        lsol_loc::Cint = 0
-        nloc_rhs::Cint = 0
-        lrhs_loc::Cint = 0
-
-        schur_mloc::Cint = 0
-        schur_nloc::Cint = 0
-        schur_lld::Cint = 0
-
-        mblock::Cint = 0
-        nblock::Cint = 0
-        nprow::Cint = 0
-        npcol::Cint = 0
-
-        info::SVector{80,Cint} = zeros(80)
-        infog::SVector{80,Cint} = zeros(80)
-        rinfo::SVector{40,T} = zeros(40)
-        rinfog::SVector{40,T} = zeros(40)
-
-        deficiency::Cint = 0
-        pivnul_list::Ptr{Cint} = C_NULL
-        mapping::Ptr{Cint} = C_NULL
-
-        size_schur::Cint = 0
-        listvar_schur::Ptr{Cint} = C_NULL
-        schur::Ptr{T} = C_NULL ##
-
-        instance_number::Cint = 0
-        wk_user::Ptr{T} = C_NULL
-
-        version_number::SVector{32,Cchar} = zeros(32)
-
-        ooc_tmpdir::SVector{256,Cchar} = zeros(256)
-        ooc_prefix::SVector{64,Cchar} = zeros(64)
-
-        write_problem::SVector{256,Cchar} = zeros(256)
-        lwk_user::Cint = 0
-
-        save_dir::SVector{256,Cchar} = zeros(256)
-        save_prefix::SVector{256,Cchar} = zeros(256)
-
-        metis_options::SVector{40,Cint} = zeros(40)
-    end
-elseif version == "5.2.1+4"
-    @kwdef mutable struct Struc{T}
-        sym::Cint = 0
-        par::Cint = 0
-        job::Cint = 0
-
-        comm_fortran::Cint = 0
-
-        icntl::SVector{60,Cint} = zeros(60)
-        keep::SVector{500,Cint} = zeros(500)
-        cntl::SVector{15,T} = zeros(15)
-        dkeep::SVector{230,T} = zeros(230)
-        keep8::SVector{150,Int64} = zeros(150)
-        n::Cint = 0
-
-        nz_alloc::Cint = 0
-
-        nz::Cint = 0
-        nnz::Int64 = 0
-        irn::Ptr{Cint} = C_NULL
-        jcn::Ptr{Cint} = C_NULL
-        a::Ptr{T} = C_NULL
-
-        nz_loc::Cint = 0
-        nnz_loc::Int64 = 0
-        irn_loc::Ptr{Cint} = C_NULL
-        jcn_loc::Ptr{Cint} = C_NULL
-        a_loc::Ptr{T} = C_NULL ###
-
-        nelt::Cint = 0
-        eltptr::Ptr{Cint} = C_NULL
-        eltvar::Ptr{Cint} = C_NULL
-        a_elt::Ptr{T} = C_NULL
-
-        perm_in::Ptr{Cint} = C_NULL
-
-        sym_perm::Ptr{Cint} = C_NULL
-        uns_perm::Ptr{Cint} = C_NULL
-
-        colsca::Ptr{T} = C_NULL
-        rowsca::Ptr{T} = C_NULL
-        colsca_from_mumps::Cint = 0
-        rowsca_from_mumps::Cint = 0
-
-        rhs::Ptr{T} = C_NULL
-        redrhs::Ptr{T} = C_NULL
-        rhs_sparse::Ptr{T} = C_NULL
-        sol_loc::Ptr{T} = C_NULL
-        rhs_loc::Ptr{T} = C_NULL
-
-        irhs_sparse::Ptr{Cint} = C_NULL
-        irhs_ptr::Ptr{Cint} = C_NULL
-        isol_loc::Ptr{Cint} = C_NULL
-        irhs_loc::Ptr{Cint} = C_NULL
-
-        nrhs::Cint = 0
-        lrhs::Cint = 0
-        lredrhs::Cint = 0
-        nz_rhs::Cint = 0
-        lsol_loc::Cint = 0
-        nloc_rhs::Cint = 0
-        lrhs_loc::Cint = 0
-
-        schur_mloc::Cint = 0
-        schur_nloc::Cint = 0
-        schur_lld::Cint = 0
-
-        mblock::Cint = 0
-        nblock::Cint = 0
-        nprow::Cint = 0
-        npcol::Cint = 0
-
-        info::SVector{80,Cint} = zeros(80)
-        infog::SVector{80,Cint} = zeros(80)
-        rinfo::SVector{40,T} = zeros(40)
-        rinfog::SVector{40,T} = zeros(40)
-
-        deficiency::Cint = 0
-        pivnul_list::Ptr{Cint} = C_NULL
-        mapping::Ptr{Cint} = C_NULL
-
-        size_schur::Cint = 0
-        listvar_schur::Ptr{Cint} = C_NULL
-        schur::Ptr{T} = C_NULL ##
-
-        instance_number::Cint = 0
-        wk_user::Ptr{T} = C_NULL
-
-        version_number::SVector{32,Cchar} = zeros(32)
-
-        ooc_tmpdir::SVector{256,Cchar} = zeros(256)
-        ooc_prefix::SVector{64,Cchar} = zeros(64)
-
-        write_problem::SVector{256,Cchar} = zeros(256)
-        lwk_user::Cint = 0
-
-        save_dir::SVector{256,Cchar} = zeros(256)
-        save_prefix::SVector{256,Cchar} = zeros(256)
-
-        metis_options::SVector{40,Cint} = zeros(40)
-    end
+@kwdef mutable struct Struc{T}
+    sym::Cint = 0
+    par::Cint = 0
+    job::Cint = 0
+
+    comm_fortran::Cint = 0
+
+    icntl::SVector{60,Cint} = zeros(60)
+    keep::SVector{500,Cint} = zeros(500)
+    cntl::SVector{15,T} = zeros(15)
+    dkeep::SVector{230,T} = zeros(230)
+    keep8::SVector{150,Int64} = zeros(150)
+    n::Cint = 0
+    nblk::Cint = 0
+
+    nz_alloc::Cint = 0
+
+    nz::Cint = 0
+    nnz::Int64 = 0
+    irn::Ptr{Cint} = C_NULL
+    jcn::Ptr{Cint} = C_NULL
+    a::Ptr{T} = C_NULL
+
+    nz_loc::Cint = 0
+    nnz_loc::Int64 = 0
+    irn_loc::Ptr{Cint} = C_NULL
+    jcn_loc::Ptr{Cint} = C_NULL
+    a_loc::Ptr{T} = C_NULL ###
+
+    nelt::Cint = 0
+    eltptr::Ptr{Cint} = C_NULL
+    eltvar::Ptr{Cint} = C_NULL
+    a_elt::Ptr{T} = C_NULL
+
+    blkptr::Ptr{Cint} = C_NULL
+    blkvar::Ptr{Cint} = C_NULL
+
+    perm_in::Ptr{Cint} = C_NULL
+
+    sym_perm::Ptr{Cint} = C_NULL
+    uns_perm::Ptr{Cint} = C_NULL
+
+    colsca::Ptr{T} = C_NULL
+    rowsca::Ptr{T} = C_NULL
+    colsca_from_mumps::Cint = 0
+    rowsca_from_mumps::Cint = 0
+
+    rhs::Ptr{T} = C_NULL
+    redrhs::Ptr{T} = C_NULL
+    rhs_sparse::Ptr{T} = C_NULL
+    sol_loc::Ptr{T} = C_NULL
+    rhs_loc::Ptr{T} = C_NULL
+
+    irhs_sparse::Ptr{Cint} = C_NULL
+    irhs_ptr::Ptr{Cint} = C_NULL
+    isol_loc::Ptr{Cint} = C_NULL
+    irhs_loc::Ptr{Cint} = C_NULL
+
+    nrhs::Cint = 0
+    lrhs::Cint = 0
+    lredrhs::Cint = 0
+    nz_rhs::Cint = 0
+    lsol_loc::Cint = 0
+    nloc_rhs::Cint = 0
+    lrhs_loc::Cint = 0
+
+    schur_mloc::Cint = 0
+    schur_nloc::Cint = 0
+    schur_lld::Cint = 0
+
+    mblock::Cint = 0
+    nblock::Cint = 0
+    nprow::Cint = 0
+    npcol::Cint = 0
+
+    info::SVector{80,Cint} = zeros(80)
+    infog::SVector{80,Cint} = zeros(80)
+    rinfo::SVector{40,T} = zeros(40)
+    rinfog::SVector{40,T} = zeros(40)
+
+    deficiency::Cint = 0
+    pivnul_list::Ptr{Cint} = C_NULL
+    mapping::Ptr{Cint} = C_NULL
+
+    size_schur::Cint = 0
+    listvar_schur::Ptr{Cint} = C_NULL
+    schur::Ptr{T} = C_NULL ##
+
+    instance_number::Cint = 0
+    wk_user::Ptr{T} = C_NULL
+
+    version_number::SVector{32,Cchar} = zeros(32)
+
+    ooc_tmpdir::SVector{256,Cchar} = zeros(256)
+    ooc_prefix::SVector{64,Cchar} = zeros(64)
+
+    write_problem::SVector{256,Cchar} = zeros(256)
+    lwk_user::Cint = 0
+
+    save_dir::SVector{256,Cchar} = zeros(256)
+    save_prefix::SVector{256,Cchar} = zeros(256)
+
+    metis_options::SVector{40,Cint} = zeros(40)
 end
 
 mutable struct MumpsSolver{T} <: AbstractLinearSolver{T}
diff --git a/lib/MadNLPTests/Project.toml b/lib/MadNLPTests/Project.toml
index c354d248..1f784a76 100644
--- a/lib/MadNLPTests/Project.toml
+++ b/lib/MadNLPTests/Project.toml
@@ -7,6 +7,7 @@ JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
 NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
+NLPModelsJuMP = "792afdf1-32c1-5681-94e0-d7bf7a5df49e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
@@ -15,4 +16,5 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 JuMP = "1"
 MadNLP = "0.5, 0.6, 0.7"
 NLPModels = "~0.17.2, 0.18, 0.19, 0.20"
+NLPModelsJuMP = "0.12"
 julia = "1.6"
\ No newline at end of file
diff --git a/lib/MadNLPTests/src/Instances/dummy_qp.jl b/lib/MadNLPTests/src/Instances/dummy_qp.jl
index 80a8b9a3..0c3db1cb 100644
--- a/lib/MadNLPTests/src/Instances/dummy_qp.jl
+++ b/lib/MadNLPTests/src/Instances/dummy_qp.jl
@@ -1,12 +1,18 @@
-struct DenseDummyQP{T} <: NLPModels.AbstractNLPModel{T,Vector{T}}
-    meta::NLPModels.NLPModelMeta{T, Vector{T}}
-    P::Matrix{T} # primal hessian
-    A::Matrix{T} # constraint jacobian
-    q::Vector{T}
-    hrows::Vector{Int}
-    hcols::Vector{Int}
-    jrows::Vector{Int}
-    jcols::Vector{Int}
+struct DenseDummyQP{
+    T,
+    VT <: AbstractVector{T},
+    MT <: AbstractMatrix{T},
+    VI <: AbstractVector{Int}
+    } <: NLPModels.AbstractNLPModel{T,VT}
+    meta::NLPModels.NLPModelMeta{T, VT}
+    P::MT # primal hessian
+    A::MT # constraint jacobian
+    q::VT
+    buffer::VT
+    hrows::VI
+    hcols::VI
+    jrows::VI
+    jcols::VI
     counters::NLPModels.Counters
 end
 
@@ -20,7 +26,8 @@ function NLPModels.hess_structure!(qp::DenseDummyQP, I::AbstractVector{T}, J::Ab
 end
 
 function NLPModels.obj(qp::DenseDummyQP, x::AbstractVector)
-    return 0.5 * dot(x, qp.P, x) + dot(qp.q, x)
+    mul!(qp.buffer, qp.P, x)
+    return 0.5 * dot(x, qp.buffer) + dot(qp.q, x)
 end
 function NLPModels.grad!(qp::DenseDummyQP, x::AbstractVector, g::AbstractVector)
     mul!(g, qp.P, x)
@@ -65,49 +72,57 @@ function MadNLP.hess_dense!(qp::DenseDummyQP{T}, x, l,hess::AbstractMatrix; obj_
     copyto!(hess, obj_weight .* qp.P)
 end
 
-function DenseDummyQP{T}(; n=100, m=10, fixed_variables=Int[], equality_cons=[]) where T
+function DenseDummyQP(
+    x0::AbstractVector{T} = zeros(100);
+    m=10, fixed_variables=similar(x0,Int,0), equality_cons=similar(x0,Int,0)
+    ) where {T}
+
+    n = length(x0)
+
     if m >= n
         error("The number of constraints `m` should be less than the number of variable `n`.")
     end
 
     Random.seed!(1)
 
-    # Build QP problem 0.5 * x' * P * x + q' * x
-    P = randn(T,n , n)
-    P += P' # P is symmetric
-    P += T(100.0) * I
-
-    q = randn(T,n)
-
-    # Build constraints gl <= Ax <= gu
-    A = zeros(T,m, n)
-    for j in 1:m
-        A[j, j]  = one(T)
-        A[j, j+1]  = -one(T)
-    end
-
-    x0 = zeros(T,n)
-    y0 = zeros(T,m)
+    y0 = fill!(similar(x0, m), zero(T))
+    q = copyto!(similar(x0, n), randn(n))
+    buffer = similar(x0, n)
 
     # Bound constraints
-    xu = fill(one(T), n)
-    xl = fill(zero(T), n)
-    gl = fill(zero(T), m)
-    gu = fill(one(T), m)
+    xl = fill!(similar(x0, n), zero(T))
+    xu = fill!(similar(x0, n), one(T))
+    gl = fill!(similar(x0, m), zero(T))
+    gu = fill!(similar(x0, m), one(T))
+
     # Update gu to load equality constraints
     gu[equality_cons] .= zero(T)
+    xl[fixed_variables] .= @view(xu[fixed_variables])
+
+    # Build QP problem 0.5 * x' * P * x + q' * x
+    P = copyto!(similar(x0, n , n), randn(n,n))
+    P = P*P' # P is symmetric
+    P += T(100.0) * I
+
 
-    xl[fixed_variables] .= xu[fixed_variables]
+    # Build constraints gl <= Ax <= gu
+    A = fill!(similar(x0, m, n), zero(T))
+    A[1:m+1:m^2] .= one(T)
+    A[m+1:m+1:m^2+m] .=-one(T)
+    # for j in 1:m
+    #     A[j, j]  = one(T)
+    #     A[j, j+1]  = -one(T)
+    # end
 
-    hrows = [i for i in 1:n for j in 1:i]
-    hcols = [j for i in 1:n for j in 1:i]
     nnzh = div(n * (n + 1), 2)
+    hrows = copyto!(similar(x0, Int, nnzh), [i for i in 1:n for j in 1:i])
+    hcols = copyto!(similar(x0, Int, nnzh), [j for i in 1:n for j in 1:i])
 
-    jrows = [j for i in 1:n for j in 1:m]
-    jcols = [i for i in 1:n for j in 1:m]
     nnzj = n * m
+    jrows = copyto!(similar(x0, Int, nnzj), [j for i in 1:n for j in 1:m])
+    jcols = copyto!(similar(x0, Int, nnzj), [i for i in 1:n for j in 1:m])
 
-    return DenseDummyQP{T}(
+    return DenseDummyQP(
         NLPModels.NLPModelMeta(
             n,
             ncon = m,
@@ -121,9 +136,8 @@ function DenseDummyQP{T}(; n=100, m=10, fixed_variables=Int[], equality_cons=[])
             ucon = gu,
             minimize = true
         ),
-        P,A,q,hrows,hcols,jrows,jcols,
+        P,A,q,buffer,
+        hrows,hcols,jrows,jcols,
         NLPModels.Counters()
     )
 end
-
-DenseDummyQP(; kwargs...) = DenseDummyQP{Float64}(; kwargs...)
diff --git a/lib/MadNLPTests/src/MadNLPTests.jl b/lib/MadNLPTests/src/MadNLPTests.jl
index e04ab39e..f7721cb4 100644
--- a/lib/MadNLPTests/src/MadNLPTests.jl
+++ b/lib/MadNLPTests/src/MadNLPTests.jl
@@ -9,8 +9,9 @@ import Test: @test, @testset
 # Optimization packages
 import MadNLP
 import NLPModels
-import JuMP: Model, @variable, @constraint, @objective, @NLconstraint , @NLobjective, optimize!,
+import JuMP: Model, @variable, @NLconstraint, @NLobjective, @NLconstraint , @NLobjective, optimize!,
     MOI, termination_status, LowerBoundRef, UpperBoundRef, value, dual
+import NLPModelsJuMP
 
 export test_madnlp, solcmp
 
@@ -53,38 +54,52 @@ function test_linear_solver(solver,T; kwargs...)
     end
 end
 
-function test_madnlp(name,optimizer_constructor::Function,exclude)
+function test_madnlp(name,optimizer_constructor::Function,exclude; Arr = Array)
     @testset "$name" begin
         for f in [infeasible,unbounded,lootsma,eigmina]
-            !(string(f) in exclude) && f(optimizer_constructor)
+            !(string(f) in exclude) && f(optimizer_constructor; Arr = Arr)
         end
     end
 end
 
-function infeasible(optimizer_constructor::Function)
+function infeasible(optimizer_constructor::Function; Arr = Array)
     @testset "infeasible" begin
         m=Model(optimizer_constructor)
         @variable(m,x>=1)
-        @constraint(m,x==0.)
-        @objective(m,Min,x^2)
-        optimize!(m)
-        @test termination_status(m) == MOI.LOCALLY_INFEASIBLE
+        @NLconstraint(m,x==0.)
+        @NLobjective(m,Min,x^2)
+
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
+        
+        @test result.status == MadNLP.INFEASIBLE_PROBLEM_DETECTED
     end
 end
 
-function unbounded(optimizer_constructor::Function)
+function unbounded(optimizer_constructor::Function; Arr = Array)
     @testset "unbounded" begin
         m=Model(optimizer_constructor)
         @variable(m,x,start=1)
-        @objective(m,Max,x^2)
-        optimize!(m)
-        @test termination_status(m) == MOI.INFEASIBLE_OR_UNBOUNDED
+        @NLobjective(m,Max,x^2)
+
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
+        
+        @test result.status == MadNLP.DIVERGING_ITERATES
     end
 end
 
-function lootsma(optimizer_constructor::Function)
+function lootsma(optimizer_constructor::Function; Arr = Array)
     @testset "lootsma" begin
-        m=Model(optimizer_constructor)
+        m=Model()
         @variable(m, par == 6.)
         @variable(m,0 <= x[i=1:3] <= 5, start = 0.)
         l=[
@@ -93,22 +108,45 @@ function lootsma(optimizer_constructor::Function)
         ]
         @NLobjective(m,Min,x[1]^3 + 11. *x[1] - par*sqrt(x[1])  +x[3] )
 
-        optimize!(m)
 
-        @test solcmp(value.(x),[0.07415998565403112,2.9848713863700236,4.0000304145340415])
-        @test solcmp(dual.(l),[2.000024518601535,2.0000305441119535])
-        @test solcmp(dual.(LowerBoundRef.(x)),[0.,0.,0.])
-        @test solcmp(dual.(UpperBoundRef.(x)),[0.,0.,0.])
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
 
-        @test termination_status(m) == MOI.LOCALLY_SOLVED
+        @test solcmp(
+            Array(result.solution[2:4]),
+            [0.07415998565403112,2.9848713863700236,4.0000304145340415];
+            atol = sqrt(result.options.tol), rtol = sqrt(result.options.tol)
+        )
+        @test solcmp(
+            Array(result.multipliers),
+            [-2.000024518601535,-2.0000305441119535];
+            atol = sqrt(result.options.tol), rtol = sqrt(result.options.tol)
+        )
+        @test solcmp(
+            Array(result.multipliers_L[2:4]),
+            [0.,0.,0.];
+            atol = sqrt(result.options.tol), rtol = sqrt(result.options.tol)
+        )
+        @test solcmp(
+            Array(result.multipliers_U[2:4]),
+            [0.,0.,0.];
+            atol = sqrt(result.options.tol), rtol = sqrt(result.options.tol)
+        )
+
+        @test result.status == MadNLP.SOLVE_SUCCEEDED
     end
 end
 
-function eigmina(optimizer_constructor::Function)
+function eigmina(optimizer_constructor::Function; Arr = Array)
     @testset "eigmina" begin
         m=Model(optimizer_constructor)
         @variable(m,-1 <= x[1:101] <= 1,start = .1)
-        @constraint(m, x[1]*x[1] + x[2]*x[2] + x[3]*x[3] + x[4]*x[4] + x[5]*x[5] + x[6]*x[6] +
+        @NLconstraint(m, x[1]*x[1] + x[2]*x[2] + x[3]*x[3] + x[4]*x[4] + x[5]*x[5] + x[6]*x[6] +
             x[7]*x[7] + x[8]*x[8] + x[9]*x[9] + x[10]*x[10] + x[11]*x[11] + x[12]*x[12] +
             x[13]*x[13] + x[14]*x[14] + x[15]*x[15] + x[16]*x[16] + x[17]*x[17] + x[18]*x[18] +
             x[19]*x[19] + x[20]*x[20] + x[21]*x[21] + x[22]*x[22] + x[23]*x[23] + x[24]*x[24] +
@@ -125,115 +163,122 @@ function eigmina(optimizer_constructor::Function)
             x[85]*x[85] + x[86]*x[86] + x[87]*x[87] + x[88]*x[88] + x[89]*x[89] + x[90]*x[90] +
             x[91]*x[91] + x[92]*x[92] + x[93]*x[93] + x[94]*x[94] + x[95]*x[95] + x[96]*x[96] +
             x[97]*x[97] + x[98]*x[98] + x[99]*x[99] + x[100]*x[100] == 1)
-        @constraint(m, x[1]*x[101] - x[1] == 0)
-        @constraint(m, x[2]*x[101] - 2*x[2] == 0)
-        @constraint(m, x[3]*x[101] - 3*x[3] == 0)
-        @constraint(m, x[4]*x[101] - 4*x[4] == 0)
-        @constraint(m, x[5]*x[101] - 5*x[5] == 0)
-        @constraint(m, x[6]*x[101] - 6*x[6] == 0)
-        @constraint(m, x[7]*x[101] - 7*x[7] == 0)
-        @constraint(m, x[8]*x[101] - 8*x[8] == 0)
-        @constraint(m, x[9]*x[101] - 9*x[9] == 0)
-        @constraint(m, x[10]*x[101] - 10*x[10] == 0)
-        @constraint(m, x[11]*x[101] - 11*x[11] == 0)
-        @constraint(m, x[12]*x[101] - 12*x[12] == 0)
-        @constraint(m, x[13]*x[101] - 13*x[13] == 0)
-        @constraint(m, x[14]*x[101] - 14*x[14] == 0)
-        @constraint(m, x[15]*x[101] - 15*x[15] == 0)
-        @constraint(m, x[16]*x[101] - 16*x[16] == 0)
-        @constraint(m, x[17]*x[101] - 17*x[17] == 0)
-        @constraint(m, x[18]*x[101] - 18*x[18] == 0)
-        @constraint(m, x[19]*x[101] - 19*x[19] == 0)
-        @constraint(m, x[20]*x[101] - 20*x[20] == 0)
-        @constraint(m, x[21]*x[101] - 21*x[21] == 0)
-        @constraint(m, x[22]*x[101] - 22*x[22] == 0)
-        @constraint(m, x[23]*x[101] - 23*x[23] == 0)
-        @constraint(m, x[24]*x[101] - 24*x[24] == 0)
-        @constraint(m, x[25]*x[101] - 25*x[25] == 0)
-        @constraint(m, x[26]*x[101] - 26*x[26] == 0)
-        @constraint(m, x[27]*x[101] - 27*x[27] == 0)
-        @constraint(m, x[28]*x[101] - 28*x[28] == 0)
-        @constraint(m, x[29]*x[101] - 29*x[29] == 0)
-        @constraint(m, x[30]*x[101] - 30*x[30] == 0)
-        @constraint(m, x[31]*x[101] - 31*x[31] == 0)
-        @constraint(m, x[32]*x[101] - 32*x[32] == 0)
-        @constraint(m, x[33]*x[101] - 33*x[33] == 0)
-        @constraint(m, x[34]*x[101] - 34*x[34] == 0)
-        @constraint(m, x[35]*x[101] - 35*x[35] == 0)
-        @constraint(m, x[36]*x[101] - 36*x[36] == 0)
-        @constraint(m, x[37]*x[101] - 37*x[37] == 0)
-        @constraint(m, x[38]*x[101] - 38*x[38] == 0)
-        @constraint(m, x[39]*x[101] - 39*x[39] == 0)
-        @constraint(m, x[40]*x[101] - 40*x[40] == 0)
-        @constraint(m, x[41]*x[101] - 41*x[41] == 0)
-        @constraint(m, x[42]*x[101] - 42*x[42] == 0)
-        @constraint(m, x[43]*x[101] - 43*x[43] == 0)
-        @constraint(m, x[44]*x[101] - 44*x[44] == 0)
-        @constraint(m, x[45]*x[101] - 45*x[45] == 0)
-        @constraint(m, x[46]*x[101] - 46*x[46] == 0)
-        @constraint(m, x[47]*x[101] - 47*x[47] == 0)
-        @constraint(m, x[48]*x[101] - 48*x[48] == 0)
-        @constraint(m, x[49]*x[101] - 49*x[49] == 0)
-        @constraint(m, x[50]*x[101] - 50*x[50] == 0)
-        @constraint(m, x[51]*x[101] - 51*x[51] == 0)
-        @constraint(m, x[52]*x[101] - 52*x[52] == 0)
-        @constraint(m, x[53]*x[101] - 53*x[53] == 0)
-        @constraint(m, x[54]*x[101] - 54*x[54] == 0)
-        @constraint(m, x[55]*x[101] - 55*x[55] == 0)
-        @constraint(m, x[56]*x[101] - 56*x[56] == 0)
-        @constraint(m, x[57]*x[101] - 57*x[57] == 0)
-        @constraint(m, x[58]*x[101] - 58*x[58] == 0)
-        @constraint(m, x[59]*x[101] - 59*x[59] == 0)
-        @constraint(m, x[60]*x[101] - 60*x[60] == 0)
-        @constraint(m, x[61]*x[101] - 61*x[61] == 0)
-        @constraint(m, x[62]*x[101] - 62*x[62] == 0)
-        @constraint(m, x[63]*x[101] - 63*x[63] == 0)
-        @constraint(m, x[64]*x[101] - 64*x[64] == 0)
-        @constraint(m, x[65]*x[101] - 65*x[65] == 0)
-        @constraint(m, x[66]*x[101] - 66*x[66] == 0)
-        @constraint(m, x[67]*x[101] - 67*x[67] == 0)
-        @constraint(m, x[68]*x[101] - 68*x[68] == 0)
-        @constraint(m, x[69]*x[101] - 69*x[69] == 0)
-        @constraint(m, x[70]*x[101] - 70*x[70] == 0)
-        @constraint(m, x[71]*x[101] - 71*x[71] == 0)
-        @constraint(m, x[72]*x[101] - 72*x[72] == 0)
-        @constraint(m, x[73]*x[101] - 73*x[73] == 0)
-        @constraint(m, x[74]*x[101] - 74*x[74] == 0)
-        @constraint(m, x[75]*x[101] - 75*x[75] == 0)
-        @constraint(m, x[76]*x[101] - 76*x[76] == 0)
-        @constraint(m, x[77]*x[101] - 77*x[77] == 0)
-        @constraint(m, x[78]*x[101] - 78*x[78] == 0)
-        @constraint(m, x[79]*x[101] - 79*x[79] == 0)
-        @constraint(m, x[80]*x[101] - 80*x[80] == 0)
-        @constraint(m, x[81]*x[101] - 81*x[81] == 0)
-        @constraint(m, x[82]*x[101] - 82*x[82] == 0)
-        @constraint(m, x[83]*x[101] - 83*x[83] == 0)
-        @constraint(m, x[84]*x[101] - 84*x[84] == 0)
-        @constraint(m, x[85]*x[101] - 85*x[85] == 0)
-        @constraint(m, x[86]*x[101] - 86*x[86] == 0)
-        @constraint(m, x[87]*x[101] - 87*x[87] == 0)
-        @constraint(m, x[88]*x[101] - 88*x[88] == 0)
-        @constraint(m, x[89]*x[101] - 89*x[89] == 0)
-        @constraint(m, x[90]*x[101] - 90*x[90] == 0)
-        @constraint(m, x[91]*x[101] - 91*x[91] == 0)
-        @constraint(m, x[92]*x[101] - 92*x[92] == 0)
-        @constraint(m, x[93]*x[101] - 93*x[93] == 0)
-        @constraint(m, x[94]*x[101] - 94*x[94] == 0)
-        @constraint(m, x[95]*x[101] - 95*x[95] == 0)
-        @constraint(m, x[96]*x[101] - 96*x[96] == 0)
-        @constraint(m, x[97]*x[101] - 97*x[97] == 0)
-        @constraint(m, x[98]*x[101] - 98*x[98] == 0)
-        @constraint(m, x[99]*x[101] - 99*x[99] == 0)
-        @constraint(m, x[100]*x[101] - 100*x[100] == 0)
-        @objective(m, Min, x[101])
-        optimize!(m)
-
-        @test termination_status(m) == MOI.LOCALLY_SOLVED
+        @NLconstraint(m, x[1]*x[101] - x[1] == 0)
+        @NLconstraint(m, x[2]*x[101] - 2*x[2] == 0)
+        @NLconstraint(m, x[3]*x[101] - 3*x[3] == 0)
+        @NLconstraint(m, x[4]*x[101] - 4*x[4] == 0)
+        @NLconstraint(m, x[5]*x[101] - 5*x[5] == 0)
+        @NLconstraint(m, x[6]*x[101] - 6*x[6] == 0)
+        @NLconstraint(m, x[7]*x[101] - 7*x[7] == 0)
+        @NLconstraint(m, x[8]*x[101] - 8*x[8] == 0)
+        @NLconstraint(m, x[9]*x[101] - 9*x[9] == 0)
+        @NLconstraint(m, x[10]*x[101] - 10*x[10] == 0)
+        @NLconstraint(m, x[11]*x[101] - 11*x[11] == 0)
+        @NLconstraint(m, x[12]*x[101] - 12*x[12] == 0)
+        @NLconstraint(m, x[13]*x[101] - 13*x[13] == 0)
+        @NLconstraint(m, x[14]*x[101] - 14*x[14] == 0)
+        @NLconstraint(m, x[15]*x[101] - 15*x[15] == 0)
+        @NLconstraint(m, x[16]*x[101] - 16*x[16] == 0)
+        @NLconstraint(m, x[17]*x[101] - 17*x[17] == 0)
+        @NLconstraint(m, x[18]*x[101] - 18*x[18] == 0)
+        @NLconstraint(m, x[19]*x[101] - 19*x[19] == 0)
+        @NLconstraint(m, x[20]*x[101] - 20*x[20] == 0)
+        @NLconstraint(m, x[21]*x[101] - 21*x[21] == 0)
+        @NLconstraint(m, x[22]*x[101] - 22*x[22] == 0)
+        @NLconstraint(m, x[23]*x[101] - 23*x[23] == 0)
+        @NLconstraint(m, x[24]*x[101] - 24*x[24] == 0)
+        @NLconstraint(m, x[25]*x[101] - 25*x[25] == 0)
+        @NLconstraint(m, x[26]*x[101] - 26*x[26] == 0)
+        @NLconstraint(m, x[27]*x[101] - 27*x[27] == 0)
+        @NLconstraint(m, x[28]*x[101] - 28*x[28] == 0)
+        @NLconstraint(m, x[29]*x[101] - 29*x[29] == 0)
+        @NLconstraint(m, x[30]*x[101] - 30*x[30] == 0)
+        @NLconstraint(m, x[31]*x[101] - 31*x[31] == 0)
+        @NLconstraint(m, x[32]*x[101] - 32*x[32] == 0)
+        @NLconstraint(m, x[33]*x[101] - 33*x[33] == 0)
+        @NLconstraint(m, x[34]*x[101] - 34*x[34] == 0)
+        @NLconstraint(m, x[35]*x[101] - 35*x[35] == 0)
+        @NLconstraint(m, x[36]*x[101] - 36*x[36] == 0)
+        @NLconstraint(m, x[37]*x[101] - 37*x[37] == 0)
+        @NLconstraint(m, x[38]*x[101] - 38*x[38] == 0)
+        @NLconstraint(m, x[39]*x[101] - 39*x[39] == 0)
+        @NLconstraint(m, x[40]*x[101] - 40*x[40] == 0)
+        @NLconstraint(m, x[41]*x[101] - 41*x[41] == 0)
+        @NLconstraint(m, x[42]*x[101] - 42*x[42] == 0)
+        @NLconstraint(m, x[43]*x[101] - 43*x[43] == 0)
+        @NLconstraint(m, x[44]*x[101] - 44*x[44] == 0)
+        @NLconstraint(m, x[45]*x[101] - 45*x[45] == 0)
+        @NLconstraint(m, x[46]*x[101] - 46*x[46] == 0)
+        @NLconstraint(m, x[47]*x[101] - 47*x[47] == 0)
+        @NLconstraint(m, x[48]*x[101] - 48*x[48] == 0)
+        @NLconstraint(m, x[49]*x[101] - 49*x[49] == 0)
+        @NLconstraint(m, x[50]*x[101] - 50*x[50] == 0)
+        @NLconstraint(m, x[51]*x[101] - 51*x[51] == 0)
+        @NLconstraint(m, x[52]*x[101] - 52*x[52] == 0)
+        @NLconstraint(m, x[53]*x[101] - 53*x[53] == 0)
+        @NLconstraint(m, x[54]*x[101] - 54*x[54] == 0)
+        @NLconstraint(m, x[55]*x[101] - 55*x[55] == 0)
+        @NLconstraint(m, x[56]*x[101] - 56*x[56] == 0)
+        @NLconstraint(m, x[57]*x[101] - 57*x[57] == 0)
+        @NLconstraint(m, x[58]*x[101] - 58*x[58] == 0)
+        @NLconstraint(m, x[59]*x[101] - 59*x[59] == 0)
+        @NLconstraint(m, x[60]*x[101] - 60*x[60] == 0)
+        @NLconstraint(m, x[61]*x[101] - 61*x[61] == 0)
+        @NLconstraint(m, x[62]*x[101] - 62*x[62] == 0)
+        @NLconstraint(m, x[63]*x[101] - 63*x[63] == 0)
+        @NLconstraint(m, x[64]*x[101] - 64*x[64] == 0)
+        @NLconstraint(m, x[65]*x[101] - 65*x[65] == 0)
+        @NLconstraint(m, x[66]*x[101] - 66*x[66] == 0)
+        @NLconstraint(m, x[67]*x[101] - 67*x[67] == 0)
+        @NLconstraint(m, x[68]*x[101] - 68*x[68] == 0)
+        @NLconstraint(m, x[69]*x[101] - 69*x[69] == 0)
+        @NLconstraint(m, x[70]*x[101] - 70*x[70] == 0)
+        @NLconstraint(m, x[71]*x[101] - 71*x[71] == 0)
+        @NLconstraint(m, x[72]*x[101] - 72*x[72] == 0)
+        @NLconstraint(m, x[73]*x[101] - 73*x[73] == 0)
+        @NLconstraint(m, x[74]*x[101] - 74*x[74] == 0)
+        @NLconstraint(m, x[75]*x[101] - 75*x[75] == 0)
+        @NLconstraint(m, x[76]*x[101] - 76*x[76] == 0)
+        @NLconstraint(m, x[77]*x[101] - 77*x[77] == 0)
+        @NLconstraint(m, x[78]*x[101] - 78*x[78] == 0)
+        @NLconstraint(m, x[79]*x[101] - 79*x[79] == 0)
+        @NLconstraint(m, x[80]*x[101] - 80*x[80] == 0)
+        @NLconstraint(m, x[81]*x[101] - 81*x[81] == 0)
+        @NLconstraint(m, x[82]*x[101] - 82*x[82] == 0)
+        @NLconstraint(m, x[83]*x[101] - 83*x[83] == 0)
+        @NLconstraint(m, x[84]*x[101] - 84*x[84] == 0)
+        @NLconstraint(m, x[85]*x[101] - 85*x[85] == 0)
+        @NLconstraint(m, x[86]*x[101] - 86*x[86] == 0)
+        @NLconstraint(m, x[87]*x[101] - 87*x[87] == 0)
+        @NLconstraint(m, x[88]*x[101] - 88*x[88] == 0)
+        @NLconstraint(m, x[89]*x[101] - 89*x[89] == 0)
+        @NLconstraint(m, x[90]*x[101] - 90*x[90] == 0)
+        @NLconstraint(m, x[91]*x[101] - 91*x[91] == 0)
+        @NLconstraint(m, x[92]*x[101] - 92*x[92] == 0)
+        @NLconstraint(m, x[93]*x[101] - 93*x[93] == 0)
+        @NLconstraint(m, x[94]*x[101] - 94*x[94] == 0)
+        @NLconstraint(m, x[95]*x[101] - 95*x[95] == 0)
+        @NLconstraint(m, x[96]*x[101] - 96*x[96] == 0)
+        @NLconstraint(m, x[97]*x[101] - 97*x[97] == 0)
+        @NLconstraint(m, x[98]*x[101] - 98*x[98] == 0)
+        @NLconstraint(m, x[99]*x[101] - 99*x[99] == 0)
+        @NLconstraint(m, x[100]*x[101] - 100*x[100] == 0)
+        @NLobjective(m, Min, x[101])
+
+        nlp = SparseWrapperModel(
+            Arr,
+            NLPModelsJuMP.MathOptNLPModel(m)
+        )
+        optimizer = optimizer_constructor()
+        result = MadNLP.madnlp(nlp; optimizer.options...)
+
+        @test result.status == MadNLP.SOLVE_SUCCEEDED
     end
 end
 
 include("Instances/dummy_qp.jl")
 include("Instances/hs15.jl")
 include("Instances/nls.jl")
+include("wrapper.jl")
 
 end # module
diff --git a/lib/MadNLPTests/src/wrapper.jl b/lib/MadNLPTests/src/wrapper.jl
new file mode 100644
index 00000000..86098c05
--- /dev/null
+++ b/lib/MadNLPTests/src/wrapper.jl
@@ -0,0 +1,215 @@
+abstract type AbstractWrapperModel{T,VT} <: NLPModels.AbstractNLPModel{T,VT} end
+
+struct DenseWrapperModel{T,VT,T2,VT2,MT2, I <: NLPModels.AbstractNLPModel{T2,VT2}} <: AbstractWrapperModel{T,VT}
+    inner::I
+    
+    x::VT2
+    y::VT2
+    
+    con::VT2
+    grad::VT2
+    jac::MT2
+    hess::MT2
+    
+    meta::NLPModels.NLPModelMeta{T, VT}
+    counters::NLPModels.Counters 
+end
+
+
+struct SparseWrapperModel{T,VT,T2,VI2,VT2,I <: NLPModels.AbstractNLPModel{T2,VT2}} <: AbstractWrapperModel{T,VT}
+    inner::I
+    
+    jrows::VI2
+    jcols::VI2
+    hrows::VI2
+    hcols::VI2
+    
+    x::VT2
+    y::VT2
+    
+    con::VT2
+    grad::VT2
+    jac::VT2
+    hess::VT2
+    
+    meta::NLPModels.NLPModelMeta{T, VT}
+    counters::NLPModels.Counters 
+end
+
+
+"""
+DenseWrapperModel(Arr, m)
+
+Construct a DenseWrapperModel (a subtype of `NLPModels.AbstractNLPModel{T,typeof(Arr(m.meta.x0))}`) from a generic NLP Model.
+
+DenseWrapperModel can be used to interface GPU-accelerated NLP models with solvers runing on CPUs.
+"""
+function DenseWrapperModel(Arr, m::NLPModels.AbstractNLPModel)
+    return DenseWrapperModel(
+        m,
+        similar(m.meta.x0, m.meta.nvar),
+        similar(m.meta.x0, m.meta.ncon),
+        similar(m.meta.x0, m.meta.ncon),
+        similar(m.meta.x0, m.meta.nvar),
+        similar(m.meta.x0, m.meta.ncon, m.meta.nvar),
+        similar(m.meta.x0, m.meta.nvar, m.meta.nvar),
+        NLPModels.NLPModelMeta(
+            m.meta.nvar,
+            x0 = Arr(m.meta.x0),
+            lvar = Arr(m.meta.lvar),
+            uvar = Arr(m.meta.uvar),
+            ncon = m.meta.ncon,
+            y0 = Arr(m.meta.y0),
+            lcon = Arr(m.meta.lcon),
+            ucon = Arr(m.meta.ucon),
+            nnzj = m.meta.nnzj,
+            nnzh = m.meta.nnzh,
+            minimize = m.meta.minimize
+        ),
+        NLPModels.Counters()
+    )
+end
+
+"""
+SparseWrapperModel(Arr, m)
+
+Construct a SparseWrapperModel (a subtype of `NLPModels.AbstractNLPModel{T,typeof(Arr(m.meta.x0))}`) from a generic NLP Model.
+
+SparseWrapperModel can be used to interface GPU-accelerated NLP models with solvers runing on CPUs.
+"""
+function SparseWrapperModel(Arr, m::NLPModels.AbstractNLPModel)
+    return SparseWrapperModel(
+        m,
+        similar(m.meta.x0, Int, m.meta.nnzj),
+        similar(m.meta.x0, Int, m.meta.nnzj),
+        similar(m.meta.x0, Int, m.meta.nnzh),
+        similar(m.meta.x0, Int, m.meta.nnzh),
+        similar(m.meta.x0, m.meta.nvar),
+        similar(m.meta.x0, m.meta.ncon),
+        similar(m.meta.x0, m.meta.ncon),
+        similar(m.meta.x0, m.meta.nvar),
+        similar(m.meta.x0, m.meta.nnzj),
+        similar(m.meta.x0, m.meta.nnzh),
+        NLPModels.NLPModelMeta(
+            m.meta.nvar,
+            x0 = Arr(m.meta.x0),
+            lvar = Arr(m.meta.lvar),
+            uvar = Arr(m.meta.uvar),
+            ncon = m.meta.ncon,
+            y0 = Arr(m.meta.y0),
+            lcon = Arr(m.meta.lcon),
+            ucon = Arr(m.meta.ucon),
+            nnzj = m.meta.nnzj,
+            nnzh = m.meta.nnzh,
+            minimize = m.meta.minimize
+        ),
+        NLPModels.Counters()
+    )
+end
+
+function NLPModels.obj(
+    m::M,
+    x::V
+    ) where {M <: AbstractWrapperModel, V <: AbstractVector}
+
+    copyto!(m.x, x)
+    return NLPModels.obj(m.inner, m.x)
+end
+function NLPModels.cons!(
+    m::M,
+    x::V,
+    g::V
+    ) where {M <: AbstractWrapperModel, V <: AbstractVector}
+
+    copyto!(m.x, x) 
+    NLPModels.cons!(m.inner, m.x, m.con)
+    copyto!(g, m.con)
+    return 
+end
+function NLPModels.grad!(
+    m::M,
+    x::V,
+    f::V
+    ) where {M <: AbstractWrapperModel, V <: AbstractVector}
+
+    copyto!(m.x, x)
+    NLPModels.grad!(m.inner, m.x, m.grad)
+    copyto!(f, m.grad)
+    return
+end
+
+function NLPModels.jac_structure!(
+    m::M,
+    rows::V,
+    cols::V
+    ) where {M <: SparseWrapperModel, V <: AbstractVector}
+    
+    NLPModels.jac_structure!(m.inner, m.jrows, m.jcols)
+    copyto!(rows, m.jrows)
+    copyto!(cols, m.jcols)
+end
+
+function NLPModels.hess_structure!(
+    m::M,
+    rows::V,
+    cols::V
+    ) where {M <: SparseWrapperModel, V <: AbstractVector}
+
+    NLPModels.hess_structure!(m.inner, m.hrows, m.hcols)
+    copyto!(rows, m.hrows)
+    copyto!(cols, m.hcols)
+end
+function NLPModels.jac_coord!(
+    m::M,
+    x::V,
+    jac::V
+    ) where {M <: SparseWrapperModel, V <: AbstractVector}
+
+    copyto!(m.x, x)    
+    NLPModels.jac_coord!(m.inner, m.x, m.jac)
+    copyto!(jac, m.jac)
+    return
+end
+function NLPModels.hess_coord!(
+    m::M,
+    x::AbstractVector,
+    y::AbstractVector,
+    hess::AbstractVector;
+    obj_weight = one(eltype(x))
+    ) where {M <: SparseWrapperModel}
+
+    copyto!(m.x, x)
+    copyto!(m.y, y)
+    NLPModels.hess_coord!(m.inner, m.x, m.y, m.hess; obj_weight=obj_weight)
+    copyto!(hess, m.hess)
+    return
+end
+
+
+
+function MadNLP.jac_dense!(
+    m::Model,
+    x::V,
+    jac::M
+    ) where {Model <: DenseWrapperModel, V <: AbstractVector, M <: AbstractMatrix}
+
+    copyto!(m.x, x)    
+    MadNLP.jac_dense!(m.inner, m.x, m.jac)
+    copyto!(jac, m.jac)
+    return
+end
+function MadNLP.hess_dense!(
+    m::Model,
+    x::V,
+    y::V,
+    hess::M;
+    obj_weight = one(eltype(x))
+    ) where {Model <: DenseWrapperModel, V <: AbstractVector, M <: AbstractMatrix}
+
+    copyto!(m.x, x)
+    copyto!(m.y, y)
+    MadNLP.hess_dense!(m.inner, m.x, m.y, m.hess; obj_weight=obj_weight)
+    copyto!(hess, m.hess)
+    return
+end
+
diff --git a/src/IPM/IPM.jl b/src/IPM/IPM.jl
index 271b25ed..cc73e2ce 100644
--- a/src/IPM/IPM.jl
+++ b/src/IPM/IPM.jl
@@ -4,9 +4,22 @@
 abstract type AbstractMadNLPSolver{T} end
 
 include("restoration.jl")
-
-mutable struct MadNLPSolver{T, KKTSystem <: AbstractKKTSystem{T}, Model <: AbstractNLPModel, LinSolver <: AbstractLinearSolver{T}, Iterator <: AbstractIterator{T}, KKTVec <: AbstractKKTVector{T, Vector{T}}} <: AbstractMadNLPSolver{T}
+include("inertiacorrector.jl")
+
+mutable struct MadNLPSolver{
+    T,
+    VT <: AbstractVector{T},
+    VI <: AbstractVector{Int},
+    KKTSystem <: AbstractKKTSystem{T},
+    Model <: AbstractNLPModel{T,VT},
+    CB <: AbstractCallback{T},
+    Iterator <: AbstractIterator{T},
+    IC <: AbstractInertiaCorrector,
+    KKTVec <: AbstractKKTVector{T, VT}
+    } <: AbstractMadNLPSolver{T}
+    
     nlp::Model
+    cb::CB
     kkt::KKTSystem
 
     opt::MadNLPOptions
@@ -18,58 +31,52 @@ mutable struct MadNLPSolver{T, KKTSystem <: AbstractKKTSystem{T}, Model <: Abstr
     nlb::Int
     nub::Int
 
-    x::PrimalVector{T, Vector{T}} # primal (after reformulation)
-    y::Vector{T} # dual
-    zl::PrimalVector{T, Vector{T}} # dual (after reformulation)
-    zu::PrimalVector{T, Vector{T}} # dual (after reformulation)
-    xl::PrimalVector{T, Vector{T}} # primal lower bound (after reformulation)
-    xu::PrimalVector{T, Vector{T}} # primal upper bound (after reformulation)
+    x::PrimalVector{T, VT, VI} # primal (after reformulation)
+    y::VT # dual
+    zl::PrimalVector{T, VT, VI} # dual (after reformulation)
+    zu::PrimalVector{T, VT, VI} # dual (after reformulation)
+    xl::PrimalVector{T, VT, VI} # primal lower bound (after reformulation)
+    xu::PrimalVector{T, VT, VI} # primal upper bound (after reformulation)
 
     obj_val::T
-    f::PrimalVector{T, Vector{T}}
-    c::Vector{T}
+    f::PrimalVector{T, VT, VI}
+    c::VT
 
-    jacl::Vector{T}
+    jacl::VT
 
-    d::UnreducedKKTVector{T, Vector{T}}
-    p::UnreducedKKTVector{T, Vector{T}}
+    d::KKTVec
+    p::KKTVec
 
     _w1::KKTVec
     _w2::KKTVec
-
     _w3::KKTVec
     _w4::KKTVec
 
-    x_trial::PrimalVector{T, Vector{T}}
-    c_trial::Vector{T}
+    x_trial::PrimalVector{T, VT, VI}
+    c_trial::VT
     obj_val_trial::T
 
-    c_slk::SubVector{T}
-    rhs::Vector{T}
-
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
-    ind_llb::Vector{Int}
-    ind_uub::Vector{Int}
-
-    x_lr::SubVector{T}
-    x_ur::SubVector{T}
-    xl_r::SubVector{T}
-    xu_r::SubVector{T}
-    zl_r::SubVector{T}
-    zu_r::SubVector{T}
-
-    dx_lr::SubVector{T}
-    dx_ur::SubVector{T}
-    x_trial_lr::SubVector{T}
-    x_trial_ur::SubVector{T}
+    c_slk::SubVector{T,VT,VI}
+    rhs::VT
+
+    ind_ineq::VI
+    ind_fixed::VI
+    ind_llb::VI
+    ind_uub::VI
+
+    x_lr::SubVector{T,VT,VI}
+    x_ur::SubVector{T,VT,VI}
+    xl_r::SubVector{T,VT,VI}
+    xu_r::SubVector{T,VT,VI}
+    zl_r::SubVector{T,VT,VI}
+    zu_r::SubVector{T,VT,VI}
+    dx_lr::SubVector{T,VT,VI}
+    dx_ur::SubVector{T,VT,VI}
+    x_trial_lr::SubVector{T,VT,VI}
+    x_trial_ur::SubVector{T,VT,VI}
 
-    linear_solver::LinSolver
     iterator::Iterator
 
-    obj_scale::Vector{T}
-    con_scale::Vector{T}
-    con_jac_scale::Vector{T}
     inf_pr::T
     inf_du::T
     inf_compl::T
@@ -84,108 +91,83 @@ mutable struct MadNLPSolver{T, KKTSystem <: AbstractKKTSystem{T}, Model <: Abstr
     ftype::String
 
     del_w::T
-    del_c::T
     del_w_last::T
+    del_c::T
 
     filter::Vector{Tuple{T,T}}
 
+    inertia_corrector::IC
     RR::Union{Nothing,RobustRestorer{T}}
     status::Status
     output::Dict
 end
 
-function MadNLPSolver(nlp::AbstractNLPModel{T}; kwargs...) where T
-    opt_ipm, opt_linear_solver, logger = load_options(; kwargs...)
-    @assert is_supported(opt_ipm.linear_solver, T)
-
-    VT = Vector{T}
-    # Determine Hessian approximation
-    QN = if opt_ipm.hessian_approximation == DENSE_BFGS
-        BFGS{T, VT}
-    elseif opt_ipm.hessian_approximation == DENSE_DAMPED_BFGS
-        DampedBFGS{T, VT}
-    elseif opt_ipm.hessian_approximation == SPARSE_COMPACT_LBFGS
-        CompactLBFGS{T, VT, Matrix{T}}
-    else
-        ExactHessian{T, VT}
-    end
-    # Determine KKT system
-    KKTSystem = if opt_ipm.kkt_system == SPARSE_KKT_SYSTEM
-        MT = (input_type(opt_ipm.linear_solver) == :csc) ? SparseMatrixCSC{T, Int32} : Matrix{T}
-        SparseKKTSystem{T, VT, MT, QN}
-    elseif opt_ipm.kkt_system == SPARSE_UNREDUCED_KKT_SYSTEM
-        MT = (input_type(opt_ipm.linear_solver) == :csc) ? SparseMatrixCSC{T, Int32} : Matrix{T}
-        SparseUnreducedKKTSystem{T, VT, MT, QN}
-    elseif opt_ipm.kkt_system == DENSE_KKT_SYSTEM
-        MT = Matrix{T}
-        DenseKKTSystem{T, VT, MT, QN}
-    elseif opt_ipm.kkt_system == DENSE_CONDENSED_KKT_SYSTEM
-        MT = Matrix{T}
-        DenseCondensedKKTSystem{T, VT, MT, QN}
-    end
-    return MadNLPSolver{T,KKTSystem}(nlp, opt_ipm, opt_linear_solver; logger=logger)
-end
-
-# Constructor for unregistered KKT systems
-function MadNLPSolver{T, KKTSystem}(nlp::AbstractNLPModel{T}; options...) where {T, KKTSystem}
-    opt_ipm, opt_linear_solver, logger = load_options(; options...)
-    @assert is_supported(opt_ipm.linear_solver, T)
-    return MadNLPSolver{T,KKTSystem}(nlp, opt_ipm, opt_linear_solver; logger=logger)
-end
+function MadNLPSolver(nlp::AbstractNLPModel{T,VT}; kwargs...) where {T, VT}
+    
+    opt, opt_linear_solver, logger = load_options(nlp; kwargs...)
+    @assert is_supported(opt.linear_solver, T)
 
-# Inner constructor
-function MadNLPSolver{T,KKTSystem}(
-    nlp::AbstractNLPModel,
-    opt::MadNLPOptions,
-    opt_linear_solver::AbstractOptions;
-    logger=MadNLPLogger(),
-) where {T, KKTSystem<:AbstractKKTSystem{T}}
     cnt = MadNLPCounters(start_time=time())
-
+    cb = create_callback(opt.callback, nlp, opt)
+    
     # generic options
     opt.disable_garbage_collector &&
         (GC.enable(false); @warn(logger,"Julia garbage collector is temporarily disabled"))
     set_blas_num_threads(opt.blas_num_threads; permanent=true)
     @trace(logger,"Initializing variables.")
-    ind_cons = get_index_constraints(nlp; fixed_variable_treatment=opt.fixed_variable_treatment)
+    
+    ind_cons = get_index_constraints(
+        get_lvar(nlp), get_uvar(nlp),
+        get_lcon(nlp), get_ucon(nlp),
+        opt.fixed_variable_treatment,
+        opt.equality_treatment
+    )
+
+    ind_lb = ind_cons.ind_lb
+    ind_ub = ind_cons.ind_ub
+    
     ns = length(ind_cons.ind_ineq)
     nx = get_nvar(nlp)
     n = nx+ns
     m = get_ncon(nlp)
+    nlb = length(ind_lb)
+    nub = length(ind_ub)
+
+    @trace(logger,"Initializing KKT system.")
+    kkt = create_kkt_system(
+        opt.kkt_system,
+        cb,
+        opt,
+        opt_linear_solver,
+        cnt,
+        ind_cons
+    )
 
-    # Initialize KKT
-    kkt = KKTSystem(nlp, ind_cons)
-
-    # Primal variable
-    x = PrimalVector{T, Vector{T}}(nx, ns)
-    variable(x) .= get_x0(nlp)
-    # Bounds
-    xl = PrimalVector{T, Vector{T}}(nx, ns)
-    variable(xl) .= get_lvar(nlp)
-    slack(xl) .= view(get_lcon(nlp), ind_cons.ind_ineq)
-    xu = PrimalVector{T, Vector{T}}(nx, ns)
-    variable(xu) .= get_uvar(nlp)
-    slack(xu) .= view(get_ucon(nlp), ind_cons.ind_ineq)
-        zl = PrimalVector{T, Vector{T}}(nx, ns)
-        zu = PrimalVector{T, Vector{T}}(nx, ns)
+    @trace(logger,"Initializing iterative solver.")
+    iterator = opt.iterator(kkt; cnt = cnt, logger = logger)
+
+    x = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    xl = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    xu = PrimalVector(VT, nx, ns, ind_lb, ind_ub)  
+    zl = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    zu = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    f = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
+    x_trial = PrimalVector(VT, nx, ns, ind_lb, ind_ub)
     
-    # Gradient
-    f = PrimalVector{T, Vector{T}}(nx, ns)
-
-    y = copy(get_y0(nlp))
-    c = zeros(T, m)
-
-    n_jac = nnz_jacobian(kkt)
-
-    nlb = length(ind_cons.ind_lb)
-    nub = length(ind_cons.ind_ub)
-
-    x_trial = PrimalVector{T, Vector{T}}(nx, ns)
-    c_trial = Vector{T}(undef, m)
+    d = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    p = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    _w1 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    _w2 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    _w3 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    _w4 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+
+    jacl = VT(undef,n) 
+    c_trial = VT(undef, m)
+    y = VT(undef, m)
+    c = VT(undef, m)
+    rhs = VT(undef, m)
 
     c_slk = view(c,ind_cons.ind_ineq)
-    rhs = (get_lcon(nlp).==get_ucon(nlp)).*get_lcon(nlp)
-
     x_lr = view(full(x), ind_cons.ind_lb)
     x_ur = view(full(x), ind_cons.ind_ub)
     xl_r = view(full(xl), ind_cons.ind_lb)
@@ -194,61 +176,44 @@ function MadNLPSolver{T,KKTSystem}(
     zu_r = view(full(zu), ind_cons.ind_ub)
     x_trial_lr = view(full(x_trial), ind_cons.ind_lb)
     x_trial_ur = view(full(x_trial), ind_cons.ind_ub)
-
-    
-    if is_reduced(kkt)
-        _w1 = ReducedKKTVector{T,typeof(c)}(n, m)
-        _w2 = ReducedKKTVector{T,typeof(c)}(n, m)
-        _w3 = ReducedKKTVector{T,typeof(c)}(n, m)
-        _w4 = ReducedKKTVector{T,typeof(c)}(n, m)
-    else
-        _w1 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-        _w2 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-        _w3 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-        _w4 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-    end
-
-    jacl = zeros(T,n) # spblas may throw an error if not initialized to zero
-
-    d = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
     dx_lr = view(d.xp, ind_cons.ind_lb) # TODO
     dx_ur = view(d.xp, ind_cons.ind_ub) # TODO
 
-    p = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
-
-    obj_scale = T[1.0]
-    con_scale = ones(T,m)
-    con_jac_scale = ones(T,n_jac)
-    @trace(logger,"Initializing linear solver.")
-    cnt.linear_solver_time =
-        @elapsed linear_solver = opt.linear_solver(get_kkt(kkt) ; opt=opt_linear_solver, logger=logger)
-
-    n_kkt = size(kkt, 1)
-    buffer_vec = similar(full(d), n_kkt)
-    @trace(logger,"Initializing iterative solver.")
-    iterator = opt.iterator(linear_solver, kkt, buffer_vec)
-
-    @trace(logger,"Initializing fixed variable treatment scheme.")
-
-    if opt.inertia_correction_method == INERTIA_AUTO
-        opt.inertia_correction_method = is_inertia(linear_solver)::Bool ? INERTIA_BASED : INERTIA_FREE
+    inertia_correction_method = if opt.inertia_correction_method == InertiaAuto
+        is_inertia(kkt.linear_solver)::Bool ? InertiaBased : InertiaFree
+    else
+        opt.inertia_correction_method
     end
 
-
-    return MadNLPSolver{T,KKTSystem,typeof(nlp),typeof(linear_solver),typeof(iterator),typeof(_w1)}(
-        nlp,kkt,opt,cnt,logger,
-        n,m,nlb,nub,x,y,zl,zu,xl,xu,0.,f,c,
-        jacl,
-        d, p,
-        _w1, _w2, _w3, _w4,
-        x_trial,c_trial,0.,c_slk,rhs,
-        ind_cons.ind_ineq,ind_cons.ind_fixed,ind_cons.ind_llb,ind_cons.ind_uub,
-        x_lr,x_ur,xl_r,xu_r,zl_r,zu_r,dx_lr,dx_ur,x_trial_lr,x_trial_ur,
-        linear_solver,iterator,
-        obj_scale,con_scale,con_jac_scale,
-        0.,0.,0.,0.,0.,0.,0.,0.,0.," ",0.,0.,0.,
-        Vector{T}[],nothing,INITIAL,Dict(),
+    inertia_corrector = build_inertia_corrector(
+        inertia_correction_method,
+        VT,
+        n, m, nlb, nub, ind_lb, ind_ub
+    )
+    
+    cnt.init_time = time() - cnt.start_time
+
+    return MadNLPSolver(
+        nlp, cb, kkt,
+        opt, cnt, logger, 
+        n, m, nlb, nub,
+        x, y, zl, zu, xl, xu,
+        zero(T), f, c, 
+        jacl, 
+        d, p, 
+        _w1, _w2, _w3, _w4, 
+        x_trial, c_trial, zero(T), c_slk, rhs, 
+        ind_cons.ind_ineq, ind_cons.ind_fixed, ind_cons.ind_llb, ind_cons.ind_uub, 
+        x_lr, x_ur, xl_r, xu_r, zl_r, zu_r, dx_lr, dx_ur, x_trial_lr, x_trial_ur, 
+        iterator, 
+        zero(T), zero(T), zero(T), zero(T), zero(T), zero(T), zero(T), zero(T), zero(T),
+        " ",
+        zero(T), zero(T), zero(T),
+        Tuple{T, T}[],
+        inertia_corrector, nothing,
+        INITIAL, Dict(), 
     )
+
 end
 
 include("utils.jl")
@@ -257,3 +222,4 @@ include("callbacks.jl")
 include("factorization.jl")
 include("solver.jl")
 
+
diff --git a/src/IPM/callbacks.jl b/src/IPM/callbacks.jl
index 9fa9522b..0fdb606b 100644
--- a/src/IPM/callbacks.jl
+++ b/src/IPM/callbacks.jl
@@ -4,45 +4,46 @@ function eval_f_wrapper(solver::MadNLPSolver, x::PrimalVector{T}) where T
     @trace(solver.logger,"Evaluating objective.")
     cnt.eval_function_time += @elapsed begin
         sense = (get_minimize(nlp) ? one(T) : -one(T))
-        obj_val = sense * obj(nlp, variable(x))
+        obj_val = sense * _eval_f_wrapper(solver.cb, variable(x))
     end
     cnt.obj_cnt += 1
     if cnt.obj_cnt == 1 && !is_valid(obj_val)
         throw(InvalidNumberException(:obj))
     end
-    return obj_val * solver.obj_scale[]
+    return obj_val
 end
 
 function eval_grad_f_wrapper!(solver::MadNLPSolver, f::PrimalVector{T}, x::PrimalVector{T}) where T
     nlp = solver.nlp
     cnt = solver.cnt
     @trace(solver.logger,"Evaluating objective gradient.")
-    obj_scaling = solver.obj_scale[] * (get_minimize(nlp) ? one(T) : -one(T))
-    cnt.eval_function_time += @elapsed grad!(
-        nlp,
+    cnt.eval_function_time += @elapsed _eval_grad_f_wrapper!(
+        solver.cb,
         variable(x),
         variable(f),
     )
-    _scal!(obj_scaling, full(f))
+    if !get_minimize(nlp)
+        variable(f) .*= -one(T)
+    end
     cnt.obj_grad_cnt+=1
+
     if cnt.obj_grad_cnt == 1 && !is_valid(full(f))
         throw(InvalidNumberException(:grad))
     end
     return f
 end
 
-function eval_cons_wrapper!(solver::MadNLPSolver, c::Vector{T}, x::PrimalVector{T}) where T
+function eval_cons_wrapper!(solver::MadNLPSolver, c::AbstractVector{T}, x::PrimalVector{T}) where T
     nlp = solver.nlp
     cnt = solver.cnt
     @trace(solver.logger, "Evaluating constraints.")
-    cnt.eval_function_time += @elapsed cons!(
-        nlp,
+    cnt.eval_function_time += @elapsed _eval_cons_wrapper!(
+        solver.cb,
         variable(x),
         c,
     )
     view(c,solver.ind_ineq) .-= slack(x)
     c .-= solver.rhs
-    c .*= solver.con_scale
     cnt.con_cnt+=1
     if cnt.con_cnt == 1 && !is_valid(c)
         throw(InvalidNumberException(:cons))
@@ -56,11 +57,11 @@ function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::Prim
     ns = length(solver.ind_ineq)
     @trace(solver.logger, "Evaluating constraint Jacobian.")
     jac = get_jacobian(kkt)
-    cnt.eval_function_time += @elapsed jac_coord!(
-        nlp,
+    cnt.eval_function_time += @elapsed _eval_jac_wrapper!(
+        solver.cb,
         variable(x),
         jac,
-    )
+        )
     compress_jacobian!(kkt)
     cnt.con_jac_cnt += 1
     if cnt.con_jac_cnt == 1 && !is_valid(jac)
@@ -70,18 +71,16 @@ function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::Prim
     return jac
 end
 
-function eval_lag_hess_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::PrimalVector{T},l::Vector{T};is_resto=false) where T
+function eval_lag_hess_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::PrimalVector{T},l::AbstractVector{T};is_resto=false) where T
     nlp = solver.nlp
     cnt = solver.cnt
     @trace(solver.logger,"Evaluating Lagrangian Hessian.")
-    dual(solver._w1) .= l .* solver.con_scale
     hess = get_hessian(kkt)
-    scale = (get_minimize(nlp) ? one(T) : -one(T))
-    scale *= (is_resto ? zero(T) : solver.obj_scale[])
-    cnt.eval_function_time += @elapsed hess_coord!(
-        nlp,
+    scale = (get_minimize(nlp) ? one(T) : -one(T)) * (is_resto ? zero(T) : one(T))
+    cnt.eval_function_time += @elapsed _eval_lag_hess_wrapper!(
+        solver.cb,
         variable(x),
-        dual(solver._w1),
+        l,
         hess;
         obj_weight = scale,
     )
@@ -99,8 +98,8 @@ function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractDenseKKTSystem, x:
     ns = length(solver.ind_ineq)
     @trace(solver.logger, "Evaluating constraint Jacobian.")
     jac = get_jacobian(kkt)
-    cnt.eval_function_time += @elapsed jac_dense!(
-        nlp,
+    cnt.eval_function_time += @elapsed _eval_jac_wrapper!(
+        solver.cb,
         variable(x),
         jac,
     )
@@ -117,19 +116,18 @@ function eval_lag_hess_wrapper!(
     solver::MadNLPSolver,
     kkt::AbstractDenseKKTSystem{T, VT, MT, QN},
     x::PrimalVector{T},
-    l::Vector{T};
+    l::AbstractVector{T};
     is_resto=false,
 ) where {T, VT, MT, QN<:ExactHessian}
     nlp = solver.nlp
     cnt = solver.cnt
     @trace(solver.logger,"Evaluating Lagrangian Hessian.")
-    dual(solver._w1) .= l .* solver.con_scale
     hess = get_hessian(kkt)
-    scale = is_resto ? zero(T) : get_minimize(nlp) ? solver.obj_scale[] : -solver.obj_scale[]
-    cnt.eval_function_time += @elapsed hess_dense!(
-        nlp,
+    scale = is_resto ? zero(T) : get_minimize(nlp) ? one(T) : -one(T)
+    cnt.eval_function_time += @elapsed _eval_lag_hess_wrapper!(
+        solver.cb,
         variable(x),
-        dual(solver._w1),
+        l,
         hess;
         obj_weight = scale,
     )
@@ -145,7 +143,7 @@ function eval_lag_hess_wrapper!(
     solver::MadNLPSolver,
     kkt::AbstractKKTSystem{T, VT, MT, QN},
     x::PrimalVector{T},
-    l::Vector{T};
+    l::AbstractVector{T};
     is_resto=false,
 ) where {T, VT, MT<:AbstractMatrix{T}, QN<:AbstractQuasiNewton{T, VT}}
     nlp = solver.nlp
@@ -168,7 +166,7 @@ function eval_lag_hess_wrapper!(
         axpy!(-one(T), qn.last_g, yk)              # yₖ = ∇f₊ - ∇f
         if m > 0
             jtprod!(solver.jacl, kkt, l)
-            BLAS.axpy!(n, one(T), solver.jacl, 1, yk, 1)  # yₖ += J₊ᵀ l₊
+            yk .+= @view(solver.jacl[1:n])         # yₖ += J₊ᵀ l₊
             NLPModels.jtprod!(nlp, qn.last_x, l, qn.last_jv)
             axpy!(-one(T), qn.last_jv, yk)           # yₖ += J₊ᵀ l₊ - Jᵀ l₊
         end
@@ -187,3 +185,4 @@ function eval_lag_hess_wrapper!(
     return get_hessian(kkt)
 end
 
+
diff --git a/src/IPM/factorization.jl b/src/IPM/factorization.jl
index 9681459a..a01840e0 100644
--- a/src/IPM/factorization.jl
+++ b/src/IPM/factorization.jl
@@ -1,125 +1,62 @@
+function solve_refine_wrapper!(d, solver, p, w)
+    result = false
 
-function factorize_wrapper!(solver::MadNLPSolver)
-    @trace(solver.logger,"Factorization started.")
-    build_kkt!(solver.kkt)
-    solver.cnt.linear_solver_time += @elapsed factorize!(solver.linear_solver)
-end
-
-function solve_refine_wrapper!(
-    solver::MadNLPSolver,
-    x::AbstractKKTVector,
-    b::AbstractKKTVector,
-)
-    cnt = solver.cnt
-    @trace(solver.logger,"Iterative solution started.")
-    fixed_variable_treatment_vec!(full(b), solver.ind_fixed)
-
-    cnt.linear_solver_time += @elapsed begin
-        result = solve_refine!(x, solver.iterator, b)
-    end
-
-    if result == :Solved
-        solve_status =  true
-    else
-        if improve!(solver.linear_solver)
-            cnt.linear_solver_time += @elapsed begin
-                factorize!(solver.linear_solver)
-                ret = solve_refine!(x, solver.iterator, b)
-                solve_status = (ret == :Solved)
-            end
+    solver.cnt.linear_solver_time += @elapsed begin
+        if solve_refine!(d, solver.iterator, p, w)
+            result = true
         else
-            solve_status = false
+            if improve!(solver.kkt.linear_solver)
+                if solve_refine!(d, solver.iterator, p, w)
+                    result = true
+                end
+            end
         end
     end
-    fixed_variable_treatment_vec!(full(x), solver.ind_fixed)
-    return solve_status
-end
-
-function solve_refine_wrapper!(
-    solver::MadNLPSolver{T,<:DenseCondensedKKTSystem},
-    x::AbstractKKTVector,
-    b::AbstractKKTVector,
-) where T
-    cnt = solver.cnt
-    @trace(solver.logger,"Iterative solution started.")
-    fixed_variable_treatment_vec!(full(b), solver.ind_fixed)
-
-    kkt = solver.kkt
 
-    n = num_variables(kkt)
-    n_eq, ns = kkt.n_eq, kkt.n_ineq
-    n_condensed = n + n_eq
-
-    # load buffers
-    b_c = view(full(solver._w1), 1:n_condensed)
-    x_c = view(full(solver._w2), 1:n_condensed)
-    jv_x = view(full(solver._w3), 1:ns) # for jprod
-    jv_t = primal(solver._w4)             # for jtprod
-    v_c = dual(solver._w4)
-
-    Σs = get_slack_regularization(kkt)
-    α = get_scaling_inequalities(kkt)
+    return result
+end
 
-    # Decompose right hand side
-    bx = view(full(b), 1:n)
-    bs = view(full(b), n+1:n+ns)
-    by = view(full(b), kkt.ind_eq_shifted)
-    bz = view(full(b), kkt.ind_ineq_shifted)
+function factorize_wrapper!(solver::MadNLPSolver)
+    @trace(solver.logger,"Factorization started.")
+    build_kkt!(solver.kkt)
+    solver.cnt.linear_solver_time += @elapsed factorize!(solver.kkt.linear_solver)
+end
 
-    # Decompose results
-    xx = view(full(x), 1:n)
-    xs = view(full(x), n+1:n+ns)
-    xy = view(full(x), kkt.ind_eq_shifted)
-    xz = view(full(x), kkt.ind_ineq_shifted)
-
-    fill!(v_c, zero(T))
-    v_c[kkt.ind_ineq] .= (Σs .* bz .+ α .* bs) ./ α.^2
-    jtprod!(jv_t, kkt, v_c)
-    # init right-hand-side
-    b_c[1:n] .= bx .+ jv_t[1:n]
-    b_c[1+n:n+n_eq] .= by
-
-    cnt.linear_solver_time += @elapsed (result = solve_refine!(x_c, solver.iterator, b_c))
-    solve_status = (result == :Solved)
-
-    # Expand solution
-    xx .= x_c[1:n]
-    xy .= x_c[1+n:end]
-    jprod_ineq!(jv_x, kkt, xx)
-    xz .= sqrt.(Σs) ./ α .* jv_x .- Σs .* bz ./ α.^2 .- bs ./ α
-    xs .= (bs .+ α .* xz) ./ Σs
-
-    fixed_variable_treatment_vec!(full(x), solver.ind_fixed)
-    return solve_status
+function solve!(kkt::SparseUnreducedKKTSystem, w::AbstractKKTVector)
+    wzl = dual_lb(w)
+    wzu = dual_ub(w)
+    f(x,y) = iszero(y) ? x : x/y
+    wzl .= f.(wzl, kkt.l_lower_aug)
+    wzu .= f.(wzu, kkt.u_lower_aug)
+    solve!(kkt.linear_solver, full(w))
+    wzl .*= .-kkt.l_lower_aug
+    wzu .*= kkt.u_lower_aug
 end
 
-# Set V1 = [U₁   U₂]   ,   V2 = [-U₁   U₂]
-function _init_lbfgs_factors!(V1, V2, U, n, p)
-    @inbounds for i in 1:n, j in 1:p
-        V1[i, j] = U[i, j]
-        V2[i, j] = -U[i, j]
-        V1[i, j+p] = U[i, j+p]
-        V2[i, j+p] = U[i, j+p]
-    end
+function solve!(kkt::AbstractReducedKKTSystem, w::AbstractKKTVector)
+    reduce_rhs!(w.xp_lr, dual_lb(w), kkt.l_diag, w.xp_ur, dual_ub(w), kkt.u_diag)
+    solve!(kkt.linear_solver, primal_dual(w))
+    finish_aug_solve!(kkt, w)
 end
 
-function solve_refine_wrapper!(
-    solver::MadNLPSolver{T, <:SparseKKTSystem{T, VT, MT, QN}},
-    x::AbstractKKTVector,
-    b::AbstractKKTVector,
-) where {T, VT, MT, QN<:CompactLBFGS{T, Vector{T}, Matrix{T}}}
-    cnt = solver.cnt
-    kkt = solver.kkt
+function solve!(
+    kkt::SparseKKTSystem{T, VT, MT, QN},
+    w::AbstractKKTVector
+    ) where {T, VT, MT, QN<:CompactLBFGS}
+
     qn = kkt.quasi_newton
     n, p = size(qn)
     # Load buffers
     xr = qn._w2
-    Tk = qn.Tk  ; fill!(Tk, zero(T))
-    x_ = primal_dual(x)
-    b_ = primal_dual(b)
-    nn = length(x_)
+    Tk = qn.Tk
+    w_ = primal_dual(w)
+    nn = length(w_)
+
+    fill!(Tk, zero(T))
+    reduce_rhs!(w.xp_lr, dual_lb(w), kkt.l_diag, w.xp_ur, dual_ub(w), kkt.u_diag)
+
     # Resize arrays with correct dimension
-    if size(qn.V1, 2) < 2*p
+    if size(qn.V1) != (nn, 2*p)
         qn.V1 = zeros(nn, 2*p)
         qn.V2 = zeros(nn, 2*p)
     else
@@ -127,35 +64,203 @@ function solve_refine_wrapper!(
         fill!(qn.V2, zero(T))
     end
 
-    fixed_variable_treatment_vec!(full(b), solver.ind_fixed)
-
     # Solve LBFGS system with Sherman-Morrison-Woodbury formula
     # (C + U Vᵀ)⁻¹ = C⁻¹ - C⁻¹ U (I + Vᵀ C⁻¹ U) Vᵀ C⁻¹
 
     # Solve linear system without low-rank part
-    cnt.linear_solver_time += @elapsed begin
-        result = solve_refine!(x, solver.iterator, b)
-    end
+    solve!(kkt.linear_solver, w_)
 
     # Add low-rank correction
     if p > 0
         _init_lbfgs_factors!(qn.V1, qn.V2, qn.U, n, p)
 
-        cnt.linear_solver_time += @elapsed begin
-            multi_solve!(solver.linear_solver, qn.V2)  # V2 = C⁻¹ U
-        end
+        multi_solve!(kkt.linear_solver, qn.V2)      # V2 = C⁻¹ U
 
         Tk[diagind(Tk)] .= one(T)                   # Tₖ = I
         mul!(Tk, qn.V1', qn.V2, one(T), one(T))     # Tₖ = (I + Vᵀ C⁻¹ U)
         J1 = qr(Tk)                                 # Tₖ⁻¹
 
-        mul!(xr, qn.V1', x_)                        # xᵣ = Vᵀ C⁻¹ b
+        mul!(xr, qn.V1', w_)                        # xᵣ = Vᵀ C⁻¹ b
         ldiv!(J1, xr)                               # xᵣ = (I + Vᵀ C⁻¹ U)⁻¹ Vᵀ C⁻¹ b
-        mul!(x_, qn.V2, xr, -one(T), one(T))        # x = x - C⁻¹ U xᵣ
+        mul!(w_, qn.V2, xr, -one(T), one(T))        # x = x - C⁻¹ U xᵣ
     end
 
-    fixed_variable_treatment_vec!(full(x), solver.ind_fixed)
-    solve_status = (result == :Solved)
-    return solve_status
+    finish_aug_solve!(kkt, w)
+end
+
+
+function solve!(kkt::SparseCondensedKKTSystem{T}, w::AbstractKKTVector)  where T
+
+    (n,m) = size(kkt.jt_csc)
+
+    # Decompose buffers
+    wx = _madnlp_unsafe_wrap(full(w), n)
+    ws = view(full(w), n+1:n+m)
+    wz = view(full(w), n+m+1:n+2*m)
+    Σs = view(kkt.pr_diag, n+1:n+m)
+
+    reduce_rhs!(w.xp_lr, dual_lb(w), kkt.l_diag, w.xp_ur, dual_ub(w), kkt.u_diag)
+
+    kkt.buffer .= kkt.diag_buffer .* (wz .+ ws ./ Σs)
+
+    mul!(wx, kkt.jt_csc, kkt.buffer, one(T), one(T))
+    solve!(kkt.linear_solver, wx)
+
+    mul!(kkt.buffer2, kkt.jt_csc', wx) # TODO: investigate why directly using wz here is causing an error
+
+    wz .= .- kkt.buffer .+ kkt.diag_buffer .* kkt.buffer2
+    ws .= (ws .+ wz) ./ Σs
+
+    finish_aug_solve!(kkt, w)
+
+end
+
+function solve!(
+    kkt::DenseCondensedKKTSystem,
+    w::AbstractKKTVector{T},
+    ) where T
+
+    n = num_variables(kkt)
+    n_eq, ns = kkt.n_eq, kkt.n_ineq
+    n_condensed = n + n_eq
+
+    # Decompose rhs
+    wx = view(full(w), 1:n)
+    ws = view(full(w), n+1:n+ns)
+    wy = view(full(w), kkt.ind_eq_shifted)
+    wz = view(full(w), kkt.ind_ineq_shifted)
+
+    x = kkt.pd_buffer
+    xx = view(x, 1:n)
+    xy = view(x, n+1:n+n_eq)
+
+    Σs = get_slack_regularization(kkt)
+
+    reduce_rhs!(w.xp_lr, dual_lb(w), kkt.l_diag, w.xp_ur, dual_ub(w), kkt.u_diag)
+
+    fill!(kkt.buffer, zero(T))
+    kkt.buffer[kkt.ind_ineq] .= kkt.diag_buffer .* (wz .+ ws ./ Σs)
+    mul!(xx, kkt.jac', kkt.buffer)
+    xx .+= wx
+    xy .= wy
+    solve!(kkt.linear_solver, x)
+
+    wx .= xx
+    mul!(dual(w), kkt.jac, wx)
+    wy .= xy
+    wz .*= kkt.diag_buffer
+    dual(w) .-= kkt.buffer
+    ws .= (ws .+ wz) ./ Σs
+
+    finish_aug_solve!(kkt, w)
+end
+
+function mul!(w::AbstractKKTVector{T}, kkt::Union{SparseKKTSystem{T,VT,MT,QN},SparseUnreducedKKTSystem{T,VT,MT,QN}}, x::AbstractKKTVector, alpha = one(T), beta = zero(T)) where {T, VT, MT, QN<:ExactHessian}
+    mul!(primal(w), Symmetric(kkt.hess_com, :L), primal(x), alpha, beta)
+    mul!(primal(w), kkt.jac_com', dual(x), alpha, one(T))
+    mul!(dual(w), kkt.jac_com,  primal(x), alpha, beta)
+    _kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+end
+
+function mul!(w::AbstractKKTVector{T}, kkt::Union{SparseKKTSystem{T,VT,MT,QN},SparseUnreducedKKTSystem{T,VT,MT,QN}}, x::AbstractKKTVector, alpha = one(T), beta = zero(T)) where {T, VT, MT, QN<:CompactLBFGS}
+    qn = kkt.quasi_newton
+    n, p = size(qn)
+    nn = length(primal_dual(w))
+    # Load buffers (size: 2p)
+    vx = qn._w2
+    # Reset V1 and V2
+    fill!(qn.V1, zero(T))
+    fill!(qn.V2, zero(T))
+    _init_lbfgs_factors!(qn.V1, qn.V2, qn.U, n, p)
+    # Upper-left block is C = ξ I + U Vᵀ
+    mul!(primal(w), Symmetric(kkt.hess_com, :L), primal(x), alpha, beta)
+    mul!(primal(w), kkt.jac_com', dual(x), alpha, one(T))
+    mul!(dual(w), kkt.jac_com,  primal(x), alpha, beta)
+    # Add (U Vᵀ) x contribution
+    mul!(vx, qn.V2', primal_dual(x))
+    mul!(primal_dual(w), qn.V1, vx, alpha, one(T))
+
+    _kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+end
+
+function mul!(w::AbstractKKTVector{T}, kkt::SparseCondensedKKTSystem, x::AbstractKKTVector, alpha, beta) where T
+    n = size(kkt.hess_com, 1)
+    m = size(kkt.jt_csc, 2)
+
+    # Decompose results
+    xx = view(full(x), 1:n)
+    xs = view(full(x), n+1:n+m)
+    xz = view(full(x), n+m+1:n+2*m)
+
+    # Decompose buffers
+    wx = _madnlp_unsafe_wrap(full(w), n)
+    ws = view(full(w), n+1:n+m)
+    wz = view(full(w), n+m+1:n+2*m)
+
+    mul!(wx, Symmetric(kkt.hess_com, :L), xx, alpha, beta) # TODO: make this symmetric
+
+    mul!(wx, kkt.jt_csc,  xz, alpha, beta)
+    mul!(wz, kkt.jt_csc', xx, alpha, one(T))
+    axpy!(-alpha, xz, ws)
+    axpy!(-alpha, xs, wz)
+
+    _kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+end
+
+function mul!(w::AbstractKKTVector{T}, kkt::AbstractDenseKKTSystem, x::AbstractKKTVector, alpha = one(T), beta = zero(T)) where T
+    (m, n) = size(kkt.jac)
+    wx = @view(primal(w)[1:n])
+    ws = @view(primal(w)[n+1:end])
+    wy = dual(w)
+    wz = @view(dual(w)[kkt.ind_ineq])
+
+    xx = @view(primal(x)[1:n])
+    xs = @view(primal(x)[n+1:end])
+    xy = dual(x)
+    xz = @view(dual(x)[kkt.ind_ineq])
+
+    symul!(wx, kkt.hess, xx, alpha, beta)
+    if m > 0  # otherwise, CUDA causes an error
+        mul!(wx, kkt.jac', dual(x), alpha, one(T))
+        mul!(wy, kkt.jac,  xx, alpha, beta)
+    end
+    ws .= beta.*ws .- alpha.* xz
+    wz .= beta.*wz .- alpha.* xs
+    _kktmul!(w,x,kkt.reg,kkt.du_diag,kkt.l_lower,kkt.u_lower,kkt.l_diag,kkt.u_diag, alpha, beta)
+end
+
+function mul_hess_blk!(wx, kkt::Union{DenseKKTSystem,DenseCondensedKKTSystem}, t)
+    n = size(kkt.hess, 1)
+    mul!(@view(wx[1:n]), Symmetric(kkt.hess, :L), @view(t[1:n]))
+    fill!(@view(wx[n+1:end]), 0)
+    wx .+= t .* kkt.pr_diag
+end
+
+function mul_hess_blk!(wx, kkt::Union{SparseKKTSystem,SparseCondensedKKTSystem}, t)
+    n = size(kkt.hess_com, 1)
+    mul!(@view(wx[1:n]), Symmetric(kkt.hess_com, :L), @view(t[1:n]))
+    fill!(@view(wx[n+1:end]), 0)
+    wx .+= t .* kkt.pr_diag
+end
+function mul_hess_blk!(wx, kkt::SparseUnreducedKKTSystem, t)
+    ind_lb = kkt.ind_lb
+    ind_ub = kkt.ind_ub
+
+    n = size(kkt.hess_com, 1)
+    mul!(@view(wx[1:n]), Symmetric(kkt.hess_com, :L), @view(t[1:n]))
+    fill!(@view(wx[n+1:end]), 0)
+    wx .+= t .* kkt.pr_diag
+    wx[ind_lb] .-= @view(t[ind_lb]) .* (kkt.l_lower ./ kkt.l_diag)
+    wx[ind_ub] .-= @view(t[ind_ub]) .* (kkt.u_lower ./ kkt.u_diag)
+end
+
+# Set V1 = [U₁   U₂]   ,   V2 = [-U₁   U₂]
+function _init_lbfgs_factors!(V1, V2, U, n, p)
+    @inbounds for i in 1:n, j in 1:p
+        V1[i, j] = U[i, j]
+        V2[i, j] = -U[i, j]
+        V1[i, j+p] = U[i, j+p]
+        V2[i, j+p] = U[i, j+p]
+    end
 end
 
diff --git a/src/IPM/inertiacorrector.jl b/src/IPM/inertiacorrector.jl
new file mode 100644
index 00000000..c01077bc
--- /dev/null
+++ b/src/IPM/inertiacorrector.jl
@@ -0,0 +1,33 @@
+abstract type AbstractInertiaCorrector end
+struct InertiaAuto <: AbstractInertiaCorrector end
+struct InertiaBased <: AbstractInertiaCorrector end
+struct InertiaIgnore <: AbstractInertiaCorrector end
+struct InertiaFree{
+    T,
+    VT <: AbstractVector{T},
+    KKTVec <: AbstractKKTVector{T, VT}
+} <: AbstractInertiaCorrector 
+    p0::KKTVec
+    d0::KKTVec
+    t::VT
+    wx::VT
+    g::VT
+end
+
+function build_inertia_corrector(::Type{InertiaBased}, ::Type{VT}, n, m, nlb, nub, ind_lb, ind_ub) where VT
+    return InertiaBased()
+end
+function build_inertia_corrector(::Type{InertiaIgnore}, ::Type{VT}, n, m, nlb, nub, ind_lb, ind_ub) where VT
+    return InertiaIgnore()
+end
+function build_inertia_corrector(::Type{InertiaFree}, ::Type{VT}, n, m, nlb, nub, ind_lb, ind_ub) where VT
+    p0 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    d0 = UnreducedKKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
+    t = VT(undef, n)
+    wx= VT(undef, n)
+    g = VT(undef, n)
+    
+    return InertiaFree(
+        p0, d0, t, wx, g
+    )
+end
diff --git a/src/IPM/kernels.jl b/src/IPM/kernels.jl
index 5c3a77b0..5c995cac 100644
--- a/src/IPM/kernels.jl
+++ b/src/IPM/kernels.jl
@@ -1,33 +1,58 @@
 
 # KKT system updates -------------------------------------------------------
 # Set diagonal
-function set_aug_diagonal!(kkt::AbstractKKTSystem, solver::MadNLPSolver{T}) where T
+function set_aug_diagonal!(kkt::AbstractKKTSystem, solver::MadNLPSolver{T, VT}) where {T, VT <: Vector{T}}
     x = full(solver.x)
     xl = full(solver.xl)
     xu = full(solver.xu)
     zl = full(solver.zl)
     zu = full(solver.zu)
-    @inbounds @simd for i in eachindex(kkt.pr_diag)
-        kkt.pr_diag[i] = zl[i] /(x[i] - xl[i])
-        kkt.pr_diag[i] += zu[i] /(xu[i] - x[i])
-    end
-    fill!(kkt.du_diag, zero(T))
-    return
-end
-function set_aug_diagonal!(kkt::SparseUnreducedKKTSystem, solver::MadNLPSolver{T}) where T
-    fill!(kkt.pr_diag, zero(T))
+    fill!(kkt.reg, zero(T))
     fill!(kkt.du_diag, zero(T))
     @inbounds @simd for i in eachindex(kkt.l_lower)
-        kkt.l_lower[i] = -sqrt(solver.zl_r[i])
+        kkt.l_lower[i] = solver.zl_r[i]
         kkt.l_diag[i]  = solver.xl_r[i] - solver.x_lr[i]
     end
     @inbounds @simd for i in eachindex(kkt.u_lower)
-        kkt.u_lower[i] = -sqrt(solver.zu_r[i])
+        kkt.u_lower[i] = solver.zu_r[i]
         kkt.u_diag[i] = solver.x_ur[i] - solver.xu_r[i]
     end
+
+    _set_aug_diagonal!(kkt)
+
+    return
+end
+function set_aug_diagonal!(kkt::AbstractKKTSystem{T}, solver::MadNLPSolver{T}) where T
+    x = full(solver.x)
+    xl = full(solver.xl)
+    xu = full(solver.xu)
+    zl = full(solver.zl)
+    zu = full(solver.zu)
+
+    fill!(kkt.reg, zero(T))
+    fill!(kkt.du_diag, zero(T))
+    kkt.l_diag .= solver.xl_r .- solver.x_lr
+    kkt.u_diag .= solver.x_ur .- solver.xu_r
+    copyto!(kkt.l_lower, solver.zl_r)
+    copyto!(kkt.u_lower, solver.zu_r)
+
+    _set_aug_diagonal!(kkt)
+
     return
 end
 
+function _set_aug_diagonal!(kkt::AbstractKKTSystem)
+    copyto!(kkt.pr_diag, kkt.reg)
+    kkt.pr_diag[kkt.ind_lb] .-= kkt.l_lower ./ kkt.l_diag
+    kkt.pr_diag[kkt.ind_ub] .-= kkt.u_lower ./ kkt.u_diag
+end
+
+function _set_aug_diagonal!(kkt::AbstractUnreducedKKTSystem)
+    copyto!(kkt.pr_diag, kkt.reg)
+    kkt.l_lower_aug .= sqrt.(kkt.l_lower)
+    kkt.u_lower_aug .= sqrt.(kkt.u_lower)
+end
+
 # Robust restoration
 function set_aug_RR!(kkt::AbstractKKTSystem, solver::MadNLPSolver, RR::RobustRestorer)
     x = full(solver.x)
@@ -35,37 +60,21 @@ function set_aug_RR!(kkt::AbstractKKTSystem, solver::MadNLPSolver, RR::RobustRes
     xu = full(solver.xu)
     zl = full(solver.zl)
     zu = full(solver.zu)
-    @inbounds @simd for i in eachindex(kkt.pr_diag)
-        kkt.pr_diag[i]  = zl[i] / (x[i] - xl[i])
-        kkt.pr_diag[i] += zu[i] / (xu[i] - x[i]) + RR.zeta * RR.D_R[i]^2
-    end
-    @inbounds @simd for i in eachindex(kkt.du_diag)
-        kkt.du_diag[i] = -RR.pp[i] /RR.zp[i] - RR.nn[i] /RR.zn[i]
-    end
-    return
-end
-function set_aug_RR!(kkt::SparseUnreducedKKTSystem, solver::MadNLPSolver, RR::RobustRestorer)
-    @inbounds @simd for i in eachindex(kkt.pr_diag)
-        kkt.pr_diag[i] = RR.zeta * RR.D_R[i]^2
-    end
-    @inbounds @simd for i in eachindex(kkt.du_diag)
-        kkt.du_diag[i] = -RR.pp[i] / RR.zp[i] - RR.nn[i] / RR.zn[i]
-    end
-    @inbounds @simd for i in eachindex(kkt.l_lower)
-        kkt.l_lower[i] = -sqrt(solver.zl_r[i])
-        kkt.l_diag[i]  = solver.xl_r[i] - solver.x_lr[i]
-    end
-    @inbounds @simd for i in eachindex(kkt.u_lower)
-        kkt.u_lower[i] = -sqrt(solver.zu_r[i])
-        kkt.u_diag[i]  = solver.x_ur[i] - solver.xu_r[i]
-    end
+    kkt.reg .= RR.zeta .* RR.D_R .^ 2
+    kkt.du_diag .= .- RR.pp ./ RR.zp .- RR.nn ./ RR.zn
+    copyto!(kkt.l_lower, solver.zl_r)
+    copyto!(kkt.u_lower, solver.zu_r)
+    kkt.l_diag .= solver.xl_r .- solver.x_lr
+    kkt.u_diag .= solver.x_ur .- solver.xu_r
+
+    _set_aug_diagonal!(kkt)
+
     return
 end
+
 function set_f_RR!(solver::MadNLPSolver, RR::RobustRestorer)
     x = full(solver.x)
-    @inbounds @simd for i in eachindex(RR.f_R)
-        RR.f_R[i] = RR.zeta * RR.D_R[i]^2 *(x[i]-RR.x_ref[i])
-    end
+    RR.f_R .= RR.zeta .* RR.D_R .^ 2 .* (x .- RR.x_ref)
 end
 
 
@@ -76,49 +85,70 @@ function set_aug_rhs!(solver::MadNLPSolver, kkt::AbstractKKTSystem, c)
     f = primal(solver.f)
     xl = primal(solver.xl)
     xu = primal(solver.xu)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -f[i] + solver.mu / (x[i] - xl[i]) - solver.mu / (xu[i] - x[i]) - solver.jacl[i]
-    end
-    py = dual(solver.p)
-    @inbounds @simd for i in eachindex(py)
-        py[i] = -c[i]
-    end
-    return
-end
-function set_aug_rhs!(solver::MadNLPSolver, kkt::SparseUnreducedKKTSystem, c)
-    f = primal(solver.f)
-    zl = primal(solver.zl)
-    zu = primal(solver.zu)
-    px = primal(solver.p)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -f[i] + zl[i] - zu[i] - solver.jacl[i]
-    end
+    zl = full(solver.zl)
+    zu = full(solver.zu)
     py = dual(solver.p)
-    @inbounds @simd for i in eachindex(py)
-        py[i] = -c[i]
-    end
     pzl = dual_lb(solver.p)
-    @inbounds @simd for i in eachindex(pzl)
-        pzl[i] = (solver.xl_r[i] - solver.x_lr[i]) * kkt.l_lower[i] + solver.mu / kkt.l_lower[i]
-    end
     pzu = dual_ub(solver.p)
-    @inbounds @simd for i in eachindex(pzu)
-        pzu[i] = (solver.xu_r[i] -solver.x_ur[i]) * kkt.u_lower[i] - solver.mu / kkt.u_lower[i]
-    end
-# >>>>>>> origin/master
-    return
-end
 
-function set_aug_rhs_ifr!(solver::MadNLPSolver{T}, kkt::SparseUnreducedKKTSystem,c) where T
-    fill!(primal(solver._w1), zero(T))
-    fill!(dual_lb(solver._w1), zero(T))
-    fill!(dual_ub(solver._w1), zero(T))
-    wy = dual(solver._w1)
-    @inbounds @simd for i in eachindex(wy)
-        wy[i] = -c[i]
-    end
-    return
-end
+    px .= .-f .+ zl .- zu .- solver.jacl
+    py .= .-c
+    pzl .= (solver.xl_r .- solver.x_lr) .* solver.zl_r .+ solver.mu
+    pzu .= (solver.xu_r .- solver.x_ur) .* solver.zu_r .- solver.mu
+
+end
+# function set_aug_rhs!(solver::MadNLPSolver, kkt::AbstractKKTSystem, c)
+#     px = primal(solver.p)
+#     x = primal(solver.x)
+#     f = primal(solver.f)
+#     xl = primal(solver.xl)
+#     xu = primal(solver.xu)
+#     zl = full(solver.zl)
+#     zu = full(solver.zu)
+#     @inbounds @simd for i in eachindex(px)
+#         px[i] = -f[i] + zl[i] - zu[i] - solver.jacl[i]
+#     end
+
+#     py = dual(solver.p)
+#     @inbounds @simd for i in eachindex(py)
+#         py[i] = -c[i]
+#     end
+
+#     pzl = dual_lb(solver.p)
+#     @inbounds @simd for i in eachindex(pzl)
+#         pzl[i] = (solver.xl_r[i] - solver.x_lr[i]) * solver.zl_r[i] + solver.mu
+#     end
+
+#     pzu = dual_ub(solver.p)
+#     @inbounds @simd for i in eachindex(pzu)
+#         pzu[i] = (solver.xu_r[i] -solver.x_ur[i]) * solver.zu_r[i] - solver.mu
+#     end
+# return
+# end
+
+# function set_aug_rhs!(solver::MadNLPSolver, kkt::SparseUnreducedKKTSystem, c)
+#     f = primal(solver.f)
+#     zl = primal(solver.zl)
+#     zu = primal(solver.zu)
+#     px = primal(solver.p)
+#     @inbounds @simd for i in eachindex(px)
+#         px[i] = -f[i] + zl[i] - zu[i] - solver.jacl[i]
+#     end
+#     py = dual(solver.p)
+#     @inbounds @simd for i in eachindex(py)
+#         py[i] = -c[i]
+#     end
+#     pzl = dual_lb(solver.p)
+#     @inbounds @simd for i in eachindex(pzl)
+#         pzl[i] = (solver.xl_r[i] - solver.x_lr[i]) * kkt.l_lower[i] + solver.mu / kkt.l_lower[i]
+#     end
+#     pzu = dual_ub(solver.p)
+#     @inbounds @simd for i in eachindex(pzu)
+#         pzu[i] = (solver.xu_r[i] - solver.x_ur[i]) * kkt.u_lower[i] - solver.mu / kkt.u_lower[i]
+#     end
+#     return
+# end
+
 
 # Set RHS RR
 function set_aug_rhs_RR!(
@@ -127,70 +157,85 @@ function set_aug_rhs_RR!(
     x = full(solver.x)
     xl = full(solver.xl)
     xu = full(solver.xu)
+    zl = full(solver.zl)
+    zu = full(solver.zu)
 
     px = primal(solver.p)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -RR.f_R[i] -solver.jacl[i] + RR.mu_R / (x[i] - xl[i]) - RR.mu_R / (xu[i] - x[i])
-    end
     py = dual(solver.p)
-    @inbounds @simd for i in eachindex(py)
-        py[i] = -solver.c[i] + RR.pp[i] - RR.nn[i] + (RR.mu_R-(rho-solver.y[i])*RR.pp[i])/RR.zp[i]-(RR.mu_R-(rho+solver.y[i])*RR.nn[i]) / RR.zn[i]
-    end
+    pzl = dual_lb(solver.p)
+    pzu = dual_ub(solver.p)
+
+    mu = RR.mu_R
+
+    px .= .- RR.f_R .+ zl .- zu .- solver.jacl
+    py .= .- solver.c .+ RR.pp .- RR.nn .+
+        (mu .- (rho .- solver.y) .* RR.pp) ./ RR.zp .-
+        (mu .- (rho .+ solver.y) .* RR.nn) ./ RR.zn
+
+    pzl .= (solver.xl_r .- solver.x_lr) .* solver.zl_r .+ mu
+    pzu .= (solver.xu_r .- solver.x_ur) .* solver.zu_r .- mu
+
     return
 end
 
+# solving KKT system
+@inbounds function _kktmul!(w,x,reg,du_diag,l_lower,u_lower,l_diag,u_diag, alpha, beta)
+    primal(w) .+= alpha .* reg .* primal(x)
+    dual(w) .+= alpha .* du_diag .* dual(x)
+    w.xp_lr .-= alpha .* dual_lb(x)
+    w.xp_ur .+= alpha .* dual_ub(x)
+    dual_lb(w) .= beta .* dual_lb(w) .+ alpha .* (x.xp_lr .* l_lower .- dual_lb(x) .* l_diag)
+    dual_ub(w) .= beta .* dual_ub(w) .+ alpha .* (x.xp_ur .* u_lower .+ dual_ub(x) .* u_diag)
+end
+
+@inbounds function reduce_rhs!(
+    xp_lr,wl,l_diag,
+    xp_ur,wu,u_diag,
+    )
+    xp_lr .-= wl ./ l_diag
+    xp_ur .-= wu ./ u_diag
+end
+
+
 # Finish
-function finish_aug_solve!(solver::MadNLPSolver, kkt::AbstractKKTSystem, mu)
-    dlb = dual_lb(solver.d)
+function finish_aug_solve!(kkt::AbstractKKTSystem{T, VT}, d) where {T, VT <: Vector{T}}
+    dlb = dual_lb(d)
+    dub = dual_ub(d)
     @inbounds @simd for i in eachindex(dlb)
-        dlb[i] = (mu-solver.zl_r[i]*solver.dx_lr[i])/(solver.x_lr[i]-solver.xl_r[i])-solver.zl_r[i]
+        dlb[i] = (-dlb[i] + kkt.l_lower[i] * d.xp_lr[i]) / kkt.l_diag[i]
     end
-    dub = dual_ub(solver.d)
     @inbounds @simd for i in eachindex(dub)
-        dub[i] = (mu+solver.zu_r[i]*solver.dx_ur[i])/(solver.xu_r[i]-solver.x_ur[i])-solver.zu_r[i]
+        dub[i] = ( dub[i] - kkt.u_lower[i] * d.xp_ur[i]) / kkt.u_diag[i]
     end
+
     return
 end
-function finish_aug_solve!(solver::MadNLPSolver, kkt::SparseUnreducedKKTSystem, mu)
-    dlb = dual_lb(solver.d)
-    @inbounds @simd for i in eachindex(dlb)
-        dlb[i] = (mu-solver.zl_r[i]*solver.dx_lr[i]) / (solver.x_lr[i]-solver.xl_r[i]) - solver.zl_r[i]
-    end
-    dub = dual_ub(solver.d)
-    @inbounds @simd for i in eachindex(dub)
-        dub[i] = (mu+solver.zu_r[i]*solver.dx_ur[i]) / (solver.xu_r[i]-solver.x_ur[i]) - solver.zu_r[i]
-    end
+
+function finish_aug_solve!(kkt::AbstractKKTSystem, d)
+    dlb = dual_lb(d)
+    dub = dual_ub(d)
+    dlb .= (.-dlb .+ kkt.l_lower .* d.xp_lr) ./ kkt.l_diag
+    dub .= (  dub .- kkt.u_lower .* d.xp_ur) ./ kkt.u_diag
     return
 end
 
-# Initial
-function set_initial_bounds!(solver::MadNLPSolver{T}) where T
-    @inbounds @simd for i in eachindex(solver.xl_r)
-        solver.xl_r[i] -= max(one(T),abs(solver.xl_r[i]))*solver.opt.tol
-    end
-    @inbounds @simd for i in eachindex(solver.xu_r)
-        solver.xu_r[i] += max(one(T),abs(solver.xu_r[i]))*solver.opt.tol
-    end
+function set_initial_bounds!(xl::AbstractVector{T},xu,tol) where T
+    map!(
+        x->x - max(one(T), abs(x)) .* tol,
+        xl, xl
+    )
+    map!(
+        x->x + max(one(T), abs(x)) .* tol,
+        xu, xu
+    )
 end
+
 function set_initial_rhs!(solver::MadNLPSolver{T}, kkt::AbstractKKTSystem) where T
     f = primal(solver.f)
     zl = primal(solver.zl)
     zu = primal(solver.zu)
     px = primal(solver.p)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -f[i] + zl[i] - zu[i]
-    end
-    fill!(dual(solver.p), zero(T))
-    return
-end
-function set_initial_rhs!(solver::MadNLPSolver{T}, kkt::SparseUnreducedKKTSystem) where T
-    f = primal(solver.f)
-    zl = primal(solver.zl)
-    zu = primal(solver.zu)
-    px = primal(solver.p)
-    @inbounds @simd for i in eachindex(px)
-        px[i] = -f[i] + zl[i] - zu[i]
-    end
+    px .= .-f .+ zl .- zu
     fill!(dual(solver.p), zero(T))
     fill!(dual_lb(solver.p), zero(T))
     fill!(dual_ub(solver.p), zero(T))
@@ -198,86 +243,90 @@ function set_initial_rhs!(solver::MadNLPSolver{T}, kkt::SparseUnreducedKKTSystem
 end
 
 # Set ifr
-function set_aug_rhs_ifr!(solver::MadNLPSolver{T}, kkt::AbstractKKTSystem) where T
-    fill!(primal(solver._w1), zero(T))
-    wy = dual(solver._w1)
-    @inbounds @simd for i in eachindex(wy)
-        wy[i] = - solver.c[i]
-    end
+function set_aug_rhs_ifr!(solver::MadNLPSolver{T}, kkt::AbstractKKTSystem, p0) where T
+    fill!(primal(p0), zero(T))
+    fill!(dual_lb(p0), zero(T))
+    fill!(dual_ub(p0), zero(T))
+    wy = dual(p0)
+    wy .= .- solver.c
     return
 end
+
 function set_g_ifr!(solver::MadNLPSolver, g)
     f = full(solver.f)
     x = full(solver.x)
     xl = full(solver.xl)
     xu = full(solver.xu)
-    @inbounds @simd for i in eachindex(g)
-        g[i] = f[i] - solver.mu / (x[i]-xl[i]) + solver.mu / (xu[i]-x[i]) + solver.jacl[i]
-    end
+    g .= f .- solver.mu ./ (x .- xl) .+ solver.mu ./ (xu .- x) .+ solver.jacl
 end
 
 
 # Finish RR
 function finish_aug_solve_RR!(dpp, dnn, dzp, dzn, l, dl, pp, nn, zp, zn, mu_R, rho)
-    @inbounds @simd for i in eachindex(dpp)
-        dpp[i] = (mu_R + pp[i] * dl[i] - (rho - l[i]) * pp[i]) / zp[i]
-        dnn[i] = (mu_R - nn[i] * dl[i] - (rho + l[i]) * nn[i]) / zn[i]
-        dzp[i] = (mu_R - zp[i] * dpp[i]) / pp[i] - zp[i]
-        dzn[i] = (mu_R - zn[i] * dnn[i]) / nn[i] - zn[i]
-    end
+    dzp .= rho .- l .- dl .- zp
+    dzn .= rho .+ l .+ dl .- zn
+    dpp .= .- pp .+ mu_R ./zp .- (pp./zp) .* dzp
+    dnn .= .- nn .+ mu_R ./zn .- (nn./zn) .* dzn
     return
 end
 
-# Scaling
-function unscale!(solver::AbstractMadNLPSolver)
-    x_slk = slack(solver.x)
-    solver.obj_val /= solver.obj_scale[]
-    @inbounds @simd for i in eachindex(solver.c)
-        solver.c[i] /= solver.con_scale[i]
-        solver.c[i] += solver.rhs[i]
-    end
-    @inbounds @simd for i in eachindex(solver.c_slk)
-        solver.c_slk[i] += x_slk[i]
-    end
-end
-
 # Kernel functions ---------------------------------------------------------
-is_valid(val::Real) = !(isnan(val) || isinf(val))
-function is_valid(vec::AbstractArray)
+is_valid(val::R) where R <: Real = !(isnan(val) || isinf(val))
+function is_valid(vec::VT) where {VT <: Union{Vector,Matrix}}
     @inbounds @simd for i=1:length(vec)
         is_valid(vec[i]) || return false
     end
     return true
 end
-is_valid(args...) = all(is_valid(arg) for arg in args)
+is_valid(vec::AbstractArray) = isempty(vec) ? true : mapreduce(is_valid, &, vec)
 
-function get_varphi(obj_val, x_lr, xl_r, xu_r, x_ur, mu)
+function get_varphi(obj_val, x_lr::SubVector{T,Vector{T},VI}, xl_r, xu_r, x_ur, mu) where {T, VI}
     varphi = obj_val
     @inbounds @simd for i=1:length(x_lr)
         xll = x_lr[i]-xl_r[i]
-        xll < 0 && return Inf
+        xll < 0 && return T(Inf)
         varphi -= mu*log(xll)
     end
     @inbounds @simd for i=1:length(x_ur)
         xuu = xu_r[i]-x_ur[i]
-        xuu < 0 && return Inf
+        xuu < 0 && return T(Inf)
         varphi -= mu*log(xuu)
     end
     return varphi
 end
+function get_varphi(obj_val, x_lr, xl_r, xu_r, x_ur, mu)
+
+    return obj_val + mapreduce(
+        (x1,x2) -> _get_varphi(x1,x2,mu), +, x_lr, xl_r
+    ) + mapreduce(
+        (x1,x2) -> _get_varphi(x1,x2,mu), +, xu_r, x_ur
+    )
+end
+
+function _get_varphi(x1::T,x2,mu) where T
+    x = x1 - x2
+    if x < 0
+        return T(Inf)
+    else
+        return -mu * log(x)
+    end
+end
 
 @inline get_inf_pr(c) = norm(c, Inf)
 
-function get_inf_du(f, zl, zu, jacl, sd)
-    inf_du = 0.0
+function get_inf_du(f::Vector{T}, zl, zu, jacl, sd) where T
+    inf_du = zero(T)
     @inbounds @simd for i=1:length(f)
         inf_du = max(inf_du,abs(f[i]-zl[i]+zu[i]+jacl[i]))
     end
     return inf_du/sd
 end
+function get_inf_du(f, zl, zu, jacl, sd)
+    return mapreduce((f,zl,zu,jacl) -> abs(f-zl+zu+jacl), max, f, zl, zu, jacl; init = zero(eltype(f))) / sd
+end
 
-function get_inf_compl(x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, mu, sc)
-    inf_compl = 0.0
+function get_inf_compl(x_lr::SubVector{T,Vector{T},VI}, xl_r, zl_r, xu_r, x_ur, zu_r, mu, sc) where {T, VI}
+    inf_compl = zero(T)
     @inbounds @simd for i=1:length(x_lr)
         inf_compl = max(inf_compl,abs((x_lr[i]-xl_r[i])*zl_r[i]-mu))
     end
@@ -286,26 +335,68 @@ function get_inf_compl(x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, mu, sc)
     end
     return inf_compl/sc
 end
+function get_inf_compl(x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, mu, sc)
+    return max(
+        mapreduce(
+            (x_lr, xl_r, zl_r) -> abs((x_lr-xl_r)*zl_r-mu),
+            max,
+            x_lr, xl_r, zl_r;
+            init = zero(eltype(x_lr))
+        ),
+        mapreduce(
+            (xu_r, x_ur, zu_r) -> abs((xu_r-x_ur)*zu_r-mu),
+            max,
+            xu_r, x_ur, zu_r;
+            init = zero(eltype(x_lr))
+        )
+    ) / sc
+end
 
-function get_varphi_d(f, x, xl, xu, dx, mu)
-    varphi_d = 0.0
+function get_varphi_d(f::Vector{T}, x, xl, xu, dx, mu) where T
+    varphi_d = zero(T)
     @inbounds @simd for i=1:length(f)
         varphi_d += (f[i] - mu/(x[i]-xl[i]) + mu/(xu[i]-x[i])) * dx[i]
     end
     return varphi_d
 end
+function get_varphi_d(f, x, xl, xu, dx, mu)
+    return mapreduce(
+        (f,x,xl,xu,dx)-> (f - mu/(x-xl) + mu/(xu-x)) * dx,
+        +,
+        f, x, xl, xu, dx;
+        init = zero(eltype(f))
+    )
+end
 
-function get_alpha_max(x, xl, xu, dx, tau)
-    alpha_max = 1.0
+function get_alpha_max(x::Vector{T}, xl, xu, dx, tau) where T
+    alpha_max = one(T)
     @inbounds @simd for i=1:length(x)
         dx[i]<0 && (alpha_max=min(alpha_max,(-x[i]+xl[i])*tau/dx[i]))
         dx[i]>0 && (alpha_max=min(alpha_max,(-x[i]+xu[i])*tau/dx[i]))
     end
     return alpha_max
 end
+function get_alpha_max(x::VT, xl, xu, dx, tau) where {T, VT <: AbstractVector{T}}
+    return min(
+        mapreduce(
+            (x, xl, dx) -> dx < 0 ? (-x+xl)*tau/dx : T(Inf),
+            min,
+
+            x, xl, dx,
+            init = one(eltype(x))
+        ),
+        mapreduce(
+            (x, xu, dx) -> dx > 0 ? (-x+xu)*tau/dx : T(Inf),
+            min,
+            x, xu, dx,
+            init = one(eltype(x))
+        )
+    )
+end
+
 
-function get_alpha_z(zl_r, zu_r, dzl, dzu, tau)
-    alpha_z = 1.0
+function get_alpha_z(zl_r::SubVector{T,Vector{T},VI}, zu_r, dzl, dzu, tau) where {T, VI}
+    alpha_z = one(T)
     @inbounds @simd for i=1:length(zl_r)
         dzl[i] < 0 && (alpha_z=min(alpha_z,-zl_r[i]*tau/dzl[i]))
      end
@@ -314,35 +405,75 @@ function get_alpha_z(zl_r, zu_r, dzl, dzu, tau)
     end
     return alpha_z
 end
+function get_alpha_z(zl_r::VT, zu_r, dzl, dzu, tau)  where {T, VT <: AbstractVector{T}}
+    return min(
+        mapreduce(
+            (zl_r, dzl) -> dzl < 0 ? (-zl_r)*tau/dzl : T(Inf),
+            min,
+            zl_r, dzl,
+            init = one(T)
+        ),
+        mapreduce(
+            (zu_r, dzu) -> dzu < 0 ? (-zu_r)*tau/dzu : T(Inf),
+            min,
+            zu_r, dzu,
+            init = one(T)
+        )
+    )
+end
 
-function get_obj_val_R(p, n, D_R, x, x_ref, rho, zeta)
+function get_obj_val_R(p::Vector{T}, n, D_R, x, x_ref, rho, zeta) where T
     obj_val_R = 0.
     @inbounds @simd for i=1:length(p)
         obj_val_R += rho*(p[i]+n[i]) .+ zeta/2*D_R[i]^2*(x[i]-x_ref[i])^2
     end
     return obj_val_R
 end
+function get_obj_val_R(p::VT, n, D_R, x, x_ref, rho, zeta) where {T, VT <: AbstractVector{T}}
+    return mapreduce(
+        (p,n,D_R,x,x_ref) -> rho*(p+n) .+ zeta/2*D_R^2*(x-x_ref)^2,
+        +,
+        p,n,D_R,x,x_ref;
+        init = zero(T)
+    )
+end
 
 @inline get_theta(c) = norm(c, 1)
 
-function get_theta_R(c, p, n)
-    theta_R = 0.0
+function get_theta_R(c::Vector{T}, p, n) where T
+    theta_R = zero(T)
     @inbounds @simd for i=1:length(c)
         theta_R += abs(c[i]-p[i]+n[i])
     end
     return theta_R
 end
+function get_theta_R(c::VT, p, n) where {T, VT <: AbstractVector{T}}
+    return mapreduce(
+        (c,p,n) -> abs(c-p+n),
+        +,
+        c,p,n;
+        init = zero(T)
+    )
+end
 
-function get_inf_pr_R(c, p, n)
-    inf_pr_R = 0.0
+function get_inf_pr_R(c::Vector{T}, p, n) where T
+    inf_pr_R = zero(T)
     @inbounds @simd for i=1:length(c)
         inf_pr_R = max(inf_pr_R,abs(c[i]-p[i]+n[i]))
     end
     return inf_pr_R
 end
+function get_inf_pr_R(c::VT, p, n) where {T, VT <: AbstractVector{T}}
+    return mapreduce(
+        (c,p,n) -> abs(c-p+n),
+        max,
+        c,p,n;
+        init = zero(T)
+    )
+end
 
-function get_inf_du_R(f_R, l, zl, zu, jacl, zp, zn, rho, sd)
-    inf_du_R = 0.0
+function get_inf_du_R(f_R::Vector{T}, l, zl, zu, jacl, zp, zn, rho, sd) where T
+    inf_du_R = zero(T)
     @inbounds @simd for i=1:length(zl)
         inf_du_R = max(inf_du_R,abs(f_R[i]-zl[i]+zu[i]+jacl[i]))
     end
@@ -354,9 +485,35 @@ function get_inf_du_R(f_R, l, zl, zu, jacl, zp, zn, rho, sd)
     end
     return inf_du_R / sd
 end
+function get_inf_du_R(f_R::VT, l, zl, zu, jacl, zp, zn, rho, sd)  where {T, VT <: AbstractVector{T}}
+    return max(
+        mapreduce(
+            (f_R, zl, zu, jacl) -> abs(f_R-zl+zu+jacl),
+            max,
+            f_R, zl, zu, jacl;
+            init = zero(T)
+        ),
+        mapreduce(
+            (l, zp) -> abs(rho-l-zp),
+            max,
+            l, zp;
+            init = zero(T)
+        ),
+        mapreduce(
+            (l, zn) -> abs(rho+l-zn),
+            max,
+            l, zn;
+            init = zero(T)
+        )
+    ) / sd
+end
+
 
-function get_inf_compl_R(x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, pp, zp, nn, zn, mu_R, sc)
-    inf_compl_R = 0.0
+function get_inf_compl_R(
+    x_lr::SubVector{T,Vector{T},VI}, xl_r, zl_r, xu_r, x_ur, zu_r, pp, zp, nn, zn, mu_R, sc
+    ) where {T, VI}
+
+    inf_compl_R = zero(T)
     @inbounds @simd for i=1:length(x_lr)
         inf_compl_R = max(inf_compl_R,abs((x_lr[i]-xl_r[i])*zl_r[i]-mu_R))
     end
@@ -371,9 +528,40 @@ function get_inf_compl_R(x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, pp, zp, nn, zn, mu_
     end
     return inf_compl_R / sc
 end
+function get_inf_compl_R(
+    x_lr::SubVector{T,VT,VI}, xl_r, zl_r, xu_r, x_ur, zu_r, pp, zp, nn, zn, mu_R, sc
+    ) where {T, VT <: AbstractVector{T}, VI}
 
-function get_alpha_max_R(x, xl, xu, dx, pp, dpp, nn, dnn, tau_R)
-    alpha_max_R = 1.0
+    return max(
+        mapreduce(
+            (x_lr, xl_r, zl_r) -> abs((x_lr-xl_r)*zl_r-mu_R),
+            max,
+            x_lr, xl_r, zl_r;
+            init = zero(T)
+        ),
+        mapreduce(
+            (xu_r, x_ur, zu_r) -> abs((xu_r-x_ur)*zu_r-mu_R),
+            max,
+            xu_r, x_ur, zu_r;
+            init = zero(T)
+        ),
+        mapreduce(
+            (pp, zp) -> abs(pp*zp-mu_R),
+            max,
+            pp, zp;
+            init = zero(T)
+        ),
+        mapreduce(
+            (nn, zn) -> abs(nn*zn-mu_R),
+            max,
+            nn, zn;
+            init = zero(T)
+        ),
+    )/ sc
+end
+
+function get_alpha_max_R(x::Vector{T}, xl, xu, dx, pp, dpp, nn, dnn, tau_R) where T
+    alpha_max_R = one(T)
     @inbounds @simd for i=1:length(x)
         dx[i]<0 && (alpha_max_R=min(alpha_max_R,(-x[i]+xl[i])*tau_R/dx[i]))
         dx[i]>0 && (alpha_max_R=min(alpha_max_R,(-x[i]+xu[i])*tau_R/dx[i]))
@@ -386,9 +574,48 @@ function get_alpha_max_R(x, xl, xu, dx, pp, dpp, nn, dnn, tau_R)
     end
     return alpha_max_R
 end
+function get_alpha_max_R(x::VT, xl, xu, dx, pp, dpp, nn, dnn, tau_R) where {T, VT <: AbstractVector{T}}
+    return min(
+        mapreduce(
+            (x,xl,xu,dx) -> if dx < 0
+                (-x+xl)*tau_R/dx
+            elseif dx > 0
+                (-x+xu)*tau_R/dx
+            else
+                T(Inf)
+            end,
+            min,
+            x,xl,xu,dx;
+            init = one(T)
+        ),
+        mapreduce(
+            (pp, dpp)-> if dpp < 0
+                -pp*tau_R/dpp
+            else
+                T(Inf)
+            end,
+            min,
+            pp, dpp;
+            init = one(T)
+        ),
+        mapreduce(
+            (nn, dnn)-> if dnn < 0
+                -nn*tau_R/dnn
+            else
+                T(Inf)
+            end,
+            min,
+            nn, dnn;
+            init = one(T)
+        )
+    )
+end
 
-function get_alpha_z_R(zl_r, zu_r, dzl, dzu, zp, dzp, zn, dzn, tau_R)
-    alpha_z_R = 1.0
+function get_alpha_z_R(
+    zl_r::SubVector{T,Vector{T},VI}, zu_r, dzl, dzu, zp, dzp, zn, dzn, tau_R
+    ) where {T, VI}
+
+    alpha_z_R = one(T)
     @inbounds @simd for i=1:length(zl_r)
         dzl[i]<0 && (alpha_z_R=min(alpha_z_R,-zl_r[i]*tau_R/dzl[i]))
     end
@@ -403,53 +630,156 @@ function get_alpha_z_R(zl_r, zu_r, dzl, dzu, zp, dzp, zn, dzn, tau_R)
     end
     return alpha_z_R
 end
+function get_alpha_z_R(
+    zl_r::SubVector{T,VT,VI}, zu_r, dzl, dzu, zp, dzp, zn, dzn, tau_R
+    ) where {T, VT <: AbstractVector{T}, VI}
+
+    f(d,z) = d < 0 ? -z*tau_R/d : T(Inf)
+    return min(
+        mapreduce(
+            f,
+            min,
+            dzl, zl_r;
+            init = one(T)
+        ),
+        mapreduce(
+            f,
+            min,
+            dzu, zu_r;
+            init = one(T)
+        ),
+        mapreduce(
+            f,
+            min,
+            dzp, zp;
+            init = one(T)
+        ),
+        mapreduce(
+            f,
+            min,
+            dzn, zn;
+            init = one(T)
+        )
+    )
+end
+
+
+function get_varphi_R(
+    obj_val, x_lr::SubVector{T,Vector{T},VI}, xl_r, xu_r, x_ur, pp, nn, mu_R
+    )  where {T, VI}
 
-function get_varphi_R(obj_val, x_lr, xl_r, xu_r, x_ur, pp, nn, mu_R)
     varphi_R = obj_val
     @inbounds @simd for i=1:length(x_lr)
         xll = x_lr[i]-xl_r[i]
-        xll < 0 && return Inf
+        xll < 0 && return T(Inf)
         varphi_R -= mu_R*log(xll)
     end
     @inbounds @simd for i=1:length(x_ur)
         xuu = xu_r[i]-x_ur[i]
-        xuu < 0 && return Inf
+        xuu < 0 && return T(Inf)
         varphi_R -= mu_R*log(xuu)
     end
     @inbounds @simd for i=1:length(pp)
-        pp[i] < 0 && return Inf
+        pp[i] < 0 && return T(Inf)
         varphi_R -= mu_R*log(pp[i])
     end
     @inbounds @simd for i=1:length(pp)
-        nn[i] < 0 && return Inf
+        nn[i] < 0 && return T(Inf)
         varphi_R -= mu_R*log(nn[i])
     end
     return varphi_R
 end
+function get_varphi_R(
+    obj_val, x_lr::SubVector{T,VT,VI}, xl_r, xu_r, x_ur, pp, nn, mu_R
+    )  where {T, VT <: AbstractVector{T}, VI}
+
+    varphi_R = obj_val
+    f1(x) = x < 0 ? T(Inf) : mu_R*log(x)
+    function f2(x,y)
+        d = x - y
+        d < 0 ? T(Inf) : mu_R * log(d)
+    end
+
+    return obj_val - +(
+        mapreduce(
+            f2,
+            +,
+            x_lr, xl_r;
+            init = zero(T)
+        ),
+        mapreduce(
+            f2,
+            +,
+            x_lr, xl_r;
+            init = zero(T)
+        ),
+        mapreduce(
+            f1,
+            +,
+            pp;
+            init = zero(T)
+        ),
+        mapreduce(
+            f1,
+            +,
+            nn;
+            init = zero(T)
+        )
+    )
+end
 
-function get_F(c, f, zl, zu, jacl, x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, mu)
-    F = 0.0
+
+function get_F(c::Vector{T}, f, zl, zu, jacl, x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, mu) where T
+    F = zero(T)
     @inbounds @simd for i=1:length(c)
-        F = max(F, c[i])
+        F += abs(c[i])
     end
     @inbounds @simd for i=1:length(f)
-        F = max(F, f[i]-zl[i]+zu[i]+jacl[i])
+        F += abs(f[i]-zl[i]+zu[i]+jacl[i])
     end
     @inbounds @simd for i=1:length(x_lr)
-        x_lr[i] >= xl_r[i] || return Inf
-        zl_r[i] >= 0       || return Inf
-        F = max(F, (x_lr[i]-xl_r[i])*zl_r[i]-mu)
+        x_lr[i] >= xl_r[i] || return T(Inf)
+        zl_r[i] >= 0       || return T(Inf)
+        F += abs((x_lr[i]-xl_r[i])*zl_r[i]-mu)
     end
     @inbounds @simd for i=1:length(x_ur)
-        xu_r[i] >= x_ur[i] || return Inf
-        zu_r[i] >= 0       || return Inf
-        F = max(F, (xu_r[i]-xu_r[i])*zu_r[i]-mu)
+        xu_r[i] >= x_ur[i] || return T(Inf)
+        zu_r[i] >= 0       || return T(Inf)
+        F += abs((xu_r[i]-xu_r[i])*zu_r[i]-mu)
     end
     return F
 end
+function get_F(c::AbstractVector{T}, f, zl, zu, jacl, x_lr, xl_r, zl_r, xu_r, x_ur, zu_r, mu) where T
+    F1 = mapreduce(
+        abs,
+        +,
+        c;
+        init = zero(T)
+    )
+    F2 = mapreduce(
+        (f,zl,zu,jacl) -> abs(f-zl+zu+jacl),
+        +,
+        f,zl,zu,jacl;
+        init = zero(T)
+    )
+    F3 = mapreduce(
+        (x_lr,xl_r,zl_r) -> (x_lr >= xl_r && zl_r >= 0) ? abs((x_lr-xl_r)*zl_r-mu) : T(Inf),
+        +,
+        x_lr,xl_r,zl_r;
+        init = zero(T)
+    )
+    F4 = mapreduce(
+        (xu_r,x_ur,zu_r) -> (xu_r >= x_ur && zu_r >= 0) ? abs((xu_r-xu_r)*zu_r-mu) : T(Inf),
+        +,
+        xu_r,xu_r,zu_r;
+        init = zero(T)
+    )
+    return F1 + F2 + F3 + F4
+end
+
 
-function get_varphi_d_R(f_R, x, xl, xu, dx, pp, nn, dpp, dnn, mu_R, rho)
-    varphi_d = 0.0
+function get_varphi_d_R(f_R::Vector{T}, x, xl, xu, dx, pp, nn, dpp, dnn, mu_R, rho) where T
+    varphi_d = zero(T)
     @inbounds @simd for i=1:length(x)
         varphi_d += (f_R[i] - mu_R/(x[i]-xl[i]) + mu_R/(xu[i]-x[i])) * dx[i]
     end
@@ -461,8 +791,31 @@ function get_varphi_d_R(f_R, x, xl, xu, dx, pp, nn, dpp, dnn, mu_R, rho)
     end
     return varphi_d
 end
+function get_varphi_d_R(f_R::VT, x, xl, xu, dx, pp, nn, dpp, dnn, mu_R, rho) where {T, VT <: AbstractVector{T}}
+    f(x,dx) = (rho - mu_R/x) * dx
+    return +(
+        mapreduce(
+            (f_R, x, xl, xu, dx) -> (f_R - mu_R/(x-xl) + mu_R/(xu-x)) * dx,
+            +,
+            f_R, x, xl, xu, dx;
+            init = zero(T)
+        ),
+        mapreduce(
+            f,
+            +,
+            pp,dpp;
+            init = zero(T)
+        ),
+        mapreduce(
+            f,
+            +,
+            nn,dnn;
+            init = zero(T)
+        ),
+    )
+end
 
-function initialize_variables!(x, xl, xu, bound_push, bound_fac)
+function initialize_variables!(x::Vector{T}, xl, xu, bound_push, bound_fac) where T
     @inbounds @simd for i=1:length(x)
         if xl[i]!=-Inf && xu[i]!=Inf
             x[i] = min(
@@ -477,35 +830,56 @@ function initialize_variables!(x, xl, xu, bound_push, bound_fac)
     end
 end
 
+function initialize_variables!(x, xl, xu, bound_push, bound_fac)
+    map!((x,l,u) -> _initialize_variables!(x,l,u, bound_push, bound_fac), x, x, xl, xu)
+end
+
+function _initialize_variables!(x::T, xl, xu, bound_push, bound_fac) where T
+    if xl!=-T(Inf) && xu!=T(Inf)
+        return min(
+            xu-min(bound_push*max(1,abs(xu)), bound_fac*(xu-xl)),
+            max(xl+min(bound_push*max(1,abs(xl)),bound_fac*(xu-xl)),x),
+        )
+    elseif xl!=-T(Inf) && xu==T(Inf)
+        return max(xl+bound_push*max(1,abs(xl)), x)
+    elseif xl==-T(Inf) && xu!=T(Inf)
+        return min(xu-bound_push*max(1,abs(xu)), x)
+    end
+    return x
+end
+
+
+
 function adjust_boundary!(x_lr::VT, xl_r, x_ur, xu_r, mu) where {T, VT <: AbstractVector{T}}
-    adjusted = 0
     c1 = eps(T)*mu
-    c2= eps(T)^(3/4)
-    @inbounds @simd for i=1:length(xl_r)
-        if x_lr[i]-xl_r[i] < c1
-            xl_r[i] -= c2*max(1,abs(x_lr[i]))
-            adjusted += 1
-        end
-    end
-    @inbounds @simd for i=1:length(xu_r)
-        if xu_r[i]-x_ur[i] < c1
-            xu_r[i] += c2*max(1, abs(x_ur[i]))
-            adjusted += 1
-        end
-    end
-    return adjusted
+    c2 = eps(T)^(3/4)
+    map!(
+        (x_lr, xl_r) -> (x_lr-xl_r < c1) ? (xl_r - c2*max(1,abs(x_lr))) : xl_r,
+        xl_r, x_lr, xl_r
+    )
+    map!(
+        (xu_r, x_ur) -> (xu_r-x_ur < c1) ? (xu_r + c2*max(1,abs(x_ur))) : xu_r,
+        xu_r, xu_r, x_ur
+    )
 end
 
-function get_rel_search_norm(x, dx)
-    rel_search_norm = 0.0
+function get_rel_search_norm(x::Vector{T}, dx) where T
+    rel_search_norm = zero(T)
     @inbounds @simd for i=1:length(x)
         rel_search_norm = max(
             rel_search_norm,
-            abs(dx[i]) / (1.0 + abs(x[i])),
+            abs(dx[i]) / (one(T) + abs(x[i])),
         )
     end
     return rel_search_norm
 end
+function get_rel_search_norm(x::AbstractVector{T}, dx) where T
+    return mapreduce(
+        (x,dx) -> abs(dx) / (one(T) + abs(x)),
+        max,
+        x, dx
+    )
+end
 
 # Utility functions
 function get_sd(l, zl_r, zu_r, s_max)
@@ -584,19 +958,36 @@ function is_barr_obj_rapid_increase(varphi, varphi_trial, obj_max_inc)
     return (varphi_trial >= varphi) && (log10(varphi_trial-varphi) > obj_max_inc + max(1.0, log10(abs(varphi))))
 end
 
-function reset_bound_dual!(z, x, mu, kappa_sigma)
+function reset_bound_dual!(z::Vector{T}, x, mu, kappa_sigma) where T
     @inbounds @simd for i in eachindex(z)
         z[i] = max(min(z[i], (kappa_sigma*mu)/x[i]), (mu/kappa_sigma)/x[i])
     end
     return
 end
-function reset_bound_dual!(z, x1, x2, mu, kappa_sigma)
+function reset_bound_dual!(z, x, mu, kappa_sigma)
+    map!(
+        (z, x) -> max(min(z, (kappa_sigma*mu)/x), (mu/kappa_sigma)/x),
+        z, z, x
+    )
+    return
+end
+
+function reset_bound_dual!(z::Vector{T}, x1, x2, mu, kappa_sigma) where T
     @inbounds @simd for i in eachindex(z)
         z[i] = max(min(z[i], (kappa_sigma*mu)/(x1[i]-x2[i])), (mu/kappa_sigma)/(x1[i]-x2[i]))
     end
     return
 end
 
+function reset_bound_dual!(z, x1, x2, mu, kappa_sigma)
+    map!(
+        (z,x1,x2) -> max(min(z, (kappa_sigma*mu)/(x1-x2)), (mu/kappa_sigma)/(x1-x2)),
+        z,z,x1,x2
+    )
+    return
+end
+
+
 function get_ftype(filter,theta,theta_trial,varphi,varphi_trial,switching_condition,armijo_condition,
                    theta_min,obj_max_inc,gamma_theta,gamma_phi,has_constraints)
     is_filter_acceptable(filter,theta_trial,varphi_trial) || return " "
@@ -625,31 +1016,8 @@ function _get_fixed_variable_index(
     return fixed_aug_index
 end
 
-function fixed_variable_treatment_vec!(vec, ind_fixed)
-    @inbounds @simd for i in ind_fixed
-        vec[i] = 0.0
-    end
-end
-
-function fixed_variable_treatment_z!(zl, zu, f, jacl, ind_fixed)
-    @inbounds @simd for i in ind_fixed
-        z = f[i]+jacl[i]
-        if z >= 0.0
-            zl[i] = z
-            zu[i] = 0.0
-        else
-            zl[i] = 0.0
-            zu[i] = -z
-        end
-    end
-end
-
 function dual_inf_perturbation!(px, ind_llb, ind_uub, mu, kappa_d)
-    @inbounds @simd for i in ind_llb
-        px[i] -= mu*kappa_d
-    end
-    @inbounds @simd for i in ind_uub
-        px[i] += mu*kappa_d
-    end
+    px[ind_llb] .-= mu*kappa_d
+    px[ind_uub] .+= mu*kappa_d
 end
 
diff --git a/src/IPM/restoration.jl b/src/IPM/restoration.jl
index ff765c67..d09a0849 100644
--- a/src/IPM/restoration.jl
+++ b/src/IPM/restoration.jl
@@ -1,24 +1,24 @@
-mutable struct RobustRestorer{T}
+mutable struct RobustRestorer{T, VT}
     obj_val_R::T
-    f_R::Vector{T}
-    x_ref::Vector{T}
+    f_R::VT
+    x_ref::VT
 
     theta_ref::T
-    D_R::Vector{T}
+    D_R::VT
     obj_val_R_trial::T
 
-    pp::Vector{T}
-    nn::Vector{T}
-    zp::Vector{T}
-    zn::Vector{T}
+    pp::VT
+    nn::VT
+    zp::VT
+    zn::VT
 
-    dpp::Vector{T}
-    dnn::Vector{T}
-    dzp::Vector{T}
-    dzn::Vector{T}
+    dpp::VT
+    dnn::VT
+    dzp::VT
+    dzn::VT
 
-    pp_trial::Vector{T}
-    nn_trial::Vector{T}
+    pp_trial::VT
+    nn_trial::VT
 
     inf_pr_R::T
     inf_du_R::T
@@ -31,31 +31,39 @@ mutable struct RobustRestorer{T}
     filter::Vector{Tuple{T,T}}
 end
 
-function RobustRestorer(solver::AbstractMadNLPSolver{T}) where T
-
-    nn = Vector{T}(undef,solver.m)
-    zp = Vector{T}(undef,solver.m)
-    zn = Vector{T}(undef,solver.m)
-    dpp= Vector{T}(undef,solver.m)
-    dnn= Vector{T}(undef,solver.m)
-    dzp= Vector{T}(undef,solver.m)
-    dzn= Vector{T}(undef,solver.m)
-    pp_trial = Vector{T}(undef,solver.m)
-    nn_trial = Vector{T}(undef,solver.m)
-
-    return RobustRestorer{T}(
+function RobustRestorer(solver::AbstractMadNLPSolver{T}) where {T}
+
+    f_R = similar(solver.y, solver.n)
+    x_ref = similar(solver.y, solver.n)
+    D_R = similar(solver.y, solver.n)
+    pp = similar(solver.y, solver.m)
+    nn = similar(solver.y, solver.m)
+    pp_trial = similar(solver.y, solver.m)
+    nn_trial = similar(solver.y, solver.m)
+
+    nn = similar(solver.y, solver.m)
+    zp = similar(solver.y, solver.m)
+    zn = similar(solver.y, solver.m)
+    dpp= similar(solver.y, solver.m)
+    dnn= similar(solver.y, solver.m)
+    dzp= similar(solver.y, solver.m)
+    dzn= similar(solver.y, solver.m)
+    pp_trial = similar(solver.y, solver.m)
+    nn_trial = similar(solver.y, solver.m)
+
+    return RobustRestorer(
         0.,
-        primal(solver._w2),
-        primal(solver._w1),
+        f_R,
+        x_ref,
         0.,
-        primal(solver._w3),
+        D_R,
         0.,
-        dual(solver._w3),
-        dual(solver._w4),
+        pp,
+        nn,
         zp, zn,
         dpp, dnn, dzp, dzn,
-        dual(solver._w2),
-        dual(solver._w1),
+        pp_trial,
+        nn_trial,
         0.,0.,0.,0.,0.,0.,
         Tuple{T,T}[],
     )
@@ -68,21 +76,22 @@ function initialize_robust_restorer!(solver::AbstractMadNLPSolver{T}) where T
 
     copyto!(RR.x_ref, full(solver.x))
     RR.theta_ref = get_theta(solver.c)
-    @inbounds @simd for i in eachindex(RR.D_R)
-        RR.D_R[i] = min(one(T), one(T) / abs(RR.x_ref[i]))
-    end
+    RR.D_R .= min.(one(T), one(T) ./ abs.(RR.x_ref))
 
     RR.mu_R = max(solver.mu, norm(solver.c, Inf))
     RR.tau_R= max(solver.opt.tau_min,1-RR.mu_R)
     RR.zeta = sqrt(RR.mu_R)
 
-    @inbounds @simd for i in eachindex(RR.nn)
-        RR.nn[i] = (RR.mu_R - solver.opt.rho*solver.c[i])/2 /solver.opt.rho +
-            sqrt(((RR.mu_R-solver.opt.rho*solver.c[i])/2 /solver.opt.rho)^2 + RR.mu_R*solver.c[i]/2 /solver.opt.rho)
-        RR.pp[i] = solver.c[i] + RR.nn[i]
-        RR.zp[i] = RR.mu_R / RR.pp[i]
-        RR.zn[i] = RR.mu_R / RR.nn[i]
-    end
+    rho = solver.opt.rho
+    mu = RR.mu_R
+    RR.nn .=
+        (mu .- rho*solver.c)./2 ./rho .+
+        sqrt.(
+            ((mu.-rho*solver.c)./2 ./rho).^2 + mu.*solver.c./2 ./rho
+        )
+    RR.pp .= solver.c .+ RR.nn
+    RR.zp .= RR.mu_R ./ RR.pp
+    RR.zn .= RR.mu_R ./ RR.nn
 
     RR.obj_val_R = get_obj_val_R(RR.pp,RR.nn,RR.D_R,full(solver.x),RR.x_ref,solver.opt.rho,RR.zeta)
     fill!(RR.f_R, zero(T))
@@ -90,12 +99,11 @@ function initialize_robust_restorer!(solver::AbstractMadNLPSolver{T}) where T
     push!(RR.filter, (solver.theta_max,-Inf))
 
     fill!(solver.y, zero(T))
-    @inbounds @simd for i in eachindex(solver.zl_r)
-        solver.zl_r[i] = min(solver.opt.rho, solver.zl_r[i])
-    end
-    @inbounds @simd for i in eachindex(solver.zu_r)
-        solver.zu_r[i] = min(solver.opt.rho, solver.zu_r[i])
-    end
+    solver.zl_r .= min.(solver.opt.rho, solver.zl_r)
+    solver.zu_r .= min.(solver.opt.rho, solver.zu_r)
+    # fill!(solver.zl_r, one(T)) # Experimental
+    # fill!(solver.zu_r, one(T)) # Experimental
+    
     solver.cnt.t = 0
 
     # misc
diff --git a/src/IPM/solver.jl b/src/IPM/solver.jl
index ad7ebac6..0b80f905 100644
--- a/src/IPM/solver.jl
+++ b/src/IPM/solver.jl
@@ -1,6 +1,5 @@
 function madnlp(model::AbstractNLPModel; kwargs...)
     solver = MadNLPSolver(model;kwargs...)
-    initialize!(solver.kkt)
     return solve!(solver)
 end
 
@@ -14,60 +13,59 @@ solve!(solver::AbstractMadNLPSolver; kwargs...) = solve!(
 
 function initialize!(solver::AbstractMadNLPSolver{T}) where T
 
-
-    # initializing slack variables
-    @trace(solver.logger,"Initializing slack variables.")
-    cons!(solver.nlp,get_x0(solver.nlp),_madnlp_unsafe_wrap(solver.c,get_ncon(solver.nlp)))
-    solver.cnt.con_cnt += 1
-    copyto!(slack(solver.x), solver.c_slk)
-
-    # Initialization
-    @trace(solver.logger,"Initializing primal and bound duals.")
+    nlp = solver.nlp
+    opt = solver.opt
+    
+    # Initializing variables 
+    @trace(solver.logger,"Initializing variables.")
+    initialize!(
+        solver.cb,
+        solver.x,
+        solver.xl,
+        solver.xu,
+        solver.y,
+        solver.rhs,
+        solver.ind_ineq,
+        opt
+    )
+    fill!(solver.jacl, zero(T))
     fill!(solver.zl_r, one(T))
     fill!(solver.zu_r, one(T))
-
-    set_initial_bounds!(solver)
-    initialize_variables!(
-        full(solver.x),
-        full(solver.xl),
-        full(solver.xu),
-        solver.opt.bound_push,solver.opt.bound_fac
+    
+    # Initializing scaling factors
+    set_scaling!(
+        solver.cb,
+        solver.x,
+        solver.xl,
+        solver.xu,
+        solver.y,
+        solver.rhs,
+        solver.ind_ineq,
+        opt.nlp_scaling_max_gradient
     )
 
-    # Automatic scaling (constraints)
-    @trace(solver.logger,"Computing constraint scaling.")
-    eval_jac_wrapper!(solver, solver.kkt, solver.x)
-    compress_jacobian!(solver.kkt)
-    if (solver.m > 0) && solver.opt.nlp_scaling
-        jac = get_raw_jacobian(solver.kkt)
-        scale_constraints!(solver.nlp, solver.con_scale, jac; max_gradient=solver.opt.nlp_scaling_max_gradient)
-        set_jacobian_scaling!(solver.kkt, solver.con_scale)
-        solver.y ./= solver.con_scale
-    end
-    compress_jacobian!(solver.kkt)
+    # Initializing KKT system
+    initialize!(solver.kkt)
 
-    # Automatic scaling (objective)
+    # Initializing jacobian and gradient
+    eval_jac_wrapper!(solver, solver.kkt, solver.x)
     eval_grad_f_wrapper!(solver, solver.f,solver.x)
-    @trace(solver.logger,"Computing objective scaling.")
-    if solver.opt.nlp_scaling
-        solver.obj_scale[] = scale_objective(solver.nlp, full(solver.f); max_gradient=solver.opt.nlp_scaling_max_gradient)
-        _scal!(solver.obj_scale[], full(solver.f))
-    end
+    
 
-    # Initialize dual variables
     @trace(solver.logger,"Initializing constraint duals.")
     if !solver.opt.dual_initialized
         set_initial_rhs!(solver, solver.kkt)
-        initialize!(solver.kkt)
         factorize_wrapper!(solver)
-        is_solved = solve_refine_wrapper!(solver,solver.d,solver.p)
+        is_solved = solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4
+        )
         if !is_solved || (norm(dual(solver.d), Inf) > solver.opt.constr_mult_init_max)
             fill!(solver.y, zero(T))
         else
             copyto!(solver.y, dual(solver.d))
         end
     end
-
+    
     # Initializing
     solver.obj_val = eval_f_wrapper(solver, solver.x)
     eval_cons_wrapper!(solver, solver.c, solver.x)
@@ -78,7 +76,7 @@ function initialize!(solver::AbstractMadNLPSolver{T}) where T
     solver.theta_min = 1e-4*max(1,theta)
     solver.mu = solver.opt.mu_init
     solver.tau = max(solver.opt.tau_min,1-solver.opt.mu_init)
-    solver.filter = [(solver.theta_max,-Inf)]
+    push!(solver.filter, (solver.theta_max,-Inf))
 
     return REGULAR
 end
@@ -98,7 +96,8 @@ function reinitialize!(solver::AbstractMadNLPSolver)
     solver.theta_min=1e-4*max(1,theta)
     solver.mu=solver.opt.mu_init
     solver.tau=max(solver.opt.tau_min,1-solver.opt.mu_init)
-    solver.filter = [(solver.theta_max,-Inf)]
+    empty!(solver.filter)
+    push!(solver.filter, (solver.theta_max,-Inf))
 
     return REGULAR
 end
@@ -111,7 +110,7 @@ function solve!(
     x = nothing, y = nothing,
     zl = nothing, zu = nothing,
     kwargs...
-)
+        )
 
     if x != nothing
         full(solver.x)[1:get_nvar(nlp)] .= x
@@ -133,7 +132,7 @@ function solve!(
 
     try
         if solver.status == INITIAL
-            @notice(solver.logger,"This is $(introduce()), running with $(introduce(solver.linear_solver))\n")
+            @notice(solver.logger,"This is $(introduce()), running with $(introduce(solver.kkt.linear_solver))\n")
             print_init(solver)
             solver.status = initialize!(solver)
         else # resolving the problem
@@ -174,10 +173,10 @@ function solve!(
         end
     finally
         solver.cnt.total_time = time() - solver.cnt.start_time
-        !(solver.status < SOLVE_SUCCEEDED) && (print_summary_1(solver);print_summary_2(solver))
-        # Unscale once the summary has been printed out
-        unscale!(solver)
-        @notice(solver.logger,"EXIT: $(STATUS_OUTPUT_DICT[solver.status])")
+        if !(solver.status < SOLVE_SUCCEEDED)
+            print_summary(solver)
+        end
+        @notice(solver.logger,"EXIT: $(get_status_output(solver.status, solver.opt))")
         solver.opt.disable_garbage_collector &&
             (GC.enable(true); @warn(solver.logger,"Julia garbage collector is turned back on"))
         finalize(solver.logger)
@@ -196,18 +195,8 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
             eval_jac_wrapper!(solver, solver.kkt, solver.x)
         end
         jtprod!(solver.jacl, solver.kkt, solver.y)
-        fixed_variable_treatment_vec!(solver.jacl,solver.ind_fixed)
-        fixed_variable_treatment_z!(
-            full(solver.zl),
-            full(solver.zu),
-            full(solver.f),
-            solver.jacl,
-            solver.ind_fixed,
-        )
-
-        sd = get_sd(solver.y,solver.zl_r,solver.zu_r,solver.opt.s_max)
-        sc = get_sc(solver.zl_r,solver.zu_r,solver.opt.s_max)
-
+        sd = get_sd(solver.y,solver.zl_r,solver.zu_r,T(solver.opt.s_max))
+        sc = get_sc(solver.zl_r,solver.zu_r,T(solver.opt.s_max))
         solver.inf_pr = get_inf_pr(solver.c)
         solver.inf_du = get_inf_du(
             full(solver.f),
@@ -216,9 +205,9 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
             solver.jacl,
             sd,
         )
-        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,0.,sc)
+        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,zero(T),sc)
         inf_compl_mu = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,solver.mu,sc)
-
+        
         print_iter(solver)
 
         # evaluate termination criteria
@@ -252,21 +241,10 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
 
         set_aug_diagonal!(solver.kkt,solver)
         set_aug_rhs!(solver, solver.kkt, solver.c)
-        if solver.opt.inertia_correction_method == INERTIA_FREE
-            set_aug_rhs_ifr!(solver, solver.kkt)
-        end
         dual_inf_perturbation!(primal(solver.p),solver.ind_llb,solver.ind_uub,solver.mu,solver.opt.kappa_d)
 
-        # start inertia conrrection
-        @trace(solver.logger,"Solving primal-dual system.")
-        if solver.opt.inertia_correction_method == INERTIA_FREE
-            inertia_free_reg(solver) || return ROBUST
-        elseif solver.opt.inertia_correction_method == INERTIA_BASED
-            inertia_based_reg(solver) || return ROBUST
-        end
-
-        finish_aug_solve!(solver, solver.kkt, solver.mu)
-
+        inertia_correction!(solver.inertia_corrector, solver) || return ROBUST
+        
         # filter start
         @trace(solver.logger,"Backtracking line search initiated.")
         theta = get_theta(solver.c)
@@ -292,15 +270,17 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
                                   solver.opt.alpha_min_frac,solver.opt.delta,solver.opt.s_theta,solver.opt.s_phi)
         solver.cnt.l = 1
         solver.alpha = alpha_max
-        varphi_trial= 0.
-            theta_trial = 0.
-            small_search_norm = get_rel_search_norm(primal(solver.x), primal(solver.d)) < 10*eps(T)
+        varphi_trial= zero(T)
+        theta_trial = zero(T)
+        small_search_norm = get_rel_search_norm(primal(solver.x), primal(solver.d)) < 10*eps(T)
         switching_condition = is_switching(varphi_d,solver.alpha,solver.opt.s_phi,solver.opt.delta,2.,solver.opt.s_theta)
         armijo_condition = false
+        unsuccessful_iterate = false
+
         while true
+            
             copyto!(full(solver.x_trial), full(solver.x))
             axpy!(solver.alpha, primal(solver.d), primal(solver.x_trial))
-
             solver.obj_val_trial = eval_f_wrapper(solver, solver.x_trial)
             eval_cons_wrapper!(solver, solver.c_trial, solver.x_trial)
 
@@ -314,11 +294,21 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
                 solver.filter,theta,theta_trial,varphi,varphi_trial,switching_condition,armijo_condition,
                 solver.theta_min,solver.opt.obj_max_inc,solver.opt.gamma_theta,solver.opt.gamma_phi,
                 has_constraints(solver))
-            solver.ftype in ["f","h"] && (@trace(solver.logger,"Step accepted with type $(solver.ftype)"); break)
+            
+            if solver.ftype in ["f","h"]
+                @trace(solver.logger,"Step accepted with type $(solver.ftype)")
+                break
+            end
 
-            solver.cnt.l==1 && theta_trial>=theta && second_order_correction(
-                solver,alpha_max,theta,varphi,theta_trial,varphi_d,switching_condition) && break
+            if solver.cnt.l==1 && theta_trial>=theta
+                if second_order_correction(
+                    solver,alpha_max,theta,varphi,theta_trial,varphi_d,switching_condition
+                    )
+                    break
+                end
+            end
 
+            unsuccessful_iterate = true            
             solver.alpha /= 2
             solver.cnt.l += 1
             if solver.alpha < alpha_min
@@ -328,25 +318,54 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
                 return RESTORE
             else
                 @trace(solver.logger,"Step rejected; proceed with the next trial step.")
-                solver.alpha * norm(primal(solver.d)) < eps(T)*10 &&
-                    return solver.cnt.acceptable_cnt >0 ?
-                    SOLVED_TO_ACCEPTABLE_LEVEL : SEARCH_DIRECTION_BECOMES_TOO_SMALL
+                if solver.alpha * norm(primal(solver.d)) < eps(T)*10
+                    if (solver.cnt.restoration_fail_count += 1) >= 4
+                        return solver.cnt.acceptable_cnt >0 ?
+                            SOLVED_TO_ACCEPTABLE_LEVEL : SEARCH_DIRECTION_BECOMES_TOO_SMALL
+                    else
+                        # (experimental) while giving up directly
+                        # we give MadNLP.jl second chance to explore
+                        # some possibility at the current iterate
+
+                        fill!(solver.y, zero(T))
+                        fill!(solver.zl_r, one(T))
+                        fill!(solver.zu_r, one(T))
+                        empty!(solver.filter)
+                        push!(solver.filter,(solver.theta_max,-Inf))
+                        solver.cnt.k+=1
+                        
+                        return REGULAR
+                    end
+                end
+            end
+        end
+
+        # this implements the heuristics in Section 3.2 of Ipopt paper.
+        # Case I is only implemented
+        if unsuccessful_iterate
+            if (solver.cnt.unsuccessful_iterate += 1) >= 4
+                if solver.theta_max/10 > theta_trial
+                    @debug(solver.logger, "restarting filter")
+                    solver.theta_max /= 10
+                    empty!(solver.filter)
+                    push!(solver.filter,(solver.theta_max,-Inf))
+                end
+                solver.cnt.unsuccessful_iterate = 0
             end
+        else
+            solver.cnt.unsuccessful_iterate = 0
         end
 
         @trace(solver.logger,"Updating primal-dual variables.")
         copyto!(full(solver.x), full(solver.x_trial))
         copyto!(solver.c, solver.c_trial)
         solver.obj_val = solver.obj_val_trial
-        adjusted = adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
-        adjusted > 0 &&
-            @warn(solver.logger,"In iteration $(solver.cnt.k), $adjusted Slack too small, adjusting variable bound")
+        adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
 
         axpy!(solver.alpha,dual(solver.d),solver.y)
 
         solver.zl_r .+= solver.alpha_z .* dual_lb(solver.d)
         solver.zu_r .+= solver.alpha_z .* dual_ub(solver.d)
-
         reset_bound_dual!(
             primal(solver.zl),
             primal(solver.x),
@@ -359,6 +378,7 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
             primal(solver.x),
             solver.mu,solver.opt.kappa_sigma,
         )
+        
         eval_grad_f_wrapper!(solver, solver.f,solver.x)
 
         if !switching_condition || !armijo_condition
@@ -367,11 +387,12 @@ function regular!(solver::AbstractMadNLPSolver{T}) where T
         end
 
         solver.cnt.k+=1
-        @trace(solver.logger,"Proceeding to the next interior point iteration.")
+            @trace(solver.logger,"Proceeding to the next interior point iteration.")
     end
 end
 
-function restore!(solver::AbstractMadNLPSolver)
+
+function restore!(solver::AbstractMadNLPSolver{T}) where T
     solver.del_w = 0
     # Backup the previous primal iterate
     copyto!(primal(solver._w1), full(solver.x))
@@ -392,10 +413,8 @@ function restore!(solver::AbstractMadNLPSolver)
         solver.zu_r,
         solver.mu,
     )
-    solver.cnt.t = 0
-    solver.alpha_z = 0.0
+    solver.alpha_z = zero(T)
     solver.ftype = "R"
-
     while true
         alpha_max = get_alpha_max(
             primal(solver.x),
@@ -442,11 +461,8 @@ function restore!(solver::AbstractMadNLPSolver)
             return ROBUST
         end
 
-        adjusted = adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
-        adjusted > 0 &&
-            @warn(solver.logger,"In iteration $(solver.cnt.k), $adjusted Slack too small, adjusting variable bound")
-
-
+        adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
+ 
         F = F_trial
 
         theta = get_theta(solver.c)
@@ -470,7 +486,7 @@ function restore!(solver::AbstractMadNLPSolver)
             sd,
         )
 
-        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,0.,sc)
+        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,zero(T),sc)
         inf_compl_mu = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,solver.mu,sc)
         print_iter(solver)
 
@@ -480,8 +496,9 @@ function restore!(solver::AbstractMadNLPSolver)
 
         dual_inf_perturbation!(primal(solver.p),solver.ind_llb,solver.ind_uub,solver.mu,solver.opt.kappa_d)
         factorize_wrapper!(solver)
-        solve_refine_wrapper!(solver,solver.d,solver.p)
-        finish_aug_solve!(solver, solver.kkt, solver.mu)
+        solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4
+        )
 
         solver.ftype = "f"
     end
@@ -495,14 +512,6 @@ function robust!(solver::MadNLPSolver{T}) where T
             eval_jac_wrapper!(solver, solver.kkt, solver.x)
         end
         jtprod!(solver.jacl, solver.kkt, solver.y)
-        fixed_variable_treatment_vec!(solver.jacl,solver.ind_fixed)
-        fixed_variable_treatment_z!(
-            full(solver.zl),
-            full(solver.zu),
-            full(solver.f),
-            solver.jacl,
-            solver.ind_fixed,
-        )
 
         # evaluate termination criteria
         @trace(solver.logger,"Evaluating restoration phase termination criteria.")
@@ -516,13 +525,13 @@ function robust!(solver::MadNLPSolver{T}) where T
             solver.jacl,
             sd,
         )
-        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,0.,sc)
+        solver.inf_compl = get_inf_compl(solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,zero(T),sc)
 
         # Robust restoration phase error
         RR.inf_pr_R = get_inf_pr_R(solver.c,RR.pp,RR.nn)
         RR.inf_du_R = get_inf_du_R(RR.f_R,solver.y,primal(solver.zl),primal(solver.zu),solver.jacl,RR.zp,RR.zn,solver.opt.rho,sd)
         RR.inf_compl_R = get_inf_compl_R(
-            solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,RR.pp,RR.zp,RR.nn,RR.zn,0.,sc)
+            solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,RR.pp,RR.zp,RR.nn,RR.zn,zero(T),sc)
         inf_compl_mu_R = get_inf_compl_R(
             solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,RR.pp,RR.zp,RR.nn,RR.zn,RR.mu_R,sc)
 
@@ -532,13 +541,12 @@ function robust!(solver::MadNLPSolver{T}) where T
         solver.cnt.k>=solver.opt.max_iter && return MAXIMUM_ITERATIONS_EXCEEDED
         time()-solver.cnt.start_time>=solver.opt.max_wall_time && return MAXIMUM_WALLTIME_EXCEEDED
 
-
         # update the barrier parameter
         @trace(solver.logger,"Updating restoration phase barrier parameter.")
-        while RR.mu_R >= solver.opt.mu_min*100 &&
+        while RR.mu_R >= solver.opt.mu_min &&
             max(RR.inf_pr_R,RR.inf_du_R,inf_compl_mu_R) <= solver.opt.barrier_tol_factor*RR.mu_R
             RR.mu_R = get_mu(RR.mu_R,solver.opt.mu_min,
-                            solver.opt.mu_linear_decrease_factor,solver.opt.mu_superlinear_decrease_power,solver.opt.tol)
+                             solver.opt.mu_linear_decrease_factor,solver.opt.mu_superlinear_decrease_power,solver.opt.tol)
             inf_compl_mu_R = get_inf_compl_R(
                 solver.x_lr,solver.xl_r,solver.zl_r,solver.xu_r,solver.x_ur,solver.zu_r,RR.pp,RR.zp,RR.nn,RR.zn,RR.mu_R,sc)
             RR.tau_R= max(solver.opt.tau_min,1-RR.mu_R)
@@ -553,17 +561,20 @@ function robust!(solver::MadNLPSolver{T}) where T
             eval_lag_hess_wrapper!(solver, solver.kkt, solver.x, solver.y; is_resto=true)
         end
         set_aug_RR!(solver.kkt, solver, RR)
-        set_aug_rhs_RR!(solver, solver.kkt, RR, solver.opt.rho)
-
+        
         # without inertia correction,
         @trace(solver.logger,"Solving restoration phase primal-dual system.")
-        factorize_wrapper!(solver)
-        solve_refine_wrapper!(solver,solver.d,solver.p)
-
-        finish_aug_solve!(solver, solver.kkt, RR.mu_R)
-        finish_aug_solve_RR!(RR.dpp,RR.dnn,RR.dzp,RR.dzn,solver.y,dual(solver.d),RR.pp,RR.nn,RR.zp,RR.zn,RR.mu_R,solver.opt.rho)
-
+        set_aug_rhs_RR!(solver, solver.kkt, RR, solver.opt.rho)
+        
+        inertia_correction!(solver.inertia_corrector, solver) || return RESTORATION_FAILED
 
+        
+        finish_aug_solve_RR!(
+            RR.dpp,RR.dnn,RR.dzp,RR.dzn,solver.y,dual(solver.d),
+            RR.pp,RR.nn,RR.zp,RR.zn,RR.mu_R,solver.opt.rho
+        )
+        
+        
         theta_R = get_theta_R(solver.c,RR.pp,RR.nn)
         varphi_R = get_varphi_R(RR.obj_val_R,solver.x_lr,solver.xl_r,solver.xu_r,solver.x_ur,RR.pp,RR.nn,RR.mu_R)
         varphi_d_R = get_varphi_d_R(
@@ -591,8 +602,8 @@ function robust!(solver::MadNLPSolver{T}) where T
         @trace(solver.logger,"Backtracking line search initiated.")
         solver.alpha = alpha_max
         solver.cnt.l = 1
-        theta_R_trial = 0.
-        varphi_R_trial = 0.
+        theta_R_trial = zero(T)
+        varphi_R_trial = zero(T)
         small_search_norm = get_rel_search_norm(primal(solver.x), primal(solver.d)) < 10*eps(T)
         switching_condition = is_switching(varphi_d_R,solver.alpha,solver.opt.s_phi,solver.opt.delta,theta_R,solver.opt.s_theta)
         armijo_condition = false
@@ -612,7 +623,7 @@ function robust!(solver::MadNLPSolver{T}) where T
             varphi_R_trial = get_varphi_R(
                 RR.obj_val_R_trial,solver.x_trial_lr,solver.xl_r,solver.xu_r,solver.x_trial_ur,RR.pp_trial,RR.nn_trial,RR.mu_R)
 
-            armijo_condition = is_armijo(varphi_R_trial,varphi_R,0.,solver.alpha,varphi_d_R) #####
+            armijo_condition = is_armijo(varphi_R_trial,varphi_R,solver.opt.eta_phi,solver.alpha,varphi_d_R) 
 
             small_search_norm && break
             solver.ftype = get_ftype(
@@ -626,7 +637,23 @@ function robust!(solver::MadNLPSolver{T}) where T
             solver.cnt.l += 1
             if solver.alpha < alpha_min
                 @debug(solver.logger,"Restoration phase cannot find an acceptable step at iteration $(solver.cnt.k).")
-                return RESTORATION_FAILED
+                if (solver.cnt.restoration_fail_count += 1) >= 4
+                    return RESTORATION_FAILED
+                else
+                    # (experimental) while giving up directly
+                    # we give MadNLP.jl second chance to explore
+                    # some possibility at the current iterate
+                    
+                    fill!(solver.y, zero(T))
+                    fill!(solver.zl_r, one(T))
+                    fill!(solver.zu_r, one(T))
+                    empty!(solver.filter)
+                    push!(solver.filter,(solver.theta_max,-Inf))
+
+                    solver.cnt.k+=1
+                    solver.cnt.t+=1
+                    return REGULAR
+                end
             else
                 @trace(solver.logger,"Step rejected; proceed with the next trial step.")
                 solver.alpha < eps(T)*10 && return solver.cnt.acceptable_cnt >0 ?
@@ -665,9 +692,7 @@ function robust!(solver::MadNLPSolver{T}) where T
         reset_bound_dual!(RR.zp,RR.pp,RR.mu_R,solver.opt.kappa_sigma)
         reset_bound_dual!(RR.zn,RR.nn,RR.mu_R,solver.opt.kappa_sigma)
 
-        adjusted = adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
-        adjusted > 0 &&
-            @warn(solver.logger,"In iteration $(solver.cnt.k), $adjusted Slack too small, adjusting variable bound")
+        adjust_boundary!(solver.x_lr,solver.xl_r,solver.x_ur,solver.xu_r,solver.mu)
 
         if !switching_condition || !armijo_condition
             @trace(solver.logger,"Augmenting restoration phase filter.")
@@ -685,20 +710,21 @@ function robust!(solver::MadNLPSolver{T}) where T
             theta <= solver.opt.required_infeasibility_reduction * RR.theta_ref
 
             @trace(solver.logger,"Going back to the regular phase.")
-            solver.zl_r.=1
-            solver.zu_r.=1
-
             set_initial_rhs!(solver, solver.kkt)
             initialize!(solver.kkt)
 
             factorize_wrapper!(solver)
-            solve_refine_wrapper!(solver,solver.d,solver.p)
+            solve_refine_wrapper!(
+                solver.d, solver, solver.p, solver._w4
+            )
             if norm(dual(solver.d), Inf)>solver.opt.constr_mult_init_max
-                fill!(solver.y, 0.0)
+                fill!(solver.y, zero(T))
             else
                 copyto!(solver.y, dual(solver.d))
             end
+            
             solver.cnt.k+=1
+            solver.cnt.t+=1
 
             return REGULAR
         end
@@ -712,91 +738,179 @@ function robust!(solver::MadNLPSolver{T}) where T
     end
 end
 
-function inertia_based_reg(solver::MadNLPSolver)
+function second_order_correction(solver::AbstractMadNLPSolver,alpha_max,theta,varphi,
+                                 theta_trial,varphi_d,switching_condition::Bool)
+    @trace(solver.logger,"Second-order correction started.")
+
+    wx = primal(solver._w1)
+    wy = dual(solver._w1)
+    copyto!(wy, solver.c_trial)
+    axpy!(alpha_max, solver.c, wy)
+
+    theta_soc_old = theta_trial
+    for p=1:solver.opt.max_soc
+        # compute second order correction
+        set_aug_rhs!(solver, solver.kkt, wy)
+        dual_inf_perturbation!(
+            primal(solver.p),
+            solver.ind_llb,solver.ind_uub,solver.mu,solver.opt.kappa_d,
+        )
+        solve_refine_wrapper!(
+            solver._w1, solver, solver.p, solver._w4
+        )
+        alpha_soc = get_alpha_max(
+            primal(solver.x),
+            primal(solver.xl),
+            primal(solver.xu),
+            wx,solver.tau
+        )
+
+        copyto!(primal(solver.x_trial), primal(solver.x))
+        axpy!(alpha_soc, wx, primal(solver.x_trial))
+        eval_cons_wrapper!(solver, solver.c_trial, solver.x_trial)
+        solver.obj_val_trial = eval_f_wrapper(solver, solver.x_trial)
+
+        theta_soc = get_theta(solver.c_trial)
+        varphi_soc= get_varphi(solver.obj_val_trial,solver.x_trial_lr,solver.xl_r,solver.xu_r,solver.x_trial_ur,solver.mu)
+
+        !is_filter_acceptable(solver.filter,theta_soc,varphi_soc) && break
+
+        if theta <=solver.theta_min && switching_condition
+            # Case I
+            if is_armijo(varphi_soc,varphi,solver.opt.eta_phi,solver.alpha,varphi_d)
+                @trace(solver.logger,"Step in second order correction accepted by armijo condition.")
+                solver.ftype = "F"
+                solver.alpha=alpha_soc
+                return true
+            end
+        else
+            # Case II
+            if is_sufficient_progress(theta_soc,theta,solver.opt.gamma_theta,varphi_soc,varphi,solver.opt.gamma_phi,has_constraints(solver))
+                @trace(solver.logger,"Step in second order correction accepted by sufficient progress.")
+                solver.ftype = "H"
+                solver.alpha=alpha_soc
+                return true
+            end
+        end
+
+        theta_soc>solver.opt.kappa_soc*theta_soc_old && break
+        theta_soc_old = theta_soc
+    end
+    @trace(solver.logger,"Second-order correction terminated.")
+
+    return false
+end
+
+
+function inertia_correction!(
+    inertia_corrector::InertiaBased,
+    solver::MadNLPSolver{T}
+    ) where {T}
+    
+    n_trial = 0
+    solver.del_w = del_w_prev = zero(T)
+
     @trace(solver.logger,"Inertia-based regularization started.")
 
     factorize_wrapper!(solver)
-    num_pos,num_zero,num_neg = inertia(solver.linear_solver)
-    solve_status = num_zero!= 0 ? false : solve_refine_wrapper!(solver,solver.d,solver.p)
 
-    n_trial = 0
-    solver.del_w = del_w_prev = 0.0
-    while !is_inertia_correct(solver.kkt, num_pos, num_zero, num_neg) || !solve_status
+    num_pos,num_zero,num_neg = inertia(solver.kkt.linear_solver)
+    
+    
+    solve_status = !is_inertia_correct(solver.kkt, num_pos, num_zero, num_neg) ?
+        false : solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4,
+        )
+    
+    
+    while !solve_status
         @debug(solver.logger,"Primal-dual perturbed.")
-        if solver.del_w == 0.0
-            solver.del_w = solver.del_w_last==0. ? solver.opt.first_hessian_perturbation :
+
+        if n_trial == 0
+            solver.del_w = solver.del_w_last==zero(T) ? solver.opt.first_hessian_perturbation :
                 max(solver.opt.min_hessian_perturbation,solver.opt.perturb_dec_fact*solver.del_w_last)
         else
-            solver.del_w*= solver.del_w_last==0. ? solver.opt.perturb_inc_fact_first : solver.opt.perturb_inc_fact
-            if solver.del_w>solver.opt.max_hessian_perturbation solver.cnt.k+=1
+            solver.del_w*= solver.del_w_last==zero(T) ? solver.opt.perturb_inc_fact_first : solver.opt.perturb_inc_fact
+            if solver.del_w>solver.opt.max_hessian_perturbation
+                solver.cnt.k+=1
                 @debug(solver.logger,"Primal regularization is too big. Switching to restoration phase.")
                 return false
             end
         end
-        solver.del_c = (num_zero == 0 || !solve_status) ?
-            solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent) : 0.
+        solver.del_c = num_neg == 0 ? zero(T) : solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent) 
         regularize_diagonal!(solver.kkt, solver.del_w - del_w_prev, solver.del_c)
         del_w_prev = solver.del_w
 
         factorize_wrapper!(solver)
-        num_pos,num_zero,num_neg = inertia(solver.linear_solver)
-        solve_status = num_zero!= 0 ? false : solve_refine_wrapper!(solver,solver.d,solver.p)
+        num_pos,num_zero,num_neg = inertia(solver.kkt.linear_solver)
+
+        solve_status = !is_inertia_correct(solver.kkt, num_pos, num_zero, num_neg) ?
+            false : solve_refine_wrapper!(
+                solver.d, solver, solver.p, solver._w4
+            )
         n_trial += 1
     end
+    
     solver.del_w != 0 && (solver.del_w_last = solver.del_w)
-
     return true
 end
 
-function inertia_free_reg(solver::MadNLPSolver)
+function inertia_correction!(
+    inertia_corrector::InertiaFree,
+    solver::MadNLPSolver{T}
+    ) where T 
+
+    n_trial = 0
+    solver.del_w = del_w_prev = zero(T)
 
     @trace(solver.logger,"Inertia-free regularization started.")
     dx = primal(solver.d)
-    p0 = solver._w1
-    d0 = solver._w2
-    t = primal(solver._w3)
-    n = primal(solver._w2)
-    wx= primal(solver._w4)
-    g = full(solver.x_trial) # just to avoid new allocation
-
-    fill!(dual(solver._w3), 0)
-    set_g_ifr!(solver,g)
+    p0 = inertia_corrector.p0
+    d0 = inertia_corrector.d0
+    t = inertia_corrector.t
+    n = primal(d0)
+    wx= inertia_corrector.wx
+    g = inertia_corrector.g
 
-    fixed_variable_treatment_vec!(primal(solver._w1), solver.ind_fixed)
-    fixed_variable_treatment_vec!(primal(solver.p),   solver.ind_fixed)
-    fixed_variable_treatment_vec!(g, solver.ind_fixed)
+    set_g_ifr!(solver,g)
+    set_aug_rhs_ifr!(solver, solver.kkt, p0)
 
     factorize_wrapper!(solver)
-    solve_status = (solve_refine_wrapper!(solver,d0,p0) && solve_refine_wrapper!(solver,solver.d,solver.p))
+
+    solve_status = solve_refine_wrapper!(
+        d0, solver, p0, solver._w3,
+    ) && solve_refine_wrapper!(
+        solver.d, solver, solver.p, solver._w4,
+    )
     copyto!(t,dx)
     axpy!(-1.,n,t)
-    mul!(solver._w4, solver.kkt, solver._w3) # prepartation for curv_test
-    n_trial = 0
-    solver.del_w = del_w_prev = 0.
 
-    while !curv_test(t,n,g,wx,solver.opt.inertia_free_tol)  || !solve_status
+    while !curv_test(t,n,g,solver.kkt,wx,solver.opt.inertia_free_tol)  || !solve_status
         @debug(solver.logger,"Primal-dual perturbed.")
         if n_trial == 0
             solver.del_w = solver.del_w_last==.0 ? solver.opt.first_hessian_perturbation :
                 max(solver.opt.min_hessian_perturbation,solver.opt.perturb_dec_fact*solver.del_w_last)
         else
             solver.del_w*= solver.del_w_last==.0 ? solver.opt.perturb_inc_fact_first : solver.opt.perturb_inc_fact
-            if solver.del_w>solver.opt.max_hessian_perturbation solver.cnt.k+=1
+            if solver.del_w>solver.opt.max_hessian_perturbation
+                solver.cnt.k+=1
                 @debug(solver.logger,"Primal regularization is too big. Switching to restoration phase.")
                 return false
             end
         end
-        solver.del_c = !solve_status ?
-            solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent) : 0.
+        solver.del_c = solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent)
         regularize_diagonal!(solver.kkt, solver.del_w - del_w_prev, solver.del_c)
         del_w_prev = solver.del_w
 
         factorize_wrapper!(solver)
-        solve_status = (solve_refine_wrapper!(solver,d0,p0) && solve_refine_wrapper!(solver,solver.d,solver.p))
+        solve_status = solve_refine_wrapper!(
+            d0, solver, p0, solver._w3
+        ) && solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4
+        )
         copyto!(t,dx)
         axpy!(-1.,n,t)
 
-        mul!(solver._w4, solver.kkt, solver._w3) # prepartation for curv_test
         n_trial += 1
     end
 
@@ -804,66 +918,49 @@ function inertia_free_reg(solver::MadNLPSolver)
     return true
 end
 
-curv_test(t,n,g,wx,inertia_free_tol) = dot(wx,t) + max(dot(wx,n)-dot(g,n),0) - inertia_free_tol*dot(t,t) >=0
-
-function second_order_correction(solver::AbstractMadNLPSolver,alpha_max,theta,varphi,
-                                 theta_trial,varphi_d,switching_condition::Bool)
-    @trace(solver.logger,"Second-order correction started.")
-
-    wx = primal(solver._w1)
-    wy = dual(solver._w1)
-    copyto!(wy, solver.c_trial)
-    axpy!(alpha_max, solver.c, wy)
-
-    theta_soc_old = theta_trial
-    for p=1:solver.opt.max_soc
-        # compute second order correction
-        set_aug_rhs!(solver, solver.kkt, wy)
-        dual_inf_perturbation!(
-            primal(solver.p),
-            solver.ind_llb,solver.ind_uub,solver.mu,solver.opt.kappa_d,
-        )
-        solve_refine_wrapper!(solver,solver._w1,solver.p)
-        alpha_soc = get_alpha_max(
-            primal(solver.x),
-            primal(solver.xl),
-            primal(solver.xu),
-            wx,solver.tau)
-
-        copyto!(primal(solver.x_trial), primal(solver.x))
-        axpy!(alpha_soc, wx, primal(solver.x_trial))
-        eval_cons_wrapper!(solver, solver.c_trial, solver.x_trial)
-        solver.obj_val_trial = eval_f_wrapper(solver, solver.x_trial)
+function inertia_correction!(
+    inertia_corrector::InertiaIgnore,
+    solver::MadNLPSolver{T}
+    ) where T
+    
+    n_trial = 0
+    solver.del_w = del_w_prev = zero(T)
 
-        theta_soc = get_theta(solver.c_trial)
-        varphi_soc= get_varphi(solver.obj_val_trial,solver.x_trial_lr,solver.xl_r,solver.xu_r,solver.x_trial_ur,solver.mu)
+    @trace(solver.logger,"Inertia-based regularization started.")
 
-        !is_filter_acceptable(solver.filter,theta_soc,varphi_soc) && break
+    factorize_wrapper!(solver)
 
-        if theta <=solver.theta_min && switching_condition
-            # Case I
-            if is_armijo(varphi_soc,varphi,solver.opt.eta_phi,solver.alpha,varphi_d)
-                @trace(solver.logger,"Step in second order correction accepted by armijo condition.")
-                solver.ftype = "F"
-                solver.alpha=alpha_soc
-                return true
-            end
+    solve_status = solve_refine_wrapper!(
+        solver.d, solver, solver.p, solver._w4,
+    )
+    while !solve_status
+        @debug(solver.logger,"Primal-dual perturbed.")
+        if n_trial == 0
+            solver.del_w = solver.del_w_last==zero(T) ? solver.opt.first_hessian_perturbation :
+                max(solver.opt.min_hessian_perturbation,solver.opt.perturb_dec_fact*solver.del_w_last)
         else
-            # Case II
-            if is_sufficient_progress(theta_soc,theta,solver.opt.gamma_theta,varphi_soc,varphi,solver.opt.gamma_phi,has_constraints(solver))
-                @trace(solver.logger,"Step in second order correction accepted by sufficient progress.")
-                solver.ftype = "H"
-                solver.alpha=alpha_soc
-                return true
+            solver.del_w*= solver.del_w_last==zero(T) ? solver.opt.perturb_inc_fact_first : solver.opt.perturb_inc_fact
+            if solver.del_w>solver.opt.max_hessian_perturbation
+                solver.cnt.k+=1
+                @debug(solver.logger,"Primal regularization is too big. Switching to restoration phase.")
+                return false
             end
         end
+        solver.del_c = solver.opt.jacobian_regularization_value * solver.mu^(solver.opt.jacobian_regularization_exponent) 
+        regularize_diagonal!(solver.kkt, solver.del_w - del_w_prev, solver.del_c)
+        del_w_prev = solver.del_w
 
-        theta_soc>solver.opt.kappa_soc*theta_soc_old && break
-        theta_soc_old = theta_soc
+        factorize_wrapper!(solver)
+        solve_status = solve_refine_wrapper!(
+            solver.d, solver, solver.p, solver._w4
+        )
+        n_trial += 1
     end
-    @trace(solver.logger,"Second-order correction terminated.")
-
-    return false
+    solver.del_w != 0 && (solver.del_w_last = solver.del_w)
+    return true
 end
 
-
+function curv_test(t,n,g,kkt,wx,inertia_free_tol)
+    mul_hess_blk!(wx, kkt, t)
+    dot(wx,t) + max(dot(wx,n)-dot(g,n),0) - inertia_free_tol*dot(t,t) >=0
+end
diff --git a/src/IPM/utils.jl b/src/IPM/utils.jl
index 43e04569..41c8145e 100644
--- a/src/IPM/utils.jl
+++ b/src/IPM/utils.jl
@@ -1,42 +1,59 @@
-mutable struct MadNLPExecutionStats{T} <: AbstractExecutionStats
+mutable struct MadNLPExecutionStats{T, VT} <: AbstractExecutionStats
+    options::MadNLPOptions
     status::Status
-    solution::Vector{T}
+    solution::VT
     objective::T
-    constraints::Vector{T}
+    constraints::VT
     dual_feas::T
     primal_feas::T
-    multipliers::Vector{T}
-    multipliers_L::Vector{T}
-    multipliers_U::Vector{T}
+    multipliers::VT
+    multipliers_L::VT
+    multipliers_U::VT
     iter::Int
-    counters::NLPModels.Counters
-    elapsed_time::Real
+    counters::MadNLPCounters
 end
 
 MadNLPExecutionStats(solver::MadNLPSolver) =MadNLPExecutionStats(
+    solver.opt,
     solver.status,
-    primal(solver.x),
-    solver.obj_val,solver.c,
-    solver.inf_du, solver.inf_pr,
-    solver.y,
-    primal(solver.zl),
-    primal(solver.zu),
-    solver.cnt.k, get_counters(solver.nlp),solver.cnt.total_time
+    primal(solver.x)[1:get_nvar(solver.nlp)],
+    solver.obj_val / solver.cb.obj_scale[],
+    solver.c ./ solver.cb.con_scale,
+    solver.inf_du,
+    solver.inf_pr,
+    copy(solver.y),
+    primal(solver.zl)[1:get_nvar(solver.nlp)],
+    primal(solver.zu)[1:get_nvar(solver.nlp)],
+    0,
+    solver.cnt,
 )
 
 function update!(stats::MadNLPExecutionStats, solver::MadNLPSolver)
     stats.status = solver.status
-    stats.objective = solver.obj_val
+    stats.solution .= @view(primal(solver.x)[1:get_nvar(solver.nlp)])
+    stats.multipliers .= solver.y
+    stats.multipliers_L .= @view(primal(solver.zl)[1:get_nvar(solver.nlp)])
+    stats.multipliers_U .= @view(primal(solver.zu)[1:get_nvar(solver.nlp)])
+    # stats.solution .= min.(
+    #     max.(
+    #         @view(primal(solver.x)[1:get_nvar(solver.nlp)]),
+    #         get_lvar(solver.nlp)
+    #     ),
+    #     get_uvar(solver.nlp)
+    # )
+    stats.objective = solver.obj_val / solver.cb.obj_scale[]
+    stats.constraints .= solver.c ./ solver.cb.con_scale .+ solver.rhs
+    stats.constraints[solver.ind_ineq] .+= slack(solver.x)
     stats.dual_feas = solver.inf_du
     stats.primal_feas = solver.inf_pr
+    update_z!(solver.cb, stats.multipliers_L, stats.multipliers_U, solver.jacl)
     stats.iter = solver.cnt.k
-    stats.elapsed_time = solver.cnt.total_time
     return stats
 end
 
 get_counters(nlp::NLPModels.AbstractNLPModel) = nlp.counters
 get_counters(nlp::NLPModels.AbstractNLSModel) = nlp.counters.counters
-getStatus(result::MadNLPExecutionStats) = STATUS_OUTPUT_DICT[result.status]
+getStatus(result::MadNLPExecutionStats) = get_status_output(result.status, result.options)
 
 # Exceptions
 struct InvalidNumberException <: Exception
@@ -48,18 +65,16 @@ struct NotEnoughDegreesOfFreedomException <: Exception end
 has_constraints(solver) = solver.m != 0
 
 function get_vars_info(solver)
-    x_lb = get_lvar(solver.nlp)
-    x_ub = get_uvar(solver.nlp)
+    nlp = solver.nlp
+
+    x_lb = get_lvar(nlp)
+    x_ub = get_uvar(nlp)
     num_fixed = length(solver.ind_fixed)
-    num_var = get_nvar(solver.nlp) - num_fixed
+    num_var = get_nvar(nlp) - num_fixed
     num_llb_vars = length(solver.ind_llb)
-    num_lu_vars = -num_fixed
-    # Number of bounded variables
-    for i in 1:get_nvar(solver.nlp)
-        if (x_lb[i] > -Inf) && (x_ub[i] < Inf)
-            num_lu_vars += 1
-        end
-    end
+    
+    # TODO make this non-allocating
+    num_lu_vars = sum((x_lb .!=-Inf) .& (x_ub .!= Inf)) - num_fixed
     num_uub_vars = length(solver.ind_uub)
     return (
         n_free=num_var,
@@ -71,26 +86,18 @@ function get_vars_info(solver)
 end
 
 function get_cons_info(solver)
-    g_lb = get_lcon(solver.nlp)
-    g_ub = get_ucon(solver.nlp)
-    # Classify constraints
-    num_eq_cons, num_ineq_cons = 0, 0
-    num_ue_cons, num_le_cons, num_lu_cons = 0, 0, 0
-    for i in 1:get_ncon(solver.nlp)
-        l, u = g_lb[i], g_ub[i]
-        if l == u
-            num_eq_cons += 1
-        elseif l < u
-            num_ineq_cons += 1
-            if isinf(l) && isfinite(u)
-                num_ue_cons += 1
-            elseif isfinite(l) && isinf(u)
-                num_le_cons +=1
-            else isfinite(l) && isfinite(u)
-                num_lu_cons += 1
-            end
-        end
-    end
+    nlp = solver.nlp
+    
+    g_lb = get_lcon(nlp)
+    g_ub = get_ucon(nlp)
+
+    # TODO make this non-allocating
+    num_eq_cons = sum(g_lb .== g_ub)
+    num_ineq_cons = length(g_lb) - num_eq_cons
+    num_le_cons = sum((g_lb .!= -Inf) .& (g_ub .==  Inf))
+    num_ue_cons = sum((g_ub .!=  Inf) .& (g_lb .== -Inf))
+    num_lu_cons = num_ineq_cons - num_le_cons - num_ue_cons
+                           
     return (
         n_eq=num_eq_cons,
         n_ineq=num_ineq_cons,
@@ -124,6 +131,7 @@ function print_init(solver::AbstractMadNLPSolver)
 end
 
 function print_iter(solver::AbstractMadNLPSolver;is_resto=false)
+    obj_scale = solver.cb.obj_scale[]
     mod(solver.cnt.k,10)==0&& @info(solver.logger,@sprintf(
         "iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls"))
     if is_resto
@@ -138,7 +146,7 @@ function print_iter(solver::AbstractMadNLPSolver;is_resto=false)
     end
     @info(solver.logger,@sprintf(
         "%4i%s% 10.7e %6.2e %6.2e %5.1f %6.2e %s %6.2e %6.2e%s  %i",
-        solver.cnt.k,is_resto ? "r" : " ",solver.obj_val/solver.obj_scale[],
+        solver.cnt.k,is_resto ? "r" : " ",solver.obj_val/obj_scale,
         inf_pr, inf_du, mu,
         solver.cnt.k == 0 ? 0. : norm(primal(solver.d),Inf),
         solver.del_w == 0 ? "   - " : @sprintf("%5.1f",log(10,solver.del_w)),
@@ -146,23 +154,23 @@ function print_iter(solver::AbstractMadNLPSolver;is_resto=false)
     return
 end
 
-function print_summary_1(solver::AbstractMadNLPSolver)
+function print_summary(solver::AbstractMadNLPSolver)
+    # TODO inquire this from nlpmodel wrapper
+    obj_scale = solver.cb.obj_scale[]
+    solver.cnt.solver_time = solver.cnt.total_time-solver.cnt.linear_solver_time-solver.cnt.eval_function_time
+    
     @notice(solver.logger,"")
     @notice(solver.logger,"Number of Iterations....: $(solver.cnt.k)\n")
     @notice(solver.logger,"                                   (scaled)                 (unscaled)")
-    @notice(solver.logger,@sprintf("Objective...............:  % 1.16e   % 1.16e",solver.obj_val,solver.obj_val/solver.obj_scale[]))
-    @notice(solver.logger,@sprintf("Dual infeasibility......:   %1.16e    %1.16e",solver.inf_du,solver.inf_du/solver.obj_scale[]))
+    @notice(solver.logger,@sprintf("Objective...............:  % 1.16e   % 1.16e",solver.obj_val,solver.obj_val/obj_scale))
+    @notice(solver.logger,@sprintf("Dual infeasibility......:   %1.16e    %1.16e",solver.inf_du,solver.inf_du/obj_scale))
     @notice(solver.logger,@sprintf("Constraint violation....:   %1.16e    %1.16e",norm(solver.c,Inf),solver.inf_pr))
     @notice(solver.logger,@sprintf("Complementarity.........:   %1.16e    %1.16e",
-                                solver.inf_compl*solver.obj_scale[],solver.inf_compl))
+                                solver.inf_compl*obj_scale,solver.inf_compl))
     @notice(solver.logger,@sprintf("Overall NLP error.......:   %1.16e    %1.16e\n",
-                                max(solver.inf_du*solver.obj_scale[],norm(solver.c,Inf),solver.inf_compl),
+                                max(solver.inf_du*obj_scale,norm(solver.c,Inf),solver.inf_compl),
                                 max(solver.inf_du,solver.inf_pr,solver.inf_compl)))
-    return
-end
-
-function print_summary_2(solver::AbstractMadNLPSolver)
-    solver.cnt.solver_time = solver.cnt.total_time-solver.cnt.linear_solver_time-solver.cnt.eval_function_time
+    
     @notice(solver.logger,"Number of objective function evaluations             = $(solver.cnt.obj_cnt)")
     @notice(solver.logger,"Number of objective gradient evaluations             = $(solver.cnt.obj_grad_cnt)")
     @notice(solver.logger,"Number of constraint evaluations                     = $(solver.cnt.con_cnt)")
diff --git a/src/Interfaces/MOI_interface.jl b/src/Interfaces/MOI_interface.jl
index 66b022b2..784b4506 100644
--- a/src/Interfaces/MOI_interface.jl
+++ b/src/Interfaces/MOI_interface.jl
@@ -584,21 +584,21 @@ struct MOIModel{T} <: AbstractNLPModel{T,Vector{T}}
     counters::NLPModels.Counters
 end
 
-obj(nlp::MOIModel,x::Vector{Float64}) = MOI.eval_objective(nlp.model,x)
+obj(nlp::MOIModel,x::AbstractVector{Float64}) = MOI.eval_objective(nlp.model,x)
 
-function grad!(nlp::MOIModel,x::Vector{Float64},f::Vector{Float64})
+function grad!(nlp::MOIModel,x::AbstractVector{Float64},f::AbstractVector{Float64})
     MOI.eval_objective_gradient(nlp.model,f,x)
 end
 
-function cons!(nlp::MOIModel,x::Vector{Float64},c::Vector{Float64})
+function cons!(nlp::MOIModel,x::AbstractVector{Float64},c::AbstractVector{Float64})
     MOI.eval_constraint(nlp.model,c,x)
 end
 
-function jac_coord!(nlp::MOIModel,x::Vector{Float64},jac::Vector{Float64})
+function jac_coord!(nlp::MOIModel,x::AbstractVector{Float64},jac::AbstractVector{Float64})
     MOI.eval_constraint_jacobian(nlp.model,jac,x)
 end
 
-function hess_coord!(nlp::MOIModel,x::Vector{Float64},l::Vector{Float64},hess::Vector{Float64}; obj_weight::Float64=1.)
+function hess_coord!(nlp::MOIModel,x::AbstractVector{Float64},l::AbstractVector{Float64},hess::AbstractVector{Float64}; obj_weight::Float64=1.)
     MOI.eval_hessian_lagrangian(nlp.model,hess,x,obj_weight,l)
 end
 
@@ -738,11 +738,7 @@ function MOI.get(model::Optimizer, ::MOI.RawStatusString)
     elseif model.solver === nothing
         return "Optimize not called"
     end
-    return get(
-        STATUS_OUTPUT_DICT,
-        model.result.status,
-        "Unknown result status: $(model.result.status)",
-    )
+    return get_status_output(model.result.status, model.result.options)
 end
 
 ### MOI.TerminationStatus
diff --git a/src/KKT/KKTsystem.jl b/src/KKT/KKTsystem.jl
index 811160b9..5e3b5d7f 100644
--- a/src/KKT/KKTsystem.jl
+++ b/src/KKT/KKTsystem.jl
@@ -189,12 +189,14 @@ function hess_dense! end
     Generic functions
 =#
 function initialize!(kkt::AbstractKKTSystem)
+    fill!(kkt.reg, 1.0)
     fill!(kkt.pr_diag, 1.0)
     fill!(kkt.du_diag, 0.0)
     fill!(kkt.hess, 0.0)
 end
 
 function regularize_diagonal!(kkt::AbstractKKTSystem, primal, dual)
+    kkt.reg .+= primal
     kkt.pr_diag .+= primal
     kkt.du_diag .= .-dual
 end
@@ -209,42 +211,11 @@ get_hessian(kkt::AbstractKKTSystem) = kkt.hess
 get_raw_jacobian(kkt::AbstractKKTSystem) = kkt.jac_raw
 
 
-# Fix variable treatment
-function treat_fixed_variable!(kkt::AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:SparseMatrixCSC{T, Int32}}
-    length(kkt.ind_fixed) == 0 && return
-    aug = kkt.aug_com
-
-    fixed_aug_diag = view(aug.nzval, aug.colptr[kkt.ind_fixed])
-    fixed_aug_diag .= 1.0
-    fixed_aug = view(aug.nzval, kkt.ind_aug_fixed)
-    fixed_aug .= 0.0
-    return
-end
-function treat_fixed_variable!(kkt::AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:Matrix{T}}
-    length(kkt.ind_fixed) == 0 && return
-    aug = kkt.aug_com
-    @inbounds for i in kkt.ind_fixed
-        aug[i, :] .= 0.0
-        aug[:, i] .= 0.0
-        aug[i, i]  = 1.0
-    end
-end
-
 function is_inertia_correct(kkt::AbstractKKTSystem, num_pos, num_zero, num_neg)
     return (num_zero == 0) && (num_pos == num_variables(kkt))
 end
 
-function build_kkt!(kkt::AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:Matrix{T}}
-    copyto!(kkt.aug_com, kkt.aug_raw)
-    treat_fixed_variable!(kkt)
-end
-
-function build_kkt!(kkt::AbstractKKTSystem{T, VT, MT}) where {T, VT, MT<:SparseMatrixCSC{T, Int32}}
-    transfer!(kkt.aug_com, kkt.aug_raw, kkt.aug_csc_map)
-    treat_fixed_variable!(kkt)
-end
-
-compress_hessian!(kkt::AbstractKKTSystem) = nothing
+compress_hessian!(kkt::AbstractKKTSystem) = nothing 
 
 
 include("rhs.jl")
diff --git a/src/KKT/dense.jl b/src/KKT/dense.jl
index f7bba195..73ea04a4 100644
--- a/src/KKT/dense.jl
+++ b/src/KKT/dense.jl
@@ -1,40 +1,71 @@
 
 """
-    DenseKKTSystem{T, VT, MT} <: AbstractReducedKKTSystem{T, VT, MT}
+    DenseKKTSystem{T, VT, MT, QN, VI} <: AbstractReducedKKTSystem{T, VT, MT, QN}
 
 Implement [`AbstractReducedKKTSystem`](@ref) with dense matrices.
 
 Requires a dense linear solver to be factorized (otherwise an error is returned).
 
 """
-struct DenseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN}
+struct DenseKKTSystem{
+    T,
+    VT <: AbstractVector{T},
+    MT <: AbstractMatrix{T},
+    QN,
+    LS,
+    VI <: AbstractVector{Int},
+    } <: AbstractReducedKKTSystem{T, VT, MT, QN}
+    
     hess::MT
     jac::MT
     quasi_newton::QN
+    reg::VT
     pr_diag::VT
     du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
     diag_hess::VT
     # KKT system
     aug_com::MT
     # Info
-    n_ineq::Int
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
-    constraint_scaling::VT
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
+    # Linear Solver
+    linear_solver::LS
     # Buffers
     etc::Dict{Symbol, Any}
 end
 
-function DenseKKTSystem{T, VT, MT, QN}(n, m, ind_ineq, ind_fixed) where {T, VT, MT, QN}
+function create_kkt_system(
+    ::Type{DenseKKTSystem},
+    cb::AbstractCallback{T,VT}, opt, 
+    opt_linear_solver, cnt, ind_cons) where {T,VT}
+    
+    ind_ineq = ind_cons.ind_ineq
+    ind_lb = ind_cons.ind_lb
+    ind_ub = ind_cons.ind_ub
+    
+    n = cb.nvar
+    m = cb.ncon
     ns = length(ind_ineq)
-    hess = MT(undef, n, n)
-    jac = MT(undef, m, n)
-    aug_com = MT(undef, n+ns+m, n+ns+m)
-    pr_diag = VT(undef, n+ns)
-    du_diag = VT(undef, m)
-    diag_hess = VT(undef, n)
-
-    constraint_scaling = VT(undef, m)
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
+    
+    hess = create_array(cb, n, n)
+    jac = create_array(cb, m, n)
+    aug_com = create_array(cb, n+ns+m, n+ns+m)
+    reg = create_array(cb, n+ns)
+    pr_diag = create_array(cb, n+ns)
+    du_diag = create_array(cb, m)
+    diag_hess = create_array(cb, n)
+    
+    l_diag = fill!(VT(undef, nlb), one(T))
+    u_diag = fill!(VT(undef, nub), one(T))
+    l_lower = fill!(VT(undef, nlb), zero(T))
+    u_lower = fill!(VT(undef, nub), zero(T))
 
     # Init!
     fill!(aug_com, zero(T))
@@ -43,19 +74,17 @@ function DenseKKTSystem{T, VT, MT, QN}(n, m, ind_ineq, ind_fixed) where {T, VT,
     fill!(pr_diag, zero(T))
     fill!(du_diag, zero(T))
     fill!(diag_hess, zero(T))
-    fill!(constraint_scaling, one(T))
-
-    quasi_newton = QN(n)
 
-    return DenseKKTSystem{T, VT, MT, QN}(
-        hess, jac, quasi_newton, pr_diag, du_diag, diag_hess, aug_com,
-        ns, ind_ineq, ind_fixed, constraint_scaling, Dict{Symbol, Any}(),
-    )
-end
-
-function DenseKKTSystem{T, VT, MT, QN}(nlp::AbstractNLPModel, info_constraints=get_index_constraints(nlp)) where {T, VT, MT, QN}
-    return DenseKKTSystem{T, VT, MT, QN}(
-        get_nvar(nlp), get_ncon(nlp), info_constraints.ind_ineq, info_constraints.ind_fixed
+    quasi_newton = create_quasi_newton(opt.hessian_approximation, cb, n)
+    cnt.linear_solver_time += @elapsed linear_solver = opt.linear_solver(aug_com; opt = opt_linear_solver)
+    
+    return DenseKKTSystem(
+        hess, jac, quasi_newton,
+        reg, pr_diag, du_diag, l_diag, u_diag, l_lower, u_lower,
+        diag_hess, aug_com,
+        ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub,
+        linear_solver,
+        Dict{Symbol, Any}(), 
     )
 end
 
@@ -67,64 +96,102 @@ Implement [`AbstractCondensedKKTSystem`](@ref) with dense matrices.
 Requires a dense linear solver to factorize the associated KKT system (otherwise an error is returned).
 
 """
-struct DenseCondensedKKTSystem{T, VT, MT, QN} <: AbstractCondensedKKTSystem{T, VT, MT, QN}
+struct DenseCondensedKKTSystem{
+    T,
+    VT <: AbstractVector{T},
+    MT <: AbstractMatrix{T},
+    QN,
+    LS,
+    VI <: AbstractVector{Int}
+    } <: AbstractCondensedKKTSystem{T, VT, MT, QN}
+    
     hess::MT
     jac::MT
     quasi_newton::QN
     jac_ineq::MT
+
+    reg::VT
     pr_diag::VT
     du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
+
+    pd_buffer::VT
+    diag_buffer::VT
+    buffer::VT
     # KKT system
     aug_com::MT
     # Info
     n_eq::Int
-    ind_eq::Vector{Int}
-    ind_eq_shifted::Vector{Int}
+    ind_eq::VI
+    ind_eq_shifted::VI
     n_ineq::Int
-    ind_ineq::Vector{Int}
-    ind_ineq_shifted::Vector{Int}
-    ind_fixed::Vector{Int}
-    constraint_scaling::VT
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
+    ind_ineq_shifted::VI
+    # Linear Solver
+    linear_solver::LS
     # Buffers
     etc::Dict{Symbol, Any}
 end
 
-function DenseCondensedKKTSystem{T, VT, MT, QN}(nlp::AbstractNLPModel, info_constraints=get_index_constraints(nlp)) where {T, VT, MT, QN}
-    n = get_nvar(nlp)
-    m = get_ncon(nlp)
-    ns = length(info_constraints.ind_ineq)
+function create_kkt_system(
+    ::Type{DenseCondensedKKTSystem},
+    cb::AbstractCallback{T,VT}, opt, 
+    opt_linear_solver, cnt, ind_cons) where {T,VT}
+    
+    n = cb.nvar
+    m = cb.ncon
+    ns = length(ind_cons.ind_ineq)
     n_eq = m - ns
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
 
-    aug_com  = MT(undef, n+m-ns, n+m-ns)
-    hess     = MT(undef, n, n)
-    jac      = MT(undef, m, n)
-    jac_ineq = MT(undef, ns, n)
+    aug_com  = create_array(cb, n+m-ns, n+m-ns)
+    hess     = create_array(cb, n, n)
+    jac      = create_array(cb, m, n)
+    jac_ineq = create_array(cb, ns, n)
 
+    reg  = VT(undef, n+ns)
     pr_diag  = VT(undef, n+ns)
     du_diag  = VT(undef, m)
-    constraint_scaling = VT(undef, m)
-
+    l_diag = fill!(VT(undef, nlb), one(T))
+    u_diag = fill!(VT(undef, nub), one(T))
+    l_lower = fill!(VT(undef, nlb), zero(T))
+    u_lower = fill!(VT(undef, nub), zero(T))
+    
+    pd_buffer = VT(undef, n + n_eq)
+    diag_buffer = VT(undef, ns)
+    buffer = VT(undef, m)
+    
     # Init!
     fill!(aug_com, zero(T))
     fill!(hess,    zero(T))
     fill!(jac,     zero(T))
     fill!(pr_diag, zero(T))
     fill!(du_diag, zero(T))
-    fill!(constraint_scaling, one(T))
-
-    ind_eq = setdiff(1:m, info_constraints.ind_ineq)
 
     # Shift indexes to avoid additional allocation in views
-    ind_eq_shifted = ind_eq .+ n .+ ns
-    ind_ineq_shifted = info_constraints.ind_ineq .+ n .+ ns
-
-    quasi_newton = QN(n)
-    return DenseCondensedKKTSystem{T, VT, MT, QN}(
-        hess, jac, quasi_newton, jac_ineq, pr_diag, du_diag, aug_com,
-        n_eq, ind_eq, ind_eq_shifted,
-        ns, info_constraints.ind_ineq, ind_ineq_shifted,
-        info_constraints.ind_fixed,
-        constraint_scaling, Dict{Symbol, Any}(),
+    ind_eq_shifted = ind_cons.ind_eq .+ n .+ ns
+    ind_ineq_shifted = ind_cons.ind_ineq .+ n .+ ns
+
+    quasi_newton = create_quasi_newton(opt.hessian_approximation, cb, n)
+    cnt.linear_solver_time += @elapsed linear_solver = opt.linear_solver(aug_com; opt = opt_linear_solver)
+    
+    return DenseCondensedKKTSystem(
+        hess, jac, quasi_newton, jac_ineq,
+        reg, pr_diag, du_diag, l_diag, u_diag, l_lower, u_lower,
+        pd_buffer, diag_buffer, buffer,
+        aug_com, 
+        n_eq, ind_cons.ind_eq, ind_eq_shifted,
+        ns,
+        ind_cons.ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub,
+        ind_ineq_shifted,
+        linear_solver,
+        Dict{Symbol, Any}(),
     )
 end
 
@@ -140,23 +207,18 @@ const AbstractDenseKKTSystem{T, VT, MT, QN} = Union{
 
 function jtprod!(y::AbstractVector, kkt::AbstractDenseKKTSystem, x::AbstractVector)
     nx = size(kkt.hess, 1)
-    ns = kkt.n_ineq
+    ind_ineq = kkt.ind_ineq
+    ns = length(ind_ineq)
     yx = view(y, 1:nx)
     ys = view(y, 1+nx:nx+ns)
     # / x
     mul!(yx, kkt.jac', x)
     # / s
-    ys .= -x[kkt.ind_ineq] .* kkt.constraint_scaling[kkt.ind_ineq]
+    ys .= -@view(x[ind_ineq])
     return
 end
 
-function set_jacobian_scaling!(kkt::AbstractDenseKKTSystem, constraint_scaling::AbstractVector)
-    copyto!(kkt.constraint_scaling, constraint_scaling)
-end
-
 function compress_jacobian!(kkt::AbstractDenseKKTSystem)
-    # Scale
-    kkt.jac .*= kkt.constraint_scaling
     return
 end
 
@@ -173,9 +235,6 @@ num_variables(kkt::DenseKKTSystem) = length(kkt.pr_diag)
 function mul!(y::AbstractVector, kkt::DenseKKTSystem, x::AbstractVector)
     symul!(y, kkt.aug_com, x)
 end
-function mul!(y::ReducedKKTVector, kkt::DenseKKTSystem, x::ReducedKKTVector)
-    mul!(full(y), kkt.aug_com, full(x))
-end
 
 # Special getters for Jacobian
 function get_jacobian(kkt::DenseKKTSystem)
@@ -191,7 +250,7 @@ function diag_add!(dest::AbstractMatrix, d1::AbstractVector, d2::AbstractVector)
     end
 end
 
-function _build_dense_kkt_system!(dest, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq, con_scale, n, m, ns)
+function _build_dense_kkt_system!(dest::VT, hess, jac, pr_diag, du_diag, diag_hess, ind_ineq, n, m, ns) where {T, VT <: AbstractMatrix{T}}
     # Transfer Hessian
     for i in 1:n, j in 1:i
         if i == j
@@ -213,8 +272,8 @@ function _build_dense_kkt_system!(dest, hess, jac, pr_diag, du_diag, diag_hess,
     # Transfer Jacobian / slacks
     for j in 1:ns
         is = ind_ineq[j]
-        dest[is + n + ns, j + n] = - con_scale[is]
-        dest[j + n, is + n + ns] = - con_scale[is]
+        dest[is + n + ns, j + n] = - one(T)
+        dest[j + n, is + n + ns] = - one(T)
     end
     # Transfer dual regularization
     for i in 1:m
@@ -226,11 +285,11 @@ function build_kkt!(kkt::DenseKKTSystem{T, VT, MT}) where {T, VT, MT}
     n = size(kkt.hess, 1)
     m = size(kkt.jac, 1)
     ns = length(kkt.ind_ineq)
+    
     _build_dense_kkt_system!(kkt.aug_com, kkt.hess, kkt.jac,
                                 kkt.pr_diag, kkt.du_diag, kkt.diag_hess,
-                                kkt.ind_ineq, kkt.constraint_scaling,
+                                kkt.ind_ineq, 
                                 n, m, ns)
-    treat_fixed_variable!(kkt)
 end
 
 function compress_hessian!(kkt::DenseKKTSystem)
@@ -249,7 +308,6 @@ function get_slack_regularization(kkt::DenseCondensedKKTSystem)
     n, ns = num_variables(kkt), kkt.n_ineq
     return view(kkt.pr_diag, n+1:n+ns)
 end
-get_scaling_inequalities(kkt::DenseCondensedKKTSystem) = kkt.constraint_scaling[kkt.ind_ineq]
 
 function _build_condensed_kkt_system!(
     dest::AbstractMatrix, hess::AbstractMatrix, jac::AbstractMatrix,
@@ -278,16 +336,14 @@ function _build_condensed_kkt_system!(
 end
 
 function _build_ineq_jac!(
-    dest::AbstractMatrix, jac::AbstractMatrix, pr_diag::AbstractVector,
-    ind_ineq::AbstractVector, ind_fixed::AbstractVector, con_scale::AbstractVector,
+    dest::AbstractMatrix, jac::AbstractMatrix, diag_buffer::AbstractVector,
+    ind_ineq::AbstractVector, 
     n, m_ineq,
 )
     @inbounds for i in 1:m_ineq, j in 1:n
         is = ind_ineq[i]
-        dest[i, j] = jac[is, j] * sqrt(pr_diag[n+i]) / con_scale[is]
+        dest[i, j] = jac[is, j] * sqrt(diag_buffer[i]) 
     end
-    # need to zero the fixed components
-    dest[:, ind_fixed] .= 0.0
 end
 
 function build_kkt!(kkt::DenseCondensedKKTSystem{T, VT, MT}) where {T, VT, MT}
@@ -296,10 +352,13 @@ function build_kkt!(kkt::DenseCondensedKKTSystem{T, VT, MT}) where {T, VT, MT}
     n_eq = length(kkt.ind_eq)
     m = size(kkt.jac, 1)
 
-    kkt.pr_diag[kkt.ind_fixed] .= 0
     fill!(kkt.aug_com, zero(T))
+
     # Build √Σₛ * J
-    _build_ineq_jac!(kkt.jac_ineq, kkt.jac, kkt.pr_diag, kkt.ind_ineq, kkt.ind_fixed, kkt.constraint_scaling, n, ns)
+    Σs = view(kkt.pr_diag, n+1:n+ns)
+    Σd = @view(kkt.du_diag[kkt.ind_ineq])
+    kkt.diag_buffer .= Σs ./ ( 1 .- Σd .* Σs)
+    _build_ineq_jac!(kkt.jac_ineq, kkt.jac, kkt.diag_buffer, kkt.ind_ineq, n, ns)
 
     # Select upper-left block
     W = if n_eq > 0
@@ -310,17 +369,17 @@ function build_kkt!(kkt::DenseCondensedKKTSystem{T, VT, MT}) where {T, VT, MT}
     # Build J' * Σₛ * J
     mul!(W, kkt.jac_ineq', kkt.jac_ineq)
 
+
     _build_condensed_kkt_system!(
         kkt.aug_com, kkt.hess, kkt.jac,
         kkt.pr_diag, kkt.du_diag,
         kkt.ind_eq, n, kkt.n_eq,
     )
-    treat_fixed_variable!(kkt)
 end
 
 # TODO: check how to handle inertia with the condensed form
 function is_inertia_correct(kkt::DenseCondensedKKTSystem, num_pos, num_zero, num_neg)
-    return (num_zero == 0)
+    return (num_zero == 0 && num_neg == kkt.n_eq)
 end
 
 # For inertia-free regularization
@@ -350,12 +409,12 @@ function _mul_expanded!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::Abst
 
     # / s (slack)
     ys .= Σs .* xs
-    ys .-= kkt.constraint_scaling[kkt.ind_ineq] .* xy[kkt.ind_ineq]
+    ys .-= xy[kkt.ind_ineq]
 
     # / y (multiplier)
     yy .= Σd .* xy
     mul!(yy, kkt.jac, xx, 1.0, 1.0)
-    yy[kkt.ind_ineq] .-= kkt.constraint_scaling[kkt.ind_ineq] .* xs
+    yy[kkt.ind_ineq] .-= xs
     return
 end
 
@@ -368,10 +427,6 @@ function mul!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector
     end
 end
 
-function mul!(y::ReducedKKTVector, kkt::DenseCondensedKKTSystem, x::ReducedKKTVector)
-    mul!(full(y), kkt, full(x))
-end
-
 function jprod_ineq!(y::AbstractVector, kkt::DenseCondensedKKTSystem, x::AbstractVector)
     mul!(y, kkt.jac_ineq, x)
 end
diff --git a/src/KKT/rhs.jl b/src/KKT/rhs.jl
index 52aaa51c..6b549058 100644
--- a/src/KKT/rhs.jl
+++ b/src/KKT/rhs.jl
@@ -77,43 +77,6 @@ function axpy!(a::Number, X::AbstractKKTVector, Y::AbstractKKTVector)
     axpy!(a, full(X), full(Y))
 end
 
-#=
-    ReducedKKTVector
-=#
-
-"""
-    ReducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
-
-KKT vector ``(x, s, y, z)``, associated to a [`AbstractReducedKKTSystem`](@ref).
-
-Compared to [`UnreducedKKTVector`](@ref), it does not store
-the dual values associated to the primal's lower and upper bounds.
-"""
-struct ReducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
-    values::VT
-    xp::VT # unsafe view
-    xl::VT # unsafe view
-end
-
-ReducedKKTVector{T,VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T, VT <: AbstractVector{T}} = ReducedKKTVector{T,VT}(n, m)
-function ReducedKKTVector{T,VT}(n::Int, m::Int) where {T, VT <: AbstractVector{T}}
-    x = VT(undef, n + m)
-    fill!(x, 0.0)
-    # Wrap directly array x to avoid dealing with views
-    xp = _madnlp_unsafe_wrap(x, n)
-    xl = _madnlp_unsafe_wrap(x, m, n+1)
-    return ReducedKKTVector{T, VT}(x, xp, xl)
-end
-function ReducedKKTVector(rhs::AbstractKKTVector)
-    return ReducedKKTVector(number_primal(rhs), number_dual(rhs))
-end
-
-full(rhs::ReducedKKTVector) = rhs.values
-primal(rhs::ReducedKKTVector) = rhs.xp
-dual(rhs::ReducedKKTVector) = rhs.xl
-primal_dual(rhs::ReducedKKTVector) = rhs.values
-
-
 #=
     UnreducedKKTVector
 =#
@@ -124,17 +87,22 @@ primal_dual(rhs::ReducedKKTVector) = rhs.values
 Full KKT vector ``(x, s, y, z, ν, w)``, associated to a [`AbstractUnreducedKKTSystem`](@ref).
 
 """
-struct UnreducedKKTVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
+struct UnreducedKKTVector{T, VT<:AbstractVector{T}, VI} <: AbstractKKTVector{T, VT}
     values::VT
     x::VT  # unsafe view
     xp::VT # unsafe view
+    xp_lr::SubVector{T, VT, VI}
+    xp_ur::SubVector{T, VT, VI}
     xl::VT # unsafe view
     xzl::VT # unsafe view
     xzu::VT # unsafe view
 end
 
-function UnreducedKKTVector{T, VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T, VT <: AbstractVector{T}}
+function UnreducedKKTVector(
+    ::Type{VT}, n::Int, m::Int, nlb::Int, nub::Int, ind_lb, ind_ub;
     values = VT(undef,n+m+nlb+nub)
+    ) where {T, VT <: AbstractVector{T}}
+    
     fill!(values, 0.0)
     # Wrap directly array x to avoid dealing with views
     x = _madnlp_unsafe_wrap(values, n + m) # Primal-Dual
@@ -142,7 +110,11 @@ function UnreducedKKTVector{T, VT}(n::Int, m::Int, nlb::Int, nub::Int) where {T,
     xl = _madnlp_unsafe_wrap(values, m, n+1) # Dual
     xzl = _madnlp_unsafe_wrap(values, nlb, n + m + 1) # Lower bound
     xzu = _madnlp_unsafe_wrap(values, nub, n + m + nlb + 1) # Upper bound
-    return UnreducedKKTVector{T, VT}(values, x, xp, xl, xzl, xzu)
+
+    xp_lr = view(xp, ind_lb)
+    xp_ur = view(xp, ind_ub)
+
+    return UnreducedKKTVector(values, x, xp, xp_lr, xp_ur, xl, xzl, xzu)
 end
 
 full(rhs::UnreducedKKTVector) = rhs.values
@@ -159,18 +131,24 @@ dual_ub(rhs::UnreducedKKTVector) = rhs.xzu
 Primal vector ``(x, s)``.
 
 """
-struct PrimalVector{T, VT<:AbstractVector{T}} <: AbstractKKTVector{T, VT}
+struct PrimalVector{T, VT<:AbstractVector{T}, VI} <: AbstractKKTVector{T, VT}
     values::VT
+    values_lr::SubVector{T, VT, VI}
+    values_ur::SubVector{T, VT, VI}
     x::VT  # unsafe view
     s::VT # unsafe view
 end
 
-function PrimalVector{T, VT}(nx::Int, ns::Int) where {T, VT <: AbstractVector{T}}
-    values = VT(undef, nx+ns) ; fill!(values, zero(T))
-    return PrimalVector{T, VT}(
-        values,
-        _madnlp_unsafe_wrap(values, nx),
-        _madnlp_unsafe_wrap(values, ns, nx+1),
+function PrimalVector(::Type{VT}, nx::Int, ns::Int, ind_lb, ind_ub) where {T, VT <: AbstractVector{T}}
+    values = VT(undef, nx+ns)
+    fill!(values, zero(T))
+    x = _madnlp_unsafe_wrap(values, nx)
+    s = _madnlp_unsafe_wrap(values, ns, nx+1)
+    values_lr = view(values, ind_lb)
+    values_ur = view(values, ind_ub)
+    
+    return PrimalVector(
+        values, values_lr, values_ur, x, s, 
     )
 end
 
@@ -179,3 +157,5 @@ primal(rhs::PrimalVector) = rhs.values
 variable(rhs::PrimalVector) = rhs.x
 slack(rhs::PrimalVector) = rhs.s
 
+
+
diff --git a/src/KKT/sparse.jl b/src/KKT/sparse.jl
index acadb152..245a895f 100644
--- a/src/KKT/sparse.jl
+++ b/src/KKT/sparse.jl
@@ -1,83 +1,161 @@
-
 """
     SparseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN}
 
 Implement the [`AbstractReducedKKTSystem`](@ref) in sparse COO format.
 
 """
-struct SparseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN}
+struct SparseKKTSystem{T, VT, MT, QN, LS, VI, VI32} <: AbstractReducedKKTSystem{T, VT, MT, QN}
     hess::VT
     jac_callback::VT
     jac::VT
     quasi_newton::QN
+    reg::VT
     pr_diag::VT
     du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
     # Augmented system
-    aug_raw::SparseMatrixCOO{T,Int32,VT}
+    aug_raw::SparseMatrixCOO{T,Int32,VT, VI32}
     aug_com::MT
-    aug_csc_map::Union{Nothing, Vector{Int}}
+    aug_csc_map::Union{Nothing, VI}
+    # Hessian
+    hess_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    hess_com::MT
+    hess_csc_map::Union{Nothing, VI}
     # Jacobian
-    jac_raw::SparseMatrixCOO{T,Int32,VT}
+    jac_raw::SparseMatrixCOO{T,Int32,VT, VI32}
     jac_com::MT
-    jac_csc_map::Union{Nothing, Vector{Int}}
+    jac_csc_map::Union{Nothing, VI}
+    # LinearSolver
+    linear_solver::LS
     # Info
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
-    ind_aug_fixed::Vector{Int}
-    jacobian_scaling::VT
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
 end
 
+
 """
     SparseUnreducedKKTSystem{T, VT, MT, QN} <: AbstractUnreducedKKTSystem{T, VT, MT, QN}
 
 Implement the [`AbstractUnreducedKKTSystem`](@ref) in sparse COO format.
 
 """
-struct SparseUnreducedKKTSystem{T, VT, MT, QN} <: AbstractUnreducedKKTSystem{T, VT, MT, QN}
+struct SparseUnreducedKKTSystem{T, VT, MT, QN, LS, VI, VI32} <: AbstractUnreducedKKTSystem{T, VT, MT, QN}
     hess::VT
     jac_callback::VT
     jac::VT
     quasi_newton::QN
+    reg::VT
     pr_diag::VT
     du_diag::VT
-
     l_diag::VT
     u_diag::VT
     l_lower::VT
     u_lower::VT
-    aug_raw::SparseMatrixCOO{T,Int32,VT}
+    l_lower_aug::VT
+    u_lower_aug::VT
+    
+    # Augmented system
+    aug_raw::SparseMatrixCOO{T,Int32,VT, VI32}
     aug_com::MT
-    aug_csc_map::Union{Nothing, Vector{Int}}
+    aug_csc_map::Union{Nothing, VI}
+
+    # Hessian
+    hess_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    hess_com::MT
+    hess_csc_map::Union{Nothing, VI}
 
-    jac_raw::SparseMatrixCOO{T,Int32,VT}
+    # Jacobian
+    jac_raw::SparseMatrixCOO{T,Int32,VT, VI32}
     jac_com::MT
-    jac_csc_map::Union{Nothing, Vector{Int}}
-    ind_ineq::Vector{Int}
-    ind_fixed::Vector{Int}
-    ind_aug_fixed::Vector{Int}
-    jacobian_scaling::VT
+    jac_csc_map::Union{Nothing, VI}
+    
+    # LinearSolver
+    linear_solver::LS
+    
+    # Info
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
 end
 
+"""
+    SparseCondensedKKTSystem{T, VT, MT, QN} <: AbstractCondensedKKTSystem{T, VT, MT, QN}
+
+Implement the [`AbstractCondensedKKTSystem`](@ref) in sparse COO format.
+
+"""
+struct SparseCondensedKKTSystem{T, VT, MT, QN, LS, VI, VI32, VTu1, VTu2, EXT} <: AbstractCondensedKKTSystem{T, VT, MT, QN}
+    # Hessian
+    hess::VT
+    hess_raw::SparseMatrixCOO{T,Int32,VT, VI32}
+    hess_com::MT
+    hess_csc_map::Union{Nothing, VI}
+
+    # Jacobian
+    jac::VT
+    jt_coo::SparseMatrixCOO{T,Int32,VT, VI32}
+    jt_csc::MT
+    jt_csc_map::Union{Nothing, VI}
+    
+    quasi_newton::QN
+    reg::VT
+    pr_diag::VT
+    du_diag::VT
+    l_diag::VT
+    u_diag::VT
+    l_lower::VT
+    u_lower::VT
+
+    # buffer
+    buffer::VT
+    buffer2::VT
+
+    # Augmented system
+    aug_com::MT
+    
+    # slack diagonal buffer
+    diag_buffer::VT
+    dptr::VTu1
+    hptr::VTu1
+    jptr::VTu2
+    
+    # LinearSolver
+    linear_solver::LS
+
+    # Info
+    ind_ineq::VI
+    ind_lb::VI
+    ind_ub::VI
+
+    # extra
+    ext::EXT
+end
+ 
 # Template to dispatch on sparse representation
 const AbstractSparseKKTSystem{T, VT, MT, QN} = Union{
     SparseKKTSystem{T, VT, MT, QN},
+    SparseCondensedKKTSystem{T, VT, MT, QN},
     SparseUnreducedKKTSystem{T, VT, MT, QN},
 }
 
 #=
     Generic sparse methods
 =#
-function build_hessian_structure(nlp::AbstractNLPModel, ::Type{<:ExactHessian})
-    hess_I = Vector{Int32}(undef, get_nnzh(nlp.meta))
-    hess_J = Vector{Int32}(undef, get_nnzh(nlp.meta))
-    hess_structure!(nlp,hess_I,hess_J)
+function build_hessian_structure(cb::SparseCallback, ::Type{<:ExactHessian})
+    hess_I = create_array(cb, Int32, cb.nnzh)
+    hess_J = create_array(cb, Int32, cb.nnzh)
+    _hess_sparsity_wrapper!(cb,hess_I,hess_J)
     return hess_I, hess_J
 end
 # NB. Quasi-Newton methods require only the sparsity pattern
 #     of the diagonal term to store the term ξ I.
-function build_hessian_structure(nlp::AbstractNLPModel, ::Type{<:AbstractQuasiNewton})
-    hess_I = collect(Int32, 1:get_nvar(nlp))
-    hess_J = collect(Int32, 1:get_nvar(nlp))
+function build_hessian_structure(cb::SparseCallback, ::Type{<:AbstractQuasiNewton})
+    hess_I = collect(Int32, 1:cb.nvar)
+    hess_J = collect(Int32, 1:cb.nvar)
     return hess_I, hess_J
 end
 
@@ -96,48 +174,66 @@ get_jacobian(kkt::AbstractSparseKKTSystem) = kkt.jac_callback
 
 nnz_jacobian(kkt::AbstractSparseKKTSystem) = nnz(kkt.jac_raw)
 
-function compress_jacobian!(kkt::AbstractSparseKKTSystem{T, VT, MT}) where {T, VT, MT<:SparseMatrixCSC{T, Int32}}
+function compress_jacobian!(kkt::AbstractSparseKKTSystem)
     ns = length(kkt.ind_ineq)
     kkt.jac[end-ns+1:end] .= -1.0
-    kkt.jac .*= kkt.jacobian_scaling # scaling
     transfer!(kkt.jac_com, kkt.jac_raw, kkt.jac_csc_map)
 end
 
 function compress_jacobian!(kkt::AbstractSparseKKTSystem{T, VT, MT}) where {T, VT, MT<:Matrix{T}}
     ns = length(kkt.ind_ineq)
     kkt.jac[end-ns+1:end] .= -1.0
-    kkt.jac .*= kkt.jacobian_scaling # scaling
     copyto!(kkt.jac_com, kkt.jac_raw)
 end
 
-function set_jacobian_scaling!(kkt::AbstractSparseKKTSystem{T, VT, MT}, constraint_scaling::AbstractVector) where {T, VT, MT}
-    nnzJ = length(kkt.jac)::Int
-    @inbounds for i in 1:nnzJ
-        index = kkt.jac_raw.I[i]
-        kkt.jacobian_scaling[i] = constraint_scaling[index]
-    end
+function compress_hessian!(kkt::AbstractSparseKKTSystem)
+    transfer!(kkt.hess_com, kkt.hess_raw, kkt.hess_csc_map)
 end
 
 
+
 #=
     SparseKKTSystem
 =#
 
-function SparseKKTSystem{T, VT, MT, QN}(
-    n::Int, m::Int, ind_ineq::Vector{Int}, ind_fixed::Vector{Int},
-    hess_sparsity_I, hess_sparsity_J,
-    jac_sparsity_I, jac_sparsity_J,
-) where {T, VT, MT, QN}
+# Build KKT system directly from SparseCallback
+function create_kkt_system(
+    ::Type{SparseKKTSystem},
+    cb::SparseCallback{T,VT}, opt, 
+    opt_linear_solver,cnt, ind_cons
+    ) where {T,VT}
+    
+    n_slack = length(ind_cons.ind_ineq)
+    # Deduce KKT size.
+
+    n = cb.nvar
+    m = cb.ncon
+    # Evaluate sparsity pattern
+    jac_sparsity_I = create_array(cb, Int32, cb.nnzj)
+    jac_sparsity_J = create_array(cb, Int32, cb.nnzj)
+    _jac_sparsity_wrapper!(cb,jac_sparsity_I, jac_sparsity_J)
+
+    quasi_newton = create_quasi_newton(opt.hessian_approximation, cb, n)
+    hess_sparsity_I, hess_sparsity_J = build_hessian_structure(cb, opt.hessian_approximation)
+
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
+
+    force_lower_triangular!(hess_sparsity_I,hess_sparsity_J)
+
+    ind_ineq = ind_cons.ind_ineq
+    
     n_slack = length(ind_ineq)
     n_jac = length(jac_sparsity_I)
     n_hess = length(hess_sparsity_I)
     n_tot = n + n_slack
 
+
     aug_vec_length = n_tot+m
     aug_mat_length = n_tot+m+n_hess+n_jac+n_slack
 
-    I = Vector{Int32}(undef, aug_mat_length)
-    J = Vector{Int32}(undef, aug_mat_length)
+    I = create_array(cb, Int32, aug_mat_length)
+    J = create_array(cb, Int32, aug_mat_length)
     V = VT(undef, aug_mat_length)
     fill!(V, 0.0)  # Need to initiate V to avoid NaN
 
@@ -158,6 +254,12 @@ function SparseKKTSystem{T, VT, MT, QN}(
     pr_diag = _madnlp_unsafe_wrap(V, n_tot)
     du_diag = _madnlp_unsafe_wrap(V, m, n_jac+n_slack+n_hess+n_tot+1)
 
+    reg = VT(undef, n_tot)
+    l_diag = VT(undef, nlb)
+    u_diag = VT(undef, nub)
+    l_lower = VT(undef, nlb)
+    u_lower = VT(undef, nub)
+    
     hess = _madnlp_unsafe_wrap(V, n_hess, n_tot+1)
     jac = _madnlp_unsafe_wrap(V, n_jac+n_slack, n_hess+n_tot+1)
     jac_callback = _madnlp_unsafe_wrap(V, n_jac, n_hess+n_tot+1)
@@ -169,49 +271,31 @@ function SparseKKTSystem{T, VT, MT, QN}(
         Int32[jac_sparsity_J; n+1:n+n_slack],
         jac,
     )
+    hess_raw = SparseMatrixCOO(
+        n_tot, n_tot,
+        hess_sparsity_I,
+        hess_sparsity_J,
+        hess,
+    )
 
-    aug_com = MT(aug_raw)
-    jac_com = MT(jac_raw)
-
-    aug_csc_map = get_mapping(aug_com, aug_raw)
-    jac_csc_map = get_mapping(jac_com, jac_raw)
-
-    ind_aug_fixed = if isa(aug_com, SparseMatrixCSC)
-        _get_fixed_variable_index(aug_com, ind_fixed)
-    else
-        zeros(Int, 0)
-    end
-    jac_scaling = ones(T, n_jac+n_slack)
+    aug_com, aug_csc_map = coo_to_csc(aug_raw)
+    jac_com, jac_csc_map = coo_to_csc(jac_raw)
+    hess_com, hess_csc_map = coo_to_csc(hess_raw)
 
-    quasi_newton = QN(n)
+    cnt.linear_solver_time += @elapsed linear_solver = opt.linear_solver(
+        aug_com; opt = opt_linear_solver
+    )
 
-    return SparseKKTSystem{T, VT, MT, QN}(
-        hess, jac_callback, jac, quasi_newton, pr_diag, du_diag,
+    return SparseKKTSystem(
+        hess, jac_callback, jac, quasi_newton, reg, pr_diag, du_diag,
+        l_diag, u_diag, l_lower, u_lower, 
         aug_raw, aug_com, aug_csc_map,
+        hess_raw, hess_com, hess_csc_map,
         jac_raw, jac_com, jac_csc_map,
-        ind_ineq, ind_fixed, ind_aug_fixed, jac_scaling,
+        linear_solver,
+        ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub,
     )
-end
-
-# Build KKT system directly from AbstractNLPModel
-function SparseKKTSystem{T, VT, MT, QN}(nlp::AbstractNLPModel, ind_cons=get_index_constraints(nlp)) where {T, VT, MT, QN}
-    n_slack = length(ind_cons.ind_ineq)
-    # Deduce KKT size.
-
-    n = get_nvar(nlp)
-    m = get_ncon(nlp)
-    # Evaluate sparsity pattern
-    jac_I = Vector{Int32}(undef, get_nnzj(nlp.meta))
-    jac_J = Vector{Int32}(undef, get_nnzj(nlp.meta))
-    jac_structure!(nlp,jac_I, jac_J)
-
-    hess_I, hess_J = build_hessian_structure(nlp, QN)
 
-    force_lower_triangular!(hess_I,hess_J)
-    return SparseKKTSystem{T, VT, MT, QN}(
-        n, m, ind_cons.ind_ineq, ind_cons.ind_fixed,
-        hess_I, hess_J, jac_I, jac_J,
-    )
 end
 
 is_reduced(::SparseKKTSystem) = true
@@ -222,12 +306,36 @@ num_variables(kkt::SparseKKTSystem) = length(kkt.pr_diag)
     SparseUnreducedKKTSystem
 =#
 
-function SparseUnreducedKKTSystem{T, VT, MT, QN}(
-    n::Int, m::Int, nlb::Int, nub::Int, ind_ineq, ind_fixed,
-    hess_sparsity_I, hess_sparsity_J,
-    jac_sparsity_I, jac_sparsity_J,
-    ind_lb, ind_ub,
-) where {T, VT, MT, QN}
+function create_kkt_system(
+    ::Type{SparseUnreducedKKTSystem},
+    cb::SparseCallback{T,VT}, opt, 
+    opt_linear_solver,cnt, ind_cons
+    ) where {T, VT}
+    ind_ineq = ind_cons.ind_ineq
+    ind_lb = ind_cons.ind_lb
+    ind_ub = ind_cons.ind_ub
+    
+    n_slack = length(ind_ineq)
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
+    # Deduce KKT size.
+    n = cb.nvar
+    m = cb.ncon
+
+    # Quasi-newton
+    quasi_newton = create_quasi_newton(opt.hessian_approximation, cb, n)
+    
+    # Evaluate sparsity pattern
+    jac_sparsity_I = create_array(cb, Int32, cb.nnzj)
+    jac_sparsity_J = create_array(cb, Int32, cb.nnzj)
+    _jac_sparsity_wrapper!(cb,jac_sparsity_I, jac_sparsity_J)
+
+    hess_sparsity_I = create_array(cb, Int32, cb.nnzh)
+    hess_sparsity_J = create_array(cb, Int32, cb.nnzh)
+    _hess_sparsity_wrapper!(cb,hess_sparsity_I,hess_sparsity_J)
+
+    force_lower_triangular!(hess_sparsity_I,hess_sparsity_J)
+    
     n_slack = length(ind_ineq)
     n_jac = length(jac_sparsity_I)
     n_hess = length(hess_sparsity_I)
@@ -236,8 +344,8 @@ function SparseUnreducedKKTSystem{T, VT, MT, QN}(
     aug_mat_length = n_tot + m + n_hess + n_jac + n_slack + 2*nlb + 2*nub
     aug_vec_length = n_tot + m + nlb + nub
 
-    I = Vector{Int32}(undef, aug_mat_length)
-    J = Vector{Int32}(undef, aug_mat_length)
+    I = create_array(cb, Int32, aug_mat_length)
+    J = create_array(cb, Int32, aug_mat_length)
     V = zeros(aug_mat_length)
 
     offset = n_tot + n_jac + n_slack + n_hess + m
@@ -267,14 +375,23 @@ function SparseUnreducedKKTSystem{T, VT, MT, QN}(
     du_diag = _madnlp_unsafe_wrap(V,m, n_jac + n_slack+n_hess+n_tot+1)
 
     l_diag = _madnlp_unsafe_wrap(V, nlb, offset+1)
-    l_lower= _madnlp_unsafe_wrap(V, nlb, offset+nlb+1)
     u_diag = _madnlp_unsafe_wrap(V, nub, offset+2nlb+1)
-    u_lower= _madnlp_unsafe_wrap(V, nub, offset+2nlb+nub+1)
+    l_lower_aug = _madnlp_unsafe_wrap(V, nlb, offset+nlb+1)
+    u_lower_aug = _madnlp_unsafe_wrap(V, nub, offset+2nlb+nub+1)
+    reg = VT(undef, n_tot)
+    l_lower = VT(undef, nlb)
+    u_lower = VT(undef, nub)
 
     hess = _madnlp_unsafe_wrap(V, n_hess, n_tot+1)
     jac = _madnlp_unsafe_wrap(V, n_jac + n_slack, n_hess+n_tot+1)
     jac_callback = _madnlp_unsafe_wrap(V, n_jac, n_hess+n_tot+1)
 
+    hess_raw = SparseMatrixCOO(
+        n_tot, n_tot,
+        hess_sparsity_I,
+        hess_sparsity_J,
+        hess,
+    )
     aug_raw = SparseMatrixCOO(aug_vec_length,aug_vec_length,I,J,V)
     jac_raw = SparseMatrixCOO(
         m, n_tot,
@@ -283,65 +400,433 @@ function SparseUnreducedKKTSystem{T, VT, MT, QN}(
         jac,
     )
 
-    aug_com = MT(aug_raw)
-    jac_com = MT(jac_raw)
+    aug_com, aug_csc_map = coo_to_csc(aug_raw)
+    jac_com, jac_csc_map = coo_to_csc(jac_raw)
+    hess_com, hess_csc_map = coo_to_csc(hess_raw)
+
+    cnt.linear_solver_time += @elapsed linear_solver = opt.linear_solver(aug_com; opt = opt_linear_solver)
+opt.linear_solver(
+        aug_com; opt = opt_linear_solver
+    )
+    return SparseUnreducedKKTSystem(
+        hess, jac_callback, jac, quasi_newton, reg, pr_diag, du_diag,
+        l_diag, u_diag, l_lower, u_lower, l_lower_aug, u_lower_aug,
+        aug_raw, aug_com, aug_csc_map,
+        hess_raw, hess_com, hess_csc_map,
+        jac_raw, jac_com, jac_csc_map,
+        linear_solver,
+        ind_ineq, ind_lb, ind_ub,
+    )
+end
+
+function initialize!(kkt::AbstractSparseKKTSystem)
+    fill!(kkt.reg, 1.0)
+    fill!(kkt.pr_diag, 1.0)
+    fill!(kkt.du_diag, 0.0)
+    fill!(kkt.hess, 0.0)
+    fill!(kkt.l_lower, 0.0)
+    fill!(kkt.u_lower, 0.0)
+    fill!(kkt.l_diag, 1.0)
+    fill!(kkt.u_diag, 1.0)
+    fill!(kkt.hess_com.nzval, 0.) # so that mul! in the initial primal-dual solve has no effect 
+end
+
+function initialize!(kkt::SparseUnreducedKKTSystem) 
+    fill!(kkt.reg, 1.0)
+    fill!(kkt.pr_diag, 1.0)
+    fill!(kkt.du_diag, 0.0)
+    fill!(kkt.hess, 0.0)
+    fill!(kkt.l_lower, 0.0)
+    fill!(kkt.u_lower, 0.0)
+    fill!(kkt.l_diag, 1.0)
+    fill!(kkt.u_diag, 1.0)
+    fill!(kkt.l_lower_aug, 0.0)
+    fill!(kkt.u_lower_aug, 0.0)
+    fill!(kkt.hess_com.nzval, 0.) # so that mul! in the initial primal-dual solve has no effect
+end
 
-    aug_csc_map = get_mapping(aug_com, aug_raw)
-    jac_csc_map = get_mapping(jac_com, jac_raw)
+is_reduced(::SparseUnreducedKKTSystem) = false
+num_variables(kkt::SparseUnreducedKKTSystem) = length(kkt.pr_diag)
 
-    jac_scaling = ones(T, n_jac+n_slack)
 
-    ind_aug_fixed = if isa(aug_com, SparseMatrixCSC)
-        _get_fixed_variable_index(aug_com, ind_fixed)
-    else
-        zeros(Int, 0)
+#=
+    SparseCondensedKKTSystem
+=#
+
+# Build KKT system directly from SparseCallback
+function create_kkt_system(
+    ::Type{SparseCondensedKKTSystem},
+    cb::SparseCallback{T,VT},
+    opt, 
+    opt_linear_solver,
+    cnt,
+    ind_cons
+    ) where {T, VT}
+
+    ind_ineq = ind_cons.ind_ineq
+    n = cb.nvar
+    m = cb.ncon
+    n_slack = length(ind_ineq)
+    
+    if n_slack != m
+        error("SparseCondensedKKTSystem does not support equality constrained NLPs.")
     end
+    
+    # Evaluate sparsity pattern
+    jac_sparsity_I = create_array(cb, Int32, cb.nnzj)
+    jac_sparsity_J = create_array(cb, Int32, cb.nnzj)
+    _jac_sparsity_wrapper!(cb,jac_sparsity_I, jac_sparsity_J)
+
+    quasi_newton = create_quasi_newton(opt.hessian_approximation, cb, n)
+    hess_sparsity_I, hess_sparsity_J = build_hessian_structure(cb, opt.hessian_approximation)
+
+    force_lower_triangular!(hess_sparsity_I,hess_sparsity_J)
 
-    quasi_newton = QN(n)
+    n_jac = length(jac_sparsity_I)
+    n_hess = length(hess_sparsity_I)
+    n_tot = n + n_slack
+    nlb = length(ind_cons.ind_lb)
+    nub = length(ind_cons.ind_ub)
 
-    return SparseUnreducedKKTSystem{T, VT, MT, QN}(
-        hess, jac_callback, jac, quasi_newton, pr_diag, du_diag,
+
+    reg = VT(undef, n_tot)
+    pr_diag = VT(undef, n_tot)
+    du_diag = VT(undef, m)
+    l_diag = VT(undef, nlb)
+    u_diag = VT(undef, nub)
+    l_lower = VT(undef, nlb)
+    u_lower = VT(undef, nub)
+    buffer = VT(undef, m)
+    buffer2= VT(undef, m)
+    hess = VT(undef, n_hess)
+    jac = VT(undef, n_jac)
+    diag_buffer = VT(undef, m)
+    
+    hess_raw = SparseMatrixCOO(n, n, hess_sparsity_I, hess_sparsity_J, hess)
+    
+    jt_coo = SparseMatrixCOO(
+        n, m, 
+        jac_sparsity_J,
+        jac_sparsity_I,
+        jac,
+    )
+
+    jt_csc, jt_csc_map = coo_to_csc(jt_coo)
+    hess_com, hess_csc_map = coo_to_csc(hess_raw)
+    aug_com, dptr, hptr, jptr = build_condensed_aug_symbolic(
+        hess_com,
+        jt_csc
+        )
+    
+    cnt.linear_solver_time += @elapsed linear_solver = opt.linear_solver(aug_com; opt = opt_linear_solver)
+
+    ext = get_sparse_condensed_ext(VT, hess_com, jptr, jt_csc_map, hess_csc_map)
+
+    return SparseCondensedKKTSystem( 
+        hess, hess_raw, hess_com, hess_csc_map,
+        jac, jt_coo, jt_csc, jt_csc_map, 
+        quasi_newton,
+        reg, pr_diag, du_diag,
         l_diag, u_diag, l_lower, u_lower,
-        aug_raw, aug_com, aug_csc_map,
-        jac_raw, jac_com, jac_csc_map,
-        ind_ineq, ind_fixed, ind_aug_fixed, jac_scaling,
+        buffer, buffer2,
+        aug_com, diag_buffer, dptr, hptr, jptr,
+        linear_solver,
+        ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub,
+        ext
     )
 end
 
-function SparseUnreducedKKTSystem{T, VT, MT, QN}(nlp::AbstractNLPModel, ind_cons=get_index_constraints(nlp)) where {T, VT, MT, QN}
-    n_slack = length(ind_cons.ind_ineq)
-    nlb = length(ind_cons.ind_lb)
-    nub = length(ind_cons.ind_ub)
-    # Deduce KKT size.
-    n = get_nvar(nlp)
-    m = get_ncon(nlp)
-    # Evaluate sparsity pattern
-    jac_I = Vector{Int32}(undef, get_nnzj(nlp.meta))
-    jac_J = Vector{Int32}(undef, get_nnzj(nlp.meta))
-    jac_structure!(nlp,jac_I, jac_J)
+    
+get_sparse_condensed_ext(::Type{Vector{T}},args...) where T = nothing
+
+
+is_reduced(::SparseCondensedKKTSystem) = true
+num_variables(kkt::SparseCondensedKKTSystem) = length(kkt.pr_diag)
+function is_inertia_correct(kkt::SparseCondensedKKTSystem, num_pos, num_zero, num_neg)
+    return (num_zero == 0) && (num_pos == num_variables(kkt))
+end
+
+
+Base.size(kkt::SparseCondensedKKTSystem,n::Int) = size(kkt.aug_com,n)
+# nnz_jacobian(kkt::SparseCondensedKKTSystem) = nnz(kkt.jac_raw)
+
+
+function compress_jacobian!(kkt::SparseCondensedKKTSystem{T, VT, MT}) where {T, VT, MT<:SparseMatrixCSC{T, Int32}}
+    ns = length(kkt.ind_ineq)
+    # kkt.jac[end-ns+1:end] .= -1.0
+    transfer!(kkt.jt_csc, kkt.jt_coo, kkt.jt_csc_map)
+end
+
+function mul!(y::AbstractKKTVector, kkt::SparseCondensedKKTSystem, x::AbstractKKTVector)
+    mul!(full(y), kkt, full(x))
+end
+
+function mul!(y::AbstractVector, kkt::SparseCondensedKKTSystem, x::AbstractVector)
+    # TODO: implement properly with AbstractKKTRHS
+    if length(y) == length(x) == size(kkt.aug_com, 1)
+        mul!(y, Symmetric(kkt.aug_com, :L), x)
+        return
+    end
+
+    n = size(kkt.hess_csc, 1)
+    m = size(kkt.jt_csc, 2)
+
+
+    Σx = view(kkt.pr_diag, 1:n)
+    Σs = view(kkt.pr_diag, n+1:n+m)
+    Σd = kkt.du_diag
+
+    # Decompose x
+    xx = view(x, 1:n)
+    xs = view(x, 1+n:n+m)
+    xy = view(x, 1+n+m:n+2*m)
+
+    # Decompose y
+    yx = view(y, 1:n)
+    ys = view(y, 1+n:n+m)
+    yy = view(y, 1+n+m:n+2*m)
+
+    # / x (variable)
+    mul!(yx, Symmetric(kkt.hess_csc, :L), xx)
+    yx .+= Σx .* xx
+    mul!(yx, kkt.jt_csc, xy, 1.0, 1.0)
+
+    # / s (slack)
+    ys .= Σs .* xs
+    ys .-= xy
+
+    # / y (multiplier)
+    yy .= Σd .* xy
+    mul!(yy, kkt.jt_csc', xx, 1.0, 1.0)
+    yy .-= xs
+end
+
+function jtprod!(y::AbstractVector, kkt::SparseCondensedKKTSystem, x::AbstractVector)
+    n = size(kkt.hess_com, 1)
+    m = size(kkt.jt_csc, 2)
+
+    mul!(view(y, 1:n), kkt.jt_csc, x)
+    y[size(kkt.jt_csc,1)+1:end] .= -x
+end
+
+function _sym_length(Jt)
+    len = 0
+    for i=1:size(Jt,2)
+        n = Jt.colptr[i+1] - Jt.colptr[i]
+        len += div(n^2 + n, 2)
+    end
+    return len
+end
+
+function _build_condensed_aug_symbolic_hess(H, sym, sym2)
+    for i in 1:size(H,2)
+        for j in H.colptr[i]:H.colptr[i+1]-1
+            c = H.rowval[j]
+            sym[j] = (0,j,0)
+            sym2[j] = (c,i)
+        end
+    end
+end
+
+function _build_condensed_aug_symbolic_jt(Jt, sym, sym2)
+
+    cnt = 0
+    for i in 1:size(Jt,2)
+        for j in Jt.colptr[i]:Jt.colptr[i+1]-1
+            for k in j:Jt.colptr[i+1]-1
+                c1 = Jt.rowval[j]
+                c2 = Jt.rowval[k]
+                sym[cnt+=1] = (i,j,k)
+                sym2[cnt] = (c2,c1)
+            end
+        end
+    end
+end
+
+function getptr(array; by = (x,y)->x != y)
+    bitarray = similar(array, Bool, length(array)+1)
+    fill!(bitarray, true)
+    bitarray[2:end-1] .= by.(@view(array[1:end-1]),  @view(array[2:end]))
+    findall(bitarray)
+end
+
+nzval(H) = H.nzval
+ 
+@inbounds function build_condensed_aug_symbolic(H::AbstractSparseMatrix{Tv,Ti}, Jt) where {Tv, Ti}
+    nnzjtsj = _sym_length(Jt)
+    
+    sym = similar(nzval(H), Tuple{Int,Int,Int},
+        size(H,2) + nnz(H) + nnzjtsj
+    )
+    sym2 = similar(nzval(H), Tuple{Int,Int},
+        size(H,2) + nnz(H) + nnzjtsj
+    )
+    dptr = similar(nzval(H), Tuple{Ti,Ti},
+        size(H,2)
+    )
+    hptr = similar(nzval(H), Tuple{Ti,Ti},
+        nnz(H)
+    )
+    jptr = similar(nzval(H), Tuple{Ti,Tuple{Ti,Ti,Ti}},
+        nnzjtsj
+    )
+    colptr = fill!(
+        similar(nzval(H), Ti, size(H,1)+1),
+        one(Tv)
+    )
+    rowval = Ti[]
+    
+    n = size(H,2)
+    
+    map!(
+        i->(-1,i,0),
+        @view(sym[1:n]),
+        1:size(H,2)
+    )
+    map!(
+        i->(i,i),
+        @view(sym2[1:n]),
+        1:size(H,2)
+    )
 
-    hess_I = Vector{Int32}(undef, get_nnzh(nlp.meta))
-    hess_J = Vector{Int32}(undef, get_nnzh(nlp.meta))
-    hess_structure!(nlp,hess_I,hess_J)
 
-    force_lower_triangular!(hess_I,hess_J)
+    _build_condensed_aug_symbolic_hess(
+        H,
+        @view(sym[n+1:n+nnz(H)]),
+        @view(sym2[n+1:n+nnz(H)])
+    )
+    _build_condensed_aug_symbolic_jt(
+        Jt,
+        @view(sym[n+nnz(H)+1:n+nnz(H) + nnzjtsj]),
+        @view(sym2[n+nnz(H)+1:n+nnz(H)+nnzjtsj])
+    )
 
-    return SparseUnreducedKKTSystem{T, VT, MT, QN}(
-        n, m, nlb, nub, ind_cons.ind_ineq, ind_cons.ind_fixed,
-        hess_I, hess_J, jac_I, jac_J, ind_cons.ind_lb, ind_cons.ind_ub,
+    p = sortperm(sym2; by = ((i,j),) -> (j,i))
+    permute!(sym, p)
+    permute!(sym2, p)
+    
+    by(x,y) = x != y
+    
+    bitarray = similar(sym2, Bool, length(sym2))
+    fill!(bitarray, true)
+    bitarray[2:end] .= by.(@view(sym2[1:end-1]),  @view(sym2[2:end]))
+    guide = cumsum(bitarray)
+
+    b = findall(x->x[1] == -1, sym)
+    dptr = map((x,y)->(Int32(x),Int32(y[2])), @view(guide[b]), @view(sym[b]))
+
+    b = findall(x->x[1] == 0, sym)
+    hptr = map((x,y)->(Int32(x),Int32(y[2])), @view(guide[b]), @view(sym[b]))
+
+    b = findall(x->x[1] != -1 && x[1] != 0, sym)
+    jptr = map((x,y)->(Int32(x),y), @view(guide[b]), @view(sym[b]))
+
+    
+    ptr = findall(bitarray)
+    rowval = map(((row,col),)->Int32(row), @view(sym2[ptr]))
+    
+    by2(x,y) = x[2] != y[2]
+    bitarray[2:end] .= by2.(@view(sym2[1:end-1]),  @view(sym2[2:end]))
+    ptr2 = findall(bitarray)
+
+    first, last = _first_and_last_col(sym2,ptr2)
+
+    fill!(
+        @view(colptr[1:first]),
+        1
     )
+
+    _set_colptr!(colptr, ptr2, sym2, guide)
+
+    fill!(
+        @view(colptr[last+1:end]),
+        length(ptr)+1
+    )
+
+    aug_com = _get_sparse_csc(
+        size(H),
+        colptr,
+        rowval,
+        similar(nzval(H), length(ptr))
+    )
+
+    return aug_com, dptr, hptr, jptr
 end
 
-function initialize!(kkt::SparseUnreducedKKTSystem)
-    kkt.pr_diag.=1
-    kkt.du_diag.=0
-    kkt.hess.=0
-    kkt.l_lower.=0
-    kkt.u_lower.=0
-    kkt.l_diag.=1
-    kkt.u_diag.=1
+function _get_sparse_csc(dims, colptr, rowval, nzval)
+    SparseMatrixCSC(
+        dims...,
+        colptr,
+        rowval,
+        nzval
+    )
 end
 
-is_reduced(::SparseUnreducedKKTSystem) = false
-num_variables(kkt::SparseUnreducedKKTSystem) = length(kkt.pr_diag)
+function _first_and_last_col(sym2,ptr2)
+    first= sym2[1][2]
+    last = sym2[ptr2[end]][2]
+    return (first, last)    
+end
+
+function _set_colptr!(colptr, ptr2, sym2, guide)
+    for i in @view(ptr2[2:end])
+
+        (~,prevcol) = sym2[i-1]
+        (row,col) = sym2[i]
+        
+        fill!(@view(colptr[prevcol+1:col]), guide[i])
+    end
+end
+
+@inbounds function _build_condensed_aug_coord!(aug_com::SparseMatrixCSC{Tv,Ti}, pr_diag, H, Jt, diag_buffer, dptr, hptr, jptr) where {Tv, Ti}
+    fill!(aug_com.nzval, zero(Tv))
+    
+    @simd for idx in eachindex(hptr)
+        i,j = hptr[idx]
+        aug_com.nzval[i] += H.nzval[j]
+    end
+    
+    @simd for idx in eachindex(dptr)
+        i,j = dptr[idx]
+        aug_com.nzval[i] += pr_diag[j]
+    end
+
+    @simd for idx in eachindex(jptr)
+        (i,(j,k,l)) = jptr[idx]
+        aug_com.nzval[i] += diag_buffer[j] * Jt.nzval[k] * Jt.nzval[l]
+    end
+end
+
+function build_condensed_aug_coord!(kkt::SparseCondensedKKTSystem{T,VT,MT}) where {T, VT, MT <: SparseMatrixCSC{T}}
+    _build_condensed_aug_coord!(
+        kkt.aug_com, kkt.pr_diag, kkt.hess_com, kkt.jt_csc, kkt.diag_buffer,
+        kkt.dptr, kkt.hptr, kkt.jptr
+    )
+end
+
+
+function build_kkt!(kkt::SparseKKTSystem)
+
+    transfer!(kkt.aug_com, kkt.aug_raw, kkt.aug_csc_map)
+end
+
+function build_kkt!(kkt::SparseUnreducedKKTSystem)
+    
+    transfer!(kkt.aug_com, kkt.aug_raw, kkt.aug_csc_map)
+end
+
+function build_kkt!(kkt::SparseCondensedKKTSystem)
+
+    n = size(kkt.hess_com, 1)
+    m = size(kkt.jt_csc, 2)
+
+
+    Σx = view(kkt.pr_diag, 1:n)
+    Σs = view(kkt.pr_diag, n+1:n+m)
+    Σd = kkt.du_diag
+        
+    kkt.diag_buffer .= Σs ./ ( 1 .- Σd .* Σs)
+    build_condensed_aug_coord!(kkt)
+end
 
+get_jacobian(kkt::SparseCondensedKKTSystem) = kkt.jac
diff --git a/src/LinearSolvers/backsolve.jl b/src/LinearSolvers/backsolve.jl
index 054d4ef6..6ab8a3a5 100644
--- a/src/LinearSolvers/backsolve.jl
+++ b/src/LinearSolvers/backsolve.jl
@@ -1,94 +1,85 @@
-# MadNLP.jl
-# Created by Sungho Shin (sungho.shin@wisc.edu)
+@kwdef struct RichardsonOptions
+    max_iter::Int = 10
+    tol::Float64 = 1e-10
+    acceptable_tol::Float64 = 1e-5
+end
 
-struct RichardsonIterator{T, VT, KKT, LinSolver <: AbstractLinearSolver{T}} <: AbstractIterator{T}
-    linear_solver::LinSolver
+struct RichardsonIterator{T, KKT <: AbstractKKTSystem{T}} <: AbstractIterator{T}
     kkt::KKT
-    residual::VT
-    max_iter::Int
-    tol::T
-    acceptable_tol::T
+    opt::RichardsonOptions
+    cnt::MadNLPCounters
     logger::MadNLPLogger
 end
+
 function RichardsonIterator(
-    linear_solver::AbstractLinearSolver{T},
-    kkt::AbstractKKTSystem,
-    res::AbstractVector;
-    max_iter=10, tol=T(1e-10), acceptable_tol=T(1e-5), logger=MadNLPLogger(),
-) where T
+    kkt;
+    opt = RichardsonOptions(),
+    logger = MadNLPLogger(),
+    cnt = MadNLPCounters()
+)
     return RichardsonIterator(
-        linear_solver, kkt, res, max_iter, tol, acceptable_tol, logger,
+        kkt, opt, cnt, logger
     )
 end
 
-# Solve reduced KKT system. Require only the primal/dual values.
-function solve_refine!(
-    x::AbstractKKTVector{T, VT},
-    solver::RichardsonIterator{T, VT, KKT, LinSolver},
-    b::AbstractKKTVector{T, VT},
-) where {T, VT, KKT<:AbstractReducedKKTSystem, LinSolver}
-    solve_refine!(primal_dual(x), solver, primal_dual(b))
-end
-
-# Solve unreduced KKT system. Require UnreducedKKTVector as inputs.
 function solve_refine!(
-    x::UnreducedKKTVector{T, VT},
-    solver::RichardsonIterator{T, VT, KKT, LinSolver},
-    b::UnreducedKKTVector{T, VT},
-) where {T, VT, KKT<:AbstractUnreducedKKTSystem, LinSolver}
-    solve_refine!(full(x), solver, full(b))
-end
+    x::VT,
+    iterator::R,
+    b::VT,
+    w::VT
+    ) where {T, VT, R <: RichardsonIterator{T}}
+    @debug(iterator.logger, "Iterative solver initiated")
 
-function solve_refine!(
-    x::AbstractVector{T},
-    solver::RichardsonIterator{T},
-    b::AbstractVector{T},
-) where T
-    @debug(solver.logger, "Iterative solver initiated")
+    norm_b = norm(full(b), Inf)
+    residual_ratio = zero(T)
 
-    ε = solver.residual
-    norm_b = norm(b, Inf)
+    fill!(full(x), zero(T))
 
-    fill!(x, zero(T))
-    fill!(ε, zero(T))
 
-    ε = solver.residual
-    axpy!(-1, b, ε)
-    norm_res = norm(ε, Inf)
-    residual_ratio = norm_res / (one(T) + norm_b)
+    if norm_b == zero(T)
+        @debug(
+            iterator.logger,
+            @sprintf(
+                "Iterative solver terminated with %4i refinement steps and residual = %6.2e",
+                0, 0
+            ),
+        )
+        return true
+    end
 
+    copyto!(full(w), full(b))
     iter = 0
-    residual_ratio_old = Inf
-    noprogress = 0
 
     while true
-        mod(iter, 10)==0 &&
-            @debug(solver.logger,"iter ||res||")
-        @debug(solver.logger, @sprintf("%4i %6.2e", iter, residual_ratio))
+        solve!(iterator.kkt, w)
+        axpy!(1., full(w), full(x))
+        copyto!(full(w), full(b))
+
+        mul!(w, iterator.kkt, x, -one(T), one(T))
+
+        norm_w = norm(full(w), Inf)
+        norm_x = norm(full(x), Inf)
+        residual_ratio = norm_w / (min(norm_x, 1e6 * norm_b) + norm_b)
+
+        if mod(iter, 10)==0
+            @debug(iterator.logger,"iter ||res||")
+        end
+        @debug(iterator.logger, @sprintf("%4i %6.2e", iter, residual_ratio))
         iter += 1
-        if (iter > solver.max_iter) || (residual_ratio < solver.tol)
+
+        if (iter >= iterator.opt.max_iter) || (residual_ratio < iterator.opt.tol)
             break
         end
-
-        solve!(solver.linear_solver, ε)
-        axpy!(-1, ε, x)
-        mul!(ε, solver.kkt, x)
-        axpy!(-1, b, ε)
-        norm_res = norm(ε, Inf)
-
-        residual_ratio_old = residual_ratio
-        residual_ratio = norm_res / (one(T)+norm_b)
     end
 
-    @debug(solver.logger, @sprintf(
-        "Iterative solver terminated with %4i refinement steps and residual = %6.2e",
-        iter, residual_ratio),
+    @debug(
+        iterator.logger,
+        @sprintf(
+            "Iterative solver terminated with %4i refinement steps and residual = %6.2e",
+            iter, residual_ratio
+        ),
     )
 
-    if residual_ratio < solver.acceptable_tol
-        return :Solved
-    else
-        return :Failed
-    end
+    return residual_ratio < iterator.opt.acceptable_tol
 end
 
diff --git a/src/LinearSolvers/lapack.jl b/src/LinearSolvers/lapack.jl
index 68a2d10f..9c530d65 100644
--- a/src/LinearSolvers/lapack.jl
+++ b/src/LinearSolvers/lapack.jl
@@ -2,8 +2,8 @@
     lapack_algorithm::LinearFactorization = BUNCHKAUFMAN
 end
 
-mutable struct LapackCPUSolver{T} <: AbstractLinearSolver{T}
-    dense::Matrix{T}
+mutable struct LapackCPUSolver{T, MT} <: AbstractLinearSolver{T}
+    A::MT
     fact::Matrix{T}
     work::Vector{T}
     lwork::BlasInt
@@ -72,19 +72,19 @@ for (sytrf,sytrs,getrf,getrs,geqrf,ormqr,trsm,potrf,potrs,typ) in (
 end
 
 function LapackCPUSolver(
-    dense::Matrix{T};
+    A::MT;
     opt=LapackOptions(),
     logger=MadNLPLogger(),
-) where T
-    fact = copy(dense)
-
+) where {T, MT <: AbstractMatrix{T}}
+    fact = Matrix{T}(undef, size(A))
     etc = Dict{Symbol,Any}()
     work = Vector{T}(undef, 1)
-    info = 0
+    info = Ref(0)
 
-    return LapackCPUSolver{T}(dense,fact,work,-1,info,etc,opt,logger)
+    return LapackCPUSolver(A,fact,work,-1,info,etc,opt,logger)
 end
 
+
 function factorize!(M::LapackCPUSolver)
     if M.opt.lapack_algorithm == BUNCHKAUFMAN
         factorize_bunchkaufman!(M)
@@ -114,9 +114,9 @@ end
 
 function factorize_bunchkaufman!(M::LapackCPUSolver)
     size(M.fact,1) == 0 && return M
-    haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.dense,1)))
+    haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.A,1)))
     M.lwork = -1
-    M.fact .= M.dense
+    M.fact .= M.A
     sytrf('L',size(M.fact,1),M.fact,size(M.fact,2),M.etc[:ipiv],M.work,M.lwork,M.info)
     M.lwork = BlasInt(real(M.work[1]))
     length(M.work) < M.lwork && resize!(M.work,M.lwork)
@@ -131,9 +131,9 @@ end
 
 function factorize_lu!(M::LapackCPUSolver)
     size(M.fact,1) == 0 && return M
-    haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.dense,1)))
-    tril_to_full!(M.dense)
-    M.fact .= M.dense
+    haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.A,1)))
+    M.fact .= M.A
+    tril_to_full!(M.fact)
     getrf(size(M.fact,1),size(M.fact,2),M.fact,size(M.fact,2),M.etc[:ipiv],M.info)
     return M
 end
@@ -147,10 +147,10 @@ end
 
 function factorize_qr!(M::LapackCPUSolver{T}) where T
     size(M.fact,1) == 0 && return M
-    haskey(M.etc,:tau) || (M.etc[:tau] = Vector{T}(undef,size(M.dense,1)))
-    tril_to_full!(M.dense)
+    haskey(M.etc,:tau) || (M.etc[:tau] = Vector{T}(undef,size(M.A,1)))
     M.lwork = -1
-    M.fact .= M.dense
+    M.fact .= M.A
+    tril_to_full!(M.fact)
     geqrf(size(M.fact,1),size(M.fact,2),M.fact,size(M.fact,2),M.etc[:tau],M.work,M.lwork,M.info)
     M.lwork = BlasInt(real(M.work[1]))
     length(M.work) < M.lwork && resize!(M.work,M.lwork)
@@ -172,7 +172,7 @@ end
 function factorize_cholesky!(M::LapackCPUSolver)
     size(M.fact,1) == 0 && return M
     M.lwork = -1
-    M.fact .= M.dense
+    M.fact .= M.A
     potrf('L',size(M.fact,1),M.fact,size(M.fact,2),M.info)
     return M
 end
@@ -184,6 +184,7 @@ end
 
 is_inertia(M::LapackCPUSolver) =
     M.opt.lapack_algorithm == BUNCHKAUFMAN || M.opt.lapack_algorithm == CHOLESKY
+
 function inertia(M::LapackCPUSolver)
     if M.opt.lapack_algorithm == BUNCHKAUFMAN
         inertia(M.fact,M.etc[:ipiv],M.info[])
@@ -203,9 +204,10 @@ end
 
 improve!(M::LapackCPUSolver) = false
 
+input_type(::Type{LapackCPUSolver}) = :dense
+
 introduce(M::LapackCPUSolver) = "Lapack-CPU ($(M.opt.lapack_algorithm))"
 
-input_type(::Type{LapackCPUSolver}) = :dense
 default_options(::Type{LapackCPUSolver}) = LapackOptions()
 
 function num_neg_ev(n,D,ipiv)
diff --git a/src/LinearSolvers/linearsolvers.jl b/src/LinearSolvers/linearsolvers.jl
index 325f6d7f..77be4cd9 100644
--- a/src/LinearSolvers/linearsolvers.jl
+++ b/src/LinearSolvers/linearsolvers.jl
@@ -140,15 +140,5 @@ include("backsolve.jl")
 
 # dense solvers
 include("lapack.jl")
-
-# UMFPACK
-# N.B: interface depends on Julia version as the UMFPACK's
-#      wrapper has been updated in v1.9. The deprecated
-#      interface will not be supported once the new LTS
-#      released.
-if VERSION >= v"1.9"
-    include("umfpack.jl")
-else
-    include("umfpack_deprecated.jl")
-end
+include("umfpack.jl")
 
diff --git a/src/LinearSolvers/umfpack.jl b/src/LinearSolvers/umfpack.jl
index 616d9570..6fb5c780 100644
--- a/src/LinearSolvers/umfpack.jl
+++ b/src/LinearSolvers/umfpack.jl
@@ -7,12 +7,13 @@
 end
 
 mutable struct UmfpackSolver{T} <: AbstractLinearSolver{T}
-    inner::UMFPACK.UmfpackLU{T, Int32}
-    tril::SparseMatrixCSC{T}
-    full::SparseMatrixCSC{T}
+    inner::UMFPACK.UmfpackLU{Float64, Int32}
+    tril::SparseMatrixCSC{T,Int32}
+    full::SparseMatrixCSC{Float64,Int32}
     tril_to_full_view::SubVector{T}
 
-    p::Vector{T}
+    p::Vector{Float64}
+    d::Vector{Float64}
 
     opt::UmfpackOptions
     logger::MadNLPLogger
@@ -22,16 +23,17 @@ function UmfpackSolver(
     csc::SparseMatrixCSC{T};
     opt=UmfpackOptions(), logger=MadNLPLogger(),
 ) where T
-    p = Vector{T}(undef,csc.n)
-    full, tril_to_full_view = get_tril_to_full(csc)
-    controls = UMFPACK.get_umfpack_control(T, Int)
+    p = Vector{Float64}(undef,csc.n)
+    d = Vector{Float64}(undef,csc.n)
+    full, tril_to_full_view = get_tril_to_full(Float64,csc)
+    controls = UMFPACK.get_umfpack_control(Float64, Int)
     # Override default controls with custom setting
     controls[4] = opt.umfpack_pivtol
     controls[5] = opt.umfpack_block_size
     controls[6] = opt.umfpack_strategy
     controls[12] = opt.umfpack_sym_pivtol
-    inner = UMFPACK.UmfpackLU(csc; control=controls)
-    return UmfpackSolver(inner, csc, full, tril_to_full_view, p, opt, logger)
+    inner = UMFPACK.UmfpackLU(full; control=controls)
+    return UmfpackSolver(inner, csc, full, tril_to_full_view, p, d, opt, logger)
 end
 
 function factorize!(M::UmfpackSolver)
@@ -43,8 +45,9 @@ end
 
 function solve!(M::UmfpackSolver{T},rhs::Vector{T}) where T
     if UMFPACK.issuccess(M.inner)
-        UMFPACK.ldiv!(M.p, M.inner, rhs)
-        rhs .= M.p
+        M.p .= rhs
+        UMFPACK.ldiv!(M.d, M.inner, M.p)
+        rhs .= M.d
     end
     # If the factorization failed, we return the same
     # rhs to enter into a primal-dual regularization phase.
@@ -66,4 +69,5 @@ function improve!(M::UmfpackSolver)
     return true
 end
 introduce(::UmfpackSolver) = "umfpack"
+is_supported(::Type{UmfpackSolver},::Type{Float32}) = true
 is_supported(::Type{UmfpackSolver},::Type{Float64}) = true
diff --git a/src/LinearSolvers/umfpack_deprecated.jl b/src/LinearSolvers/umfpack_deprecated.jl
deleted file mode 100644
index 677939b7..00000000
--- a/src/LinearSolvers/umfpack_deprecated.jl
+++ /dev/null
@@ -1,111 +0,0 @@
-const umfpack_default_ctrl = copy(UMFPACK.umf_ctrl)
-const umfpack_default_info = copy(UMFPACK.umf_info)
-
-@kwdef mutable struct UmfpackOptions <: AbstractOptions
-    umfpack_pivtol::Float64 = 1e-4
-    umfpack_pivtolmax::Float64 = 1e-1
-    umfpack_sym_pivtol::Float64 = 1e-3
-    umfpack_block_size::Float64 = 16
-    umfpack_strategy::Float64 = 2.
-end
-
-mutable struct UmfpackSolver{T} <: AbstractLinearSolver{T}
-    inner::UMFPACK.UmfpackLU{T, Int32}
-    tril::SparseMatrixCSC{T}
-    full::SparseMatrixCSC{T}
-    tril_to_full_view::SubVector{T}
-
-    p::Vector{T}
-
-    tmp::Vector{Ptr{Cvoid}}
-    ctrl::Vector{T}
-    info::Vector{T}
-
-    opt::UmfpackOptions
-    logger::MadNLPLogger
-end
-
-
-for (numeric,solve,T) in (
-    (:umfpack_di_numeric, :umfpack_di_solve, Float64),
-    (:umfpack_si_numeric, :umfpack_si_solve, Float32),
-    )
-    @eval begin
-        umfpack_numeric(
-            colptr::Vector{Int32},rowval::Vector{Int32},
-            nzval::Vector{$T},symbolic::Ptr{Nothing},
-            tmp::Vector{Ptr{Nothing}},ctrl::Vector{$T},
-            info::Vector{$T}) = ccall(
-                ($(string(numeric)),:libumfpack),
-                Int32,
-                (Ptr{Int32},Ptr{Int32},Ptr{$T},Ptr{Cvoid},Ptr{Cvoid},
-                 Ptr{$T},Ptr{$T}),
-                colptr,rowval,nzval,symbolic,tmp,ctrl,info)
-        umfpack_solve(
-            typ,colptr::Vector{Int32},rowval::Vector{Int32},
-            nzval::Vector{$T},x::Vector{$T},b::Vector{$T},
-            numeric,ctrl::Vector{$T},info::Vector{$T}) = ccall(
-                ($(string(solve)),:libumfpack),
-                Int32,
-                (Int32, Ptr{Int32}, Ptr{Int32}, Ptr{$T},Ptr{$T},
-                 Ptr{$T}, Ptr{Cvoid}, Ptr{$T},Ptr{$T}),
-                typ,colptr,rowval,nzval,x,b,numeric,ctrl,info)
-    end
-end
-
-
-
-function UmfpackSolver(
-    csc::SparseMatrixCSC{T};
-    opt=UmfpackOptions(), logger=MadNLPLogger(),
-) where T
-    p = Vector{T}(undef,csc.n)
-    full,tril_to_full_view = get_tril_to_full(csc)
-
-    full.colptr.-=1; full.rowval.-=1
-
-    inner = UMFPACK.UmfpackLU(C_NULL,C_NULL,full.n,full.n,full.colptr,full.rowval,full.nzval,0)
-    UMFPACK.finalizer(UMFPACK.umfpack_free_symbolic,inner)
-    UMFPACK.umfpack_symbolic!(inner)
-    ctrl = copy(umfpack_default_ctrl)
-    info = copy(umfpack_default_info)
-    ctrl[4]=opt.umfpack_pivtol
-    ctrl[12]=opt.umfpack_sym_pivtol
-    ctrl[5]=opt.umfpack_block_size
-    ctrl[6]=opt.umfpack_strategy
-
-    tmp = Vector{Ptr{Cvoid}}(undef, 1)
-
-    return UmfpackSolver(inner,csc,full,tril_to_full_view,p,tmp,ctrl,info,opt,logger)
-end
-
-function factorize!(M::UmfpackSolver)
-    UMFPACK.umfpack_free_numeric(M.inner)
-    M.full.nzval.=M.tril_to_full_view
-    status = umfpack_numeric(M.inner.colptr,M.inner.rowval,M.inner.nzval,M.inner.symbolic,M.tmp,M.ctrl,M.info)
-    M.inner.numeric = M.tmp[]
-
-    M.inner.status = status
-    return M
-end
-function solve!(M::UmfpackSolver{T},rhs::Vector{T}) where T
-    status = umfpack_solve(1,M.inner.colptr,M.inner.rowval,M.inner.nzval,M.p,rhs,M.inner.numeric,M.ctrl,M.info)
-    rhs .= M.p
-    return rhs
-end
-is_inertia(::UmfpackSolver) = false
-inertia(M::UmfpackSolver) = throw(InertiaException())
-input_type(::Type{UmfpackSolver}) = :csc
-default_options(::Type{UmfpackSolver}) = UmfpackOptions()
-
-function improve!(M::UmfpackSolver)
-    if M.ctrl[4] == M.opt.umfpack_pivtolmax
-        @debug(M.logger,"improve quality failed.")
-        return false
-    end
-    M.ctrl[4] = min(M.opt.umfpack_pivtolmax,M.ctrl[4]^.75)
-    @debug(M.logger,"improved quality: pivtol = $(M.ctrl[4])")
-    return true
-end
-introduce(::UmfpackSolver)="umfpack"
-is_supported(::Type{UmfpackSolver},::Type{Float64}) = true
diff --git a/src/MadNLP.jl b/src/MadNLP.jl
index fdc75739..28c8f5a5 100644
--- a/src/MadNLP.jl
+++ b/src/MadNLP.jl
@@ -1,6 +1,3 @@
-# MadNLP.jl
-# Created by Sungho Shin (sungho.shin@wisc.edu)
-
 module MadNLP
 
 import Pkg.TOML: parsefile
@@ -10,7 +7,7 @@ import Printf: @sprintf
 import LinearAlgebra: BLAS, Adjoint, Symmetric, mul!, ldiv!, norm, dot, diagind, normInf, transpose!
 import LinearAlgebra: cholesky, qr, lu, cholesky!, axpy!
 import LinearAlgebra.BLAS: symv!, ger!, libblas, liblapack, BlasInt, @blasfunc
-import SparseArrays: AbstractSparseMatrix, SparseMatrixCSC, sparse, getcolptr, rowvals, nnz
+import SparseArrays: SparseArrays, AbstractSparseMatrix, SparseMatrixCSC, sparse, getcolptr, rowvals, nnz
 import Base: string, show, print, size, getindex, copyto!, @kwdef
 import SuiteSparse: UMFPACK
 import NLPModels
diff --git a/src/enums.jl b/src/enums.jl
index ddfb9124..afe25555 100644
--- a/src/enums.jl
+++ b/src/enums.jl
@@ -10,29 +10,6 @@
       WARN   = 5,
       ERROR  = 6)
 
-@enum(FixedVariableTreatments::Int,
-      RELAX_BOUND = 1,
-      MAKE_PARAMETER = 2)
-
-@enum(InertiaCorrectionMethod::Int,
-      INERTIA_AUTO = 1,
-      INERTIA_BASED = 2,
-      INERTIA_FREE = 3)
-
-@enum(KKTLinearSystem::Int,
-      SPARSE_KKT_SYSTEM = 1,
-      SPARSE_UNREDUCED_KKT_SYSTEM = 2,
-      DENSE_KKT_SYSTEM = 3,
-      DENSE_CONDENSED_KKT_SYSTEM = 4,
-)
-
-@enum(HessianApproximation::Int,
-      EXACT_HESSIAN = 1,
-      DENSE_BFGS = 2,
-      DENSE_DAMPED_BFGS = 3,
-      SPARSE_COMPACT_LBFGS = 4,
-)
-
 @enum(BFGSInitStrategy::Int,
       SCALAR1  = 1,
       SCALAR2  = 2,
@@ -66,23 +43,44 @@
       INVALID_NUMBER_HESSIAN_LAGRANGIAN = -11,
 )
 
-const STATUS_OUTPUT_DICT = Dict(
-    SOLVE_SUCCEEDED => "Optimal Solution Found.",
-    SOLVED_TO_ACCEPTABLE_LEVEL => "Solved To Acceptable Level.",
-    SEARCH_DIRECTION_BECOMES_TOO_SMALL => "Search Direction is becoming Too Small.",
-    DIVERGING_ITERATES => "Iterates divering; problem might be unbounded.",
-    MAXIMUM_ITERATIONS_EXCEEDED => "Maximum Number of Iterations Exceeded.",
-    MAXIMUM_WALLTIME_EXCEEDED => "Maximum wall-clock Time Exceeded.",
-    RESTORATION_FAILED => "Restoration Failed",
-    INFEASIBLE_PROBLEM_DETECTED => "Converged to a point of local infeasibility. Problem may be infeasible.",
-    INVALID_NUMBER_DETECTED => "Invalid number in NLP function or derivative detected.",
-    ERROR_IN_STEP_COMPUTATION => "Error in step computation.",
-    NOT_ENOUGH_DEGREES_OF_FREEDOM => "Problem has too few degrees of freedom.",
-    USER_REQUESTED_STOP => "Stopping optimization at current point as requested by user.",
-    INTERNAL_ERROR => "Internal Error.",
-    INVALID_NUMBER_OBJECTIVE => "Invalid number in NLP objective function detected.",
-    INVALID_NUMBER_GRADIENT => "Invalid number in NLP objective gradient detected.",
-    INVALID_NUMBER_CONSTRAINTS => "Invalid number in NLP constraint function detected.",
-    INVALID_NUMBER_JACOBIAN => "Invalid number in NLP constraint Jacobian detected.",
-    INVALID_NUMBER_HESSIAN_LAGRANGIAN => "Invalid number in NLP Hessian Lagrangian detected.",
-)
+function get_status_output(status, opt)
+    if status == SOLVE_SUCCEEDED
+        return @sprintf "Optimal Solution Found (tol = %5.1e)." opt.tol 
+    elseif status == SOLVED_TO_ACCEPTABLE_LEVEL
+        return @sprintf "Solved To Acceptable Level (tol = %5.1e)." opt.acceptable_tol
+    elseif status == SEARCH_DIRECTION_BECOMES_TOO_SMALL
+        return "Search Direction is becoming Too Small."
+    elseif status == DIVERGING_ITERATES
+        return "Iterates divering; problem might be unbounded."
+    elseif status == MAXIMUM_ITERATIONS_EXCEEDED
+        return "Maximum Number of Iterations Exceeded."
+    elseif status == MAXIMUM_WALLTIME_EXCEEDED
+        return "Maximum wall-clock Time Exceeded."
+    elseif status == RESTORATION_FAILED
+        return "Restoration Failed"
+    elseif status == INFEASIBLE_PROBLEM_DETECTED
+        return "Converged to a point of local infeasibility. Problem may be infeasible."
+    elseif status == INVALID_NUMBER_DETECTED
+        return "Invalid number in NLP function or derivative detected."
+    elseif status == ERROR_IN_STEP_COMPUTATION
+        return "Error in step computation."
+    elseif status == NOT_ENOUGH_DEGREES_OF_FREEDOM
+        return "Problem has too few degrees of freedom."
+    elseif status == USER_REQUESTED_STOP
+        return "Stopping optimization at current point as requested by user."
+    elseif status == INTERNAL_ERROR
+        return "Internal Error."
+    elseif status == INVALID_NUMBER_OBJECTIVE
+        return "Invalid number in NLP objective function detected."
+    elseif status == INVALID_NUMBER_GRADIENT
+        return "Invalid number in NLP objective gradient detected."
+    elseif status == INVALID_NUMBER_CONSTRAINTS
+        return "Invalid number in NLP constraint function detected."
+    elseif status == INVALID_NUMBER_JACOBIAN
+        return "Invalid number in NLP constraint Jacobian detected."
+    elseif INVALID_NUMBER_HESSIAN_LAGRANGIAN
+        return "Invalid number in NLP Hessian Lagrangian detected."
+    else
+        error("status code is not valid") 
+    end
+end
diff --git a/src/matrixtools.jl b/src/matrixtools.jl
index da7d9838..13ba1ebb 100644
--- a/src/matrixtools.jl
+++ b/src/matrixtools.jl
@@ -3,11 +3,11 @@
 
 abstract type AbstractSparseMatrixCOO{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti} end
 
-mutable struct SparseMatrixCOO{Tv,Ti<:Integer, VTv<:AbstractVector{Tv}} <: AbstractSparseMatrixCOO{Tv,Ti}
+mutable struct SparseMatrixCOO{Tv,Ti,VTv<:AbstractVector{Tv},VTi<:AbstractVector{Ti}} <: AbstractSparseMatrixCOO{Tv,Ti}
     m::Int
     n::Int
-    I::Vector{Ti}
-    J::Vector{Ti}
+    I::VTi
+    J::VTi
     V::VTv
 end
 size(A::SparseMatrixCOO) = (A.m,A.n)
@@ -37,12 +37,12 @@ function diag!(dest::AbstractVector{T}, src::AbstractMatrix{T}) where T
         dest[i] = src[i, i]
     end
 end
-
-function get_tril_to_full(csc::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti<:Integer}
+get_tril_to_full(csc::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti} = get_tril_to_full(Tv,csc)
+function get_tril_to_full(T,csc::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti<:Integer}
     cscind = SparseMatrixCSC{Int,Ti}(Symmetric(
         SparseMatrixCSC{Int,Ti}(csc.m,csc.n,csc.colptr,csc.rowval,collect(1:nnz(csc))),:L))
-    return SparseMatrixCSC{Tv,Ti}(
-        csc.m,csc.n,cscind.colptr,cscind.rowval,Vector{Tv}(undef,nnz(cscind))),view(csc.nzval,cscind.nzval)
+    return SparseMatrixCSC{T,Ti}(
+        csc.m,csc.n,cscind.colptr,cscind.rowval,Vector{T}(undef,nnz(cscind))),view(csc.nzval,cscind.nzval)
 end
 function tril_to_full!(dense::Matrix{T}) where T
     for i=1:size(dense,1)
@@ -52,13 +52,23 @@ function tril_to_full!(dense::Matrix{T}) where T
     end
 end
 
-function SparseMatrixCSC{Tv, Ti}(coo::SparseMatrixCOO{Tv,Ti}) where {Tv,Ti <: Integer}
-    cscind = sparse(coo.I,coo.J,ones(Ti,nnz(coo)),coo.m,coo.n)
-    nzval = Vector{Tv}(undef,nnz(cscind))
-    fill!(nzval, zero(Tv))
-    return SparseMatrixCSC{Tv, Ti}(
+function coo_to_csc(coo) 
+    cscind = sparse(
+        coo.I,
+        coo.J,
+        fill!(similar(coo.I,nnz(coo)), 1),
+        coo.m,
+        coo.n
+    )
+    nzval = similar(coo.V, nnz(cscind))
+    fill!(nzval, 0)
+    
+    csc = SparseMatrixCSC(
         coo.m,coo.n,cscind.colptr,cscind.rowval,nzval,
     )
+    map = get_mapping(csc, coo)
+    
+    return csc, map
 end
 
 function _get_coo_to_csc(I,J,cscind,map)
@@ -78,7 +88,7 @@ function transfer!(dest::SparseMatrixCSC, src::SparseMatrixCOO, map::Vector{Int}
 end
 
 function get_mapping(dest::SparseMatrixCSC{Tv1,Ti1}, src::SparseMatrixCOO{Tv2,Ti2}) where {Tv1,Tv2,Ti1,Ti2}
-    map = Vector{Int}(undef,nnz(src))
+    map = similar(src.V, Int, nnz(src))
     dest.nzval .= 1:nnz(dest)
     _get_coo_to_csc(src.I, src.J, dest, map)
     return map
@@ -88,7 +98,7 @@ function Matrix{Tv}(coo::SparseMatrixCOO{Tv,Ti}) where {Tv,Ti<:Integer}
     return Matrix{Tv}(undef,coo.m,coo.n)
 end
 
-Base.copyto!(dense::Matrix{Tv},coo::SparseMatrixCOO{Tv,Ti}) where {Tv,Ti<:Integer} = _copyto!(dense,coo.I,coo.J,coo.V)
+Base.copyto!(dense::Matrix,coo::SparseMatrixCOO) = _copyto!(dense,coo.I,coo.J,coo.V)
 function _copyto!(dense::Matrix{Tv},I,J,V) where Tv
     fill!(dense, zero(Tv))
     for i=1:length(I)
diff --git a/src/nlpmodels.jl b/src/nlpmodels.jl
index b990a938..6508c036 100644
--- a/src/nlpmodels.jl
+++ b/src/nlpmodels.jl
@@ -1,101 +1,662 @@
-
-function _set_scaling!(con_scale::AbstractVector, jac::SparseMatrixCOO)
-    @simd for i in 1:nnz(jac)
-        row = @inbounds jac.I[i]
-        @inbounds con_scale[row] = max(con_scale[row], abs(jac.V[i]))
+function get_index_constraints(lvar, uvar, lcon, ucon, fixed_variable_treatment, equality_treatment)
+    
+    ncon = length(lcon)
+    
+    if ncon > 0
+        if equality_treatment == EnforceEquality
+            ind_eq = findall(lcon .== ucon)
+            ind_ineq = findall(lcon .!= ucon)
+        else
+            ind_eq = similar(lvar, Int, 0)
+            ind_ineq = similar(lvar, Int, ncon) .= 1:ncon
+        end
+        xl = [lvar;view(lcon,ind_ineq)]
+        xu = [uvar;view(ucon,ind_ineq)]
+    else
+        ind_eq   = similar(lvar, Int, 0)
+        ind_ineq = similar(lvar, Int, 0)
+        xl = lvar
+        xu = uvar
+    end
+    
+    if fixed_variable_treatment == MakeParameter
+        ind_fixed = findall(xl .== xu)
+        ind_lb = findall((xl .!= -Inf) .* (xl .!= xu))
+        ind_ub = findall((xu .!=  Inf) .* (xl .!= xu))
+    else
+        ind_fixed = similar(xl, Int, 0)
+        ind_lb = findall(xl .!=-Inf)
+        ind_ub = findall(xu .!= Inf)
     end
+
+    ind_llb = findall((lvar .!= -Inf).*(uvar .== Inf))
+    ind_uub = findall((lvar .== -Inf).*(uvar .!= Inf))
+    
+    # Return named tuple
+    return (
+        ind_eq = ind_eq,
+        ind_ineq = ind_ineq,
+        ind_fixed = ind_fixed,
+        ind_lb = ind_lb,
+        ind_ub = ind_ub,
+        ind_llb = ind_llb,
+        ind_uub = ind_uub,
+    )
 end
-function _set_scaling!(con_scale::AbstractVector, jac::Matrix)
-    for row in 1:size(jac, 1)
-        for col in 1:size(jac, 2)
-            @inbounds con_scale[row] = max(con_scale[row], abs(jac[row, col]))
-        end
+
+
+abstract type AbstractCallback{T,VT} end
+abstract type AbstractFixedVariableTreatment end
+abstract type AbstractEqualityTreatment end
+struct EnforceEquality <: AbstractEqualityTreatment end
+struct RelaxEquality <: AbstractEqualityTreatment end
+
+struct MakeParameter{VT,VI} <: AbstractFixedVariableTreatment
+    fixed::VI
+    fixedj::VI
+    fixedh::VI
+    grad_storage::VT
+end
+struct RelaxBound <: AbstractFixedVariableTreatment end
+
+struct SparseCallback{
+    T,
+    VT <: AbstractVector{T},
+    VI <: AbstractVector{Int},
+    I <: AbstractNLPModel{T, VT},
+    FH <: AbstractFixedVariableTreatment,
+    EH <: AbstractEqualityTreatment,
+    } <: AbstractCallback{T, VT}
+    
+    nlp::I
+    nvar::Int
+    ncon::Int
+    nnzj::Int
+    nnzh::Int
+
+    con_buffer::VT
+    jac_buffer::VT
+    grad_buffer::VT
+    hess_buffer::VT
+
+    jac_I::VI
+    jac_J::VI
+    hess_I::VI
+    hess_J::VI
+
+    obj_scale::Base.RefValue{T}
+    con_scale::VT
+    jac_scale::VT
+
+    fixed_handler::FH
+    equality_handler::EH
+end
+
+struct DenseCallback{
+    T,
+    VT <: AbstractVector{T},
+    MT <: AbstractMatrix{T},
+    I <: AbstractNLPModel{T, VT},
+    FH <: AbstractFixedVariableTreatment,
+    EH <: AbstractEqualityTreatment,
+    } <: AbstractCallback{T, VT}
+    
+    nlp::I
+    nvar::Int
+    ncon::Int
+
+    con_buffer::VT
+    jac_buffer::MT
+    grad_buffer::VT
+
+    obj_scale::Base.RefValue{T}
+    con_scale::VT
+
+    fixed_handler::FH
+    equality_handler::EH
+end
+
+
+create_array(cb::AbstractCallback, args...) = similar(get_x0(cb.nlp), args...)
+
+function set_obj_scale!(obj_scale, f::VT,max_gradient) where {T, VT <: AbstractVector{T}}
+    obj_scale[] = min(one(T), max_gradient / norm(f,Inf))
+end
+function set_con_scale_sparse!(con_scale::VT, jac_I,jac_buffer, max_gradient) where {T, VT <: AbstractVector{T}}
+    fill!(con_scale, one(T))
+    _set_con_scale_sparse!(con_scale, jac_I, jac_buffer)
+    map!(x-> min(one(T), max_gradient / x), con_scale, con_scale)
+end
+function _set_con_scale_sparse!(con_scale, jac_I, jac_buffer)
+    @inbounds @simd for i in 1:length(jac_I)
+        row = jac_I[i]
+        con_scale[row] = max(con_scale[row], abs(jac_buffer[i]))
     end
 end
+function set_jac_scale_sparse!(jac_scale::VT, con_scale, jac_I) where {T, VT <: AbstractVector{T}}
+    copyto!(jac_scale,  @view(con_scale[jac_I]))
+end
+function set_con_scale_dense!(con_scale::VT, jac_buffer, max_gradient) where {T, VT <: AbstractVector{T}}
+    con_scale .= min.(one(T), max_gradient ./ mapreduce(abs, max, jac_buffer, dims=2, init=one(T)))
+end
+
 
-"""
-    scale_constraints!(
-        nlp::AbstractNLPModel,
-        con_scale::AbstractVector,
-        jac::AbstractMatrix;
-        max_gradient=1e-8,
+function create_dense_fixed_handler(
+    fixed_variable_treatment::Type{MakeParameter},
+    nlp,
+    opt
     )
+    lvar = get_lvar(nlp)
+    uvar = get_uvar(nlp)
+    
+    isfixed  = (lvar .== uvar)
+    fixed  = findall(isfixed)
 
-Compute the scaling of the constraints associated
-to the nonlinear model `nlp`. By default, Ipopt's scaling
-is applied. The user can write its own function to scale
-appropriately any custom `AbstractNLPModel`.
+    return MakeParameter(
+        fixed,
+        similar(fixed,0),
+        similar(fixed,0),
+        similar(lvar, length(fixed))
+    )
+end
 
-### Notes
+function create_sparse_fixed_handler(
+    fixed_variable_treatment::Type{MakeParameter},
+    nlp,
+    jac_I,
+    jac_J,
+    hess_I,
+    hess_J,
+    hess_buffer;
+    opt
+    )
+    lvar = get_lvar(nlp)
+    uvar = get_uvar(nlp)
+    nnzj = get_nnzj(nlp.meta)
+    nnzh = get_nnzh(nlp.meta)
+    
+    isfixed  = (lvar .== uvar)
+    
+    fixed  = findall(isfixed)
+    fixedj = findall(@view(isfixed[jac_J]))
+    fixedh = findall(@view(isfixed[hess_I]) .|| @view(isfixed[hess_J]))
+    nfixed = length(fixed)
 
-This function assumes that the Jacobian `jac` has been evaluated
-before calling this function.
+    nnzh = nnzh + nfixed
+    resize!(hess_I, nnzh)
+    resize!(hess_J, nnzh)
+    resize!(hess_buffer, nnzh)
+    copyto!(@view(hess_I[end-nfixed+1:end]), fixed)
+    copyto!(@view(hess_J[end-nfixed+1:end]), fixed)
 
-"""
-function scale_constraints!(
-    nlp::AbstractNLPModel{T},
-    con_scale::AbstractVector,
-    jac::AbstractMatrix;
-    max_gradient=1e-8,
-) where T
-    fill!(con_scale, zero(T))
-    _set_scaling!(con_scale, jac)
-    @inbounds for i in eachindex(con_scale)
-        con_scale[i] = min(one(T), max_gradient / con_scale[i])
-    end
+    fixed_handler = MakeParameter(
+        fixed,
+        fixedj,
+        fixedh,
+        similar(lvar, length(fixed))
+    )
+    
+    return fixed_handler, nnzj, nnzh
+end
+
+function create_sparse_fixed_handler(
+    fixed_variable_treatment::Type{RelaxBound},
+    nlp,
+    jac_I,
+    jac_J,
+    hess_I,
+    hess_J,
+    hess_buffer;
+    opt
+    )
+
+    fixed_handler = RelaxBound()
+
+    
+    return fixed_handler, get_nnzj(nlp.meta), get_nnzh(nlp.meta)
 end
 
-"""
-    scale_objective(
-        nlp::AbstractNLPModel,
-        grad::AbstractVector;
-        max_gradient=1e-8,
+function create_callback(
+    ::Type{SparseCallback},
+    nlp::AbstractNLPModel{T, VT},
+    opt,
+    ) where {T, VT}
+
+    n = get_nvar(nlp)
+    m = get_ncon(nlp)
+    nnzj = get_nnzj(nlp.meta)
+    nnzh = get_nnzh(nlp.meta)
+    
+
+    x0   = get_x0(nlp)
+
+    con_buffer = similar(x0, m)
+    grad_buffer = similar(x0, n)
+    jac_buffer = similar(x0, nnzj)
+    hess_buffer = similar(x0, nnzh)
+
+    jac_I = similar(x0, Int, nnzj)
+    jac_J = similar(x0, Int, nnzj)
+    hess_I = similar(x0, Int, nnzh)
+    hess_J = similar(x0, Int, nnzh)
+
+    obj_scale = Ref(one(T))
+    con_scale = similar(jac_buffer, m)
+    jac_scale = similar(jac_buffer, nnzj)
+    
+    NLPModels.jac_structure!(nlp,jac_I,jac_J)
+    NLPModels.hess_structure!(nlp,hess_I,hess_J)
+
+    fixed_handler, nnzj, nnzh = create_sparse_fixed_handler(
+        opt.fixed_variable_treatment,
+        nlp,
+        jac_I, jac_J, hess_I, hess_J,
+        hess_buffer;
+        opt = opt
     )
 
-Compute the scaling of the objective associated to the
-nonlinear model `nlp`. By default, Ipopt's scaling
-is applied. The user can write its own function to scale
-appropriately the objective of any custom `AbstractNLPModel`.
+    equality_handler = opt.equality_treatment()
 
-### Notes
 
-This function assumes that the gradient `gradient` has been evaluated
-before calling this function.
+    return SparseCallback(
+        nlp,
+        n,m,nnzj,nnzh,
+        con_buffer,
+        jac_buffer,
+        grad_buffer,
+        hess_buffer,
+        jac_I,
+        jac_J,
+        hess_I,
+        hess_J,
+        obj_scale,
+        con_scale,
+        jac_scale,
+        fixed_handler,
+        equality_handler
+    )
+end
+
+function create_callback(
+    ::Type{DenseCallback},
+    nlp::AbstractNLPModel{T, VT},
+    opt,
+    ) where {T, VT}
 
-"""
-function scale_objective(
-    nlp::AbstractNLPModel{T},
-    grad::AbstractVector;
-    max_gradient=1e-8,
-) where T
-    return min(one(T), max_gradient / normInf(grad))
+    n = get_nvar(nlp)
+    m = get_ncon(nlp)
+    
+    x0   = similar(get_x0(nlp))
+    con_buffer = similar(x0, m)
+    jac_buffer = similar(x0, m, n)
+    grad_buffer = similar(x0, n)
+    obj_scale = Ref(one(T))
+    con_scale = similar(x0, m)
+
+    fixed_handler = create_dense_fixed_handler(
+        opt.fixed_variable_treatment,
+        nlp,
+        opt
+    )
+
+    equality_handler = opt.equality_treatment()
+
+    return DenseCallback(
+        nlp,
+        n,m,
+        con_buffer,
+        jac_buffer,
+        grad_buffer,
+        obj_scale,
+        con_scale,
+        fixed_handler,
+        equality_handler
+    )
 end
 
-function get_index_constraints(nlp::AbstractNLPModel; fixed_variable_treatment=MAKE_PARAMETER)
-    ind_ineq = findall(get_lcon(nlp) .!= get_ucon(nlp))
-    xl = [get_lvar(nlp);view(get_lcon(nlp),ind_ineq)]
-    xu = [get_uvar(nlp);view(get_ucon(nlp),ind_ineq)]
-    if fixed_variable_treatment == MAKE_PARAMETER
-        ind_fixed = findall(xl .== xu)
-        ind_lb = findall((xl .!= -Inf) .* (xl .!= xu))
-        ind_ub = findall((xu .!=  Inf) .* (xl .!= xu))
-    else
-        ind_fixed = Int[]
-        ind_lb = findall(xl .!=-Inf)
-        ind_ub = findall(xu .!= Inf)
+function _treat_fixed_variable_initialize!(fixed_handler::RelaxBound, x0, lvar, uvar) end
+function _treat_fixed_variable_initialize!(fixed_handler::MakeParameter, x0, lvar, uvar)
+    fixed = fixed_handler.fixed
+    copyto!(@view(x0[fixed]), @view(lvar[fixed]))
+    fill!(@view(lvar[fixed]), -Inf)
+    fill!(@view(uvar[fixed]),  Inf)
+end
+
+function _treat_equality_initialize!(equality_handler::EnforceEquality, lcon, ucon, tol) end
+function _treat_equality_initialize!(equality_handler::RelaxEquality, lcon, ucon, tol)
+    set_initial_bounds!(
+        lcon,
+        ucon,
+        tol
+    )
+end
+
+function initialize!(
+    cb::AbstractCallback,
+    x, xl, xu, y0, rhs,
+    ind_ineq,
+    opt
+    )
+
+    x0= variable(x)
+    lvar= variable(xl)
+    uvar= variable(xu)
+    
+    fixed_handler = cb.fixed_handler
+    nlp = cb.nlp
+    
+    con_buffer =cb.con_buffer
+    grad_buffer =cb.grad_buffer
+
+    
+    x0   .= get_x0(nlp)
+    y0   .= get_y0(nlp) 
+    lvar .= get_lvar(nlp)
+    uvar .= get_uvar(nlp)
+    lcon = copy(get_lcon(nlp))
+    ucon = copy(get_ucon(nlp))
+    
+    _treat_fixed_variable_initialize!(fixed_handler, x0, lvar, uvar)
+    _treat_equality_initialize!(cb.equality_handler, lcon, ucon, opt.tol)
+
+    set_initial_bounds!(
+        lvar,
+        uvar,
+        opt.tol
+    )
+    initialize_variables!(
+        x0,
+        lvar,
+        uvar,
+        opt.bound_push,
+        opt.bound_fac
+    )
+    
+    NLPModels.cons!(nlp,x0,con_buffer)
+    
+    slack(xl) .= view(lcon, ind_ineq)
+    slack(xu) .= view(ucon, ind_ineq)
+    rhs .= (lcon.==ucon).*lcon
+    copyto!(slack(x), @view(con_buffer[ind_ineq]))
+
+    set_initial_bounds!(
+        slack(xl),
+        slack(xu),
+        opt.tol
+    )
+    initialize_variables!(
+        slack(x),
+        slack(xl),
+        slack(xu),
+        opt.bound_push,
+        opt.bound_fac
+    )
+
+end
+
+function set_scaling!(
+    cb::SparseCallback,
+    x, xl, xu, y0, rhs,
+    ind_ineq,
+    nlp_scaling_max_gradient
+    )
+    
+    x0= variable(x)
+
+    nlp = cb.nlp
+    obj_scale = cb.obj_scale
+    con_scale = cb.con_scale
+    jac_scale = cb.jac_scale    
+    con_buffer =cb.con_buffer
+    jac_buffer =cb.jac_buffer
+    grad_buffer =cb.grad_buffer
+
+    # Set scaling
+    NLPModels.jac_coord!(nlp,x0,jac_buffer)
+    set_con_scale_sparse!(con_scale, cb.jac_I, jac_buffer, nlp_scaling_max_gradient)
+    set_jac_scale_sparse!(jac_scale, con_scale, cb.jac_I)
+    
+    NLPModels.grad!(nlp,x0,grad_buffer)
+    set_obj_scale!(obj_scale, grad_buffer, nlp_scaling_max_gradient)
+
+    con_scale_slk = @view(con_scale[ind_ineq])
+    y0  ./= con_scale
+    rhs .*= con_scale
+    slack(x) .*= con_scale_slk
+    slack(xl) .*= con_scale_slk
+    slack(xu) .*= con_scale_slk
+end
+
+function set_scaling!(
+    cb::DenseCallback,
+    x, xl, xu, y0, rhs,
+    ind_ineq,
+    nlp_scaling_max_gradient
+    )
+    
+    x0= variable(x)
+
+    nlp = cb.nlp
+    obj_scale = cb.obj_scale
+    con_scale = cb.con_scale
+    con_buffer =cb.con_buffer
+    jac_buffer =cb.jac_buffer
+    grad_buffer =cb.grad_buffer
+
+    # Set scaling
+    jac_dense!(nlp,x0,jac_buffer)
+    set_con_scale_dense!(con_scale, jac_buffer, nlp_scaling_max_gradient)
+    
+    NLPModels.grad!(nlp,x0,grad_buffer)
+    set_obj_scale!(obj_scale, grad_buffer, nlp_scaling_max_gradient)
+
+    con_scale_slk = @view(con_scale[ind_ineq])
+    y0  ./= con_scale
+    rhs .*= con_scale
+    slack(x) .*= con_scale_slk
+    slack(xl) .*= con_scale_slk
+    slack(xu) .*= con_scale_slk
+end
+
+function _jac_sparsity_wrapper!(
+    cb::SparseCallback,
+    I::AbstractVector,J::AbstractVector
+    )
+    
+    copyto!(I, cb.jac_I)
+    copyto!(J, cb.jac_J)
+end
+
+function _hess_sparsity_wrapper!(
+    cb::SparseCallback,
+    I::AbstractVector,J::AbstractVector
+    ) 
+    copyto!(I, cb.hess_I)
+    copyto!(J, cb.hess_J)
+end
+
+
+function _eval_cons_wrapper!(cb::AbstractCallback,x::AbstractVector,c::AbstractVector)
+    NLPModels.cons!(cb.nlp, x,c)
+    c .*= cb.con_scale
+    return c
+end
+
+
+function _eval_jac_wrapper!(
+    cb::SparseCallback,
+    x::AbstractVector,
+    jac::AbstractVector
+    )
+    
+    nnzj_orig = get_nnzj(cb.nlp.meta)
+    NLPModels.jac_coord!(cb.nlp, x, jac)
+    jac .*= cb.jac_scale
+
+    _treat_fixed_variable_jac_coord!(cb.fixed_handler, cb, x, jac)
+end
+function _treat_fixed_variable_jac_coord!(fixed_handler::RelaxBound, cb, x, jac) end
+function _treat_fixed_variable_jac_coord!(fixed_handler::MakeParameter, cb::SparseCallback{T}, x, jac) where T
+    fill!(@view(jac[fixed_handler.fixedj]), zero(T))
+end
+
+function _eval_grad_f_wrapper!(
+    cb::AbstractCallback{T},
+    x::AbstractVector,
+    grad::AbstractVector
+    ) where T
+    
+    NLPModels.grad!(cb.nlp, x, grad)
+    grad .*= cb.obj_scale[]
+    _treat_fixed_variable_grad!(cb.fixed_handler, cb, x, grad)
+end
+function _treat_fixed_variable_grad!(fixed_handler::RelaxBound, cb, x, grad) end
+function _treat_fixed_variable_grad!(fixed_handler::MakeParameter, cb::AbstractCallback{T}, x, grad) where T
+    fixed_handler.grad_storage .= @view(grad[fixed_handler.fixed])
+    map!(
+        (x,y)->x-y,
+        @view(grad[fixed_handler.fixed]),
+        @view(x[cb.fixed_handler.fixed]),
+        @view(get_lvar(cb.nlp)[cb.fixed_handler.fixed])
+    )
+end
+
+function _eval_f_wrapper(cb::AbstractCallback,x::AbstractVector)
+    return NLPModels.obj(cb.nlp,x)* cb.obj_scale[]
+end
+
+function _eval_lag_hess_wrapper!(
+    cb::SparseCallback{T},
+    x::AbstractVector,
+    y::AbstractVector,
+    hess::AbstractVector; 
+    obj_weight = 1.0
+    ) where T
+    
+    nnzh_orig = get_nnzh(cb.nlp.meta)
+    
+    cb.con_buffer .= y .* cb.con_scale
+    NLPModels.hess_coord!(
+        cb.nlp, x, cb.con_buffer, view(hess, 1:nnzh_orig);
+        obj_weight=obj_weight * cb.obj_scale[]
+    )
+    _treat_fixed_variable_hess_coord!(cb.fixed_handler, cb, hess) 
+end
+
+function _treat_fixed_variable_hess_coord!(fixed_handler::RelaxBound, cb, hess) end
+function _treat_fixed_variable_hess_coord!(fixed_handler::MakeParameter, cb::SparseCallback{T}, hess) where T
+    nnzh_orig = get_nnzh(cb.nlp.meta)
+    fill!(@view(hess[fixed_handler.fixedh]), zero(T))
+    fill!(@view(hess[nnzh_orig+1:end]), one(T))
+end
+
+function _eval_jac_wrapper!(
+    cb::SparseCallback{T},
+    x::AbstractVector,
+    jac::AbstractMatrix
+    ) where T
+    
+    jac_buffer = cb.jac_buffer
+    _eval_jac_wrapper!(cb, x, jac_buffer)
+    fill!(jac, zero(T))
+    @inbounds @simd for k=1:length(cb.jac_I)
+        i,j = cb.jac_I[k], cb.jac_J[k]
+        jac[i,j] += jac_buffer[k]
     end
+end
+
+function _eval_lag_hess_wrapper!(
+    cb::SparseCallback{T},
+    x::AbstractVector,
+    y::AbstractVector,
+    hess::AbstractMatrix;
+    obj_weight = one(T)
+    ) where T
+    
+    hess_buffer = cb.hess_buffer
+    _eval_lag_hess_wrapper!(cb, x, y, hess_buffer; obj_weight=obj_weight * cb.obj_scale[])
+    fill!(hess, zero(T))
+    @inbounds @simd for k=1:length(cb.hess_I)
+        i,j = cb.hess_I[k], cb.hess_J[k]
+        hess[i,j] += hess_buffer[k]
+    end
+    _treat_fixed_variable_hess_dense!(cb.fixed_handler, cb, hess)
+end
+function _treat_fixed_variable_hess_dense!(fixed_handler::RelaxBound, cb, hess) end
+function _treat_fixed_variable_hess_dense!(fixed_handler::MakeParameter, cb::SparseCallback{T}, hess) where T
+    nnzh_orig = get_nnzh(cb.nlp.meta)
+    
+    fixed = fixed_handler.fixed
+    _set_diag!(hess, fixed, one(T))
+end
 
-    ind_llb = findall((get_lvar(nlp) .!= -Inf).*(get_uvar(nlp) .== Inf))
-    ind_uub = findall((get_lvar(nlp) .== -Inf).*(get_uvar(nlp) .!= Inf))
 
-    # Return named tuple
-    return (
-        ind_ineq=ind_ineq,
-        ind_fixed=ind_fixed,
-        ind_lb=ind_lb,
-        ind_ub=ind_ub,
-        ind_llb=ind_llb,
-        ind_uub=ind_uub,
+function _eval_jac_wrapper!(
+    cb::DenseCallback{T},
+    x::AbstractVector,
+    jac::AbstractMatrix
+    ) where T
+    
+    jac_dense!(cb.nlp, x, jac)
+    jac .*= cb.con_scale
+    _treat_fixed_variable_jac_dense!(cb.fixed_handler, cb, jac)
+end
+function _treat_fixed_variable_jac_dense!(fixed_handler::RelaxBound, cb::DenseCallback, jac) end
+function _treat_fixed_variable_jac_dense!(fixed_handler::MakeParameter, cb::DenseCallback{T}, jac) where T
+    jac[:,fixed_handler.fixed] .= zero(T)
+end
+
+
+function _eval_lag_hess_wrapper!(
+    cb::DenseCallback{T},
+    x::AbstractVector,
+    y::AbstractVector,
+    hess::AbstractMatrix;
+    obj_weight = one(T)
+    ) where T
+    
+    hess_dense!(
+        cb.nlp, x, y, hess;
+        obj_weight=obj_weight * cb.obj_scale[]
+    )
+    
+    _treat_fixed_variable_lag_hess_dense!(cb.fixed_handler, cb, hess)
+end
+function _treat_fixed_variable_lag_hess_dense!(fixed_handler::RelaxBound, cb::DenseCallback, hess) end
+function _treat_fixed_variable_lag_hess_dense!(fixed_handler::MakeParameter, cb::DenseCallback{T}, hess) where T
+    fixed = fixed_handler.fixed
+    hess[:,fixed] .= zero(T)
+    hess[fixed,:] .= zero(T)
+    _set_diag!(hess, fixed, one(T))
+end
+
+
+
+function update_z!(cb, zl, zu, jacl)
+    _update_z!(cb.fixed_handler, zl, zu, jacl, get_minimize(cb.nlp) ? 1 : -1)
+end
+
+function _update_z!(fixed_handler::MakeParameter, zl, zu, jacl, sense) 
+    zl_r = @view(zl[fixed_handler.fixed])
+    zu_r = @view(zu[fixed_handler.fixed])
+    jacl_r = @view(jacl[fixed_handler.fixed])
+    map!(
+        (x,y)->sense * max(x+y,0),
+        zl_r,
+        fixed_handler.grad_storage,
+        jacl_r
+    )
+    map!(
+        (x,y)->sense * max(-(x+y),0),
+        zu_r,
+        fixed_handler.grad_storage,
+        jacl_r,
     )
 end
+function _update_z!(fixed_handler::RelaxBound, zl, zu, jacl, sense) end
 
+function _set_diag!(A, inds, a)
+    @inbounds @simd for i in inds
+        A[i,i] = a
+    end
+end
diff --git a/src/options.jl b/src/options.jl
index 30a0e71c..480f983e 100644
--- a/src/options.jl
+++ b/src/options.jl
@@ -20,7 +20,7 @@ end
     # General options
     rethrow_error::Bool = true
     disable_garbage_collector::Bool = false
-    blas_num_threads::Int = 1
+    blas_num_threads::Int = 1 
     linear_solver::Type = LapackCPUSolver
     iterator::Type = RichardsonIterator
 
@@ -40,15 +40,18 @@ end
 
     # NLP options
     kappa_d::Float64 = 1e-5
-    fixed_variable_treatment::FixedVariableTreatments = MAKE_PARAMETER
+    fixed_variable_treatment::Type = MakeParameter
+    equality_treatment::Type = EnforceEquality
+    boudn_relax_factor::Float64 = 1e-8
     jacobian_constant::Bool = false
     hessian_constant::Bool = false
-    kkt_system::KKTLinearSystem = SPARSE_KKT_SYSTEM
-    hessian_approximation::HessianApproximation = EXACT_HESSIAN
+    kkt_system::Type = SparseKKTSystemes
+    hessian_approximation::Type = ExactHessian
+    callback::Type = SparseCallback
 
     # initialization options
     dual_initialized::Bool = false
-    inertia_correction_method::InertiaCorrectionMethod = INERTIA_AUTO
+    inertia_correction_method::Type = InertiaAuto
     constr_mult_init_max::Float64 = 1e3
     bound_push::Float64 = 1e-2
     bound_fac::Float64 = 1e-2
@@ -88,15 +91,54 @@ end
 
     # Barrier
     mu_init::Float64 = 1e-1
-    mu_min::Float64 = 1e-11
+    mu_min::Float64 = min(1e-4, tol ) / (barrier_tol_factor + 1) # by courtesy of Ipopt
     mu_superlinear_decrease_power::Float64 = 1.5
     tau_min::Float64 = 0.99
     mu_linear_decrease_factor::Float64 = .2
 end
 
+# smart option presets
+function MadNLPOptions(nlp::AbstractNLPModel{T}) where T
+
+    # if dense callback is defined, we use dense callback
+    is_dense_callback =
+        hasmethod(MadNLP.jac_dense!, Tuple{typeof(nlp), AbstractVector, AbstractMatrix}) &&
+        hasmethod(MadNLP.hess_dense!, Tuple{typeof(nlp), AbstractVector, AbstractVector, AbstractMatrix})
+    
+    callback = is_dense_callback ? DenseCallback : SparseCallback
+
+    # if dense callback is used, we use dense condensed kkt system
+    kkt_system = is_dense_callback ? DenseCondensedKKTSystem : SparseKKTSystem
+
+    # if dense kkt system, we use a dense linear solver
+    linear_solver = is_dense_callback ? LapackCPUSolver : default_sparse_solver(nlp)
+
+    tol = get_tolerance(T,kkt_system)
+    
+    return MadNLPOptions(
+        callback = callback,
+        kkt_system = kkt_system,
+        linear_solver = linear_solver,
+        tol = tol,
+    )
+end
+
+get_tolerance(::Type{T},::Type{KKT}) where {T, KKT} = 10^round(log10(eps(T))/2)
+get_tolerance(::Type{T},::Type{SparseCondensedKKTSystem}) where T = 10^(round(log10(eps(T))/4))
+
+function default_sparse_solver(nlp::AbstractNLPModel)
+    if isdefined(Main, :MadNLPHSL)
+        Main.MadNLPHSL.Ma27Solver
+    elseif isdefined(Main, :MadNLPMumps)
+        Main.MadNLPMumps.MumpsSolver
+    else
+        UmfpackSolver
+    end
+end
+
 function check_option_sanity(options)
-    is_kkt_dense = (options.kkt_system == DENSE_KKT_SYSTEM) || (options.kkt_system == DENSE_CONDENSED_KKT_SYSTEM)
-    is_hess_approx_dense = (options.hessian_approximation == DENSE_BFGS) || (options.hessian_approximation == DENSE_DAMPED_BFGS)
+    is_kkt_dense = options.kkt_system <: AbstractDenseKKTSystem
+    is_hess_approx_dense = options.hessian_approximation <: Union{BFGS, DampedBFGS}
     if input_type(options.linear_solver) == :csc && is_kkt_dense
         error("[options] Sparse Linear solver is not supported in dense mode.\n"*
               "Please use a dense linear solver or change `kkt_system` ")
@@ -114,9 +156,9 @@ function print_ignored_options(logger,option_dict)
     end
 end
 
-function load_options(; linear_solver=default_linear_solver(), options...)
+function load_options(nlp; options...)
     # Initiate interior-point options
-    opt_ipm = MadNLPOptions(linear_solver=linear_solver)
+    opt_ipm = MadNLPOptions(nlp)
     linear_solver_options = set_options!(opt_ipm, options)
     check_option_sanity(opt_ipm)
     # Initiate linear-solver options
diff --git a/src/quasi_newton.jl b/src/quasi_newton.jl
index 32ff9d8d..e7b7cc1d 100644
--- a/src/quasi_newton.jl
+++ b/src/quasi_newton.jl
@@ -36,9 +36,6 @@ curvature(::Val{SCALAR3}, sk, yk) = 0.5 * (curvature(Val(SCALAR1), sk, yk) + cur
 curvature(::Val{SCALAR4}, sk, yk) = sqrt(curvature(Val(SCALAR1), sk, yk) * curvature(Val(SCALAR2), sk, yk))
 
 
-struct ExactHessian{T, VT} <: AbstractHessian{T, VT} end
-ExactHessian{T, VT}(n::Int) where {T, VT} = ExactHessian{T, VT}()
-
 """
     BFGS{T, VT} <: AbstractQuasiNewton{T, VT}
 
@@ -51,7 +48,7 @@ B_{k+1} = B_k - \frac{(B_k s_k)(B_k s_k)^⊤}{s_k^⊤ B_k s_k} + \frac{y_k y_k^
 The matrix is not updated if ``s_k^⊤ y_k < 10^{-8}``.
 
 """
-struct BFGS{T, VT} <: AbstractQuasiNewton{T, VT}
+struct BFGS{T, VT <: AbstractVector{T}} <: AbstractQuasiNewton{T, VT}
     init_strategy::BFGSInitStrategy
     sk::VT
     yk::VT
@@ -60,8 +57,13 @@ struct BFGS{T, VT} <: AbstractQuasiNewton{T, VT}
     last_x::VT
     last_jv::VT
 end
-function BFGS{T, VT}(n::Int; init_strategy=SCALAR1) where {T, VT}
-    return BFGS{T, VT}(
+function create_quasi_newton(
+    ::Type{BFGS},
+    cb::AbstractCallback{T,VT},
+    n;
+    init_strategy = SCALAR1
+    ) where {T,VT}
+    BFGS(
         init_strategy,
         VT(undef, n),
         VT(undef, n),
@@ -84,7 +86,7 @@ function update!(qn::BFGS{T, VT}, Bk::AbstractMatrix, sk::AbstractVector, yk::Ab
     return true
 end
 
-struct DampedBFGS{T, VT} <: AbstractQuasiNewton{T, VT}
+struct DampedBFGS{T, VT <: AbstractVector{T}} <: AbstractQuasiNewton{T, VT}
     init_strategy::BFGSInitStrategy
     sk::VT
     yk::VT
@@ -94,8 +96,13 @@ struct DampedBFGS{T, VT} <: AbstractQuasiNewton{T, VT}
     last_x::VT
     last_jv::VT
 end
-function DampedBFGS{T, VT}(n::Int; init_strategy=SCALAR1) where {T, VT}
-    return DampedBFGS{T, VT}(
+function create_quasi_newton(
+    ::Type{DampedBFGS},
+    cb::AbstractCallback{T,VT},
+    n;
+    init_strategy = SCALAR1
+    ) where {T,VT}
+    return DampedBFGS(
         init_strategy,
         VT(undef, n),
         VT(undef, n),
@@ -142,7 +149,7 @@ end
 """
     CompactLBFGS{T, VT} <: AbstractQuasiNewton
 """
-mutable struct CompactLBFGS{T, VT, MT} <: AbstractQuasiNewton{T, VT}
+mutable struct CompactLBFGS{T, VT <: AbstractVector{T}, MT <: AbstractMatrix{T}} <: AbstractQuasiNewton{T, VT}
     init_strategy::BFGSInitStrategy
     sk::VT
     yk::VT
@@ -167,30 +174,36 @@ mutable struct CompactLBFGS{T, VT, MT} <: AbstractQuasiNewton{T, VT}
     _w2::VT
 end
 
-function CompactLBFGS{T, VT, MT}(n::Int; max_mem=6, init_strategy=SCALAR1) where {T, VT<:AbstractVector{T}, MT<:AbstractMatrix{T}}
-    return CompactLBFGS{T, VT, MT}(
+function create_quasi_newton(
+    ::Type{CompactLBFGS},
+    cb::AbstractCallback{T,VT},
+    n;
+    max_mem=6,
+    init_strategy = SCALAR1
+    ) where {T, VT}
+    return CompactLBFGS(
         init_strategy,
-        zeros(T, n),
-        zeros(T, n),
-        zeros(T, n),
-        zeros(T, n),
-        zeros(T, n),
+        fill!(create_array(cb, n), zero(T)),
+        fill!(create_array(cb, n), zero(T)),
+        fill!(create_array(cb, n), zero(T)),
+        fill!(create_array(cb, n), zero(T)),
+        fill!(create_array(cb, n), zero(T)),
         max_mem,
         0,
-        zeros(T, n, 0),
-        zeros(T, n, 0),
-        zeros(T, n, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0, 0),
-        zeros(T, 0),
-        zeros(T, 0),
-        zeros(T, 0),
+        fill!(create_array(cb, n, 0), zero(T)),
+        fill!(create_array(cb, n, 0), zero(T)),
+        fill!(create_array(cb, n, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0, 0), zero(T)),
+        fill!(create_array(cb, 0), zero(T)),
+        fill!(create_array(cb, 0), zero(T)),
+        fill!(create_array(cb, 0), zero(T)),
     )
 end
 
@@ -277,26 +290,26 @@ function update!(qn::CompactLBFGS{T, VT, MT}, Bk, sk, yk) where {T, VT, MT}
     #            [ U₂ ]          [  U₂ ]
 
     # Step 1: σₖ I
-    sigma = curvature(Val(qn.init_strategy), sk, yk) # σₖ
-    Bk .= sigma                                      # Hₖ .= σₖ I (diagonal Hessian approx.)
+    sigma = curvature(Val(qn.init_strategy), sk, yk)  # σₖ
+    Bk .= sigma                                       # Hₖ .= σₖ I (diagonal Hessian approx.)
 
     # Step 2: Mₖ = σₖ Sₖᵀ Sₖ + Lₖ Dₖ⁻¹ Lₖᵀ
-    qn.DkLk .= (one(T) ./ qn.Dk) .* qn.Lk'           # DₖLₖ = Dₖ⁻¹ Lₖᵀ
-    qn.Mk .= qn.SdotS                                # Mₖ = Sₖᵀ Sₖ
-    mul!(qn.Mk, qn.Lk, qn.DkLk, one(T), sigma)       # Mₖ = σₖ Sₖᵀ Sₖ + Lₖ Dₖ⁻¹ Lₖᵀ
+    qn.DkLk .= (one(T) ./ qn.Dk) .* qn.Lk'            # DₖLₖ = Dₖ⁻¹ Lₖᵀ
+    qn.Mk .= qn.SdotS                                 # Mₖ = Sₖᵀ Sₖ
+    mul!(qn.Mk, qn.Lk, qn.DkLk, one(T), sigma)        # Mₖ = σₖ Sₖᵀ Sₖ + Lₖ Dₖ⁻¹ Lₖᵀ
     symmetrize!(qn.Mk)
 
     copyto!(qn.Jk, qn.Mk)
-    cholesky!(qn.Jk)                                 # Mₖ = Jₖᵀ Jₖ (factorization)
+    cholesky!(qn.Jk)                                  # Mₖ = Jₖᵀ Jₖ (factorization)
 
     # Step 3: Nₖ = [U₁ U₂]
     U1 = view(qn.U, :, 1:k)
-    copyto!(U1, qn.Sk)                               # U₁ = Sₖ
-    mul!(U1, qn.Yk, qn.DkLk, one(T), sigma)          # U₁ = σₖ Sₖ + Yₖ Dₖ⁻¹ Lₖ
+    copyto!(U1, qn.Sk)                                # U₁ = Sₖ
+    mul!(U1, qn.Yk, qn.DkLk, one(T), sigma)           # U₁ = σₖ Sₖ + Yₖ Dₖ⁻¹ Lₖ
     BLAS.trsm!('R', 'U', 'N', 'N', one(T), qn.Jk, U1) # U₁ = Jₖ⁻ᵀ (σₖ Sₖ + Yₖ Dₖ⁻¹ Lₖ)
     U2 = view(qn.U, :, 1+k:2*k)
-    δ .= .-one(T) ./ sqrt.(qn.Dk)                    # δ = 1 / √Dₖ
-    U2 .= δ' .* qn.Yk                                # U₂ = (1 / √Dₖ) * Yₖ
+    δ .= .-one(T) ./ sqrt.(qn.Dk)                     # δ = 1 / √Dₖ
+    U2 .= δ' .* qn.Yk                                 # U₂ = (1 / √Dₖ) * Yₖ
     return true
 end
 
@@ -304,3 +317,6 @@ function init!(qn::CompactLBFGS, Bk::AbstractArray, sk::AbstractVector, yk::Abst
     return
 end
 
+
+struct ExactHessian{T, VT} <: AbstractHessian{T, VT} end
+create_quasi_newton(::Type{ExactHessian}, cb::AbstractCallback{T,VT}, n) where {T,VT} = ExactHessian{T, VT}()
diff --git a/src/utils.jl b/src/utils.jl
index 723e97ef..8a553cd4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,9 +1,5 @@
 abstract type AbstractOptions end
 
-# Build info
-default_linear_solver() = UmfpackSolver
-default_dense_solver() = LapackCPUSolver
-
 # MadNLPLogger
 @kwdef mutable struct MadNLPLogger
     print_level::LogLevels = INFO
@@ -84,7 +80,7 @@ function _madnlp_unsafe_wrap(vec::VT, n, shift=1) where VT
 end
 
 # Type definitions for noncontiguous views
-const SubVector{Tv} = SubArray{Tv, 1, Vector{Tv}, Tuple{Vector{Int}}, false}
+const SubVector{Tv,VT, VI} = SubArray{Tv, 1, VT, Tuple{VI}, false}
 
 @kwdef mutable struct MadNLPCounters
     k::Int = 0 # total iteration counter
@@ -97,6 +93,7 @@ const SubVector{Tv} = SubArray{Tv, 1, Vector{Tv}, Tuple{Vector{Int}}, false}
     eval_function_time::Float64 = 0.
     solver_time::Float64 = 0.
     total_time::Float64 = 0.
+    init_time::Float64 = 0.
 
     obj_cnt::Int = 0
     obj_grad_cnt::Int = 0
@@ -104,7 +101,18 @@ const SubVector{Tv} = SubArray{Tv, 1, Vector{Tv}, Tuple{Vector{Int}}, false}
     con_jac_cnt::Int = 0
     lag_hess_cnt::Int = 0
 
+    t1::Float64 = 0.
+    t2::Float64 = 0.
+    t3::Float64 = 0.
+    t4::Float64 = 0.
+    t5::Float64 = 0.
+    t6::Float64 = 0.
+    t7::Float64 = 0.
+    t8::Float64 = 0.
+    
     acceptable_cnt::Int = 0
+    unsuccessful_iterate::Int = 0
+    restoration_fail_count::Int = 0
 end
 
 """
@@ -143,8 +151,8 @@ function timing_linear_solver(ips; ntrials=10)
     t_build, t_factorize, t_backsolve = (0.0, 0.0, 0.0)
     for _ in 1:ntrials
         t_build     += @elapsed build_kkt!(ips.kkt)
-        t_factorize += @elapsed factorize!(ips.linear_solver)
-        t_backsolve += @elapsed solve_refine_wrapper!(ips,ips.d,ips.p)
+        t_factorize += @elapsed factorize!(ips.kkt.linear_solver)
+        t_backsolve += @elapsed solve!(ips.kkt, ips.d)
     end
     return (
         time_build_kkt = t_build / ntrials,
diff --git a/test/kkt_test.jl b/test/kkt_test.jl
index e4ec1f4f..bdd8fbab 100644
--- a/test/kkt_test.jl
+++ b/test/kkt_test.jl
@@ -1,7 +1,6 @@
 using LinearAlgebra
 
 @testset "$KKTVector" for KKTVector in [
-    MadNLP.ReducedKKTVector,
     MadNLP.UnreducedKKTVector,
 ]
     T = Float64
@@ -9,7 +8,10 @@ using LinearAlgebra
     n, m = 10, 20
     nlb, nub = 5, 6
 
-    rhs = KKTVector{T, VT}(n, m, nlb, nub)
+    ind_lb = [2,3,4]
+    ind_ub = [4,5,6]
+
+    rhs = KKTVector(VT, n, m, nlb, nub, ind_lb, ind_ub)
     @test length(rhs) == length(MadNLP.full(rhs))
     @test MadNLP.number_primal(rhs) == length(MadNLP.primal(rhs)) == n
     @test MadNLP.number_dual(rhs) == length(MadNLP.dual(rhs)) == m
diff --git a/test/madnlp_dense.jl b/test/madnlp_dense.jl
index 9fff7df4..495bad6a 100644
--- a/test/madnlp_dense.jl
+++ b/test/madnlp_dense.jl
@@ -8,13 +8,14 @@ using Random
 
 function _compare_dense_with_sparse(
     kkt_system, n, m, ind_fixed, ind_eq;
-    inertia=MadNLP.INERTIA_BASED,
+    inertia=MadNLP.InertiaBased,
 )
 
-    for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)]
+    for (T,tol,atol) in [(Float32,1e-3,1e0), (Float64,1e-8,1e-6)]
 
         sparse_options = Dict{Symbol, Any}(
-            :kkt_system=>MadNLP.SPARSE_KKT_SYSTEM,
+            :kkt_system=>MadNLP.SparseKKTSystem,
+            :callback=>MadNLP.SparseCallback,
             :inertia_correction_method=>inertia,
             :linear_solver=>MadNLP.LapackCPUSolver,
             :print_level=>MadNLP.ERROR,
@@ -22,13 +23,14 @@ function _compare_dense_with_sparse(
         )
         dense_options = Dict{Symbol, Any}(
             :kkt_system=>kkt_system,
+            :callback=>MadNLP.DenseCallback,
             :inertia_correction_method=>inertia,
             :linear_solver=>MadNLP.LapackCPUSolver,
             :print_level=>MadNLP.ERROR,
             :tol=>tol
         )
 
-        nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed, equality_cons=ind_eq)
+        nlp = MadNLPTests.DenseDummyQP(zeros(T,n), m=m, fixed_variables=ind_fixed, equality_cons=ind_eq)
 
         solver = MadNLPSolver(nlp; sparse_options...)
         solverd = MadNLPSolver(nlp; dense_options...)
@@ -38,7 +40,7 @@ function _compare_dense_with_sparse(
 
         # Check that dense formulation matches exactly sparse formulation
         @test result_dense.status == MadNLP.SOLVE_SUCCEEDED
-        @test result_sparse.iter == result_dense.iter
+        @test result_sparse.counters.k == result_dense.counters.k
         @test result_sparse.objective ≈ result_dense.objective atol=atol
         @test result_sparse.solution ≈ result_dense.solution atol=atol
         @test result_sparse.multipliers ≈ result_dense.multipliers atol=atol
@@ -49,50 +51,49 @@ function _compare_dense_with_sparse(
     end
 end
 
-@testset "MadNLP: API $(kkt_type)" for (kkt_type, kkt_options) in [
-        (MadNLP.DenseKKTSystem, MadNLP.DENSE_KKT_SYSTEM),
-        (MadNLP.DenseCondensedKKTSystem, MadNLP.DENSE_CONDENSED_KKT_SYSTEM),
+@testset "MadNLP: API $(kkt)" for kkt in [
+    MadNLP.DenseKKTSystem,
+    MadNLP.DenseCondensedKKTSystem,
     ]
 
     n = 10 # number of variables
     @testset "Unconstrained" begin
         dense_options = Dict{Symbol, Any}(
-            :kkt_system=>kkt_options,
+            :kkt_system=>kkt,
             :linear_solver=>MadNLP.LapackCPUSolver,
         )
         m = 0
-        nlp = MadNLPTests.DenseDummyQP(; n=n, m=m)
+        nlp = MadNLPTests.DenseDummyQP(zeros(n); m=m)
         solverd = MadNLPSolver(nlp; dense_options...)
 
         kkt = solverd.kkt
-        @test isa(kkt, kkt_type)
         @test isempty(kkt.jac)
-        @test solverd.linear_solver.dense === kkt.aug_com
+        @test solverd.kkt.linear_solver.A === kkt.aug_com 
         @test size(kkt.hess) == (n, n)
         @test length(kkt.pr_diag) == n
         @test length(kkt.du_diag) == m
 
         # Test that using a sparse solver is forbidden in dense mode
         dense_options_error = Dict{Symbol, Any}(
-            :kkt_system=>kkt_options,
+            :kkt_system=>kkt,
             :linear_solver=>MadNLP.UmfpackSolver,
         )
         @test_throws Exception MadNLPSolver(nlp; dense_options_error...)
     end
     @testset "Constrained" begin
         dense_options = Dict{Symbol, Any}(
-            :kkt_system=>MadNLP.DENSE_KKT_SYSTEM,
+            :kkt_system=>MadNLP.DenseKKTSystem,
             :linear_solver=>MadNLP.LapackCPUSolver,
         )
         m = 5
-        nlp = MadNLPTests.DenseDummyQP(; n=n, m=m)
+        nlp = MadNLPTests.DenseDummyQP(zeros(n); m=m)
         solverd = MadNLPSolver(nlp; dense_options...)
         ns = length(solverd.ind_ineq)
 
         kkt = solverd.kkt
         @test isa(kkt, MadNLP.DenseKKTSystem)
         @test size(kkt.jac) == (m, n)
-        @test solverd.linear_solver.dense === kkt.aug_com
+        @test solverd.kkt.linear_solver.A === kkt.aug_com
         @test size(kkt.hess) == (n, n)
         @test length(kkt.pr_diag) == n + ns
         @test length(kkt.du_diag) == m
@@ -100,39 +101,36 @@ end
 end
 
 
-@testset "MadNLP: option kkt_system=$(kkt_system)" for kkt_system in [MadNLP.DENSE_KKT_SYSTEM, MadNLP.DENSE_CONDENSED_KKT_SYSTEM]
+@testset "MadNLP: option kkt_system=$(kkt)" for kkt in [MadNLP.DenseKKTSystem, MadNLP.DenseCondensedKKTSystem]
     @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
-        _compare_dense_with_sparse(kkt_system, n, m, Int[], Int[])
-        _compare_dense_with_sparse(kkt_system, n, m, Int[], Int[]; inertia=MadNLP.INERTIA_FREE)
+        _compare_dense_with_sparse(kkt, n, m, Int[], Int[])
+        _compare_dense_with_sparse(kkt, n, m, Int[], Int[]; inertia=MadNLP.InertiaFree)
     end
     # Test with non-trivial equality constraints.
     @testset "Equality constraints" begin
         n, m = 20, 15
-        _compare_dense_with_sparse(kkt_system, n, m, Int[], Int[1, 8])
-        _compare_dense_with_sparse(kkt_system, n, m, Int[], Int[1, 8]; inertia=MadNLP.INERTIA_FREE)
+        _compare_dense_with_sparse(kkt, n, m, Int[], Int[1, 8])
+        _compare_dense_with_sparse(kkt, n, m, Int[], Int[1, 8]; inertia=MadNLP.InertiaFree)
     end
     @testset "Fixed variables" begin
         n, m = 10, 5
-        _compare_dense_with_sparse(kkt_system, n, m, Int[1, 2], Int[])
-        _compare_dense_with_sparse(kkt_system, n, m, Int[1, 2], Int[]; inertia=MadNLP.INERTIA_FREE)
+        _compare_dense_with_sparse(kkt, n, m, Int[1, 2], Int[])
+        _compare_dense_with_sparse(kkt, n, m, Int[1, 2], Int[]; inertia=MadNLP.InertiaFree)
     end
 end
 
-@testset "MadNLP: custom KKT constructor" begin
-    T, VT, MT = Float64, Vector{Float64}, Matrix{Float64}
-    QN = MadNLP.ExactHessian{T, VT}
-    nlp = MadNLPTests.DenseDummyQP{T}(; n=10, m=5)
-    KKT = MadNLP.DenseKKTSystem{T, VT, MT, QN}
-    solver = MadNLPSolver{T, KKT}(nlp; linear_solver=LapackCPUSolver)
-    @test isa(solver.kkt, KKT)
-end
+# Now we do not support custom KKT constructor
+# @testset "MadNLP: custom KKT constructor" begin
+#     solver = MadNLPSolver(nlp; kkt_system = MadNLP.DenseKKTSystem, linear_solver=LapackCPUSolver)
+#     @test isa(solver.kkt, KKT)
+# end
 
 @testset "MadNLP: restart (PR #113)" begin
     n, m = 10, 5
-    nlp = MadNLPTests.DenseDummyQP(; n=n, m=m)
+    nlp = MadNLPTests.DenseDummyQP(zeros(n); m=m)
     sparse_options = Dict{Symbol, Any}(
-        :kkt_system=>MadNLP.SPARSE_KKT_SYSTEM,
-        :linear_solver=>MadNLP.LapackCPUSolver,
+        :kkt_system=>MadNLP.SparseKKTSystem,
+        :callback=>MadNLP.SparseCallback,
         :print_level=>MadNLP.ERROR,
     )
 
@@ -144,74 +142,3 @@ end
     @test solver.status == MadNLP.SOLVE_SUCCEEDED
 end
 
-@testset "MadNLP: $QN + $KKT" for QN in [
-    MadNLP.DENSE_BFGS,
-    MadNLP.DENSE_DAMPED_BFGS,
-], KKT in [
-    MadNLP.DENSE_KKT_SYSTEM,
-    MadNLP.DENSE_CONDENSED_KKT_SYSTEM,
-]
-    @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
-        nlp = MadNLPTests.DenseDummyQP{Float64}(; n=n, m=m)
-        solver_exact = MadNLP.MadNLPSolver(
-            nlp;
-            print_level=MadNLP.ERROR,
-            kkt_system=MadNLP.DENSE_KKT_SYSTEM,
-            linear_solver=LapackCPUSolver,
-        )
-        results_ref = MadNLP.solve!(solver_exact)
-
-        solver_qn = MadNLP.MadNLPSolver(
-            nlp;
-            print_level=MadNLP.ERROR,
-            kkt_system=KKT,
-            hessian_approximation=QN,
-            linear_solver=LapackCPUSolver,
-        )
-        results_qn = MadNLP.solve!(solver_qn)
-
-        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
-        @test results_qn.objective ≈ results_ref.objective atol=1e-6
-        @test results_qn.solution ≈ results_ref.solution atol=1e-6
-        @test solver_qn.cnt.lag_hess_cnt == 0
-        # TODO: this test is currently breaking the CI, investigate why.
-        # @test solver_exact.y ≈ solver_qn.y atol=1e-4
-    end
-end
-
-@testset "MadNLP: LBFGS" begin
-    @testset "HS15" begin
-        nlp = MadNLPTests.HS15Model()
-        solver_qn = MadNLP.MadNLPSolver(
-            nlp;
-            hessian_approximation=MadNLP.SPARSE_COMPACT_LBFGS,
-            print_level=MadNLP.ERROR,
-        )
-        results_qn = MadNLP.solve!(solver_qn)
-        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
-    end
-    @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
-        nlp = MadNLPTests.DenseDummyQP{Float64}(; )
-        # Reference solve with exact Hessian
-        solver_exact = MadNLP.MadNLPSolver(
-            nlp;
-            print_level=MadNLP.ERROR,
-        )
-        results_ref = MadNLP.solve!(solver_exact)
-
-        # LBFGS solve
-        solver_qn = MadNLP.MadNLPSolver(
-            nlp;
-            hessian_approximation=MadNLP.SPARSE_COMPACT_LBFGS,
-            print_level=MadNLP.ERROR,
-        )
-        results_qn = MadNLP.solve!(solver_qn)
-        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
-        @test results_qn.objective ≈ results_ref.objective atol=1e-6
-        @test results_qn.solution ≈ results_ref.solution atol=1e-6
-        @test solver_qn.cnt.lag_hess_cnt == 0
-        # TODO: this test is currently breaking the CI, investigate why.
-        # @test solver_exact.y ≈ solver_qn.y atol=1e-4
-    end
-end
-
diff --git a/test/madnlp_quasi_newton.jl b/test/madnlp_quasi_newton.jl
new file mode 100644
index 00000000..1f916d93
--- /dev/null
+++ b/test/madnlp_quasi_newton.jl
@@ -0,0 +1,85 @@
+@testset "MadNLP: $QN + $KKT" for QN in [
+    MadNLP.BFGS,
+    MadNLP.DampedBFGS,
+], KKT in [
+    MadNLP.DenseKKTSystem,
+    MadNLP.DenseCondensedKKTSystem,
+]
+    @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
+        nlp = MadNLPTests.DenseDummyQP(zeros(Float64, n); m=m)
+        solver_exact = MadNLP.MadNLPSolver(
+            nlp;
+            print_level=MadNLP.ERROR,
+            kkt_system=MadNLP.DenseKKTSystem,
+            linear_solver=LapackCPUSolver,
+        )
+        results_ref = MadNLP.solve!(solver_exact)
+
+        solver_qn = MadNLP.MadNLPSolver(
+            nlp;
+            print_level=MadNLP.ERROR,
+            kkt_system=KKT,
+            hessian_approximation=QN,
+            linear_solver=LapackCPUSolver,
+        )
+        results_qn = MadNLP.solve!(solver_qn)
+
+        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
+        @test results_qn.objective ≈ results_ref.objective atol=1e-6
+        @test results_qn.solution ≈ results_ref.solution atol=1e-6
+        @test solver_qn.cnt.lag_hess_cnt == 0
+        @test solver_exact.y ≈ solver_qn.y atol=1e-4
+    end
+end
+
+@testset "MadNLP: LBFGS" begin
+    @testset "HS15" begin
+        nlp = MadNLPTests.HS15Model()
+        solver_qn = MadNLP.MadNLPSolver(
+            nlp;
+            callback = MadNLP.SparseCallback,
+            kkt_system = MadNLP.SparseKKTSystem,
+            hessian_approximation=MadNLP.CompactLBFGS,
+            print_level=MadNLP.ERROR,
+        )
+        results_qn = MadNLP.solve!(solver_qn)
+        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
+
+    end
+    @testset "Size: ($n, $m)" for (n, m) in [(10, 0), (10, 5), (50, 10)]
+        x0 = zeros(Float64,n)
+        nlp = MadNLPTests.DenseDummyQP(x0; m=m)
+        # Reference solve with exact Hessian
+        solver_exact = MadNLP.MadNLPSolver(
+            nlp;
+            callback = MadNLP.SparseCallback,
+            kkt_system = MadNLP.SparseKKTSystem,
+            print_level=MadNLP.ERROR,
+        )
+        results_ref = MadNLP.solve!(solver_exact)
+
+        # LBFGS solve
+        solver_qn = MadNLP.MadNLPSolver(
+            nlp;
+            callback = MadNLP.SparseCallback,
+            kkt_system = MadNLP.SparseKKTSystem,
+            hessian_approximation=MadNLP.CompactLBFGS,
+            print_level=MadNLP.ERROR,
+        )
+        results_qn = MadNLP.solve!(solver_qn)
+
+        @test results_qn.status == MadNLP.SOLVE_SUCCEEDED
+        @test results_qn.objective ≈ results_ref.objective atol=1e-6
+        @test results_qn.solution ≈ results_ref.solution atol=1e-6
+        @test solver_qn.cnt.lag_hess_cnt == 0
+        @test solver_exact.y ≈ solver_qn.y atol=1e-4
+
+        # Test accuracy of KKT solver with LBFGS
+        b, x, w = solver_qn.p, solver_qn.d, solver_qn._w4
+        fill!(b.values, 1.0)
+        MadNLP.solve_refine_wrapper!(x, solver_qn, b, w)
+        mul!(w, solver_qn.kkt, x)
+        @assert norm(w.values .- b.values, Inf) <= 1e-6
+    end
+end
+
diff --git a/test/madnlp_test.jl b/test/madnlp_test.jl
index 5cc383fe..bdd7e0be 100644
--- a/test/madnlp_test.jl
+++ b/test/madnlp_test.jl
@@ -9,6 +9,7 @@ testset = [
     [
         "LapackCPU-BUNCHKAUFMAN",
         ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.DenseKKTSystem,
             linear_solver=MadNLP.LapackCPUSolver,
             lapack_algorithm=MadNLP.BUNCHKAUFMAN,
             print_level=MadNLP.ERROR),
@@ -17,6 +18,7 @@ testset = [
     [
         "LapackCPU-LU",
         ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.DenseKKTSystem,
             linear_solver=MadNLP.LapackCPUSolver,
             lapack_algorithm=MadNLP.LU,
             print_level=MadNLP.ERROR),
@@ -25,6 +27,7 @@ testset = [
     [
         "LapackCPU-QR",
         ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.DenseKKTSystem,
             linear_solver=MadNLP.LapackCPUSolver,
             lapack_algorithm=MadNLP.QR,
             print_level=MadNLP.ERROR),
@@ -33,6 +36,7 @@ testset = [
     [
         "LapackCPU-CHOLESKY",
         ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.DenseKKTSystem,
             linear_solver=MadNLP.LapackCPUSolver,
             lapack_algorithm=MadNLP.CHOLESKY,
             print_level=MadNLP.ERROR),
@@ -41,30 +45,50 @@ testset = [
     [
         "Option: RELAX_BOUND",
         ()->MadNLP.Optimizer(
-            fixed_variable_treatment=MadNLP.RELAX_BOUND,
+            fixed_variable_treatment=MadNLP.RelaxBound,
             print_level=MadNLP.ERROR),
-        [],
-        true
+        []
     ],
     [
         "Option: AUGMENTED KKT SYSTEM",
         ()->MadNLP.Optimizer(
-            kkt_system=MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM,
+            kkt_system=MadNLP.SparseUnreducedKKTSystem,
             print_level=MadNLP.ERROR),
-        ["infeasible","eigmina"] # numerical errors
+        []
+    ],
+    [
+        "Option: SPARSE CONDENSED KKT SYSTEM",
+        ()->MadNLP.Optimizer(
+            kkt_system=MadNLP.SparseCondensedKKTSystem,
+            equality_treatment = MadNLP.RelaxEquality,
+            fixed_variable_treatment = MadNLP.RelaxBound,
+            tol = 1e-4,
+            print_level=MadNLP.ERROR),
+        []
     ],
     [
-        "Option: INERTIA_FREE & AUGMENTED KKT SYSTEM",
+        "Option: InertiaFree",
         ()->MadNLP.Optimizer(
-            inertia_correction_method=MadNLP.INERTIA_FREE,
-            kkt_system=MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM,
+            inertia_correction_method=MadNLP.InertiaFree,
             print_level=MadNLP.ERROR),
-        ["infeasible","eigmina"] # numerical errors
+        []
+    ],
+    [
+        "Option: InertiaFree & AUGMENTED KKT SYSTEM",
+        ()->MadNLP.Optimizer(
+            inertia_correction_method=MadNLP.InertiaFree,
+            kkt_system=MadNLP.SparseUnreducedKKTSystem,
+            print_level=MadNLP.ERROR),
+        []
     ],
     [
-        "Option: INERTIA_FREE",
+        "Option: InertiaFree & SPARSE CONDENSED KKT SYSTEM",
         ()->MadNLP.Optimizer(
-            inertia_correction_method=MadNLP.INERTIA_FREE,
+            inertia_correction_method=MadNLP.InertiaFree,
+            kkt_system=MadNLP.SparseCondensedKKTSystem,
+            equality_treatment = MadNLP.RelaxEquality,
+            fixed_variable_treatment = MadNLP.RelaxBound,
+            tol = 1e-4,
             print_level=MadNLP.ERROR),
         []
     ],
@@ -121,6 +145,7 @@ end
 @testset "MadNLP callback allocations" begin
     nlp = MadNLPTests.HS15Model()
     solver = MadNLPSolver(nlp)
+    MadNLP.initialize!(solver)
     kkt = solver.kkt
     x, f, c = solver.x, solver.f, solver.c
     # Precompile
@@ -145,6 +170,7 @@ end
 @testset "MadNLP timings" begin
     nlp = MadNLPTests.HS15Model()
     solver = MadNLPSolver(nlp)
+    MadNLP.initialize!(solver)
     time_callbacks = MadNLP.timing_callbacks(solver)
     @test isa(time_callbacks, NamedTuple)
     time_linear_solver = MadNLP.timing_linear_solver(solver)
diff --git a/test/minlp_test.jl b/test/minlp_test.jl
index 5e7014a1..cea1c02f 100644
--- a/test/minlp_test.jl
+++ b/test/minlp_test.jl
@@ -1,4 +1,5 @@
 const OPTIMIZER = ()->MadNLP.Optimizer(
+    kkt_system=MadNLP.DenseKKTSystem,
     linear_solver=MadNLP.LapackCPUSolver,
     print_level=MadNLP.ERROR
 )
diff --git a/test/runtests.jl b/test/runtests.jl
index cd365282..1159e2c4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -19,6 +19,7 @@ import SparseArrays: sparse
     @testset "MadNLP test" begin
         include("madnlp_test.jl")
         include("madnlp_dense.jl")
+        include("madnlp_quasi_newton.jl")
     end
 
     @testset "MINLP test" begin