diff --git a/.gitignore b/.gitignore
index 381e0b6d2..57bff62ff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 *.jl.*.cov
 *.jl.mem
 deps/deps.jl
+scratch.jl
diff --git a/Manifest.toml b/Manifest.toml
index 1ac395a21..5460bf30c 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -9,9 +9,9 @@ uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 
 [[BinaryProvider]]
 deps = ["Libdl", "Pkg", "SHA", "Test"]
-git-tree-sha1 = "b530fbeb6f41ab5a83fbe3db1fcbe879334bcd2d"
+git-tree-sha1 = "48c147e63431adbcee69bc40b04c3f0fec0a4982"
 uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
-version = "0.4.2"
+version = "0.5.0"
 
 [[Combinatorics]]
 deps = ["LinearAlgebra", "Polynomials", "Test"]
@@ -21,15 +21,15 @@ version = "0.7.0"
 
 [[Compat]]
 deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "ae262fa91da6a74e8937add6b613f58cd56cdad4"
+git-tree-sha1 = "ff2595695fc4f14427358ce2593f867085c45dcb"
 uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "1.1.0"
+version = "1.2.0"
 
 [[Conda]]
 deps = ["Compat", "JSON", "VersionParsing"]
-git-tree-sha1 = "a47f9a2c7b80095e6a935536795635522fe27f5d"
+git-tree-sha1 = "85b5bf3ffcf4f39abe019dab1dd00a0aead8d882"
 uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
-version = "1.0.1"
+version = "1.0.2"
 
 [[Dates]]
 deps = ["Printf"]
@@ -78,9 +78,9 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 
 [[MathOptInterface]]
 deps = ["Compat", "Unicode"]
-git-tree-sha1 = "f2791f990529b7aea1fcb9fd45b636cd3a9d1e79"
+git-tree-sha1 = "ba12e7ce825c1458c03f88aae84fa630d882d303"
 uuid = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
-version = "0.6.0"
+version = "0.6.1"
 
 [[Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
@@ -91,9 +91,9 @@ uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 
 [[Polynomials]]
 deps = ["LinearAlgebra", "SparseArrays", "Test"]
-git-tree-sha1 = "af883752c4935425a3ab30031a2069254c451b8b"
+git-tree-sha1 = "1a1eae52956658a6acae6fa1b6d6c3d488192895"
 uuid = "f27b6e38-b328-58d1-80ce-0feddd5e7a45"
-version = "0.5.0"
+version = "0.5.1"
 
 [[Printf]]
 deps = ["Unicode"]
@@ -139,7 +139,7 @@ deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [[UUIDs]]
-deps = ["Random"]
+deps = ["Random", "SHA"]
 uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [[Unicode]]
@@ -149,4 +149,4 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 deps = ["Compat"]
 git-tree-sha1 = "c9d5aa108588b978bd859554660c8a5c4f2f7669"
 uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
-version = "1.1.2"
+version = "1.1.3"
diff --git a/README.md b/README.md
index 186d27850..a5e5635d5 100644
--- a/README.md
+++ b/README.md
@@ -3,3 +3,29 @@
 Under construction. Only works on Julia v0.7+.
 
 An interior point solver for general convex conic optimization problems. An extension of methods in [CVXOPT](https://github.com/cvxopt/cvxopt/blob/master/src/python/coneprog.py) and [Alfonso](https://github.com/dpapp-github/alfonso). 
+
+Solves a pair of primal and dual cone programs:
+
+primal (over x,s):
+```
+  min  c'x :          duals
+    b - Ax == 0       (y)
+    h - Gx == s in K  (z)
+```
+dual (over z,y):
+```
+  max  -b'y - h'z :      duals
+    c + A'y + G'z == 0   (x)
+                z in K*  (s)
+```
+where K is a convex cone defined as a Cartesian product of recognized primitive cones, and K* is its dual cone.
+
+The primal-dual optimality conditions are:
+```
+         b - Ax == 0
+         h - Gx == s
+  c + A'y + G'z == 0
+            s'z == 0
+              s in K
+              z in K*
+```
\ No newline at end of file
diff --git a/examples/envelope/envelope.jl b/examples/envelope/envelope.jl
index 9417e4fa7..c8667a393 100644
--- a/examples/envelope/envelope.jl
+++ b/examples/envelope/envelope.jl
@@ -21,15 +21,17 @@ function build_envelope!(alf::Alfonso.AlfonsoOpt, npoly::Int, deg::Int, n::Int,
     (L, U, pts, P0, P, w) = Alfonso.interpolate(n, d, calc_w=true)
     LWts = fill(binomial(n+d-1, n), n)
     wtVals = 1.0 .- pts.^2
-    PWts = [Array((qr(Diagonal(sqrt.(wtVals[:,j]))*P[:,1:LWts[j]])).Q) for j in 1:n]
+    PWts = [Array((qr(Diagonal(sqrt.(wtVals[:, j])) * P[:, 1:LWts[j]])).Q) for j in 1:n]
 
     # set up problem data
     if dense
-        A = repeat(Array(1.0I, U, U), outer=(1, npoly)) # dense A
+        A = repeat(Array(1.0I, U, U), outer=(1, npoly))
     else
-        A = repeat(sparse(1.0I, U, U), outer=(1, npoly)) # sparse A
+        A = repeat(sparse(1.0I, U, U), outer=(1, npoly)) # TODO maybe construct without repeat
     end
+    G = Diagonal(-1.0I, npoly*U) # TODO uniformscaling
     b = w
+    h = zeros(npoly*U)
     if use_data
         # use provided data in data folder
         c = vec(readdlm(joinpath(@__DIR__, "data/c$(size(A,2)).txt"), ',', Float64))
@@ -37,12 +39,12 @@ function build_envelope!(alf::Alfonso.AlfonsoOpt, npoly::Int, deg::Int, n::Int,
         # generate random data
         Random.seed!(rseed)
         LDegs = binomial(n+deg, n)
-        c = vec(P0[:,1:LDegs]*rand(-9:9, LDegs, npoly))
+        c = vec(P0[:, 1:LDegs]*rand(-9:9, LDegs, npoly))
     end
 
     cone = Alfonso.Cone([Alfonso.SumOfSquaresCone(U, [P, PWts...]) for k in 1:npoly], [1+(k-1)*U:k*U for k in 1:npoly])
 
-    return Alfonso.load_data!(alf, A, b, c, cone)
+    return Alfonso.load_data!(alf, c, A, b, G, h, cone)
 end
 
 # alf = Alfonso.AlfonsoOpt(maxiter=100, verbose=true)
diff --git a/examples/lp/lp.jl b/examples/lp/lp.jl
index 6948f4818..8593aaaa9 100644
--- a/examples/lp/lp.jl
+++ b/examples/lp/lp.jl
@@ -32,9 +32,11 @@ function build_lp!(alf::Alfonso.AlfonsoOpt, m::Int, n::Int; use_data::Bool=false
     if tosparse && !issparse(A)
         A = sparse(A)
     end
+    G = Diagonal(-1.0I, n) # TODO uniformscaling
+    h = zeros(n)
     cone = Alfonso.Cone([Alfonso.NonnegativeCone(n)], [1:n])
 
-    return Alfonso.load_data!(alf, A, b, c, cone)
+    return Alfonso.load_data!(alf, c, A, b, G, h, cone)
 end
 
 # alf = Alfonso.AlfonsoOpt(maxiter=100, verbose=true)
diff --git a/examples/namedpoly/namedpoly.jl b/examples/namedpoly/namedpoly.jl
index e23ea80b4..657e6117d 100644
--- a/examples/namedpoly/namedpoly.jl
+++ b/examples/namedpoly/namedpoly.jl
@@ -68,9 +68,12 @@ function build_namedpoly!(alf::Alfonso.AlfonsoOpt, polyname::Symbol, d::Int)
     A = ones(1, U)
     b = [1.0,]
     c = [fn(pts[j,:]...) for j in 1:U]
+    G = Diagonal(-1.0I, U) # TODO uniformscaling?
+    h = zeros(U)
+
     cone = Alfonso.Cone([Alfonso.SumOfSquaresCone(U, [P0, PWts...])], [1:U])
 
-    return Alfonso.load_data!(alf, A, b, c, cone)
+    return Alfonso.load_data!(alf, c, A, b, G, h, cone)
 end
 
 # alf = Alfonso.AlfonsoOpt(maxiter=100, verbose=false)
diff --git a/src/Alfonso.jl b/src/Alfonso.jl
index 38c905639..0a59cadf8 100644
--- a/src/Alfonso.jl
+++ b/src/Alfonso.jl
@@ -6,7 +6,15 @@ module Alfonso
 
     include("interpolation.jl")
     include("cone.jl")
-    for primcone in ["nonnegative", "sumofsquares", "secondorder", "exponential", "power", "rotatedsecondorder", "positivesemidefinite"]
+    for primcone in [
+        "nonnegative",
+        "sumofsquares",
+        "secondorder",
+        "exponential",
+        # "power",
+        "rotatedsecondorder",
+        "positivesemidefinite",
+        ]
         include(joinpath(@__DIR__, "primitivecones", primcone * ".jl"))
     end
     include("nativeinterface.jl")
diff --git a/src/cone.jl b/src/cone.jl
index 5324d6b2a..8d941d266 100644
--- a/src/cone.jl
+++ b/src/cone.jl
@@ -14,11 +14,11 @@ end
 # calculate complexity parameter of the barrier (sum of the primitive cone barrier parameters)
 barrierpar(cone::Cone) = sum(barrierpar_prm(prm) for prm in cone.prms)
 
-function getintdir!(arr::Vector{Float64}, cone::Cone)
+function getintdir!(dir::Vector{Float64}, cone::Cone)
     for k in eachindex(cone.prms)
-        getintdir_prm!(view(arr, cone.idxs[k]), cone.prms[k])
+        getintdir_prm!(view(dir, cone.idxs[k]), cone.prms[k])
     end
-    return arr
+    return dir
 end
 
 # TODO can parallelize the functions acting on Cone
@@ -38,18 +38,32 @@ function calcg!(g::Vector{Float64}, cone::Cone)
     return g
 end
 
-function calcHiarr!(Hi_mat::AbstractMatrix{Float64}, mat::AbstractMatrix{Float64}, cone::Cone)
+function calcHarr!(prod::AbstractMatrix{Float64}, arr::AbstractMatrix{Float64}, cone::Cone)
     for k in eachindex(cone.prms)
-        calcHiarr_prm!(view(Hi_mat, cone.idxs[k], :), view(mat, cone.idxs[k], :), cone.prms[k])
+        calcHarr_prm!(view(prod, cone.idxs[k], :), view(arr, cone.idxs[k], :), cone.prms[k])
     end
-    return Hi_mat
+    return prod
 end
 
-function calcHiarr!(Hi_vec::AbstractVector{Float64}, vec::AbstractVector{Float64}, cone::Cone)
+function calcHarr!(prod::AbstractVector{Float64}, arr::AbstractVector{Float64}, cone::Cone)
     for k in eachindex(cone.prms)
-        calcHiarr_prm!(view(Hi_vec, cone.idxs[k]), view(vec, cone.idxs[k]), cone.prms[k])
+        calcHarr_prm!(view(prod, cone.idxs[k]), view(arr, cone.idxs[k]), cone.prms[k])
     end
-    return Hi_vec
+    return prod
+end
+
+function calcHiarr!(prod::AbstractMatrix{Float64}, arr::AbstractMatrix{Float64}, cone::Cone)
+    for k in eachindex(cone.prms)
+        calcHiarr_prm!(view(prod, cone.idxs[k], :), view(arr, cone.idxs[k], :), cone.prms[k])
+    end
+    return prod
+end
+
+function calcHiarr!(prod::AbstractVector{Float64}, arr::AbstractVector{Float64}, cone::Cone)
+    for k in eachindex(cone.prms)
+        calcHiarr_prm!(view(prod, cone.idxs[k]), view(arr, cone.idxs[k]), cone.prms[k])
+    end
+    return prod
 end
 
 # utilities for converting between smat and svec forms (lower triangle) for symmetric matrices
diff --git a/src/nativeinterface.jl b/src/nativeinterface.jl
index 45e09a319..d01cb119e 100644
--- a/src/nativeinterface.jl
+++ b/src/nativeinterface.jl
@@ -1,19 +1,62 @@
-#=
-Copyright 2018, David Papp, Sercan Yildiz, and contributors
 
-an implementation of the algorithm for non-symmetric conic optimization Alfonso (https://github.com/dpapp-github/alfonso) and analyzed in the paper:
-D. Papp and S. Yildiz. On "A homogeneous interior-point algorithm for nonsymmetric convex conic optimization"
-available at https://arxiv.org/abs/1712.00492
-=#
+# cache for linear system solves (to avoid allocations)
+# use QR + cholesky method from CVXOPT
+# (1) eliminate equality constraints via QR of A'
+# (2) solve reduced system by cholesky
+# |0  A'  G'   | * |ux| = |bx|
+# |A  0   0    |   |uy|   |by|
+# |G  0  -Hi/mu|   |uz|   |bz|
+mutable struct LinSysCache
+    Q2
+    RiQ1
+    GQ2
+    HG
+    GHG
+    GHGQ2
+    Q2GHGQ2
+    bxGHbz
+    Q1x
+    rhs
+    Q2div
+    Q2x
+    GHGxi
+    HGxi
+    x1
+    y1
+    z1
+
+    function LinSysCache(Q1::AbstractMatrix{Float64}, Q2::AbstractMatrix{Float64}, Ri::AbstractMatrix{Float64}, G::AbstractMatrix{Float64}, n::Int, p::Int, q::Int)
+        L = new()
+        nmp = n - p
+        L.Q2 = Q2
+        L.RiQ1 = Ri*Q1'
+        L.GQ2 = G*Q2
+        L.HG = Matrix{Float64}(undef, q, n) # TODO don't enforce dense on some
+        L.GHG = Matrix{Float64}(undef, n, n)
+        L.GHGQ2 = Matrix{Float64}(undef, n, nmp)
+        L.Q2GHGQ2 = Matrix{Float64}(undef, nmp, nmp)
+        L.bxGHbz = Vector{Float64}(undef, n)
+        L.Q1x = Vector{Float64}(undef, n)
+        L.rhs = Vector{Float64}(undef, n)
+        L.Q2div = Vector{Float64}(undef, nmp)
+        L.Q2x = Vector{Float64}(undef, n)
+        L.GHGxi = Vector{Float64}(undef, n)
+        L.HGxi = Vector{Float64}(undef, q)
+        L.x1 = Vector{Float64}(undef, n)
+        L.y1 = Vector{Float64}(undef, p)
+        L.z1 = Vector{Float64}(undef, q)
+        return L
+    end
+end
 
-# TODO add time limit option and use it in loop
+# model object containing options, problem data, linear system cache, and solution
 mutable struct AlfonsoOpt
     # options
     verbose::Bool           # if true, prints progress at each iteration
-    optimtol::Float64       # optimization tolerance parameter
+    tolrelopt::Float64      # relative optimality gap tolerance
+    tolabsopt::Float64      # absolute optimality gap tolerance
+    tolfeas::Float64        # feasibility tolerance
     maxiter::Int            # maximum number of iterations
-    # itrefinethres::Float64  # iterative refinement success threshold
-    # maxitrefinesteps::Int   # maximum number of iterative refinement steps in linear system solves
     predlinesearch::Bool    # if false, predictor step uses a fixed step size, else step size is determined via line search
     maxpredsmallsteps::Int  # maximum number of predictor step size reductions allowed with respect to the safe fixed step size
     predlsmulti::Float64    # predictor line search step size multiplier
@@ -24,57 +67,45 @@ mutable struct AlfonsoOpt
     corrlsmulti::Float64    # corrector line search step size multiplier
 
     # problem data
-    A::AbstractMatrix{Float64}  # constraint matrix
-    b::Vector{Float64}          # right-hand side vector
-    c::Vector{Float64}          # cost vector
-    cone::Cone                  # primal cone object
-
-    # algorithmic parameters
-    bnu::Float64
-    beta::Float64
-    eta::Float64
-    alphapredthres::Float64
-    alphapredinit::Float64
-    tol_pres::Float64
-    tol_dres::Float64
-    tol_compl::Float64
+    c::Vector{Float64}          # linear cost vector, size n
+    A::AbstractMatrix{Float64}  # equality constraint matrix, size p*n
+    b::Vector{Float64}          # equality constraint vector, size p
+    G::AbstractMatrix{Float64}  # cone constraint matrix, size q*n
+    h::Vector{Float64}          # cone constraint vector, size q
+    cone::Cone                  # primal constraint cone object
+
+    L::LinSysCache  # cache for linear system solves
 
     # results
     status::Symbol          # solver status
     solvetime::Float64      # total solve time
     niters::Int             # total number of iterations
+
+    x::Vector{Float64}      # final value of the primal free variables
+    s::Vector{Float64}      # final value of the primal cone variables
     y::Vector{Float64}      # final value of the dual free variables
-    x::Vector{Float64}      # final value of the primal variables
-    tau::Float64            # final value of the tau-variable
-    s::Vector{Float64}      # final value of the dual slack variables
-    kap::Float64            # final value of the kappa-variable
+    z::Vector{Float64}      # final value of the dual cone variables
+    tau::Float64            # final value of the tau variable
+    kap::Float64            # final value of the kappa variable
+    mu::Float64             # final value of mu
     pobj::Float64           # final primal objective value
     dobj::Float64           # final dual objective value
-    dgap::Float64           # final duality gap
-    cgap::Float64           # final complementarity gap
-    rel_dgap::Float64       # final relative duality gap
-    rel_cgap::Float64       # final relative complementarity gap
-    pres::Vector{Float64}   # final primal residuals
-    dres::Vector{Float64}   # final dual residuals
-    pin::Float64            # final primal infeasibility
-    din::Float64            # final dual infeasibility
-    rel_pin::Float64        # final relative primal infeasibility
-    rel_din::Float64        # final relative dual infeasibility
-
-    # TODO match natural order of options listed above
-    function AlfonsoOpt(verbose, optimtol, maxiter, predlinesearch, maxpredsmallsteps, maxcorrsteps, corrcheck, maxcorrlsiters, alphacorr, predlsmulti, corrlsmulti)
+
+    function AlfonsoOpt(verbose, tolrelopt, tolabsopt, tolfeas, maxiter, predlinesearch, maxpredsmallsteps, predlsmulti, corrcheck, maxcorrsteps, alphacorr, maxcorrlsiters, corrlsmulti)
         alf = new()
 
         alf.verbose = verbose
-        alf.optimtol = optimtol
+        alf.tolrelopt = tolrelopt
+        alf.tolabsopt = tolabsopt
+        alf.tolfeas = tolfeas
         alf.maxiter = maxiter
         alf.predlinesearch = predlinesearch
         alf.maxpredsmallsteps = maxpredsmallsteps
-        alf.maxcorrsteps = maxcorrsteps
+        alf.predlsmulti = predlsmulti
         alf.corrcheck = corrcheck
-        alf.maxcorrlsiters = maxcorrlsiters
+        alf.maxcorrsteps = maxcorrsteps
         alf.alphacorr = alphacorr
-        alf.predlsmulti = predlsmulti
+        alf.maxcorrlsiters = maxcorrlsiters
         alf.corrlsmulti = corrlsmulti
 
         alf.status = :NotLoaded
@@ -83,259 +114,301 @@ mutable struct AlfonsoOpt
     end
 end
 
+# initialize a model object
 function AlfonsoOpt(;
     verbose = false,
-    optimtol = 1e-6,
-    maxiter = 1e3,
+    tolrelopt = 1e-6,
+    tolabsopt = 1e-7,
+    tolfeas = 1e-7,
+    maxiter = 5e2,
     predlinesearch = true,
-    maxpredsmallsteps = 8,
-    maxcorrsteps = 8, # NOTE doubled in .m code
+    maxpredsmallsteps = 15,
+    predlsmulti = 0.7,
     corrcheck = true,
-    maxcorrlsiters = 8,
+    maxcorrsteps = 15,
     alphacorr = 1.0,
-    predlsmulti = 0.7,
+    maxcorrlsiters = 15,
     corrlsmulti = 0.5,
     )
 
-    if !(1e-10 <= optimtol <= 1e-2)
-        error("optimtol must be from 1e-10 to 1e-2")
+    if min(tolrelopt, tolabsopt, tolfeas) < 1e-10 || max(tolrelopt, tolabsopt, tolfeas) > 1e-2
+        error("tolrelopt, tolabsopt, tolfeas must be between 1e-10 and 1e-2")
     end
     if maxiter < 1
         error("maxiter must be at least 1")
     end
     if maxpredsmallsteps < 1
-        error("maxcorrsteps must be at least 1")
+        error("maxpredsmallsteps must be at least 1")
     end
-    if !(1 <= maxcorrsteps <= 8)
-        error("maxcorrsteps must be from 1 to 8")
+    if maxcorrsteps < 1
+        error("maxcorrsteps must be at least 1")
     end
 
-    return AlfonsoOpt(verbose, optimtol, maxiter, predlinesearch, maxpredsmallsteps, maxcorrsteps, corrcheck, maxcorrlsiters, alphacorr, predlsmulti, corrlsmulti)
+    return AlfonsoOpt(verbose, tolrelopt, tolabsopt, tolfeas, maxiter, predlinesearch, maxpredsmallsteps, predlsmulti, corrcheck, maxcorrsteps, alphacorr, maxcorrlsiters, corrlsmulti)
 end
 
 get_status(alf::AlfonsoOpt) = alf.status
 get_solvetime(alf::AlfonsoOpt) = alf.solvetime
 get_niters(alf::AlfonsoOpt) = alf.niters
-get_y(alf::AlfonsoOpt) = copy(alf.y)
+
 get_x(alf::AlfonsoOpt) = copy(alf.x)
-get_tau(alf::AlfonsoOpt) = alf.tau
 get_s(alf::AlfonsoOpt) = copy(alf.s)
+get_y(alf::AlfonsoOpt) = copy(alf.y)
+get_z(alf::AlfonsoOpt) = copy(alf.z)
+
+get_tau(alf::AlfonsoOpt) = alf.tau
 get_kappa(alf::AlfonsoOpt) = alf.kappa
-get_pobj(alf::AlfonsoOpt) = alf.pobj
-get_dobj(alf::AlfonsoOpt) = alf.dobj
-get_dgap(alf::AlfonsoOpt) = alf.dgap
-get_cgap(alf::AlfonsoOpt) = alf.cgap
-get_rel_dgap(alf::AlfonsoOpt) = alf.rel_dgap
-get_rel_cgap(alf::AlfonsoOpt) = alf.rel_cgap
-get_pres(alf::AlfonsoOpt) = copy(alf.pres)
-get_dres(alf::AlfonsoOpt) = copy(alf.dres)
-get_pin(alf::AlfonsoOpt) = alf.pin
-get_din(alf::AlfonsoOpt) = alf.din
-get_rel_pin(alf::AlfonsoOpt) = alf.rel_pin
-get_rel_din(alf::AlfonsoOpt) = alf.rel_din
-
-# load and verify problem data, calculate algorithmic parameters
+get_mu(alf::AlfonsoOpt) = alf.mu
+
+get_pobj(alf::AlfonsoOpt) = dot(alf.c, alf.x)
+get_dobj(alf::AlfonsoOpt) = -dot(alf.b, alf.y) - dot(alf.h, alf.z)
+
+# verify problem data and load into model object
 function load_data!(
     alf::AlfonsoOpt,
+    c::Vector{Float64},
     A::AbstractMatrix{Float64},
     b::Vector{Float64},
-    c::Vector{Float64},
-    cone::Cone,
+    G::AbstractMatrix{Float64},
+    h::Vector{Float64},
+    cone::Cone;
+    check::Bool=false, # check rank conditions
     )
+
     # check data consistency
-    (m, n) = size(A)
-    if (m == 0) || (n == 0)
-        error("input matrix A has trivial dimension $m x $n")
+    n = length(c)
+    p = length(b)
+    q = length(h)
+    @assert n > 0
+    @assert p + q > 0
+    if n != size(A, 2) || n != size(G, 2)
+        error("number of variables is not consistent in A, G, and c")
     end
-    if m != length(b)
-        error("dimension of vector b is $(length(b)), but number of rows in matrix A is $m")
+    if p != size(A, 1)
+        error("number of constraint rows is not consistent in A and b")
     end
-    if n != length(c)
-        error("dimension of vector c is $(length(c)), but number of columns in matrix A is $n")
+    if q != size(G, 1)
+        error("number of constraint rows is not consistent in G and h")
     end
+
+    # perform QR decomposition of A' for use in linear system solves
+    # TODO reduce allocs, improve efficiency
+    # A' = [Q1 Q2] * [R1; 0]
     if issparse(A)
-        dropzeros!(A)
+        alf.verbose && println("\nJulia is currently missing some sparse matrix methods that could improve performance; Alfonso may perform better if A is loaded as a dense matrix")
+        # TODO currently using dense Q1, Q2, R - probably some should be sparse
+        F = qr(sparse(A'))
+        @assert length(F.prow) == n
+        @assert length(F.pcol) == p
+        @assert istriu(F.R)
+
+        Q = F.Q*Matrix(1.0I, n, n)
+        Q1 = zeros(n, p)
+        Q1[F.prow, F.pcol] = Q[:, 1:p]
+        Q2 = zeros(n, n-p)
+        Q2[F.prow, :] = Q[:, p+1:n]
+        Ri = zeros(p, p)
+        Ri[F.pcol, F.pcol] = inv(UpperTriangular(F.R))
+    else
+        F = qr(A')
+        @assert istriu(F.R)
+
+        Q = F.Q*Matrix(1.0I, n, n)
+        Q1 = Q[:, 1:p]
+        Q2 = Q[:, p+1:n]
+        Ri = inv(UpperTriangular(F.R))
+        @assert norm(A'*Ri - Q1) < 1e-8 # TODO delete later
     end
 
-    # TODO check cone consistency in cone functions file
-    # idxend = 0
-    # for k in eachindex(cone)
-    #     if dimension(cone[k]) != length(coneidxs[k])
-    #         error("dimension of cone type $(cone[k]) does not match length of variable indices")
-    #     end
-    #     @assert coneidxs[k][1] == idxend + 1
-    #     idxend += length(coneidxs[k])
-    # end
-    # @assert idxend == n
-
-    # calculate complexity parameter nu-bar of the augmented barrier (sum of the primitive cone barrier parameters plus 1)
-    bnu = 1 + barrierpar(cone)
-
-    # calculate prediction and correction step parameters
-    (beta, eta, cpredfix) = getbetaeta(alf.maxcorrsteps, bnu) # beta: large neighborhood parameter, eta: small neighborhood parameter
-    alphapredfix = cpredfix/(eta + sqrt(2*eta^2 + bnu)) # fixed predictor step size
-    alphapredthres = (alf.predlsmulti^alf.maxpredsmallsteps)*alphapredfix # minimum predictor step size
-    alphapredinit = (alf.predlinesearch ? min(100*alphapredfix, 0.9999) : alphapredfix) # predictor step size
-
-    # calculate termination tolerances: infinity operator norms of submatrices of LHS matrix
-    tol_pres = max(1.0, maximum(sum(abs, A[i,:]) + abs(b[i]) for i in 1:m)) # first m rows
-    tol_dres = max(1.0, maximum(sum(abs, A[:,j]) + abs(c[j]) + 1.0 for j in 1:n)) # next n rows
-    tol_compl = max(1.0, maximum(abs, b), maximum(abs, c)) # row m+n+1
+    if check
+        # check rank conditions
+        # TODO rank for qr decomp should be implemented in Julia - see https://github.com/JuliaLang/julia/blob/f8b52dab77415a22d28497f48407aca92fbbd4c3/stdlib/LinearAlgebra/src/qr.jl#L895
+        if rank(A) < p # TODO change to rank(F)
+            error("A matrix is not full-row-rank; some primal equalities may be redundant or inconsistent")
+        end
+        if rank(vcat(A, G)) < n
+            error("[A' G'] is not full-row-rank; some dual equalities may be redundant (i.e. primal variables can be removed) or inconsistent")
+        end
+    end
 
-    # save data in solver object
+    alf.c = c
     alf.A = A
     alf.b = b
-    alf.c = c
+    alf.G = G
+    alf.h = h
     alf.cone = cone
-    alf.bnu = bnu
-    alf.beta = beta
-    alf.eta = eta
-    alf.alphapredthres = alphapredthres
-    alf.alphapredinit = alphapredinit
-    alf.tol_pres = tol_pres
-    alf.tol_dres = tol_dres
-    alf.tol_compl = tol_compl
-
+    alf.L = LinSysCache(Q1, Q2, Ri, G, n, p, q)
     alf.status = :Loaded
 
     return alf
 end
 
-# calculate initial central primal-dual iterate
-function getinitialiterate(alf::AlfonsoOpt)
-    (A, b, c) = (alf.A, alf.b, alf.c)
-    (m, n) = size(A)
-    cone = alf.cone
-
-    # initial primal iterate
-    tx = similar(c)
-    getintdir!(tx, cone)
-    loadpnt!(cone, tx)
-    @assert incone(cone)
-
-    # scaling factor for the primal problem
-    rp = maximum((1.0 + abs(b[i]))/(1.0 + abs(sum(A[i,:]))) for i in 1:m)
-    # scaling factor for the dual problem
-    g = similar(c)
-    calcg!(g, cone)
-    rd = maximum((1.0 + abs(g[j]))/(1.0 + abs(c[j])) for j in 1:n)
-
-    # central primal-dual iterate
-    tx .*= sqrt(rp*rd)
-    @assert incone(cone) # TODO a little expensive to call this twice
-    ty = zeros(m)
-    tau = 1.0
-    ts = calcg!(g, cone)
-    ts .*= -1.0
-    kap = 1.0
-    mu = (dot(tx, ts) + tau*kap)/alf.bnu
+# solve using predictor-corrector algorithm based on homogeneous self-dual embedding
+function solve!(alf::AlfonsoOpt)
+    starttime = time()
 
-    # TODO can delete later
-    @assert abs(1.0 - mu) < 1e-8
-    @assert abs(calcnbhd(tau*kap, mu, copy(ts), copy(ts), cone)) < 1e-6
+    (c, A, b, G, h, cone) = (alf.c, alf.A, alf.b, alf.G, alf.h, alf.cone)
+    (n, p, q) = (length(c), length(b), length(h))
+    bnu = 1.0 + barrierpar(cone) # complexity parameter nu-bar of the augmented barrier (sum of the primitive cone barrier parameters plus 1)
 
-    return (tx, ty, tau, ts, kap, mu)
-end
+    # preallocate arrays
+    # primal and dual variables multiplied by tau
+    tx = similar(c)
+    ty = similar(b)
+    tz = similar(h)
+    ts = similar(h)
+    # values during line searches
+    ls_tz = similar(tz)
+    ls_ts = similar(ts)
+    # cone functions evaluate barrier derivatives at ls_ts
+    loadpnt!(cone, ls_ts)
+    # gradient evaluations at ls_ts of the barrier function for K
+    g = similar(ts)
+    # helper arrays for residuals, right-hand-sides, and search directions
+    tmp_tx = similar(tx)
+    tmp_tx2 = similar(tx)
+    tmp_ty = similar(ty)
+    tmp_tz = similar(tz)
+    tmp_ts = similar(ts)
+
+    # find initial primal-dual iterate
+    (tau, kap, mu) = findinitialiterate!(tx, ty, tz, ts, ls_ts, bnu, alf)
+
+    # calculate tolerances for convergence
+    tol_res_tx = inv(max(1.0, norm(c)))
+    tol_res_ty = inv(max(1.0, norm(b)))
+    tol_res_tz = inv(max(1.0, norm(h)))
 
-# perform prediction and correction steps in a loop until converged
-function solve!(alf::AlfonsoOpt)
-    starttime = time()
-    (A, b, c) = (alf.A, alf.b, alf.c)
-    (m, n) = size(A)
-    cone = alf.cone
-
-    # calculate initial central primal-dual iterate
-    alf.verbose && println("Finding initial iterate")
-    (tx, ty, tau, ts, kap, mu) = getinitialiterate(alf)
-
-    # preallocate arrays # TODO probably could get away with fewer. rename to temp_
-    p_res = similar(ty)
-    d_res = similar(tx)
-    dir_ty = similar(ty)
-    dir_tx = similar(tx)
-    dir_ts = similar(ts)
-    sa_tx = copy(tx)
-    sa_ts = similar(ts)
-    g = similar(tx)
-    HiAt = similar(b, n, m) # TODO for very sparse LPs, using sparse here is good (diagonal hessian), but for sparse problems with dense hessians, want dense
-    AHiAt = similar(b, m, m)
-    y1 = similar(b)
-    x1 = similar(c)
-    y2 = similar(b)
-    x2 = similar(c)
+    # calculate prediction and correction step parameters
+    (beta, eta, cpredfix) = getbetaeta(alf.maxcorrsteps, bnu) # beta: large neighborhood parameter, eta: small neighborhood parameter
+    alphapredfix = cpredfix/(eta + sqrt(2*eta^2 + bnu)) # fixed predictor step size
+    alphapredthres = (alf.predlsmulti^alf.maxpredsmallsteps)*alphapredfix # minimum predictor step size
+    alphapredinit = (alf.predlinesearch ? min(100*alphapredfix, 0.9999) : alphapredfix) # predictor step size
 
     # main loop
     if alf.verbose
-        println("Starting iteration")
-        @printf("\n%5s %12s %12s %9s %9s %9s %9s %9s %9s\n", "iter", "p_obj", "d_obj", "gap", "p_inf", "d_inf", "tau", "kap", "mu")
+        println("starting iteration")
+        @printf("\n%5s %12s %12s %9s %9s %9s %9s %9s %9s\n", "iter", "p_obj", "d_obj", "rel_gap", "p_inf", "d_inf", "tau", "kap", "mu")
         flush(stdout)
     end
 
-    loadpnt!(cone, sa_tx)
     alf.status = :StartedIterating
-    alphapred = alf.alphapredinit
+    alphapred = alphapredinit
     iter = 0
     while true
-    # calculate convergence metrics
-        ctx = dot(c, tx)
-        bty = dot(b, ty)
-        p_obj = ctx/tau
-        d_obj = bty/tau
-        rel_gap = abs(ctx - bty)/(tau + abs(bty))
-        # p_res = -A*tx + b*tau
-        mul!(p_res, A, tx)
-        p_res .= tau .* b .- p_res
-        p_inf = maximum(abs, p_res)/alf.tol_pres
-        # d_res = A'*ty - c*tau + ts
-        mul!(d_res, A', ty)
-        d_res .+= ts .- tau .* c
-        d_inf = maximum(abs, d_res)/alf.tol_dres
-        abs_gap = -bty + ctx + kap
-        compl = abs(abs_gap)/alf.tol_compl
+        # calculate residuals and convergence parameters
+        invtau = inv(tau)
+
+        # tmp_tx = -A'*ty - G'*tz - c*tau
+        mul!(tmp_tx2, A', ty)
+        mul!(tmp_tx, G', tz)
+        @. tmp_tx = -tmp_tx2 - tmp_tx
+        nres_x = norm(tmp_tx)
+        @. tmp_tx -= c*tau
+        nres_tx = norm(tmp_tx)*invtau
+
+        # tmp_ty = A*tx - b*tau
+        mul!(tmp_ty, A, tx)
+        nres_y = norm(tmp_ty)
+        @. tmp_ty -= b*tau
+        nres_ty = norm(tmp_ty)*invtau
+
+        # tmp_tz = ts + G*tx - h*tau
+        mul!(tmp_tz, G, tx)
+        @. tmp_tz += ts
+        nres_z = norm(tmp_tz)
+        @. tmp_tz -= h*tau
+        nres_tz = norm(tmp_tz)*invtau
+
+        (cx, by, hz) = (dot(c, tx), dot(b, ty), dot(h, tz))
+        obj_pr = cx*invtau
+        obj_du = -(by + hz)*invtau
+        gap = dot(tz, ts) # TODO is this right? maybe should adapt original alfonso conditions
+
+        # TODO maybe add small epsilon to denominators that are zero to avoid NaNs, and get rid of isnans further down
+        if obj_pr < 0.0
+            relgap = gap/-obj_pr
+        elseif obj_pr > 0.0
+            relgap = gap/obj_du
+        else
+            relgap = NaN
+        end
+
+        nres_pr = max(nres_ty*tol_res_ty, nres_tz*tol_res_tz)
+        nres_du = nres_tx*tol_res_tx
+
+        if hz + by < 0.0
+            infres_pr = nres_x*tol_res_tx/(-hz - by)
+        else
+            infres_pr = NaN
+        end
+        if cx < 0.0
+            infres_du = -max(nres_y*tol_res_ty, nres_z*tol_res_tz)/cx
+        else
+            infres_du = NaN
+        end
 
         if alf.verbose
             # print iteration statistics
-            @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e\n", iter, p_obj, d_obj, rel_gap, p_inf, d_inf, tau, kap, mu)
+            @printf("%5d %12.4e %12.4e %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e\n", iter, obj_pr, obj_du, relgap, nres_pr, nres_du, tau, kap, mu)
             flush(stdout)
         end
 
         # check convergence criteria
-        if (p_inf <= alf.optimtol) && (d_inf <= alf.optimtol)
-            if rel_gap <= alf.optimtol
-                alf.verbose && println("Problem is feasible and an approximate optimal solution was found; terminating")
-                alf.status = :Optimal
-                break
-            elseif (compl <= alf.optimtol) && (tau <= alf.optimtol*1e-02*max(1.0, kap))
-                alf.verbose && println("Problem is nearly primal or dual infeasible; terminating")
-                alf.status = :NearlyInfeasible
-                break
-            end
-        elseif (tau <= alf.optimtol*1e-02*min(1.0, kap)) && (mu <= alf.optimtol*1e-02)
-            alf.verbose && println("Problem is ill-posed; terminating")
+        # TODO nearly primal or dual infeasible or nearly optimal cases?
+        if nres_pr <= alf.tolfeas && nres_du <= alf.tolfeas && (gap <= alf.tolabsopt || (!isnan(relgap) && relgap <= alf.tolrelopt))
+            alf.verbose && println("optimal solution found; terminating")
+            alf.status = :Optimal
+
+            alf.x = tx .*= invtau
+            alf.s = ts .*= invtau
+            alf.y = ty .*= invtau
+            alf.z = tz .*= invtau
+            break
+        elseif !isnan(infres_pr) && infres_pr <= alf.tolfeas
+            alf.verbose && println("primal infeasibility detected; terminating")
+            alf.status = :PrimalInfeasible
+
+            invobj = inv(-by - hz)
+            alf.x = tx .= NaN
+            alf.s = ts .= NaN
+            alf.y = ty .*= invobj
+            alf.z = tz .*= invobj
+            break
+        elseif !isnan(infres_du) && infres_du <= alf.tolfeas
+            alf.verbose && println("dual infeasibility detected; terminating")
+            alf.status = :DualInfeasible
+
+            invobj = inv(-cx)
+            alf.x = tx .*= invobj
+            alf.s = ts .*= invobj
+            alf.y = ty .= NaN
+            alf.z = tz .= NaN
+            break
+        elseif mu <= alf.tolfeas*1e-2 && tau <= alf.tolfeas*1e-2*min(1.0, kap)
+            alf.verbose && println("ill-posedness detected; terminating")
             alf.status = :IllPosed
+
+            alf.x = tx .= NaN
+            alf.s = ts .= NaN
+            alf.y = ty .= NaN
+            alf.z = tz .= NaN
             break
         end
 
         # check iteration limit
         iter += 1
         if iter >= alf.maxiter
-            alf.verbose && println("Reached iteration limit; terminating")
+            alf.verbose && println("iteration limit reached; terminating")
             alf.status = :IterationLimit
             break
         end
 
         # prediction phase
         # calculate prediction direction
-        # x rhs is (ts - d_res), y rhs is p_res
-        dir_tx .= ts .- d_res # temp for x rhs
-        (y1, x1, y2, x2) = solvelinsys(y1, x1, y2, x2, mu, dir_tx, p_res, A, b, c, cone, HiAt, AHiAt)
-
-        dir_tau = (abs_gap - kap - dot(b, y2) + dot(c, x2))/(mu/tau^2 + dot(b, y1) - dot(c, x1))
-        dir_ty .= y2 .+ dir_tau .* y1
-        dir_tx .= x2 .+ dir_tau .* x1
-        mul!(dir_ts, A', dir_ty)
-        dir_ts .= dir_tau .* c .- dir_ts .- d_res
-        dir_kap = -abs_gap + dot(b, dir_ty) - dot(c, dir_tx)
+        @. tmp_ts = tmp_tz
+        @. tmp_tz = -tz
+        (tmp_kap, tmp_tau) = finddirection!(tmp_tx, tmp_ty, tmp_tz, tmp_ts, -kap, kap + cx + by + hz, mu, tau, alf)
 
         # determine step length alpha by line search
         alpha = alphapred
@@ -346,19 +419,20 @@ function solve!(alf::AlfonsoOpt)
         while true
             nprediters += 1
 
-            sa_tx .= tx .+ alpha .* dir_tx
+            @. ls_ts = ts + alpha*tmp_ts
 
             # accept primal iterate if
             # - decreased alpha and it is the first inside the cone and beta-neighborhood or
             # - increased alpha and it is inside the cone and the first to leave beta-neighborhood
             if incone(cone)
                 # primal iterate is inside the cone
-                sa_ts .= ts .+ alpha .* dir_ts
-                sa_tk = (tau + alpha*dir_tau)*(kap + alpha*dir_kap)
-                sa_mu = (dot(sa_tx, sa_ts) + sa_tk)/alf.bnu
-                nbhd = calcnbhd(sa_tk, sa_mu, sa_ts, g, cone)
 
-                if nbhd < abs2(alf.beta*sa_mu)
+                @. ls_tz = tz + alpha*tmp_tz
+                ls_tk = (tau + alpha*tmp_tau)*(kap + alpha*tmp_kap)
+                ls_mu = (dot(ls_ts, ls_tz) + ls_tk)/bnu
+                nbhd = calcnbhd(ls_tk, ls_mu, ls_tz, g, cone)
+
+                if nbhd < abs2(beta*ls_mu)
                     # iterate is inside the beta-neighborhood
                     if !alphaprevok || (alpha > alf.predlsmulti)
                         # either the previous iterate was outside the beta-neighborhood or increasing alpha again will make it > 1
@@ -372,24 +446,15 @@ function solve!(alf::AlfonsoOpt)
                     alpha = alpha/alf.predlsmulti # increase alpha
                     continue
                 end
-
-                # # iterate is outside the beta-neighborhood
-                # if alphaprevok # TODO technically this should be only if nprediters > 1, but it seems to work
-                #     # previous iterate was inside the beta-neighborhood
-                #     if alf.predlinesearch
-                #         alphapred = alpha*alf.predlsmulti
-                #     end
-                #     break
-                # end
             end
 
             # primal iterate is either
             # - outside the cone or
             # - inside the cone and outside the beta-neighborhood and previous iterate was outside the beta-neighborhood
-            if alpha < alf.alphapredthres
+            if alpha < alphapredthres
                 # alpha is very small, so predictor has failed
                 predfail = true
-                alf.verbose && println("Predictor could not improve the solution ($nprediters line search steps); terminating")
+                alf.verbose && println("predictor could not improve the solution ($nprediters line search steps); terminating")
                 alf.status = :PredictorFail
                 break
             end
@@ -397,21 +462,21 @@ function solve!(alf::AlfonsoOpt)
             alphaprevok = false
             alpha = alf.predlsmulti*alpha # decrease alpha
         end
-        # @show nprediters
         if predfail
             break
         end
 
         # step distance alpha in the direction
-        ty .+= alpha .* dir_ty
-        tx .= sa_tx
-        tau += alpha*dir_tau
-        ts .+= alpha .* dir_ts
-        kap += alpha*dir_kap
-        mu = (dot(tx, ts) + tau*kap)/alf.bnu
+        @. tx += alpha*tmp_tx
+        @. ty += alpha*tmp_ty
+        @. tz += alpha*tmp_tz
+        @. ts = ls_ts
+        tau += alpha*tmp_tau
+        kap += alpha*tmp_kap
+        mu = (dot(ts, tz) + tau*kap)/bnu
 
         # skip correction phase if allowed and current iterate is in the eta-neighborhood
-        if alf.corrcheck && (nbhd < abs2(alf.eta*mu))
+        if alf.corrcheck && (nbhd < abs2(eta*mu))
             continue
         end
 
@@ -422,19 +487,12 @@ function solve!(alf::AlfonsoOpt)
             ncorrsteps += 1
 
             # calculate correction direction
-            # x rhs is (ts + mu*g), y rhs is 0
+            @. tmp_tx = 0.0
+            @. tmp_ty = 0.0
             calcg!(g, cone)
-            dir_tx .= ts .+ mu .* g # temp for x rhs
-            dir_ty .= 0.0 # temp for y rhs
-            (y1, x1, y2, x2) = solvelinsys(y1, x1, y2, x2, mu, dir_tx, dir_ty, A, b, c, cone, HiAt, AHiAt)
-
-            dir_tau = (mu/tau - kap - dot(b, y2) + dot(c, x2))/(mu/tau^2 + dot(b, y1) - dot(c, x1))
-            dir_ty .= y2 .+ dir_tau .* y1
-            dir_tx .= x2 .+ dir_tau .* x1
-            # dir_ts = -A'*dir_ty + c*dir_tau
-            mul!(dir_ts, A', dir_ty)
-            dir_ts .= dir_tau .* c .- dir_ts
-            dir_kap = dot(b, dir_ty) - dot(c, dir_tx)
+            @. tmp_tz = -tz - mu*g
+            @. tmp_ts = 0.0
+            (tmp_kap, tmp_tau) = finddirection!(tmp_tx, tmp_ty, tmp_tz, tmp_ts, -kap + mu/tau, 0.0, mu, tau, alf)
 
             # determine step length alpha by line search
             alpha = alf.alphacorr
@@ -442,8 +500,7 @@ function solve!(alf::AlfonsoOpt)
             while ncorrlsiters <= alf.maxcorrlsiters
                 ncorrlsiters += 1
 
-                sa_tx .= tx .+ alpha .* dir_tx
-
+                @. ls_ts = ts + alpha*tmp_ts
                 if incone(cone)
                     # primal iterate tx is inside the cone, so terminate line search
                     break
@@ -453,129 +510,234 @@ function solve!(alf::AlfonsoOpt)
                 if ncorrlsiters == alf.maxcorrlsiters
                     # corrector failed
                     corrfail = true
-                    alf.verbose && println("Corrector could not improve the solution ($ncorrlsiters line search steps); terminating")
+                    alf.verbose && println("corrector could not improve the solution ($ncorrlsiters line search steps); terminating")
                     alf.status = :CorrectorFail
                     break
                 end
 
                 alpha = alf.corrlsmulti*alpha # decrease alpha
             end
-            # @show ncorrlsiters
             if corrfail
                 break
             end
 
             # step distance alpha in the direction
-            ty .+= alpha .* dir_ty
-            tx .= sa_tx
-            tau += alpha*dir_tau
-            ts .+= alpha .* dir_ts
-            kap += alpha*dir_kap
-            mu = (dot(tx, ts) + tau*kap)/alf.bnu
+            @. tx += alpha*tmp_tx
+            @. ty += alpha*tmp_ty
+            @. tz += alpha*tmp_tz
+            @. ts = ls_ts
+            tau += alpha*tmp_tau
+            kap += alpha*tmp_kap
+            mu = (dot(ts, tz) + tau*kap)/bnu
 
             # finish if allowed and current iterate is in the eta-neighborhood, or if taken max steps
             if (ncorrsteps == alf.maxcorrsteps) || alf.corrcheck
-                sa_ts .= ts
-                if calcnbhd(tau*kap, mu, sa_ts, g, cone) <= abs2(alf.eta*mu)
+                @. ls_tz = tz
+                nbhd = calcnbhd(tau*kap, mu, ls_tz, g, cone)
+                if nbhd <= abs2(eta*mu)
                     break
                 elseif ncorrsteps == alf.maxcorrsteps
-                    # nbhd_eta > eta, so corrector failed
+                    # outside eta neighborhood, so corrector failed
                     corrfail = true
-                    alf.verbose && println("Corrector phase finished outside the eta-neighborhood ($ncorrsteps correction steps); terminating")
+                    alf.verbose && println("corrector phase finished outside the eta-neighborhood ($ncorrsteps correction steps); terminating")
                     alf.status = :CorrectorFail
                     break
                 end
             end
         end
-        # @show ncorrsteps
         if corrfail
             break
         end
     end
 
-    alf.verbose && println("\nFinished in $iter iterations\nInternal status is $(alf.status)\n")
+    alf.verbose && println("\nfinished in $iter iterations; internal status is $(alf.status)\n")
 
-    # calculate final solution and iteration statistics
+    # calculate solution and iteration statistics
     alf.niters = iter
-
-    tx ./= tau
-    alf.x = tx
-    ty ./= tau
-    alf.y = ty
     alf.tau = tau
-    ts ./= tau
-    alf.s = ts
     alf.kap = kap
-
-    alf.pobj = dot(c, alf.x)
-    alf.dobj = dot(b, alf.y)
-    alf.dgap = alf.pobj - alf.dobj
-    alf.cgap = dot(alf.s, alf.x)
-    alf.rel_dgap = alf.dgap/(1.0 + abs(alf.pobj) + abs(alf.dobj))
-    alf.rel_cgap = alf.cgap/(1.0 + abs(alf.pobj) + abs(alf.dobj))
-
-    # alf.pres = b - A*alf.x
-    mul!(p_res, A, alf.x)
-    p_res .= b .- p_res
-    alf.pres = p_res
-    # alf.dres = c - A'*alf.y - alf.s
-    mul!(d_res, A', alf.y)
-    d_res .= c .- d_res .- alf.s
-    alf.dres = d_res
-
-    alf.pin = norm(alf.pres)
-    alf.din = norm(alf.dres)
-    alf.rel_pin = alf.pin/(1.0 + norm(b, Inf))
-    alf.rel_din = alf.din/(1.0 + norm(c, Inf))
-
+    alf.mu = mu
     alf.solvetime = time() - starttime
 
     return nothing
 end
 
-function calcnbhd(tk, mu, sa_ts, g, cone)
+# TODO put this inside the cone functions
+function calcnbhd(tk, mu, ls_tz, g, cone)
     calcg!(g, cone)
-    sa_ts .+= mu .* g
-    calcHiarr!(g, sa_ts, cone)
-    return (tk - mu)^2 + dot(sa_ts, g)
+    @. ls_tz += mu*g
+    calcHiarr!(g, ls_tz, cone)
+    return (tk - mu)^2 + dot(ls_tz, g)
+end
+
+# calculate initial central primal-dual iterate
+function findinitialiterate!(tx::Vector{Float64}, ty::Vector{Float64}, tz::Vector{Float64}, ts::Vector{Float64}, ls_ts::Vector{Float64}, bnu::Float64, alf::AlfonsoOpt)
+    (b, G, h, cone) = (alf.b, alf.G, alf.h, alf.cone)
+    L = alf.L
+    (Q2, RiQ1, GQ2, Q2GHGQ2, bxGHbz, Q1x, rhs, Q2div, Q2x, HGxi) = (L.Q2, L.RiQ1, L.GQ2, L.Q2GHGQ2, L.bxGHbz, L.Q1x, L.rhs, L.Q2div, L.Q2x, L.HGxi)
+
+    alf.verbose && println("\nfinding initial iterate")
+
+    # solve linear equation
+    # |0  A' G'| * |tx| = |0|
+    # |A  0  0 |   |ty|   |b|
+    # |G  0  -I|   |ts|   |h|
+
+    # Q1x = Q1*Ri'*b
+    # Q2x = Q2*(Symmetric(GQ2'*GQ2)\(GQ2'*(h - G*Q1x)))
+    # tx = Q1x + Q2x
+    # ts = h - G*tx
+    # ty = Ri*Q1'*G'*ts
+
+    # bunchkaufman allocates more than cholesky, but doesn't fail when approximately quasidefinite (TODO could try LDL instead)
+    # TODO does it matter that F could be either type?
+    mul!(Q2GHGQ2, GQ2', GQ2)
+    F = cholesky!(Symmetric(Q2GHGQ2), check=false)
+    if !issuccess(F)
+        alf.verbose && println("linear system matrix was nearly not positive definite")
+        mul!(Q2GHGQ2, Q2', GHGQ2)
+        F = bunchkaufman!(Symmetric(Q2GHGQ2))
+    end
+
+    # Q1x = Q1*Ri'*b
+    mul!(Q1x, RiQ1', b)
+    # Q2x = Q2*(F\(GQ2'*(h - G*Q1x)))
+    mul!(HGxi, G, Q1x)
+    @. HGxi = h - HGxi
+    mul!(Q2div, GQ2', HGxi)
+    ldiv!(F, Q2div)
+    mul!(Q2x, Q2, Q2div)
+    # tx = Q1x + Q2x
+    @. tx = Q1x + Q2x
+    # ts = h - G*tx
+    mul!(ts, G, tx)
+    @. ts = h - ts
+    # ty = Ri*Q1'*G'*ts
+    mul!(bxGHbz, G', ts)
+    mul!(ty, RiQ1, bxGHbz)
+
+    # from ts, step along interior direction of cone until ts is inside cone
+    @. ls_ts = ts
+    if !incone(cone)
+        tmp_ts = getintdir!(tz, cone)
+        alpha = 1.0 # TODO starting alpha maybe should depend on ls_ts (eg norm like in Alfonso) in case 1.0 is too large/small
+        steps = 0
+        while !incone(cone)
+            @. ls_ts = ts + alpha*tmp_ts
+            alpha *= 1.5
+            steps += 1
+            if steps > 25
+                error("cannot find initial iterate")
+            end
+        end
+        alf.verbose && println("$steps steps taken for initial iterate")
+        @. ts = ls_ts
+    end
+
+    @assert incone(cone) # TODO delete
+    calcg!(tz, cone)
+    @. tz *= -1.0
+
+    tau = 1.0
+    kap = 1.0
+    mu = (dot(tz, ts) + tau*kap)/bnu
+
+    @assert abs(1.0 - mu) < 1e-10
+    # @assert calcnbhd(tau*kap, mu, copy(tz), copy(tz), cone) < 1e-6
+    alf.verbose && println("initial iterate found")
+
+    return (tau, kap, mu)
+end
+
+# calculate new prediction or correction direction given rhs of KKT linear system
+function finddirection!(rhs_tx::Vector{Float64}, rhs_ty::Vector{Float64}, rhs_tz::Vector{Float64}, rhs_ts::Vector{Float64}, rhs_kap::Float64, rhs_tau::Float64, mu::Float64, tau::Float64, alf::AlfonsoOpt)
+    (c, b, G, h, cone) = (alf.c, alf.b, alf.G, alf.h, alf.cone)
+    L = alf.L
+    (Q2, HG, GHG, GHGQ2, Q2GHGQ2, x1, y1, z1) = (L.Q2, L.HG, L.GHG, L.GHGQ2, L.Q2GHGQ2, L.x1, L.y1, L.z1)
+
+    # solve two symmetric systems and combine the solutions
+    # use QR + cholesky method from CVXOPT
+    # (1) eliminate equality constraints via QR of A'
+    # (2) solve reduced system by cholesky
+    # |0  A'  G'   | * |ux| = |bx|
+    # |A  0   0    |   |uy|   |by|
+    # |G  0  -Hi/mu|   |uz|   |bz|
+
+    # A' = [Q1 Q2] * [R1; 0]
+    # factorize Q2' * G' * mu*H * G * Q2
+    calcHarr!(HG, G, cone)
+    @. HG *= mu
+    mul!(GHG, G', HG)
+    mul!(GHGQ2, GHG, Q2)
+    mul!(Q2GHGQ2, Q2', GHGQ2)
+
+    # bunchkaufman allocates more than cholesky, but doesn't fail when approximately quasidefinite (TODO could try LDL instead)
+    # TODO does it matter that F could be either type?
+    F = cholesky!(Symmetric(Q2GHGQ2), check=false)
+    if !issuccess(F)
+        alf.verbose && println("linear system matrix was nearly not positive definite")
+        mul!(Q2GHGQ2, Q2', GHGQ2)
+        F = bunchkaufman!(Symmetric(Q2GHGQ2))
+    end
+
+    # (x2, y2, z2) = (rhs_tx, -rhs_ty, -mu*H*rhs_ts - rhs_tz)
+    @. rhs_ty *= -1.0
+    @. rhs_tz *= -1.0
+    if !iszero(rhs_ts)
+        calcHarr!(z1, rhs_ts, cone)
+        @. rhs_tz -= mu*z1
+    end
+    calcxyz!(rhs_tx, rhs_ty, rhs_tz, F, G, L)
+
+    # (x1, y1, z1) = (-c, b, mu*H*h)
+    @. x1 = -c
+    @. y1 = b
+    calcHarr!(z1, h, cone)
+    @. z1 *= mu
+    calcxyz!(x1, y1, z1, F, G, L)
+
+    # combine
+    dir_tau = (rhs_tau + rhs_kap + dot(c, rhs_tx) + dot(b, rhs_ty) + dot(h, rhs_tz))/(mu/tau/tau - dot(c, x1) - dot(b, y1) - dot(h, z1))
+    @. rhs_tx += dir_tau*x1
+    @. rhs_ty += dir_tau*y1
+    @. rhs_tz += dir_tau*z1
+    mul!(z1, G, rhs_tx)
+    @. rhs_ts = -z1 + h*dir_tau - rhs_ts
+    dir_kap = -dot(c, rhs_tx) - dot(b, rhs_ty) - dot(h, rhs_tz) - rhs_tau
+
+    return (dir_kap, dir_tau)
 end
 
-function solvelinsys(y1, x1, y2, x2, mu, rhs_tx, rhs_ty, A, b, c, cone, HiAt, AHiAt)
-    invmu = inv(mu)
-
-    # TODO could ultimately be faster or more stable to do cholesky.L ldiv everywhere than to do full hessian ldiv
-    calcHiarr!(HiAt, A', cone)
-    HiAt .*= invmu
-    mul!(AHiAt, A, HiAt)
-    F = cholesky!(Symmetric(AHiAt))
-
-    # TODO can parallelize 1 and 2
-    # y2 = F\(rhs_ty + HiAt'*rhs_tx)
-    mul!(y2, HiAt', rhs_tx)
-    y2 .+= rhs_ty
-    ldiv!(F, y2) # y2 done
-
-    # x2 = Hi*invmu*(A'*y2 - rhs_tx)
-    mul!(x2, A', y2)
-    rhs_tx .= x2 .- rhs_tx # destroys rhs_tx
-    rhs_tx .*= invmu
-    calcHiarr!(x2, rhs_tx, cone) # x2 done
-
-    # y1 = F\(b + HiAt'*c)
-    mul!(y1, HiAt', c)
-    y1 .+= b
-    ldiv!(F, y1) # y1 done
-
-    # x1 = Hi*invmu*(A'*y1 - c)
-    mul!(rhs_tx, A', y1)
-    rhs_tx .-= c
-    rhs_tx .*= invmu
-    calcHiarr!(x1, rhs_tx, cone) # x1 done
-
-    return (y1, x1, y2, x2)
+# calculate solution to reduced symmetric linear systems
+function calcxyz!(xi::Vector{Float64}, yi::Vector{Float64}, zi::Vector{Float64}, F, G::AbstractMatrix{Float64}, L::LinSysCache)
+    (Q2, RiQ1, HG, GHG, bxGHbz, Q1x, rhs, Q2div, Q2x, GHGxi, HGxi) = (L.Q2, L.RiQ1, L.HG, L.GHG, L.bxGHbz, L.Q1x, L.rhs, L.Q2div, L.Q2x, L.GHGxi, L.HGxi)
+
+    # bxGHbz = bx + G'*Hbz
+    mul!(bxGHbz, G', zi)
+    @. bxGHbz += xi
+    # Q1x = Q1*Ri'*by
+    mul!(Q1x, RiQ1', yi)
+    # Q2x = Q2*(K22_F\(Q2'*(bxGHbz - GHG*Q1x)))
+    mul!(rhs, GHG, Q1x)
+    @. rhs = bxGHbz - rhs
+    mul!(Q2div, Q2', rhs)
+    ldiv!(F, Q2div)
+    mul!(Q2x, Q2, Q2div)
+    # xi = Q1x + Q2x
+    @. xi = Q1x + Q2x
+    # yi = Ri*Q1'*(bxGHbz - GHG*xi)
+    mul!(GHGxi, GHG, xi)
+    @. bxGHbz -= GHGxi
+    mul!(yi, RiQ1, bxGHbz)
+    # zi = HG*xi - Hbz
+    mul!(HGxi, HG, xi)
+    @. zi = HGxi - zi
+
+    return (xi, yi, zi)
 end
 
-function getbetaeta(maxcorrsteps, bnu)
+# get neighborhood parameters depending on magnitude of barrier parameter and maximum number of correction steps
+function getbetaeta(maxcorrsteps::Int, bnu::Float64)
     if maxcorrsteps <= 2
         if bnu < 10.0
             return (0.1810, 0.0733, 0.0225)
diff --git a/src/primitivecones/exponential.jl b/src/primitivecones/exponential.jl
index ed9dc7e9f..cf35af41a 100644
--- a/src/primitivecones/exponential.jl
+++ b/src/primitivecones/exponential.jl
@@ -6,12 +6,15 @@
 mutable struct ExponentialCone <: PrimitiveCone
     pnt::AbstractVector{Float64}
     g::AbstractVector{Float64}
-    Hi::Symmetric{Float64,Array{Float64,2}} # TODO could be faster as StaticArray
+    H::Matrix{Float64} # TODO could be faster as StaticArray
+    H2::Matrix{Float64}
+    F
 
     function ExponentialCone()
         prm = new()
         prm.g = Vector{Float64}(undef, 3)
-        prm.Hi = Symmetric(similar(prm.g, 3, 3))
+        prm.H = similar(prm.g, 3, 3)
+        prm.H2 = copy(prm.H)
         return prm
     end
 end
@@ -27,33 +30,46 @@ function incone_prm(prm::ExponentialCone)
         return false
     end
 
-    ylzy = y*log(z/y)
-    dist = ylzy - x
-    if dist <= 0.0
+    lzy = log(z/y)
+    ylzy = y*lzy
+    ylzyx = ylzy - x
+    if ylzyx <= 0.0
         return false
     end
-    invdist = inv(dist)
 
     # gradient
+    iylzyx = inv(ylzyx)
     g = prm.g
-    g[1] = invdist # 1/(-x + y log(z/y))
-    g[2] = invdist * (y - x - 2*dist) / y # (x + y - 2 y log(z/y))/(y (-x + y log(z/y)))
-    g[3] = (-1 - y*invdist) / z # (-1 + y/(x - y log(z/y)))/z
-
-    # Hessian inverse
-    HiU = prm.Hi.data # upper triangle
-    den = 2*y + dist
-    invden = inv(den)
-    # TODO refactor more repeated subexpressions
-    HiU[1,1] = -(-2*ylzy^3 + (4*x - y)*abs2(ylzy) + (-3*abs2(x) + 2*y*x - 2*abs2(y))*ylzy + x*(abs2(x) - 2*y*x + 2*abs2(y))) * invden # (-2 y^3 log^3(z/y) + (4 x - y) y^2 log^2(z/y) + y (-3 x^2 + 2 y x - 2 y^2) log(z/y) + x (x^2 - 2 y x + 2 y^2))/(x - 2 y - y log(z/y))
-    HiU[1,2] = y * (abs2(ylzy) - x*ylzy + x*y) * invden  # (y^2 (y log^2(z/y) - x log(z/y) + x))/(-x + 2 y + y log(z/y))
-    HiU[1,3] = y * z * (2*ylzy - x) * invden # (y z (2 y log(z/y) - x))/(-x + 2 y + y log(z/y))
-    HiU[2,2] = abs2(y) * (1 - y*invden) # (y^2 (-x + y + y log(z/y)))/(-x + 2 y + y log(z/y))
-    HiU[2,3] = abs2(y) * z * invden # (y^2 z)/(-x + 2 y + y log(z/y))
-    HiU[3,3] = abs2(z) * (1 - y*invden) # (z^2 (-x + y + y log(z/y)))/(-x + 2 y + y log(z/y)))
+    g[1] = iylzyx # 1/(-x + y log(z/y))
+    g[2] = iylzyx * (y - x - 2*ylzyx) / y # (x + y - 2 y log(z/y))/(y (-x + y log(z/y)))
+    g[3] = (-1 - y*iylzyx) / z # (-1 + y/(x - y log(z/y)))/z
+
+    # Hessian
+    yz = y/z
+    H = prm.H
+    H[1,1] = abs2(iylzyx)
+    H[1,2] = H[2,1] = -(lzy - 1.0)*H[1,1]
+    H[1,3] = H[3,1] = -yz*H[1,1]
+    H[2,2] = abs2(lzy - 1.0)*H[1,1] + iylzyx/y + inv(abs2(y))
+    H[2,3] = H[3,2] = yz*(lzy - 1.0)*H[1,1] - iylzyx/z
+    H[3,3] = abs2(yz)*H[1,1] + yz/z*iylzyx + inv(abs2(z))
+
+    @. prm.H2 = H
+    prm.F = bunchkaufman!(Symmetric(prm.H2))
+
+    # old code for inverse hessian
+    # den = 2*y + dist
+    # invden = inv(den)
+    # Hi[1,1] = -(-2*ylzy^3 + (4*x - y)*abs2(ylzy) + (-3*abs2(x) + 2*y*x - 2*abs2(y))*ylzy + x*(abs2(x) - 2*y*x + 2*abs2(y))) * invden # (-2 y^3 log^3(z/y) + (4 x - y) y^2 log^2(z/y) + y (-3 x^2 + 2 y x - 2 y^2) log(z/y) + x (x^2 - 2 y x + 2 y^2))/(x - 2 y - y log(z/y))
+    # Hi[1,2] = y * (abs2(ylzy) - x*ylzy + x*y) * invden  # (y^2 (y log^2(z/y) - x log(z/y) + x))/(-x + 2 y + y log(z/y))
+    # Hi[1,3] = y * z * (2*ylzy - x) * invden # (y z (2 y log(z/y) - x))/(-x + 2 y + y log(z/y))
+    # Hi[2,2] = abs2(y) * (1 - y*invden) # (y^2 (-x + y + y log(z/y)))/(-x + 2 y + y log(z/y))
+    # Hi[2,3] = abs2(y) * z * invden # (y^2 z)/(-x + 2 y + y log(z/y))
+    # Hi[3,3] = abs2(z) * (1 - y*invden) # (z^2 (-x + y + y log(z/y)))/(-x + 2 y + y log(z/y)))
 
     return true
 end
 
-calcg_prm!(g::AbstractVector{Float64}, prm::ExponentialCone) = (g .= prm.g; g)
-calcHiarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::ExponentialCone) = mul!(prod, prm.Hi, arr)
+calcg_prm!(g::AbstractVector{Float64}, prm::ExponentialCone) = (@. g = prm.g; g)
+calcHiarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::ExponentialCone) = ldiv!(prod, prm.F, arr)
+calcHarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::ExponentialCone) = mul!(prod, prm.H, arr)
diff --git a/src/primitivecones/nonnegative.jl b/src/primitivecones/nonnegative.jl
index f709dfea7..6f0e5edd0 100644
--- a/src/primitivecones/nonnegative.jl
+++ b/src/primitivecones/nonnegative.jl
@@ -1,20 +1,31 @@
 
 # nonnegative orthant cone
+# barrier is -sum_j ln x_j
+# from Nesterov & Todd "Self-Scaled Barriers and Interior-Point Methods for Convex Programming"
 mutable struct NonnegativeCone <: PrimitiveCone
     dim::Int
     pnt::AbstractVector{Float64}
+    invpnt::Vector{Float64}
 
     function NonnegativeCone(dim::Int)
         prm = new()
         prm.dim = dim
+        prm.invpnt = Vector{Float64}(undef, dim)
         return prm
     end
 end
 
 dimension(prm::NonnegativeCone) = prm.dim
 barrierpar_prm(prm::NonnegativeCone) = prm.dim
-getintdir_prm!(arr::AbstractVector{Float64}, prm::NonnegativeCone) = (arr .= 1.0; arr)
+getintdir_prm!(arr::AbstractVector{Float64}, prm::NonnegativeCone) = (@. arr = 1.0; arr)
 loadpnt_prm!(prm::NonnegativeCone, pnt::AbstractVector{Float64}) = (prm.pnt = pnt)
 incone_prm(prm::NonnegativeCone) = all(x -> (x > 0.0), prm.pnt)
-calcg_prm!(g::AbstractVector{Float64}, prm::NonnegativeCone) = (g .= inv.(prm.pnt) .* -1.0; g)
-calcHiarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::NonnegativeCone) = (prod .= abs2.(prm.pnt) .* arr; prod)
+
+function calcg_prm!(g::AbstractVector{Float64}, prm::NonnegativeCone)
+    @. prm.invpnt = inv(prm.pnt)
+    @. g = -prm.invpnt
+    return g
+end
+
+calcHiarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::NonnegativeCone) = (@. prod = abs2(prm.pnt)*arr; prod)
+calcHarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::NonnegativeCone) = (@. prod = abs2(prm.invpnt)*arr; prod)
diff --git a/src/primitivecones/positivesemidefinite.jl b/src/primitivecones/positivesemidefinite.jl
index daa92a478..212d5c635 100644
--- a/src/primitivecones/positivesemidefinite.jl
+++ b/src/primitivecones/positivesemidefinite.jl
@@ -1,29 +1,31 @@
 
 # positive semidefinite cone (lower triangle, off-diagonals scaled)
+# barrier for matrix cone is -ln det(X)
+# from Nesterov & Todd "Self-Scaled Barriers and Interior-Point Methods for Convex Programming"
 mutable struct PositiveSemidefiniteCone <: PrimitiveCone
     dim::Int
     side::Int
     pnt::AbstractVector{Float64}
-    g::Vector{Float64}
     mat::Matrix{Float64}
     mat2::Matrix{Float64}
     matpnt::Matrix{Float64}
+    matinv::Matrix{Float64}
 
     function PositiveSemidefiniteCone(dim::Int)
         prm = new()
         prm.dim = dim
         prm.side = round(Int, sqrt(0.25 + dim + dim) - 0.5)
-        prm.g = Vector{Float64}(undef, dim)
         prm.mat = Matrix{Float64}(undef, prm.side, prm.side)
         prm.mat2 = copy(prm.mat)
         prm.matpnt = copy(prm.mat)
+        # prm.matinv = copy(prm.mat)
         return prm
     end
 end
 
 dimension(prm::PositiveSemidefiniteCone) = prm.dim
 barrierpar_prm(prm::PositiveSemidefiniteCone) = prm.side
-getintdir_prm!(arr::AbstractVector{Float64}, prm::PositiveSemidefiniteCone) = mattovec!(arr, Matrix(1.0I, prm.side, prm.side))
+getintdir_prm!(arr::AbstractVector{Float64}, prm::PositiveSemidefiniteCone) = mattovec!(arr, Matrix(1.0I, prm.side, prm.side)) # TODO eliminate allocs
 loadpnt_prm!(prm::PositiveSemidefiniteCone, pnt::AbstractVector{Float64}) = (prm.pnt = pnt)
 
 function incone_prm(prm::PositiveSemidefiniteCone)
@@ -33,13 +35,12 @@ function incone_prm(prm::PositiveSemidefiniteCone)
         return false
     end
 
-    grad = -inv(F) # TODO reduce allocs
-    mattovec!(prm.g, grad)
+    prm.matinv = -inv(F) # TODO eliminate allocs
     vectomat!(prm.matpnt, prm.pnt)
     return true
 end
 
-calcg_prm!(g::AbstractVector{Float64}, prm::PositiveSemidefiniteCone) = (g .= prm.g; g)
+calcg_prm!(g::AbstractVector{Float64}, prm::PositiveSemidefiniteCone) = (mattovec!(g, prm.matinv); g)
 
 function calcHiarr_prm!(prod::AbstractVector{Float64}, arr::AbstractVector{Float64}, prm::PositiveSemidefiniteCone)
     vectomat!(prm.mat, arr)
@@ -58,3 +59,21 @@ function calcHiarr_prm!(prod::AbstractMatrix{Float64}, arr::AbstractMatrix{Float
     end
     return prod
 end
+
+function calcHarr_prm!(prod::AbstractVector{Float64}, arr::AbstractVector{Float64}, prm::PositiveSemidefiniteCone)
+    vectomat!(prm.mat, arr)
+    mul!(prm.mat2, prm.mat, prm.matinv)
+    mul!(prm.mat, prm.matinv, prm.mat2)
+    mattovec!(prod, prm.mat)
+    return prod
+end
+
+function calcHarr_prm!(prod::AbstractMatrix{Float64}, arr::AbstractMatrix{Float64}, prm::PositiveSemidefiniteCone)
+    for j in 1:size(arr, 2)
+        vectomat!(prm.mat, view(arr, :, j))
+        mul!(prm.mat2, prm.mat, prm.matinv)
+        mul!(prm.mat, prm.matinv, prm.mat2)
+        mattovec!(view(prod, :, j), prm.mat)
+    end
+    return prod
+end
diff --git a/src/primitivecones/power.jl b/src/primitivecones/power.jl
index a4139b304..67716bc91 100644
--- a/src/primitivecones/power.jl
+++ b/src/primitivecones/power.jl
@@ -1,4 +1,8 @@
-*
+
+# TODO use AD for the barrier function
+# TODO should we just do the n-dim power cone? any benefit from 3-d restriction?
+
+
 # power cone (MathOptInterface definition) parametrized by power α
 # x^α * y^(1-α) >= abs(z), x,y >= 0
 # barrier from Skajaa & Ye 2014 is
@@ -31,36 +35,36 @@ function incone_prm(prm::PowerCone)
     end
     α = prm.exponent
 
-    lhs = x^α * y^(1-α)
-    dist = lhs - abs(z)
-    if dist <= 0.0
+    if x^α * y^(1-α) - abs(z) <= 0.0
         return false
     end
 
-    # TODO refactor more repeated subexpressions
     # gradient
-    g = prm.g
-    g[1] = (α - (2*α*y^2*x^(2*α))/(y^2*x^(2*α) - z^2*y^(2*α)) - 1)/x # (α - (2 α y^2 x^(2 α))/(y^2 x^(2 α) - z^2 y^(2 α)) - 1)/x
-    g[2] = ((α - 2)*y^2*x^(2*α) + α*z^2*y^(2*α))/(y*(y^2*x^(2*α) - z^2*y^(2*α))) # ((α - 2) y^2 x^(2 α) + α z^2 y^(2 α))/(y (y^2 x^(2 α) - z^2 y^(2 α)))
-    g[3] = (2*z)/(x^(2*α)*y^(2 - 2*α) - z^2) # (2 z)/(x^(2 α) y^(2 - 2 α) - z^2)
 
-    # Hessian inverse
-    HiU = prm.Hi.data # upper triangle
-    HiU[1,1] = (x^2*(2*y^(2*α + 2)*z^2*(2*α^2 - 3*α + 1)*x^(2*α) + y^4*(α - 2)*x^(4*α) + y^(4*α)*z^4*α))/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
-    # (x^2 (2 y^(2 α + 2) z^2 (2 α^2 - 3 α + 1) x^(2 α) + y^4 (α - 2) x^(4 α) + y^(4 α) z^4 α))/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
-    HiU[1,2] = (4*x^(2*α + 1)*y^(2*α + 3)*z^2*(α - 1)*α)/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
-    # (4 x^(2 α + 1) y^(2 α + 3) z^2 (α - 1) α)/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
-    HiU[1,3] = (2*x^(2*α + 1)*y^2*z*α*(y^2*(α - 2)*x^(2*α) + y^(2*α)*z^2*α))/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
-    # (2 x^(2 α + 1) y^2 z α (y^2 (α - 2) x^(2 α) + y^(2 α) z^2 α))/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
-    HiU[2,2] = -(-2*y^(2*α + 4)*z^2*α*(2*α - 1)*x^(2*α) + y^6*(α + 1)*x^(4*α) + y^(4*α + 2)*z^4*(α - 1))/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
-    # -(-2 y^(2 α + 4) z^2 α (2 α - 1) x^(2 α) + y^6 (α + 1) x^(4 α) + y^(4 α + 2) z^4 (α - 1))/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
-    HiU[2,3] = (2*x^(2*α)*y^3*z*(α - 1)*(y^2*(α + 1)*x^(2*α) + y^(2*α)*z^2*(α - 1)))/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
-    # (2 x^(2 α) y^3 z (α - 1) (y^2 (α + 1) x^(2 α) + y^(2 α) z^2 (α - 1)))/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
-    HiU[3,3] = (y^(4*α + 2)*z^4*(11*α^2 - 11*α + 2)*x^(2*α) - 3*y^(2*α + 4)*z^2*(α - 1)*α*x^(4*α) + y^6*(α^2 - α - 2)*x^(6*α) - y^(6*α)*z^6*(α - 1)*α)/(4*y^(4*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + 2*y^(2*α + 4)*(α^2 - α - 2)*x^(4*α) - 2*y^(6*α)*z^4*(α - 1)*α)
-    # (y^(4 α + 2) z^4 (11 α^2 - 11 α + 2) x^(2 α) - 3 y^(2 α + 4) z^2 (α - 1) α x^(4 α) + y^6 (α^2 - α - 2) x^(6 α) - y^(6 α) z^6 (α - 1) α)/(4 y^(4 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + 2 y^(2 α + 4) (α^2 - α - 2) x^(4 α) - 2 y^(6 α) z^4 (α - 1) α)
+    # Hessian
+
+
+    # old code for gradient and inverse hessian
+    # g[1] = (α - (2*α*y^2*x^(2*α))/(y^2*x^(2*α) - z^2*y^(2*α)) - 1)/x # (α - (2 α y^2 x^(2 α))/(y^2 x^(2 α) - z^2 y^(2 α)) - 1)/x
+    # g[2] = ((α - 2)*y^2*x^(2*α) + α*z^2*y^(2*α))/(y*(y^2*x^(2*α) - z^2*y^(2*α))) # ((α - 2) y^2 x^(2 α) + α z^2 y^(2 α))/(y (y^2 x^(2 α) - z^2 y^(2 α)))
+    # g[3] = (2*z)/(x^(2*α)*y^(2 - 2*α) - z^2) # (2 z)/(x^(2 α) y^(2 - 2 α) - z^2)
+
+    # Hi[1,1] = (x^2*(2*y^(2*α + 2)*z^2*(2*α^2 - 3*α + 1)*x^(2*α) + y^4*(α - 2)*x^(4*α) + y^(4*α)*z^4*α))/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
+    # # (x^2 (2 y^(2 α + 2) z^2 (2 α^2 - 3 α + 1) x^(2 α) + y^4 (α - 2) x^(4 α) + y^(4 α) z^4 α))/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
+    # Hi[1,2] = (4*x^(2*α + 1)*y^(2*α + 3)*z^2*(α - 1)*α)/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
+    # # (4 x^(2 α + 1) y^(2 α + 3) z^2 (α - 1) α)/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
+    # Hi[1,3] = (2*x^(2*α + 1)*y^2*z*α*(y^2*(α - 2)*x^(2*α) + y^(2*α)*z^2*α))/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
+    # # (2 x^(2 α + 1) y^2 z α (y^2 (α - 2) x^(2 α) + y^(2 α) z^2 α))/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
+    # Hi[2,2] = -(-2*y^(2*α + 4)*z^2*α*(2*α - 1)*x^(2*α) + y^6*(α + 1)*x^(4*α) + y^(4*α + 2)*z^4*(α - 1))/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
+    # # -(-2 y^(2 α + 4) z^2 α (2 α - 1) x^(2 α) + y^6 (α + 1) x^(4 α) + y^(4 α + 2) z^4 (α - 1))/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
+    # Hi[2,3] = (2*x^(2*α)*y^3*z*(α - 1)*(y^2*(α + 1)*x^(2*α) + y^(2*α)*z^2*(α - 1)))/(2*y^(2*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + y^4*(α^2 - α - 2)*x^(4*α) - y^(4*α)*z^4*(α - 1)*α)
+    # # (2 x^(2 α) y^3 z (α - 1) (y^2 (α + 1) x^(2 α) + y^(2 α) z^2 (α - 1)))/(2 y^(2 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + y^4 (α^2 - α - 2) x^(4 α) - y^(4 α) z^4 (α - 1) α)
+    # Hi[3,3] = (y^(4*α + 2)*z^4*(11*α^2 - 11*α + 2)*x^(2*α) - 3*y^(2*α + 4)*z^2*(α - 1)*α*x^(4*α) + y^6*(α^2 - α - 2)*x^(6*α) - y^(6*α)*z^6*(α - 1)*α)/(4*y^(4*α + 2)*z^2*(1 - 2*α)^2*x^(2*α) + 2*y^(2*α + 4)*(α^2 - α - 2)*x^(4*α) - 2*y^(6*α)*z^4*(α - 1)*α)
+    # # (y^(4 α + 2) z^4 (11 α^2 - 11 α + 2) x^(2 α) - 3 y^(2 α + 4) z^2 (α - 1) α x^(4 α) + y^6 (α^2 - α - 2) x^(6 α) - y^(6 α) z^6 (α - 1) α)/(4 y^(4 α + 2) z^2 (1 - 2 α)^2 x^(2 α) + 2 y^(2 α + 4) (α^2 - α - 2) x^(4 α) - 2 y^(6 α) z^4 (α - 1) α)
 
     return true
 end
 
-calcg_prm!(g::AbstractVector{Float64}, prm::PowerCone) = (g .= prm.g; g)
+calcg_prm!(g::AbstractVector{Float64}, prm::PowerCone) = (@. g = prm.g; g)
 calcHiarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::PowerCone) = mul!(prod, prm.Hi, arr)
+calcHarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::PowerCone) = mul!(prod, prm.H, arr)
diff --git a/src/primitivecones/rotatedsecondorder.jl b/src/primitivecones/rotatedsecondorder.jl
index 34f358143..fb05f49ce 100644
--- a/src/primitivecones/rotatedsecondorder.jl
+++ b/src/primitivecones/rotatedsecondorder.jl
@@ -1,23 +1,26 @@
 
 # rotated second order cone
+# barrier is -ln(2*x*y - norm(z)^2)
+# from Nesterov & Todd "Self-Scaled Barriers and Interior-Point Methods for Convex Programming"
 mutable struct RotatedSecondOrderCone <: PrimitiveCone
     dim::Int
     pnt::AbstractVector{Float64}
-    g::Vector{Float64}
+    disth::Float64
     Hi::Matrix{Float64}
+    H::Matrix{Float64}
 
     function RotatedSecondOrderCone(dim::Int)
         prm = new()
         prm.dim = dim
-        prm.g = Vector{Float64}(undef, dim)
         prm.Hi = Matrix{Float64}(undef, dim, dim)
+        prm.H = copy(prm.Hi)
         return prm
     end
 end
 
 dimension(prm::RotatedSecondOrderCone) = prm.dim
 barrierpar_prm(prm::RotatedSecondOrderCone) = 2.0
-getintdir_prm!(arr::AbstractVector{Float64}, prm::RotatedSecondOrderCone) = (arr[1:2] .= 1.0; arr[3:end] .= 0.0; arr)
+getintdir_prm!(arr::AbstractVector{Float64}, prm::RotatedSecondOrderCone) = (@. arr[1:2] = 1.0; @. arr[3:end] = 0.0; arr)
 loadpnt_prm!(prm::RotatedSecondOrderCone, pnt::AbstractVector{Float64}) = (prm.pnt = pnt)
 
 function incone_prm(prm::RotatedSecondOrderCone)
@@ -25,27 +28,28 @@ function incone_prm(prm::RotatedSecondOrderCone)
     if (pnt[1] <= 0) || (pnt[2] <= 0)
         return false
     end
-
     nrm2 = 0.5*sum(abs2, pnt[j] for j in 3:prm.dim)
-    disth = pnt[1]*pnt[2] - nrm2
-    if disth > 0.0
-        g = prm.g
-        g .= pnt
-        g[1] = -pnt[2]
-        g[2] = -pnt[1]
-        g ./= disth
+    prm.disth = pnt[1]*pnt[2] - nrm2
+    if prm.disth <= 0.0
+        return false
+    end
 
-        Hi = prm.Hi
-        mul!(Hi, pnt, pnt')
-        Hi[2,1] = Hi[1,2] = nrm2
-        for j in 3:prm.dim
-            Hi[j,j] += disth
-        end
-        return true
+    mul!(prm.Hi, pnt, pnt')
+    prm.Hi[2,1] = prm.Hi[1,2] = nrm2
+    for j in 3:prm.dim
+        prm.Hi[j,j] += prm.disth
+    end
+    @. prm.H = prm.Hi
+    for j in 3:prm.dim
+        prm.H[1,j] = prm.H[j,1] = -prm.Hi[2,j]
+        prm.H[2,j] = prm.H[j,2] = -prm.Hi[1,j]
     end
-    return false
+    prm.H[1,1] = prm.Hi[2,2]
+    prm.H[2,2] = prm.Hi[1,1]
+    @. prm.H *= inv(prm.disth)^2
+    return true
 end
 
-calcg_prm!(g::AbstractVector{Float64}, prm::RotatedSecondOrderCone) = (g .= prm.g; g)
-# TODO if later use Linv instead of Hinv, see Vandenberghe coneprog.dvi for analytical Linv
+calcg_prm!(g::AbstractVector{Float64}, prm::RotatedSecondOrderCone) = (@. g = prm.pnt/prm.disth; tmp = g[1]; g[1] = -g[2]; g[2] = -tmp; g)
 calcHiarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::RotatedSecondOrderCone) = mul!(prod, prm.Hi, arr)
+calcHarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::RotatedSecondOrderCone) = mul!(prod, prm.H, arr)
diff --git a/src/primitivecones/secondorder.jl b/src/primitivecones/secondorder.jl
index db6352a06..dcefdaf3f 100644
--- a/src/primitivecones/secondorder.jl
+++ b/src/primitivecones/secondorder.jl
@@ -1,42 +1,51 @@
 
 # second order cone
+# barrier is -ln(x^2 - norm(y)^2)
+# from Nesterov & Todd "Self-Scaled Barriers and Interior-Point Methods for Convex Programming"
 mutable struct SecondOrderCone <: PrimitiveCone
     dim::Int
     pnt::AbstractVector{Float64}
     dist::Float64
     Hi::Matrix{Float64}
+    H::Matrix{Float64}
 
     function SecondOrderCone(dim::Int)
         prm = new()
         prm.dim = dim
         prm.Hi = Matrix{Float64}(undef, dim, dim)
+        prm.H = copy(prm.Hi)
         return prm
     end
 end
 
 dimension(prm::SecondOrderCone) = prm.dim
 barrierpar_prm(prm::SecondOrderCone) = 1.0
-getintdir_prm!(arr::AbstractVector{Float64}, prm::SecondOrderCone) = (arr[1] = 1.0; arr[2:end] .= 0.0; arr)
+getintdir_prm!(arr::AbstractVector{Float64}, prm::SecondOrderCone) = (arr[1] = 1.0; @. arr[2:end] = 0.0; arr)
 loadpnt_prm!(prm::SecondOrderCone, pnt::AbstractVector{Float64}) = (prm.pnt = pnt)
 
 function incone_prm(prm::SecondOrderCone)
     if prm.pnt[1] <= 0
         return false
     end
-
     prm.dist = abs2(prm.pnt[1]) - sum(abs2, prm.pnt[j] for j in 2:prm.dim)
-    if prm.dist > 0.0
-        mul!(prm.Hi, prm.pnt, prm.pnt')
-        prm.Hi .+= prm.Hi
-        prm.Hi[1,1] -= prm.dist
-        for j in 2:prm.dim
-            prm.Hi[j,j] += prm.dist
-        end
-        return true
+    if prm.dist <= 0.0
+        return false
+    end
+
+    mul!(prm.Hi, prm.pnt, prm.pnt')
+    @. prm.Hi += prm.Hi
+    prm.Hi[1,1] -= prm.dist
+    for j in 2:prm.dim
+        prm.Hi[j,j] += prm.dist
+    end
+    @. prm.H = prm.Hi
+    for j in 2:prm.dim
+        prm.H[1,j] = prm.H[j,1] = -prm.H[j,1]
     end
-    return false
+    @. prm.H *= inv(prm.dist)^2
+    return true
 end
 
-calcg_prm!(g::AbstractVector{Float64}, prm::SecondOrderCone) = (g .= inv(prm.dist) .* prm.pnt; g[1] *= -1.0; g)
-# TODO if later use Linv instead of Hinv, see Vandenberghe coneprog.dvi for analytical Linv
+calcg_prm!(g::AbstractVector{Float64}, prm::SecondOrderCone) = (@. g = prm.pnt/prm.dist; g[1] = -g[1]; g)
 calcHiarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::SecondOrderCone) = mul!(prod, prm.Hi, arr)
+calcHarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::SecondOrderCone) = mul!(prod, prm.H, arr)
diff --git a/src/primitivecones/sumofsquares.jl b/src/primitivecones/sumofsquares.jl
index 4b4240e1c..a465bbf36 100644
--- a/src/primitivecones/sumofsquares.jl
+++ b/src/primitivecones/sumofsquares.jl
@@ -6,6 +6,7 @@ mutable struct SumOfSquaresCone <: PrimitiveCone
     pnt::AbstractVector{Float64}
     g::Vector{Float64}
     H::Matrix{Float64}
+    Hchol::Matrix{Float64}
     F::Cholesky{Float64,Array{Float64,2}}
     ipwtpnt::Vector{Matrix{Float64}}
     Vp::Vector{Matrix{Float64}}
@@ -20,6 +21,7 @@ mutable struct SumOfSquaresCone <: PrimitiveCone
         prm.ipwt = ipwt
         prm.g = similar(ipwt[1], dim)
         prm.H = similar(ipwt[1], dim, dim)
+        prm.Hchol = copy(prm.H)
         prm.ipwtpnt = [similar(ipwt[1], size(ipwtj, 2), size(ipwtj, 2)) for ipwtj in ipwt]
         prm.Vp = [similar(ipwt[1], dim, size(ipwtj, 2)) for ipwtj in ipwt]
         prm.Vp2 = similar(ipwt[1], dim, dim)
@@ -29,12 +31,12 @@ end
 
 dimension(prm::SumOfSquaresCone) = prm.dim
 barrierpar_prm(prm::SumOfSquaresCone) = sum(size(ipwtj, 2) for ipwtj in prm.ipwt)
-getintdir_prm!(arr::AbstractVector{Float64}, prm::SumOfSquaresCone) = (arr .= 1.0; arr)
+getintdir_prm!(arr::AbstractVector{Float64}, prm::SumOfSquaresCone) = (@. arr = 1.0; arr)
 loadpnt_prm!(prm::SumOfSquaresCone, pnt::AbstractVector{Float64}) = (prm.pnt = pnt)
 
 function incone_prm(prm::SumOfSquaresCone)
-    prm.g .= 0.0
-    prm.H .= 0.0
+    @. prm.g = 0.0
+    @. prm.H = 0.0
 
     for j in eachindex(prm.ipwt) # TODO can do this loop in parallel (use separate Vp2[j])
         # prm.ipwtpnt[j] = prm.ipwt[j]'*Diagonal(prm.pnt)*prm.ipwt[j]
@@ -46,22 +48,24 @@ function incone_prm(prm::SumOfSquaresCone)
             return false
         end
 
-        prm.Vp[j] .= prm.ipwt[j] # TODO this shouldn't be necessary if don't have to use rdiv
+        @. prm.Vp[j] = prm.ipwt[j] # TODO this shouldn't be necessary if don't have to use rdiv
         rdiv!(prm.Vp[j], F.U)
         mul!(prm.Vp2, prm.Vp[j], prm.Vp[j]') # TODO if parallel, need to use separate Vp2[j]
 
         for i in eachindex(prm.g)
-            @inbounds prm.g[i] -= prm.Vp2[i,i]
+            prm.g[i] -= prm.Vp2[i,i]
         end
-        prm.H .+= abs2.(prm.Vp2)
+        @. prm.H += abs2(prm.Vp2)
     end
 
-    prm.F = cholesky!(prm.H, check=false)
+    @. prm.Hchol = prm.H
+    prm.F = cholesky!(prm.Hchol, check=false)
     if !issuccess(prm.F)
         return false
     end
     return true
 end
 
-calcg_prm!(g::AbstractVector{Float64}, prm::SumOfSquaresCone) = (g .= prm.g; g)
+calcg_prm!(g::AbstractVector{Float64}, prm::SumOfSquaresCone) = (@. g = prm.g; g)
 calcHiarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::SumOfSquaresCone) = ldiv!(prod, prm.F, arr)
+calcHarr_prm!(prod::AbstractArray{Float64}, arr::AbstractArray{Float64}, prm::SumOfSquaresCone) = mul!(prod, prm.H, arr)
diff --git a/test/moiexamples.jl b/test/moiexamples.jl
index b6e02588a..e69de29bb 100644
--- a/test/moiexamples.jl
+++ b/test/moiexamples.jl
@@ -1,43 +0,0 @@
-
-# TODO 
-# @testset "envelope example" begin
-#     opt = build_envelope(2, 5, 1, 5, use_data=true, native=false)
-#     MOI.optimize!(opt)
-#     @test MOI.get(opt, MOI.TerminationStatus()) == MOI.Success
-#     @test MOI.get(opt, MOI.ObjectiveValue()) ≈ -25.502777 atol=1e-4
-#     @test MOI.get(opt, MOI.ObjectiveBound()) ≈ -25.502777 atol=1e-4
-# end
-#
-# @testset "lp example" begin
-#     opt = build_lp(500, 1000, use_data=true, native=false)
-#     MOI.optimize!(opt)
-#     @test MOI.get(opt, MOI.TerminationStatus()) == MOI.Success
-#     @test MOI.get(opt, MOI.ObjectiveValue()) ≈ 2055.807 atol=1e-4
-#     @test MOI.get(opt, MOI.ObjectiveBound()) ≈ 2055.807 atol=1e-4
-# end
-#
-# @testset "namedpoly examples" begin
-#     @testset "Goldstein-Price" begin
-#         opt = build_namedpoly(:goldsteinprice, 7, native=false)
-#         MOI.optimize!(opt)
-#         @test MOI.get(opt, MOI.TerminationStatus()) == MOI.Success
-#         @test MOI.get(opt, MOI.ObjectiveValue()) ≈ 3 atol=1e-4
-#         @test MOI.get(opt, MOI.ObjectiveBound()) ≈ 3 atol=1e-4
-#     end
-#
-#     @testset "Robinson" begin
-#         opt = build_namedpoly(:robinson, 8, native=false)
-#         MOI.optimize!(opt)
-#         @test MOI.get(opt, MOI.TerminationStatus()) == MOI.Success
-#         @test MOI.get(opt, MOI.ObjectiveValue()) ≈ 0.814814 atol=1e-4
-#         @test MOI.get(opt, MOI.ObjectiveBound()) ≈ 0.814814 atol=1e-4
-#     end
-#
-#     @testset "Lotka-Volterra" begin
-#         opt = build_namedpoly(:lotkavolterra, 3, native=false)
-#         MOI.optimize!(opt)
-#         @test MOI.get(opt, MOI.TerminationStatus()) == MOI.Success
-#         @test MOI.get(opt, MOI.ObjectiveValue()) ≈ -20.8 atol=1e-4
-#         @test MOI.get(opt, MOI.ObjectiveBound()) ≈ -20.8 atol=1e-4
-#     end
-# end
diff --git a/test/nativeexamples.jl b/test/nativeexamples.jl
index 6d814428a..ca1237a2d 100644
--- a/test/nativeexamples.jl
+++ b/test/nativeexamples.jl
@@ -1,32 +1,183 @@
 
-@testset "large dense lp example (dense A)" begin
+@testset "small lp 1" begin
+    Random.seed!(1)
+    (n, p, q) = (10, 8, 10)
+    c = rand(0.0:9.0, n)
+    A = rand(-9.0:9.0, p, n)
+    b = A*ones(n)
+    G = SparseMatrixCSC(-1.0I, q, n)
+    h = zeros(q)
+    cone = Alfonso.Cone([Alfonso.NonnegativeCone(q)], [1:q])
     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    build_lp!(alf, 500, 1000, use_data=true)
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
     @time Alfonso.solve!(alf)
     @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ 2055.807 atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(alf) ≈ 2055.807 atol=1e-4 rtol=1e-4
 end
 
-@testset "large sparse lp example (sparse A)" begin
+@testset "small lp 2" begin
+    Random.seed!(1)
+    (n, p, q) = (5, 2, 10)
+    c = rand(0.0:9.0, n)
+    A = rand(-9.0:9.0, p, n)
+    b = A*ones(n)
+    G = rand(q, n) - Matrix(2.0I, q, n)
+    h = G*ones(n)
+    cone = Alfonso.Cone([Alfonso.NonnegativeCone(q)], [1:q])
     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    build_lp!(alf, 500, 1000, dense=false)
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
     @time Alfonso.solve!(alf)
     @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ Alfonso.get_dobj(alf) atol=1e-4 rtol=1e-4
 end
 
+@testset "small lp 3" begin
+    Random.seed!(1)
+    (n, p, q) = (30, 12, 30)
+    c = rand(0.0:9.0, n)
+    A = rand(-9.0:9.0, p, n)
+    b = A*ones(n)
+    @assert n == q
+    G = Diagonal(-1.0I, n)
+    h = zeros(q)
+    cone = Alfonso.Cone([Alfonso.NonnegativeCone(q)], [1:q])
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
+    @time Alfonso.solve!(alf)
+    @test Alfonso.get_status(alf) == :Optimal
+end
+
+@testset "small second-order cone problem" begin
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    c = Float64[0, -1, -1]
+    A = Float64[1 0 0; 0 1 0]
+    b = Float64[1, 1/sqrt(2)]
+    G = SparseMatrixCSC(-1.0I, 3, 3)
+    h = zeros(3)
+    cone = Alfonso.Cone([Alfonso.SecondOrderCone(3)], [1:3])
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
+    @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 15
+    @test Alfonso.get_status(alf) == :Optimal
+    @test Alfonso.get_pobj(alf) ≈ -sqrt(2) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_y(alf) ≈ [sqrt(2), 0] atol=1e-4 rtol=1e-4
+    @test Alfonso.get_x(alf) ≈ [1, 1/sqrt(2), 1/sqrt(2)] atol=1e-4 rtol=1e-4
+end
+
+@testset "small rotated second-order cone problem" begin
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    c = Float64[0, 0, -1, -1]
+    A = Float64[1 0 0 0; 0 1 0 0]
+    b = Float64[1/2, 1]
+    G = SparseMatrixCSC(-1.0I, 4, 4)
+    h = zeros(4)
+    cone = Alfonso.Cone([Alfonso.RotatedSecondOrderCone(4)], [1:4])
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
+    @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 15
+    @test Alfonso.get_status(alf) == :Optimal
+    @test Alfonso.get_pobj(alf) ≈ -sqrt(2) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_x(alf)[3:4] ≈ [1, 1]/sqrt(2) atol=1e-4 rtol=1e-4
+end
+
+@testset "small rotated second-order cone problem 2" begin
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    c = Float64[0, 0, -1]
+    A = Float64[1 0 0; 0 1 0]
+    b = Float64[1/2, 1]/sqrt(2)
+    G = SparseMatrixCSC(-1.0I, 3, 3)
+    h = zeros(3)
+    cone = Alfonso.Cone([Alfonso.RotatedSecondOrderCone(3)], [1:3])
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
+    @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 20
+    @test Alfonso.get_status(alf) == :Optimal
+    @test Alfonso.get_pobj(alf) ≈ -1/sqrt(2) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_x(alf)[2] ≈ 1/sqrt(2) atol=1e-4 rtol=1e-4
+end
+
+@testset "small positive semidefinite cone problem" begin
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    c = Float64[0, -1, 0]
+    A = Float64[1 0 0; 0 0 1]
+    b = Float64[1/2, 1]
+    G = SparseMatrixCSC(-1.0I, 3, 3)
+    h = zeros(3)
+    cone = Alfonso.Cone([Alfonso.PositiveSemidefiniteCone(3)], [1:3])
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
+    @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 15
+    @test Alfonso.get_status(alf) == :Optimal
+    @test Alfonso.get_pobj(alf) ≈ -1 atol=1e-4 rtol=1e-4
+    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_x(alf)[2] ≈ 1 atol=1e-4 rtol=1e-4
+end
+
+@testset "small positive semidefinite cone problem 2" begin
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    c = Float64[1, 0, 1, 0, 0, 1]
+    A = Float64[1 2 3 4 5 6; 1 1 1 1 1 1]
+    b = Float64[10, 3]
+    G = SparseMatrixCSC(-1.0I, 6, 6)
+    h = zeros(6)
+    cone = Alfonso.Cone([Alfonso.PositiveSemidefiniteCone(6)], [1:6])
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
+    @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 20
+    @test Alfonso.get_status(alf) == :Optimal
+    @test Alfonso.get_pobj(alf) ≈ 1.249632 atol=1e-4 rtol=1e-4
+    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_x(alf) ≈ [0.491545, 0.647333, 0.426249, 0.571161, 0.531874, 0.331838] atol=1e-4 rtol=1e-4
+end
+
+@testset "small exponential cone problem" begin
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    c = Float64[1, 1, 1]
+    A = Float64[0 1 0; 1 0 0]
+    b = Float64[2, 1]
+    G = SparseMatrixCSC(-1.0I, 3, 3)
+    h = zeros(3)
+    cone = Alfonso.Cone([Alfonso.ExponentialCone()], [1:3])
+    Alfonso.load_data!(alf, c, A, b, G, h, cone)
+    @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 20
+    @test Alfonso.get_status(alf) == :Optimal
+    @test Alfonso.get_pobj(alf) ≈ (2*exp(1/2)+3) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
+    @test dot(Alfonso.get_y(alf), b) ≈ -Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_x(alf) ≈ [1, 2, 2*exp(1/2)] atol=1e-4 rtol=1e-4
+    @test Alfonso.get_y(alf) ≈ -[1+exp(1/2)/2, 1+exp(1/2)] atol=1e-4 rtol=1e-4
+    @test Alfonso.get_z(alf) ≈ (c + A'*Alfonso.get_y(alf)) atol=1e-4 rtol=1e-4
+end
+
+# @testset "small power cone problem" begin
+#     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+#     c = Float64[1, 0, 0, -1, -1, 0]
+#     A = Float64[1 1 1/2 0 0 0; 0 0 0 0 0 1]
+#     b = Float64[2, 1]
+#     cone = Alfonso.Cone([Alfonso.PowerCone(0.2), Alfonso.PowerCone(0.4)], [[1, 2, 4], [3, 6, 5]])
+#     Alfonso.load_data!(alf, A, b, c, cone)
+#     @time Alfonso.solve!(alf)
+#     @test Alfonso.get_status(alf) == :Optimal
+#     @test Alfonso.get_pobj(alf) ≈ -1.80734 atol=1e-4 rtol=1e-4
+#     @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
+#     @test Alfonso.get_x(alf)[1:3] ≈ [0.0639314, 0.783361, 2.30542] atol=1e-4 rtol=1e-4
+# end
+
 @testset "small dense lp example (dense vs sparse A)" begin
     # dense methods
     d_alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_lp!(d_alf, 50, 100, dense=true, tosparse=false)
     @time Alfonso.solve!(d_alf)
+    @test Alfonso.get_niters(d_alf) <= 40
     @test Alfonso.get_status(d_alf) == :Optimal
 
     # sparse methods
     s_alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_lp!(s_alf, 50, 100, dense=true, tosparse=true)
     @time Alfonso.solve!(s_alf)
+    @test Alfonso.get_niters(s_alf) <= 40
     @test Alfonso.get_status(s_alf) == :Optimal
 
     @test Alfonso.get_pobj(d_alf) ≈ Alfonso.get_pobj(s_alf) atol=1e-4 rtol=1e-4
@@ -38,6 +189,7 @@ end
     d_alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_envelope!(d_alf, 2, 5, 1, 5, use_data=true, dense=true)
     @time Alfonso.solve!(d_alf)
+    @test Alfonso.get_niters(d_alf) <= 30
     @test Alfonso.get_status(d_alf) == :Optimal
     @test Alfonso.get_pobj(d_alf) ≈ -25.502777 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(d_alf) ≈ -25.502777 atol=1e-4 rtol=1e-4
@@ -46,67 +198,38 @@ end
     s_alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_envelope!(s_alf, 2, 5, 1, 5, use_data=true, dense=false)
     @time Alfonso.solve!(s_alf)
+    @test Alfonso.get_niters(s_alf) <= 30
     @test Alfonso.get_status(s_alf) == :Optimal
     @test Alfonso.get_pobj(s_alf) ≈ -25.502777 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(s_alf) ≈ -25.502777 atol=1e-4 rtol=1e-4
 end
 
-@testset "2D poly envelope example (dense vs sparse A)" begin
-    # dense methods
-    d_alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    build_envelope!(d_alf, 2, 4, 2, 7, dense=true)
-    @time Alfonso.solve!(d_alf)
-    @test Alfonso.get_status(d_alf) == :Optimal
-
-    # sparse methods
-    s_alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    build_envelope!(s_alf, 2, 4, 2, 7, dense=false)
-    @time Alfonso.solve!(s_alf)
-    @test Alfonso.get_status(s_alf) == :Optimal
-
-    @test Alfonso.get_pobj(d_alf) ≈ Alfonso.get_pobj(s_alf) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(d_alf) ≈ Alfonso.get_dobj(s_alf) atol=1e-4 rtol=1e-4
-end
-
-@testset "3D poly envelope example (sparse A)" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    build_envelope!(alf, 2, 3, 3, 5, dense=false)
-    @time Alfonso.solve!(alf)
-    @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ Alfonso.get_dobj(alf) atol=1e-4 rtol=1e-4
-end
-
-@testset "4D poly envelope example (sparse A)" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    build_envelope!(alf, 2, 3, 4, 4, dense=false)
-    @time Alfonso.solve!(alf)
-    @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ Alfonso.get_dobj(alf) atol=1e-4 rtol=1e-4
-end
-
 # most values taken from https://people.sc.fsu.edu/~jburkardt/py_src/polynomials/polynomials.html
 @testset "Butcher" begin
     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_namedpoly!(alf, :butcher, 2)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 40
     @test Alfonso.get_status(alf) == :Optimal
     @test Alfonso.get_pobj(alf) ≈ -1.4393333333 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(alf) ≈ -1.4393333333 atol=1e-4 rtol=1e-4
 end
 
 @testset "Caprasse" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag, tolfeas=5e-7)
     build_namedpoly!(alf, :caprasse, 4)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 45
     @test Alfonso.get_status(alf) == :Optimal
     @test Alfonso.get_pobj(alf) ≈ -3.1800966258 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(alf) ≈ -3.1800966258 atol=1e-4 rtol=1e-4
 end
 
 @testset "Goldstein-Price" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag, tolfeas=1e-10)
     build_namedpoly!(alf, :goldsteinprice, 7)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 60
     @test Alfonso.get_status(alf) == :Optimal
     @test Alfonso.get_pobj(alf) ≈ 3 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(alf) ≈ 3 atol=1e-4 rtol=1e-4
@@ -126,6 +249,7 @@ end
     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_namedpoly!(alf, :lotkavolterra, 3)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 35
     @test Alfonso.get_status(alf) == :Optimal
     @test Alfonso.get_pobj(alf) ≈ -20.8 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(alf) ≈ -20.8 atol=1e-4 rtol=1e-4
@@ -142,9 +266,10 @@ end
 # end
 
 @testset "Motzkin" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag, optimtol=1e-5)
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag, tolrelopt=1e-5, tolabsopt=1e-6, tolfeas=1e-6)
     build_namedpoly!(alf, :motzkin, 7)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 35
     @test Alfonso.get_status(alf) == :Optimal
     @test Alfonso.get_pobj(alf) ≈ 0 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(alf) ≈ 0 atol=1e-4 rtol=1e-4
@@ -154,138 +279,92 @@ end
     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_namedpoly!(alf, :reactiondiffusion, 4)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 35
     @test Alfonso.get_status(alf) == :Optimal
     @test Alfonso.get_pobj(alf) ≈ -36.71269068 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(alf) ≈ -36.71269068 atol=1e-4 rtol=1e-4
 end
 
 @testset "Robinson" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag, optimtol=1e-5)
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_namedpoly!(alf, :robinson, 8)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 40
     @test Alfonso.get_status(alf) == :Optimal
     @test Alfonso.get_pobj(alf) ≈ 0.814814 atol=1e-4 rtol=1e-4
     @test Alfonso.get_dobj(alf) ≈ 0.814814 atol=1e-4 rtol=1e-4
 end
 
-# tolerances not satisfied
 @testset "Rosenbrock" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag, optimtol=1e-5, maxpredsmallsteps=20)
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag, tolfeas=1.1e-8)
     build_namedpoly!(alf, :rosenbrock, 3)
     @time Alfonso.solve!(alf)
-    # @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ 0 atol=1e-3 rtol=1e-3
+    @test Alfonso.get_niters(alf) <= 65
+    @test Alfonso.get_status(alf) == :Optimal
+    @test Alfonso.get_pobj(alf) ≈ 0 atol=1e-2 rtol=1e-2
     @test Alfonso.get_dobj(alf) ≈ 0 atol=1e-3 rtol=1e-3
 end
 
-# tolerances not satisfied
 @testset "Schwefel" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag, optimtol=1e-5, maxpredsmallsteps=25)
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
     build_namedpoly!(alf, :schwefel, 4)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 50
     @test Alfonso.get_status(alf) == :Optimal
     @test Alfonso.get_pobj(alf) ≈ 0 atol=1e-3 rtol=1e-3
     @test Alfonso.get_dobj(alf) ≈ 0 atol=1e-3 rtol=1e-3
 end
 
-@testset "small second-order cone problem" begin
+@testset "large dense lp example (dense A)" begin
     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    c = Float64[0, -1, -1]
-    A = Float64[1 0 0; 0 1 0]
-    b = Float64[1, 1/sqrt(2)]
-    cone = Alfonso.Cone([Alfonso.SecondOrderCone(3)], [1:3])
-    Alfonso.load_data!(alf, A, b, c, cone)
+    build_lp!(alf, 500, 1000, use_data=true, dense=true)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 75
     @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ -sqrt(2) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_y(alf) ≈ [-sqrt(2), 0] atol=1e-4 rtol=1e-4
-    @test Alfonso.get_x(alf) ≈ [1, 1/sqrt(2), 1/sqrt(2)] atol=1e-4 rtol=1e-4
+    @test Alfonso.get_pobj(alf) ≈ 2055.807 atol=1e-4 rtol=1e-4
+    @test Alfonso.get_dobj(alf) ≈ 2055.807 atol=1e-4 rtol=1e-4
 end
 
-@testset "small exponential cone problem" begin
+@testset "large sparse lp example (sparse A)" begin
     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    c = Float64[1, 1, 1]
-    A = Float64[0 1 0; 1 0 0]
-    b = Float64[2, 1]
-    cone = Alfonso.Cone([Alfonso.ExponentialCone()], [1:3])
-    Alfonso.load_data!(alf, A, b, c, cone)
+    build_lp!(alf, 500, 1000, dense=false, nzfrac=10/1000)
     @time Alfonso.solve!(alf)
+    @test Alfonso.get_niters(alf) <= 70
     @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ (2*exp(1/2)+3) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
-    @test dot(Alfonso.get_y(alf), b) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_x(alf) ≈ [1, 2, 2*exp(1/2)] atol=1e-4 rtol=1e-4
-    @test Alfonso.get_y(alf) ≈ [1+exp(1/2)/2, 1+exp(1/2)] atol=1e-4 rtol=1e-4
-    @test Alfonso.get_s(alf) ≈ (c - A'*Alfonso.get_y(alf)) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_pobj(alf) ≈ Alfonso.get_dobj(alf) atol=1e-4 rtol=1e-4
 end
 
-@testset "small power cone problem" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    c = Float64[1, 0, 0, -1, -1, 0]
-    A = Float64[1 1 1/2 0 0 0; 0 0 0 0 0 1]
-    b = Float64[2, 1]
-    cone = Alfonso.Cone([Alfonso.PowerCone(0.2), Alfonso.PowerCone(0.4)], [[1, 2, 4], [3, 6, 5]])
-    Alfonso.load_data!(alf, A, b, c, cone)
-    @time Alfonso.solve!(alf)
-    @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ -1.80734 atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_x(alf)[1:3] ≈ [0.0639314, 0.783361, 2.30542] atol=1e-4 rtol=1e-4
-end
+@testset "2D poly envelope example (dense vs sparse A)" begin
+    # dense methods
+    d_alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    build_envelope!(d_alf, 2, 4, 2, 7, dense=true)
+    @time Alfonso.solve!(d_alf)
+    @test Alfonso.get_niters(d_alf) <= 55
+    @test Alfonso.get_status(d_alf) == :Optimal
 
-@testset "small rotated second-order cone problem" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    c = Float64[0, 0, -1, -1]
-    A = Float64[1 0 0 0; 0 1 0 0]
-    b = Float64[1/2, 1]
-    cone = Alfonso.Cone([Alfonso.RotatedSecondOrderCone(4)], [1:4])
-    Alfonso.load_data!(alf, A, b, c, cone)
-    @time Alfonso.solve!(alf)
-    @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ -sqrt(2) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_x(alf)[3:4] ≈ [1, 1]/sqrt(2) atol=1e-4 rtol=1e-4
-end
+    # sparse methods
+    s_alf = Alfonso.AlfonsoOpt(verbose=verbflag)
+    build_envelope!(s_alf, 2, 4, 2, 7, dense=false)
+    @time Alfonso.solve!(s_alf)
+    @test Alfonso.get_niters(s_alf) <= 55
+    @test Alfonso.get_status(s_alf) == :Optimal
 
-@testset "small rotated second-order cone problem 2" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    c = Float64[0, 0, -1]
-    A = Float64[1 0 0; 0 1 0]
-    b = Float64[1/2, 1]/sqrt(2)
-    cone = Alfonso.Cone([Alfonso.RotatedSecondOrderCone(3)], [1:3])
-    Alfonso.load_data!(alf, A, b, c, cone)
-    @time Alfonso.solve!(alf)
-    @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ -1/sqrt(2) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_x(alf)[2] ≈ 1/sqrt(2) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_pobj(d_alf) ≈ Alfonso.get_pobj(s_alf) atol=1e-4 rtol=1e-4
+    @test Alfonso.get_dobj(d_alf) ≈ Alfonso.get_dobj(s_alf) atol=1e-4 rtol=1e-4
 end
 
-@testset "small positive semidefinite cone problem" begin
+@testset "3D poly envelope example (sparse A)" begin
     alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    c = Float64[0, -1, 0]
-    A = Float64[1 0 0; 0 0 1]
-    b = Float64[1/2, 1]
-    cone = Alfonso.Cone([Alfonso.PositiveSemidefiniteCone(3)], [1:3])
-    Alfonso.load_data!(alf, A, b, c, cone)
+    build_envelope!(alf, 2, 3, 3, 5, dense=false)
     @time Alfonso.solve!(alf)
     @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ -1 atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_x(alf)[2] ≈ 1 atol=1e-4 rtol=1e-4
+    @test Alfonso.get_pobj(alf) ≈ Alfonso.get_dobj(alf) atol=1e-4 rtol=1e-4
 end
 
-@testset "small positive semidefinite cone problem 2" begin
-    alf = Alfonso.AlfonsoOpt(verbose=verbflag)
-    c = Float64[1, 0, 1, 0, 0, 1]
-    A = Float64[1 2 3 4 5 6; 1 1 1 1 1 1]
-    b = Float64[10, 3]
-    cone = Alfonso.Cone([Alfonso.PositiveSemidefiniteCone(6)], [1:6])
-    Alfonso.load_data!(alf, A, b, c, cone)
+@testset "4D poly envelope example (sparse A)" begin
+    alf = Alfonso.AlfonsoOpt(verbose=verbflag, tolrelopt=1e-5, tolabsopt=1e-6, tolfeas=1e-6)
+    build_envelope!(alf, 2, 3, 4, 4, dense=false)
     @time Alfonso.solve!(alf)
     @test Alfonso.get_status(alf) == :Optimal
-    @test Alfonso.get_pobj(alf) ≈ 1.249632 atol=1e-4 rtol=1e-4
-    @test Alfonso.get_dobj(alf) ≈ Alfonso.get_pobj(alf) atol=1e-4 rtol=1e-4
-    @test Alfonso.get_x(alf) ≈ [0.491545, 0.647333, 0.426249, 0.571161, 0.531874, 0.331838] atol=1e-4 rtol=1e-4
+    @test Alfonso.get_pobj(alf) ≈ Alfonso.get_dobj(alf) atol=1e-4 rtol=1e-4
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index e18934a2c..9f658f650 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -32,3 +32,5 @@ include(joinpath(@__DIR__, "nativeexamples.jl"))
 # @testset "Continuous linear problems" begin
 #     MOIT.contlineartest(MOIB.SplitInterval{Float64}(optimizer), config)
 # end
+
+return nothing