mfalt · mfalt · Sep 29, 2017 · Oct 3, 2017 · Jan 8, 2018 · Jan 10, 2018
diff --git a/examples/data/snp500_extended.csv b/examples/data/snp500_extended.csv
diff --git a/src/DynamicApproximations.jl b/src/DynamicApproximations.jl
@@ -23,6 +23,9 @@ global const DEBUG = false
 global const DEBUG2 = false
 global const COUNTER_TEST = false
 global const OPTIMIZE = true
+# TODO Tweak this accuracy, sqrt(eps()) was not good enough for snp 500, M = 12 test
+global const accuracy = sqrt(eps())/100000
+
 
 include(joinpath("types","QuadraticPolynomial.jl"))
 include(joinpath("types","PiecewiseQuadratic.jl"))

diff --git a/src/solve.jl b/src/solve.jl
@@ -33,7 +33,7 @@ function add_quadratic!{T}(Λ::PiecewiseQuadratic{T}, ρ::QuadraticPolynomial{T}
         b2_minus_4ac =  Δb^2 - 4*Δa*Δc
 
         if Δa > 0 # ρ has greater curvature, i.e., ρ is smallest in the middle if intersect
-            if b2_minus_4ac <= 0
+            if b2_minus_4ac <= accuracy
                 # Zero (or one) intersections, old quadratic is smallest, just step forward
                 DEBUG && println("No intersections, old quadratic is smallest, Δa > 0, breaking.")
                 break
@@ -75,7 +75,7 @@ function add_quadratic!{T}(Λ::PiecewiseQuadratic{T}, ρ::QuadraticPolynomial{T}
             end
 
         elseif Δa < 0 # ρ has lower curvature, i.e., ρ is smallest on the sides
-            if b2_minus_4ac <= 0
+            if b2_minus_4ac <= accuracy
                 # Zero (or one) roots
                 λ_prev, λ_curr = update_segment_new(λ_prev, λ_curr, ρ)
             else
@@ -308,24 +308,47 @@ function find_optimal_fit{T}(ℓ::AbstractTransitionCost{T}, V_0N::QuadraticPoly
     return Λ
 end
 
-function  fit_pwl_reguralized(g::AbstractArray, ζ; lazy=true)
-    ℓ = lazy ? TransitionCostDiscrete{Float64}(g) :
-               compute_discrete_transition_costs(g)
+"""
+    I, Y, v = fit_pwl_reguralized(g::AbstractArray, ζ; t=1:length(g), lazy=true)
+    I, Y, v = fit_pwl_reguralized(g, t, ζ, tol=1e-3; lazy=true)
+
+Approximate `g[k]` (or `g(t)`) with a continuous piecewise linear function `f` according to
+    v = min_f `||f-g||₂^2 + ζ⋅(length(I)-2)`
+where the norm is `sum((f[k]-g[k])²)` (or integral over `(f(t)-g(t))²` from `t[1]` to `t[end]`).
+
+Returns:
+    `I`: Vector of length `M`
+    `Y`: Vector of length `M`
+    `v`: Float64
+such that the optimal function has breakpoints in `I` (or `t[I]`) and satisfies
+`f(I) .= Y` (or `f(t[I]) .= Y`).
+
+Kwargs:
+`t` is optional parameter in the discrete case, restricting the set of possible gridpoints,
+i.e. so that `f[t[I]] .= Y`.
+
+`lazy` = true, means that the internal transition costs `ℓ[i,j]` will be calculated when needed.
+
+`tol` specifies the relative tolerance sent to `quadg` kused when calculating the integrals (continuous case).
+"""
+function  fit_pwl_reguralized(g::AbstractArray, ζ; t=1:length(g), lazy=true, guess=nothing)
+    ℓ = lazy ? TransitionCostDiscrete{Float64}(g, t=t) :
+               compute_discrete_transition_costs(g, t=t)
     # Discrete case, so cost at endpoint is quadratic
     cost_last = QuadraticPolynomial(1.0, -2*g[end], g[end]^2)
-    fit_pwl_reguralized_internal(ℓ, cost_last, ζ)
+    fit_pwl_reguralized_internal(ℓ, cost_last, ζ, guess=guess)
 end
 
-function  fit_pwl_reguralized(g, t, ζ, tol=1e-3; lazy=true)
+function  fit_pwl_reguralized(g, t, ζ, tol=1e-3; lazy=true, guess=nothing)
     ℓ = lazy ? TransitionCostContinuous{Float64}(g, t, tol) :
                compute_transition_costs(g, t, tol)
     # Continouous case, no cost at endpoint
     cost_last = zero(QuadraticPolynomial{Float64})
-    fit_pwl_reguralized_internal(ℓ, cost_last, ζ)
+    fit_pwl_reguralized_internal(ℓ, cost_last, ζ, guess=guess)
 end
 
-function  fit_pwl_reguralized_internal(ℓ, cost_last, ζ)
-    Λ_reg = regularize(ℓ, cost_last, ζ)
+function  fit_pwl_reguralized_internal(ℓ, cost_last, ζ; guess=nothing)
+    Λ_reg = regularize(ℓ, cost_last, ζ, guess=guess)
     #Get solution that starts at first index
     I, _, f_reg = recover_optimal_index_set(Λ_reg[1])
     Y, f = find_optimal_y_values(ℓ, cost_last, I)
@@ -337,10 +360,33 @@ end
 # revober optimal solution does not do so. It is arguably more interesting
 # to test cost including regularization.
 
+"""
+    I, Y, v = fit_pwl_constrained(g::AbstractArray, M; t=1:length(g), lazy=true)
+    I, Y, v = fit_pwl_constrained(g, t, M, tol=1e-3; lazy=true)
+
+Approximate `g[k]` (or `g(t)`) with a continuous piecewise linear function `f` according to
+    v = min_f `||f-g||₂^2`
+    s.t. length(I)-2 = M
+where the norm is `sum((f[k]-g[k])²)` (or integral over `(f(t)-g(t))²` from `t[1]` to `t[end]`).
+
+Returns:
+    `I`: Vector of length `M`
+    `Y`: Vector of length `M`
+    `v`: Float64
+such that the optimal function has breakpoints in `I` (or `t[I]`) and satisfies
+`f(I) .= Y` (or `f(t[I]) .= Y`).
+
+Kwargs:
+`t` is optional parameter in the discrete case, restricting the set of possible gridpoints,
+i.e. so that `f[grid[I]] .= Y`.
 
-function  fit_pwl_constrained(g::AbstractArray, M; lazy=false)
-    ℓ = lazy ? TransitionCostDiscrete{Float64}(g) :
-               compute_discrete_transition_costs(g)
+`lazy` = true, means that the internal transition costs `ℓ[i,j]` will be calculated when needed.
+
+`tol` specifies the relative tolerance sent to `quadg` kused when calculating the integrals (continuous case).
+"""
+function  fit_pwl_constrained(g::AbstractArray, M::Integer; t=1:length(g), lazy=false)
+    ℓ = lazy ? TransitionCostDiscrete{Float64}(g, t=t) :
+               compute_discrete_transition_costs(g, t=t)
     cost_last = QuadraticPolynomial(1.0, -2*g[end], g[end]^2)
     fit_pwl_constrained_internal(ℓ, cost_last, M)
 end
@@ -366,40 +412,122 @@ function  fit_pwl_constrained_internal(ℓ, cost_last, M)
     return Ivec, Yvec, fvec
 end
 
+function get_guess{T}(guess, ℓ::AbstractTransitionCost{T}, V_0N::QuadraticPolynomial{T})
+    if isa(guess,Void)
+        return Inf, NaN, NaN
+    else
+        I_guess, Y_guess = guess
+        @assert I_guess[1] == 1
+        @assert I_guess[end] == size(ℓ,2)
+        #Cost guess is sum of ell from I_guess[k] to end, including end cost
+        cost = zeros(T,size(I_guess))
+        # cost is 0.0 at first pont
+        cost[end] = V_0N(Y_guess[end])
+        for k = length(I_guess)-1:-1:1
+            cost[k] = cost[k+1] +
+                ℓ[I_guess[k], I_guess[k+1]](Y_guess[k], Y_guess[k+1])
+        end
+        return cost, I_guess, Y_guess
+    end
+end
+
+function get_guess_cost(i, i_next_guess, cost_guess, ℓ, ζ, I_guess, Y_guess)
+    if i_next_guess > length(I_guess)
+        return Inf
+    end
+    i_guess = I_guess[i_next_guess]
+    y_guess = Y_guess[i_next_guess]
+    c_ℓ = ℓ[i, i_guess]
+    # c_y(.) = ℓ[i_guess, i](y_guess, .)
+    cy_a = c_ℓ.P[1]
+    cy_b = 2*y_guess*c_ℓ.P[2] + c_ℓ.q[1]
+    cy_c = y_guess^2*c_ℓ.P[4] + c_ℓ.q[2]*y_guess + c_ℓ.r + (length(I_guess)-i_next_guess+1)*ζ
+    #minimum : Δc - Δb^2/(4*Δa)
+    min_guess = cy_c-cy_b^2/(4*cy_a)
+    #The minimum cost for guess with extra breakpoint at i:
+    # min ℓ(i,ip) + rest of cost from ip
+    min_guess += cost_guess[i_next_guess]
+    return min_guess
+end
 """
 Solves the regularization problem
 minimzie ∫ (g - y)^2 dt + ζ⋅card(d^2/dt^2 y)
 """
-function regularize{T}(ℓ::AbstractTransitionCost{T}, V_0N::QuadraticPolynomial{T}, ζ::T)
+function regularize{T}(ℓ::AbstractTransitionCost{T}, V_0N::QuadraticPolynomial{T}, ζ::T; guess=nothing)
     N = size(ℓ, 2)
 
+    # Used if guess is provided
+    cost_guess, I_guess, Y_guess = get_guess(guess, ℓ, V_0N)
+
+    #println("cost_guess: $cost_guess")
     Λ = Vector{PiecewiseQuadratic{T}}(N)
+    Λ_min = Vector{Float64}(N)
 
     V_0N = deepcopy(V_0N)
     V_0N.time_index = -1
     Λ[N] = create_new_pwq(V_0N)
+    Λ_min[N] = find_minimum(V_0N)[2]
 
     ρ = QuadraticPolynomial{T}()
-
+    nskip = 0
     for i=N-1:-1:1
         Λ_new = create_new_pwq()
+        # Following only used with guesses
+        min_guess = Inf
+        if !isa(guess,Void)
+            ind_next = findfirst(v -> v > i, I_guess)
+            min_guess  = get_guess_cost(i, ind_next  , cost_guess, ℓ, ζ, I_guess, Y_guess)
+            min_guess2 = get_guess_cost(i, ind_next+1, cost_guess, ℓ, ζ, I_guess, Y_guess)
+            #min_guess > min_guess2 && println("$min_guess, $min_guess2")
+            min_guess = min(min_guess, min_guess2)
+        end
+        ddebug = (i == 2791)
         for ip=i+1:N
-
-
             ζ_level_insertion = false
+            ℓiip = ℓ[i,ip]
+
+            # Early check if guess exists
+            if !isa(guess,Void)
+                min_possible = find_minimum_value(ℓiip) + Λ_min[ip] + ζ
+                #TODO more exact than 10sqrt(sqrt(eps()))
+                if min_possible > min_guess + 10*sqrt(sqrt(eps()))
+                    nskip += 1
+                    #ddebug && print("skip $i,$ip ")
+                    #ddebug && println("min_pos: $(find_minimum_value(ℓiip)) + $(Λ_min[ip]) + $ζ > $min_guess")
+                    continue
+                else
+                    #println("Not skipping at i=$i, ip=$ip, Δa=$Δa")
+                    #ddebug && print("Not skip $i,$ip ")
+                    #ddebug && println("min_pos: $(find_minimum_value(ℓiip)) + $(Λ_min[ip]) + $ζ < $min_guess")
+                end
+            end
+            ddebug = if i == 214 && ip == 215
+                true
+            else
+                false
+            end
+
+            #ddebug && println(Λ[ip])
+            #ddebug && sleep(10)
+            counter = 0
             for λ in Λ[ip]
+                counter += 1
+                if counter == 10 && ddebug
+                    #return Λ
+                end
                 p = λ.p
                 #counter1 += 1
-
-                minimize_wrt_x2(ℓ[i,ip], p, ρ)
+                #ddebug && println("Λ[ip]: $(Λ[ip])")
+                #ddebug && println("p: $p")
+                #ddebug && println("counter: $counter, length: $(length(Λ_new))")
+                DynamicApproximations.minimize_wrt_x2(ℓiip, p, ρ)
                 ρ.c += ζ # add cost for break point
 
-
-                add_quadratic!(Λ_new, ρ)
+                DynamicApproximations.add_quadratic!(Λ_new, ρ)
 
 
                 if ζ_level_insertion == false
-                    if !poly_minus_constant_is_greater(Λ_new, ρ, ζ)
+                    if !DynamicApproximations.poly_minus_constant_is_greater(Λ_new, ρ, ζ)
                         ζ_level_insertion = true
                     end
                 end
@@ -418,12 +546,13 @@ function regularize{T}(ℓ::AbstractTransitionCost{T}, V_0N::QuadraticPolynomial
             end
 
             if ζ_level_insertion == false
-                break
+                #break
             end
         end
         Λ[i] = Λ_new
+        Λ_min[i] = find_minimum_value(Λ_new)
     end
-
+    println("Nskip: $nskip")
     return Λ
 end
 

diff --git a/src/transition_cost_computation.jl b/src/transition_cost_computation.jl
@@ -7,105 +7,39 @@ function `g` and a time sequence `t`
 """
 function compute_transition_costs(g, t::AbstractVector, tol=1e-3)
     T = Float64
-    # Find primitive functions to g, t*g, and g^2
-    # and evaluate them at the break points
-    #I_g = polyint(g).(t)
-    #I_g2 = polyint(g^2).(t)
-    #I_tg = polyint(Poly([0,1]) * g).(t)
-
+    ℓ_lazy = TransitionCostContinuous{T}(g, t)
     N = length(t)
-
-    # Find primitive functions to g, t*g, and g^2 at the break points
-    I_g = zeros(N)
-    I_g2 = zeros(N)
-    I_tg = zeros(N)
-
-    for i=2:N
-        I_g[i] = I_g[i-1] + quadgk(g, t[i-1], t[i], reltol=tol)[1]
-        I_g2[i] = I_g2[i-1] + quadgk(τ -> g(τ)^2, t[i-1], t[i], reltol=tol)[1]
-        I_tg[i] = I_tg[i-1] + quadgk(τ -> τ*g(τ), t[i-1], t[i], reltol=tol)[1]
-    end
-
     ℓ = Array{QuadraticForm{T}}(N-1,N)
-
-    for i=1:N-1
-        for ip=i+1:N
-
-            P = (t[ip] - t[i]) * @SMatrix [1/3 1/6; 1/6 1/3]
-
-            q = -2* 1/(t[ip]-t[i]) *
-            @SVector [-(I_tg[ip] - I_tg[i]) + t[ip]*(I_g[ip] - I_g[i]),
-            (I_tg[ip] - I_tg[i]) - t[i]*(I_g[ip] - I_g[i])]
-
-            r =  I_g2[ip] - I_g2[i]
-
-            ℓ[i,ip] = QuadraticForm(P, q, r)
+    for i = 1:N-1
+        for ip = i+1:N
+            ℓ[i,ip] = ℓ_lazy[i,ip]
         end
     end
-
+    # TODO move/add tests to Lazy, maybe only i to i+1
     for i = 1:N-1, ip = i+1:N
         P, q, r = ℓ[i,ip].P, ℓ[i,ip].q, ℓ[i,ip].r
         minval = -q⋅(P\q)/4+r
+        #TODO add epsilon on RHS?
         minval < 0 && error("Transition cost is negative for some values in `compute_transition_costs`, try decreasing rtol.")
     end
+
     return ℓ
 end
 
-
-
-
 """
     ℓ = compute_transition_costs(g::AbstractArray)
 Computes the transition costs `ℓ` given a discrete
 function `g`.
 """
-function compute_discrete_transition_costs(g::AbstractArray)
+function compute_discrete_transition_costs(g::AbstractArray; t=1:length(g))
     T = Float64
-
-    N = length(g)
-
-    # Find sums of g, k*g, and g^2
-    G1 = zeros(T, N)
-    G2 = zeros(T, N)
-    G3 = zeros(T, N)
-
-    # The sums corresponding to transitioning from i to ip
-    # i.e. not including the cost at ip
-    for k=2:N
-        G1[k] = G1[k-1] + g[k-1]
-        G2[k] = G2[k-1] + (k-1)*g[k-1]
-        G3[k] = G3[k-1] + g[k-1]^2
-    end
-
-    # The P-matrices only depend on the distance d=ip-i
-    P_mats  = Vector{SMatrix{2,2,Float64,4}}(N-1)
-    P_mats[1] = @SMatrix [1.0 0; 0 0]
-    for d=2:N-1
-        off_diag_elems = sum([k*(d - k) for k=0:d-1])
-        P_mats[d] = @SMatrix [P_mats[d-1][1,1] + d^2    off_diag_elems;
-        off_diag_elems            P_mats[d-1][1,1]]
-    end
-
-    P_mats = P_mats ./ (1.0:N-1).^2 # FIXME: Why can't this be done above in the loop?
-
-    #P_invs = inv.(P_mats)
-
+    ℓ_lazy = TransitionCostDiscrete{T}(g; t=t)
+    N = length(t)
     ℓ = Array{QuadraticForm{T}}(N-1,N)
-
-    for i=1:N-1
-        for ip=i+1:N
-
-            P = P_mats[ip-i]
-
-            q = -2* 1/(ip-i) *
-            @SVector [-(G2[ip] - G2[i]) + ip*(G1[ip] - G1[i]),
-            (G2[ip] - G2[i]) - i*(G1[ip] - G1[i])]
-
-            r =  G3[ip] - G3[i]
-
-            ℓ[i,ip] = QuadraticForm(P, q, r)
+    for i = 1:N-1
+        for ip = i+1:N
+            ℓ[i,ip] = ℓ_lazy[i,ip]
         end
     end
-
     return ℓ
 end