diff --git a/SciLean/Meta/Notation/Let'.lean b/SciLean/Meta/Notation/Let'.lean index f4647fad..66c147f8 100644 --- a/SciLean/Meta/Notation/Let'.lean +++ b/SciLean/Meta/Notation/Let'.lean @@ -87,3 +87,55 @@ macro_rules (kind :=let'_syntax') let $y := x.1.2 let $z := x.2 $b) + + +/-- Let binding that deconstructs structure into its fields. + +The notation +``` +let ⟨..⟩ := s +b +``` +expands to +``` +let ⟨x₁,...,xₙ⟩ := s +b +``` +where `x₁` are field names of struct `s`. + +For example, `Prod` has field `fst` and `snd` therefore +``` +let ⟨..⟩ := (1,2) +fst + snd +``` +as it expands to +``` +let ⟨fst,snd⟩ := (1,2) +fst + snd +``` + -/ +syntax (name:=let_struct_syntax) withPosition("let" "⟨..⟩" ":=" term) optSemicolon(term) : term + +open Lean Elab Term Syntax Meta +elab_rules (kind:=let_struct_syntax) : term +| `(let ⟨..⟩ := $x:term + $b) => do + + let X ← inferType (← elabTerm x none) + let .const struct _ := X.getAppFn' | throwError "structure expected" + let info := getStructureInfo (← getEnv) struct + let ids := info.fieldNames.map (fun n => mkIdent n) + let stx ← `(let ⟨$ids,*⟩ := $x; $b) + elabTerm stx none + + +/-- Structure field assigment, allows for `s.x := x'` notation in `do` block. + +`s.x := x'` expands into `s := {s with x := x'}` -/ +macro_rules +| `(doElem| $x:ident := $val) => do + let .str n f := x.getId | Macro.throwUnsupported + if n == .anonymous then Macro.throwUnsupported + let o := mkIdentFrom x n + let field := mkIdentFrom x (Name.mkSimple f) + `(doElem| $o:ident := {$o with $field:ident := $val}) diff --git a/SciLean/Numerics/Optimization/BFGS.lean b/SciLean/Numerics/Optimization/BFGS.lean deleted file mode 100644 index ff2defde..00000000 --- a/SciLean/Numerics/Optimization/BFGS.lean +++ /dev/null @@ -1,158 +0,0 @@ -import SciLean.Util.Approx.Basic -import SciLean.Logic.Function.Argmin -import SciLean.Tactic.DataSynth.HasRevFDerivUpdate -import SciLean.Tactic.DataSynth.ArrayOperations -import SciLean.Tactic.DataSynth.DefRevDeriv -import SciLean.Data.DataArray -import SciLean.Analysis.Calculus.Notation.Gradient - -namespace SciLean - -/-! - -Based on implementation in Optim.jl -https://github.com/JuliaNLSolvers/Optim.jl/blob/711dfec61acf5dbed677e1af15f2a3347d5a88ad/src/multivariate/solvers/first_order/bfgs.jl - --/ - -instance {R} [RealScalar R] : WellFoundedLT R := sorry_proof - -variable - {R : Type} [RealScalar R] [PlainDataType R] [ToString R] - {n : ℕ} - - -- generalize it to - -- todo: define class `MatrixType M X` saying that `M` is matrix associated with `X` - -- {X : Type} [ArrayType X I R] - -- {M : Type} [ArrayType M (I×I) R] -- [MatrixType M X] - -set_default_scalar R - -namespace BFGS - -abbrev OptimM := IO - -variable (R n) -structure State where - /-- previous position -/ - x₀ : R^[n] - /-- current position -/ - x : R^[n] - /-- gradient `∇ f x` -/ - df₀ : R^[n] -- ?? - fx₀ : R - /-- change in the position `xₙ₊₁ - xₙ` -/ - Δx : R^[n] - /-- change in the gradient `∇ f xₙ₊₁ - ∇ f xₙ` -/ - Δdf : R^[n] -- ?? - u : R^[n] - /-- approximation to inverse hessian `H⁻¹` -/ - invH : R^[n,n] - /-- search direction, equals to `- H⁻¹ * ∇ f x` -/ - p : R^[n] -- ?? -variable {R n} - - -/-- Liner search -/ -def lineSearch (f : R^[n] → R) (x p : R^[n]) (m : R) : OptimM R := sorry - -def update (state : State R n) (f : R^[n] → R) (hf : HasRevFDeriv R f f') : OptimM (State R n) := do - - -- todo: add this notation!!! - -- with_struct state do - let mut ⟨x₀,x,df₀,fx₀,Δx,Δdf,u,invH,p⟩ := state - - let df := (f' x).2 1 -- ∇! f x - p := - (invH * df) - - let α ← lineSearch f x p ⟪p,df⟫ - - Δx := α • p - x₀ := x - x := x + Δx - df₀ := df - - return ⟨x₀,x,df₀,fx₀,Δx,Δdf,u,invH,p⟩ - - -def updateH (state : State R n) (f : R^[n] → R) (hf : HasRevFDeriv R f f') : OptimM (State R n) := do - - let mut ⟨x₀,x,df₀,fx₀,Δx,Δdf,u,invH,p⟩ := state - - let df := (f' x).2 1 -- ∇! f x - Δdf := df - df₀ - - let s := ⟪Δx, Δdf⟫ - - -- update `H⁻¹` only if we can guarangee positive definitness - if s > 0 then - - -- todo: I would like the implementation to look like this: - -- invH := - -- let H := invH⁻¹ - -- (H + s⁻¹ • Δdf.outerprod Δdf - ⟪Δx,H*Δx⟫ • (H*Δx).outerprod (H*Δx))⁻¹ - -- rewrite_by .... somehow apply Sherman–Morrison formula - - u := invH*Δdf - let c1 := (s + ⟪Δdf,u⟫)/s^2 - let c2 := s⁻¹ - invH := invH + c1 • Δx.outerprod Δx - - c2 • (u.outerprod Δx + Δx.outerprod u) - - - return sorry - - -end BFGS - - -def bfgs (f : R^[n] → R) {f'} (hf : HasRevFDerivUpdate R f f') (x₀ : R^[n] := 0) : R^[n] := Id.run do - - let mut xₙ := x₀ - let (fx', updateFun) := f' xₙ - let df' := updateFun 1 0 - let mut fxₙ := fx' - let mut dfₙ := df' - let mut Hₙ := 𝐈 n - - let mut firstRun := true - for n in [0:10] do - - let pₙ := - (Hₙ * df') - - let α := (argmin (α : R), f (xₙ + α • pₙ)) - -- approx_by - -- simp only [linese_search_with_wolfe_condition] - - let sₙ := α • pₙ - let x' := xₙ + sₙ - - let (fx', updateFun) := f' xₙ - let df' := updateFun 1 0 - let yₙ := df' - dfₙ - - Hₙ := Hₙ + ((⟪sₙ,yₙ⟫ + ⟪yₙ,Hₙ*yₙ⟫)/⟪sₙ,yₙ⟫^2) • sₙ.outerprod sₙ - - ⟪sₙ,yₙ⟫⁻¹ • ((Hₙ*yₙ).outerprod sₙ + sₙ.outerprod (Hₙᵀ*yₙ)) - -- rewrite_by optimize_array_expr - -- todo: simplify/optimize this, add function (A.addOuterprod x y) and use that - - -- compute errors - let Δx := ‖x'-xₙ‖₂ - let Δxᵣ := Δx / ‖xₙ‖₂ - let Δf := ‖fx'-fxₙ‖₂ - let Δfᵣ := Δf / ‖fxₙ‖₂ - - dbg_trace s!"\ - ‖xₙ₊₁-xₙ‖ = {Δx}\ - ‖xₙ₊₁-xₙ‖ / ‖xₙ‖ = {Δxᵣ}\ - ‖f(xₙ₊₁) - f(xₙ)‖ = {Δf}\ - ‖f(xₙ₊₁) - f(xₙ)‖ / ‖f(xₙ)‖ = {Δfᵣ}" - - fxₙ := fx' - xₙ := x' - - return xₙ - - - --- def cg (f : R^[n] → R) {f' f''} (hf : HasRevFDeriv R f f') (x₀ : R^[n] := 0) : R^[n] := Id.run do diff --git a/SciLean/Numerics/Optimization/BacktrackingLinerSearch.lean b/SciLean/Numerics/Optimization/BacktrackingLinerSearch.lean deleted file mode 100644 index fd9135c3..00000000 --- a/SciLean/Numerics/Optimization/BacktrackingLinerSearch.lean +++ /dev/null @@ -1,46 +0,0 @@ -import SciLean.Util.Approx.Basic -import SciLean.Logic.Function.Argmin -import SciLean.Tactic.DataSynth.HasRevFDerivUpdate -import SciLean.Tactic.DataSynth.HasFwdFDeriv -import SciLean.Tactic.DataSynth.ArrayOperations -import SciLean.Tactic.DataSynth.DefRevDeriv -import SciLean.Data.DataArray - -namespace SciLean - -instance {R} [RealScalar R] : WellFoundedLT R := sorry_proof - -variable - {R : Type} [RealScalar R] [PlainDataType R] [ToString R] - {X : Type} [NormedAddCommGroup X] [AdjointSpace R X] [CompleteSpace X] - -set_default_scalar R - - - -variable (R) -structure BacktrackingLineSearch.Config where - /-- Step reduction factor -/ - τ : R := 0.5 - c : R := 0.5 - maxSteps : ℕ := 100 -variable {R} - - -open BacktrackingLineSearch in -def backtrackingLineSearch (cfg : Config R) (f : X → R) (x p : X) (m : R) (α₀ : R) : R := Id.run do - - let mut fxₙ := f x - let mut αₙ := α₀ - - let t := -cfg.c*m - for _ in [0:cfg.maxSteps] do - - let fx' := f (x + αₙ•p) - if fxₙ - fx' ≥ αₙ * t then - break - - fxₙ := fx' - αₙ := cfg.τ * αₙ - - return αₙ diff --git a/SciLean/Numerics/Optimization/GradientDescent.lean b/SciLean/Numerics/Optimization/GradientDescent.lean deleted file mode 100644 index 909371de..00000000 --- a/SciLean/Numerics/Optimization/GradientDescent.lean +++ /dev/null @@ -1,101 +0,0 @@ -import SciLean.Util.Approx.Basic -import SciLean.Logic.Function.Argmin -import SciLean.Tactic.DataSynth.HasRevFDerivUpdate -import SciLean.Tactic.DataSynth.ArrayOperations -import SciLean.Tactic.DataSynth.DefRevDeriv -import SciLean.Data.DataArray - -namespace SciLean - - - - -structure GradientDescent.Config (R) [RealScalar R] where - maxSteps : ℕ - rate : R - absError : R - - - -variable - {R : Type} [RealScalar R] - {X : Type} [NormedAddCommGroup X] [AdjointSpace R X] [CompleteSpace X] - -set_default_scalar R - - -structure Optimization.State (R) [RealScalar R] where - step : ℕ := 0 - Δx : R := 0 - Δx_rel : R := 0 - Δf : R := 0 - Δf_rel : R := 0 - -open Lean - - -abbrev OptimizationM (R : Type) [RealScalar R] := StateM (Optimization.State R) - - -open GradientDescent Optimization in -def gradientDescent [ToString R] [ToString X] (cfg : Config R) - (f : X → R) {f'} (hf : HasRevFDerivUpdate R f f') (x₀ : X := 0) : OptimizationM R X := do - - let mut val : R := 0 - let mut xₙ := x₀ - - dbg_trace "current point: {xₙ}" - dbg_trace "f(xₙ): {val}" - - let mut firstRun := true - for i in [0:cfg.maxSteps] do - let (val',updateFun) := f' xₙ - let x' := updateFun (-cfg.rate) xₙ - - let Δx := ‖x'-xₙ‖₂ - let Δxᵣ := Δx / ‖xₙ‖₂ - let Δf := ‖val'-val‖₂ - let Δfᵣ := Δf / ‖val‖₂ - - xₙ := x' - val := val' - - dbg_trace "current point: {xₙ}" - dbg_trace "f(xₙ): {val}" - -- todo: set state - - return xₙ - --- using Optimization, Zygote --- rosenbrock(u, p) = (p[1] - u[1])^2 + p[2] * (u[2] - u[1]^2)^2 --- u0 = zeros(2) --- p = [1.0, 100.0] - --- optf = OptimizationFunction(rosenbrock, AutoZygote()) --- prob = OptimizationProblem(optf, u0, p) - --- sol = solve(prob, Optimization.LBFGS()) - -variable [PlainDataType R] - -def rosenbrock (a b : R) (u : R^[2]) := - let x := u[0] - let y := u[1] - (a - x)^2 + b * (y - x^2)^2 - -set_option trace.Meta.Tactic.data_synth true in -def_rev_deriv rosenbrock in u by - unfold rosenbrock - data_synth => lsimp - -#eval - gradientDescent - {maxSteps := 300, rate := 1e-2, absError := 1e-6} - (rosenbrock 1.0 10.0) - (by data_synth) - ⊞[0,3] - |>.run {} - |>.fst - - -variable (a b : R) diff --git a/SciLean/Numerics/Optimization/Optimize.lean b/SciLean/Numerics/Optimization/Optimize.lean deleted file mode 100644 index 6b65dad4..00000000 --- a/SciLean/Numerics/Optimization/Optimize.lean +++ /dev/null @@ -1,246 +0,0 @@ -import SciLean.Data.DataArray -import SciLean.Tactic.DataSynth.HasRevFDerivUpdate -import SciLean.Meta.Notation.Do - -/-! - -Port of Optim.jl - -This is a port of: Optim.jl/src/multivariate/optimize/optimize.jl -https://github.com/JuliaNLSolvers/Optim.jl/blob/711dfec61acf5dbed677e1af15f2a3347d5a88ad/src/multivariate/optimize/optimize.jl - --/ - -namespace SciLean.Optimjl - -variable - {R : Type} [RealScalar R] - {X : Type} [NormedAddCommGroup X] [AdjointSpace R X] [CompleteSpace X] - - -open Lean - -variable (R X) -structure Method where - isNewton : Bool - isNewtonTrustRegion : Bool - -structure ObjectiveFunction where - f : X → R - f' : X → R × (R → X) - hf : HasRevFDeriv R f f' - -structure Options where - x_abstol : R := 0 - x_reltol : R := 0 - f_abstol : R := 0 - f_reltol : R := 0 - g_abstol : R := 1e-8 - g_reltol : R := 1e-8 - outer_x_abstol : R := 0 - outer_x_reltol : R := 0 - outer_f_abstol : R := 0 - outer_f_reltol : R := 0 - outer_g_abstol : R := 1e-8 - outer_g_reltol : R := 1e-8 - f_calls_limit : ℕ := 0 - g_calls_limit : ℕ := 0 - h_calls_limit : ℕ := 0 - allow_f_increases : Bool := true - allow_outer_f_increases : Bool := true - successive_f_tol : ℕ := 1 - iterations : ℕ := 1000 - outer_iterations : ℕ := 1000 - store_trace : Bool := false - trace_simplex : Bool := false - show_trace : Bool := false - extended_trace : Bool := false - show_warnings : Bool := true - show_every: ℕ := 1 - callback : Unit → IO Unit := fun _ => pure () - time_limit? : Option ℕ := none - -class AbstractOptimizerState (S : Type) where - initialConvergence (d : ObjectiveFunction R X) (state : S) (x₀ : X) (options : Options R) : (Bool×Bool) - assessConvergence (state : S) (d : ObjectiveFunction R X) (options : Options R) : (Bool×Bool×Bool×Bool) - - updateState (d : ObjectiveFunction R X) (state : S) (method : Method) : (S×Bool) - updateG (d : ObjectiveFunction R X) (state : S) (method : Method) : S - updateH (d : ObjectiveFunction R X) (state : S) (method : Method) : S - - pick_best_x (f_inc_pick : Bool) (state : S) : X - pick_best_f (f_inc_pick : Bool) (state : S) (d : ObjectiveFunction R X) : R - x_abschange (state : S) : R - x_relchange (state : S) : R - f_abschange (d : ObjectiveFunction R X) (state : S) : R - f_relchange (d : ObjectiveFunction R X) (state : S) : R - g_residual (d : ObjectiveFunction R X) (state : S) : R - - f_calls (d : ObjectiveFunction R X) (state : S) : ℕ - g_calls (d : ObjectiveFunction R X) (state : S) : ℕ - h_calls (d : ObjectiveFunction R X) (state : S) : ℕ - -export AbstractOptimizerState (initialConvergence assessConvergence updateState updateG updateH - pick_best_x pick_best_f x_abschange x_relchange f_abschange f_relchange g_residual - f_calls g_calls h_calls) - -variable {R X} - - -variable (R X) -structure MultivariateOptimizationResults where - method : Method - initial_x : X - minimizer : X - minimum : R - iterations : ℕ - iteration_converged : Bool - x_converged : Bool - x_abstol : R - x_reltol : R - x_abschange : R -- Tc - x_relchange : R -- Tc - f_converged : Bool - f_abstol : R - f_reltol : R - f_abschange : R -- Tc - f_relchange : R -- Tc - g_converged : Bool - g_abstol : R - g_residual : R -- Tc - f_increased : Bool - -- trace::M - f_calls : ℕ - g_calls : ℕ - h_calls : ℕ - ls_success : Bool - time_limit? : Option ℕ - time_run : ℕ - -- stopped_by::Tsb - - -variable {R X} - - -variable [AbstractOptimizerState R X S] - -def optimize - (d : ObjectiveFunction R X) (x₀ : X) - (method : Method) - (options : Options R) - (state₀ : S) : - IO (MultivariateOptimizationResults R X) := do - - let t₀ ← IO.monoNanosNow - -- tr = OptimizationTrace{typeof(value(d)), typeof(method)}() - -- tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback !== nothing - let mut stopped := false - let mut stopped_by_callback := false - let mut stopped_by_time_limit := false - let mut f_limit_reached := false - let mut g_limit_reached := false - let mut h_limit_reached := false - let mut x_converged := false - let mut f_converged := false - let mut g_converged := false - let mut f_increased := false - let mut counter_f_tol := 0 - - let mut state := state₀ - - (f_converged, g_converged) := initialConvergence d state x₀ options - let mut converged := f_converged || g_converged - - let mut iteration := 0 - - -- options.show_trace && print_header(method) - let mut _time ← IO.monoNanosNow - -- trace!(tr, d, state, iteration, method, options, _time-t0) - let mut ls_success := true - - while !converged && !stopped && iteration < options.iterations do - iteration += 1 - (state, ls_success) := updateState d state method - if !ls_success then - break - state := updateG d state method - (x_converged, f_converged, g_converged, f_increased) := assessConvergence state d options - counter_f_tol := if f_converged then counter_f_tol+1 else 0 - converged := x_converged || g_converged || (counter_f_tol > options.successive_f_tol) - if !(converged && method.isNewton) && !(method.isNewtonTrustRegion) then - state := updateH d state method - -- if tracing then - -- -- update trace; callbacks can stop routine early by returning true - -- stopped_by_callback = trace!(tr, d, state, iteration, method, options, time()-t0) - _time ← IO.monoNanosNow - if let some time_limit := options.time_limit? then - stopped_by_time_limit := _time - t₀ > time_limit - -- f_limit_reached = options.f_calls_limit > 0 && f_calls(d) >= options.f_calls_limit ? true : false - -- g_limit_reached = options.g_calls_limit > 0 && g_calls(d) >= options.g_calls_limit ? true : false - -- h_limit_reached = options.h_calls_limit > 0 && h_calls(d) >= options.h_calls_limit ? true : false - - if (f_increased && !options.allow_f_increases) || stopped_by_callback || - stopped_by_time_limit || f_limit_reached || g_limit_reached || h_limit_reached then - stopped := true - - -- if method isa NewtonTrustRegion - -- # If the trust region radius keeps on reducing we need to stop - -- # because something is wrong. Wrong gradients or a non-differentiability - -- # at the solution could be explanations. - -- if state.delta ≤ method.delta_min - -- stopped = true - -- end - -- end - - -- if g_calls(d) > 0 && !all(isfinite, gradient(d)) - -- options.show_warnings && @warn "Terminated early due to NaN in gradient." - -- break - -- end - - -- if h_calls(d) > 0 && !(d isa TwiceDifferentiableHV) && !all(isfinite, hessian(d)) - -- options.show_warnings && @warn "Terminated early due to NaN in Hessian." - -- break - - - pure () - - -- Tf = typeof(value(d)) - let f_incr_pick := f_increased && !options.allow_f_increases - -- Tf = typeof(value(d)) - -- f_incr_pick = f_increased && !options.allow_f_increases - -- stopped_by =(f_limit_reached=f_limit_reached, - -- g_limit_reached=g_limit_reached, - -- h_limit_reached=h_limit_reached, - -- time_limit=stopped_by_time_limit, - -- callback=stopped_by_callback, - -- f_increased=f_incr_pick) - - - return { - method := method - initial_x := x₀ - minimizer := pick_best_x R f_incr_pick state - minimum := pick_best_f f_incr_pick state d - iterations := iteration - iteration_converged := iteration == options.iterations - x_converged := x_converged - x_abstol := options.x_abstol - x_reltol := options.x_reltol - x_abschange := x_abschange X state - x_relchange := x_relchange X state - f_converged := f_converged - f_abstol := options.f_abstol - f_reltol := options.f_reltol - f_abschange := f_abschange d state - f_relchange := f_relchange d state - g_converged := g_converged - g_abstol := options.g_abstol - g_residual := g_residual d state - f_increased := f_increased - f_calls := f_calls d state - g_calls := g_calls d state - h_calls := h_calls d state - ls_success := ls_success - time_limit? := options.time_limit? - time_run := _time - t₀ - } diff --git a/SciLean/Numerics/Optimization/Optimjl/Multivariate/Optimize/Optimize.lean b/SciLean/Numerics/Optimization/Optimjl/Multivariate/Optimize/Optimize.lean index c49ebea1..2762e28f 100644 --- a/SciLean/Numerics/Optimization/Optimjl/Multivariate/Optimize/Optimize.lean +++ b/SciLean/Numerics/Optimization/Optimjl/Multivariate/Optimize/Optimize.lean @@ -48,8 +48,7 @@ def optimize let mut iteration := 0 if options.show_trace then - IO.println s!"{iteration}\t{printStateHeader Method R X}" - IO.println s!"{iteration}\t{printState Method R X state}" + IO.println s!"n\t{printStateHeader Method R X}" -- options.show_trace && print_header(method) let mut _time ← IO.monoNanosNow @@ -78,7 +77,7 @@ def optimize state := updateH method state d if options.show_trace then - IO.println s!"{iteration}\t{printState Method R X state}" + IO.println s!"{iteration-1}\t{printState Method R X state}" -- -- update trace; callbacks can stop routine early by returning true -- stopped_by_callback = trace!(tr, d, state, iteration, method, options, time()-t0) diff --git a/SciLean/Numerics/Optimization/Optimjl/Multivariate/Solvers/FirstOrder/BFGS.lean b/SciLean/Numerics/Optimization/Optimjl/Multivariate/Solvers/FirstOrder/BFGS.lean index f920661e..b527eb4f 100644 --- a/SciLean/Numerics/Optimization/Optimjl/Multivariate/Solvers/FirstOrder/BFGS.lean +++ b/SciLean/Numerics/Optimization/Optimjl/Multivariate/Solvers/FirstOrder/BFGS.lean @@ -12,58 +12,6 @@ https://github.com/JuliaNLSolvers/Optim.jl/blob/711dfec61acf5dbed677e1af15f2a334 namespace SciLean.Optimjl -/-- Let binding that deconstructs structure into its fields. - -The notation -``` -let ⟨..⟩ := s -b -``` -expands to -``` -let ⟨x₁,...,xₙ⟩ := s -b -``` -where `x₁` are field names of struct `s`. - -For example, `Prod` has field `fst` and `snd` therefore -``` -let ⟨..⟩ := (1,2) -fst + snd -``` -as it expands to -``` -let ⟨fst,snd⟩ := (1,2) -fst + snd -``` - -/ -syntax (name:=let_struct_syntax) withPosition("let" "⟨..⟩" ":=" term) optSemicolon(term) : term - -open Lean Elab Term Syntax Meta -elab_rules (kind:=let_struct_syntax) : term -| `(let ⟨..⟩ := $x:term - $b) => do - - let X ← inferType (← elabTerm x none) - let .const struct _ := X.getAppFn' | throwError "structure expected" - let info := getStructureInfo (← getEnv) struct - let ids := info.fieldNames.map (fun n => mkIdent n) - let stx ← `(let ⟨$ids,*⟩ := $x; $b) - elabTerm stx none - - -/-- Structure field assigment, allows for `s.x := x'` notation in `do` block. - -`s.x := x'` expands into `s := {s with x := x'}` -/ -macro_rules -| `(doElem| $x:ident := $val) => do - let .str n f := x.getId | Macro.throwUnsupported - if n == .anonymous then Macro.throwUnsupported - let o := mkIdentFrom x n - let field := mkIdentFrom x (Name.mkSimple f) - `(doElem| $o:ident := {$o with $field:ident := $val}) - - variable {R : Type} [RealScalar R] [PlainDataType R] [ToString R] @@ -95,15 +43,6 @@ set_default_scalar R namespace BFGS -/-- BFGS configuration -/ -structure Method (R : Type) (n : ℕ) [RealScalar R] [PlainDataType R] where - alphaguess (φ₀ dφ₀ : R) (d : ObjectiveFunction R (R^[n])) : R - linesearch (d : ObjectiveFunction R (R^[n])) (x s x_ls : R^[n]) (α₀ φ₀ dφ₀ : R) : Option (R × R) - initial_invH (x : R^[n]) : Option (R^[n,n]) := none - initial_stepnorm : Option R := none - -- manifold : Manifold - - structure State (R : Type) (n : ℕ) [RealScalar R] [PlainDataType R] where /-- current position `xₙ` -/ x : R^[n] @@ -152,7 +91,7 @@ def reset_search_direction (method : BFGS R) (state : State R n) def perform_linesearch (method : BFGS R) (state : State R n) (d : ObjectiveFunction R (R^[n])) : - (Except LineSearchError (State R n)) := Id.run do + (Except LineSearchError (R×R)) := Id.run do let mut state := state let mut dφ₀ := ⟪state.g, state.s⟫ @@ -174,9 +113,8 @@ def perform_linesearch (method : BFGS R) (state : State R n) (d : ObjectiveFunct -- WARNING! Here we run IO code in pure code, the last `()` is `IO.RealWorld` -- This hould be fixed, eiter remove LineSearch.call from IO or make this function in IO match method.lineSearch.call φ φ₀ dφ₀ state.alpha () () with - | .ok ((α, φα),_) _ => - state.alpha := α - return .ok state + | .ok (αφα,_) _ => + return .ok αφα | .error e _ => return .error e @@ -191,15 +129,20 @@ def updateState (method : BFGS R) (state : State R n) (d : ObjectiveFunction R ( match perform_linesearch method state d with | .error e => return .error e - | .ok state' => - state := state' + | .ok (α,φα) => + + state.alpha := α + -- update position state.dx := state.alpha • state.s state.x_previous := state.x state.x := state.x + state.dx - state.f_x_previous := state.f_x - -- dbg_trace s!" done\tαₙ := {state.alpha}\txₙ := {state.x}\tf(xₙ) := {d.f state.x}" + -- do not update `f_x` as it will be updated in `updateFG` + + -- TODO: update f_calls and g_calls + -- this information should be somehow given by line search + return .ok state diff --git a/SciLean/Numerics/Optimization/Optimjl/Multivariate/Solvers/FirstOrder/LBFGS.lean b/SciLean/Numerics/Optimization/Optimjl/Multivariate/Solvers/FirstOrder/LBFGS.lean new file mode 100644 index 00000000..ba962963 --- /dev/null +++ b/SciLean/Numerics/Optimization/Optimjl/Multivariate/Solvers/FirstOrder/LBFGS.lean @@ -0,0 +1,378 @@ +import SciLean.Numerics.Optimization.Optimjl.Utilities.Types +import SciLean.Numerics.Optimization.Optimjl.LinerSearches.Types +import SciLean.Numerics.Optimization.Optimjl.LinerSearches.BackTracking + +set_option linter.unusedVariables false + +/-! Port of Optim.jl, file src/multivariate/solvers/first_order/l_bfgs.jl + +github link: +https://github.com/JuliaNLSolvers/Optim.jl/blob/711dfec61acf5dbed677e1af15f2a3347d5a88ad/src/multivariate/solvers/first_order/l_bfgs.jl + +-/ + +namespace SciLean.Optimjl + + +variable + {R : Type} [RealScalar R] [PlainDataType R] [ToString R] + + +variable (R) +inductive LBFGS.InitialInvH (n : ℕ) where +/-- Initialize inverse Hessian to this specified value -/ +| invH (invH : R^[n,n]) +/-- Initialize inverse Hessian such that the step length is the specified `stepnorm` -/ +| stepnorm (stepnorm : R) +/-- Initialize inverse Hessian to identity matrix -/ +| identity + +open LBFGS in +structure LBFGS (m : ℕ) extends Options R where + /-- Linear search that finds appropriate `α` `xₙ₊₁ = xₙ + α • sₙ` -/ + lineSearch : LineSearch0Obj R := .mk (BackTracking R) {} + /-- Guess initial `α` to try given function value and gradient -/ + alphaguess (φ₀ dφ₀ : R) (d : ObjectiveFunction R (R^[n])) : R := 1 + -- P : T + -- precondprep!::Tprep + -- scaleinvH0 : Bool := true +variable {R} + +set_default_scalar R + +namespace LBFGS + + +structure State (R : Type) (n m : ℕ) [RealScalar R] [PlainDataType R] where + /-- current position `xₙ₊₁` -/ + x : R^[n] + /-- previous position `xₙ`-/ + x_previous : R^[n] := x + /-- current gradient `∇f(xₙ₊₁)` -/ + g : R^[n] := 0 + /-- previous gradient `∇f(xₙ)` -/ + g_previous : R^[n] := g + /-- current valus `f(xₙ₊₁)` -/ + f_x : R + /-- previous valus `f(xₙ)` -/ + f_x_previous : R := f_x + /-- difference between positions `xₙ₊₁ - xₙ` -/ + dx : R^[n] := 0 + /-- difference between positions `xₙ₊₁ - xₙ` -/ + dg : R^[n] := 0 + /-- step direction `- (∇²f)⁻¹ ∇f` -/ + s : R^[n] := - g + /-- position difference `xₙ₊₁-xₙ` -/ + dx_history : R^[n]^[m] := 0 + /-- gradient difference `∇f(xₙ₊₁)-∇f(xₙ)`-/ + dg_history : R^[n]^[m] := 0 + /-- ρₙ := 1 / ⟪∇f(xₙ₊₁) - ∇f(xₙ), xₙ₊₁ - xₙ⟫ -/ + ρ : R^[m] := 0 + /-- -/ + pseudo_iteration : ℤ := 0 + -- /-- preconditioner scale -/ + -- precon : R := 1 + /-- line search scalle `dx := α • s` -/ + alpha : R := 1 + f_calls : ℕ := 0 + g_calls : ℕ := 0 + h_calls : ℕ := 0 + + +open Set in +/-- This function updates search direction `s` from gradient `g` by so called "two loop recursion". -/ +def twoloop (g : R^[n]) (k : ℤ) (m : ℕ) + (ρ : Icc (k-m) (k-1) → R) (dx dg : Icc (k-m) (k-1) → R^[n]) + /- (scaleinvH0 : Bool) (precon : R) -/ : R^[n] := Id.run do + if h : m = 0 then + -g + else if k-m < 0 then + return -g + else + + let mut α : R^[m] := 0 + + -- initialize `q` + let mut q := g + + -- Backward pass + for h : j in [0:m] do + have : 0 ≤ j := h.1 + have : j < m := h.2 + -- indices going in reverse order + let iᵣ : Icc (k-m) (k-1) := ⟨k-1-j, by constructor <;> omega⟩ + let jᵣ : Fin m := ⟨m-j-1, by omega⟩ + + -- skip iterations with negative index + if iᵣ.1 < 0 then + continue + + let dxi := dx iᵣ + let dgi := dg iᵣ + let α' := ρ iᵣ * ⟪dxi, q⟫ + α[jᵣ] := α' + q -= α' • dgi + + let mut s := + if k > 0 then + let i₀ : Icc (k-m) (k-1) := ⟨k-1, by constructor <;> omega⟩ + let γ := ⟪dx i₀, dg i₀⟫ / ‖dg i₀‖₂² + γ • q + else + q + + -- forward pass + for h : j in [0:m] do + have : 0 ≤ j := h.1 + have : j < m := h.2 + -- indices going in forward order + let i : Icc (k-m) (k-1) := ⟨k-m+j, by constructor <;> omega⟩ + let j : Fin m := ⟨j, by omega⟩ + + -- skip iterations with negative index + if i.1 < 0 then + continue + + let dxi := dx i + let dgi := dg i + let β := ρ i * ⟪dgi,s⟫ + s += (α[j] - β) • dxi + + return - s + + +open Set in +def updateSearchDirection (method : LBFGS R m) (state : State R n m) : State R n m := Id.run do + let mut state := state + + let k : ℤ := state.pseudo_iteration + + let ρ : Icc (k-m) (k-1) → R := fun i => + let i : Fin m := ⟨(i.1%m).toNat, by sorry_proof⟩ + state.ρ[i] + + let dx : Icc (k-m) (k-1) → R^[n] := fun i => + let i : Fin m := ⟨(i.1%m).toNat, by sorry_proof⟩ + state.dx_history[i] + + let dg : Icc (k-m) (k-1) → R^[n] := fun i => + let i : Fin m := ⟨(i.1%m).toNat, by sorry_proof⟩ + state.dg_history[i] + + state.s := twoloop state.g k m ρ dx dg -- method.scaleinvH0 precon + + return state + + +def reset_search_direction (state : State R n m) + : State R n m := Id.run do + + let mut state := state + + -- we do not have to reset this data because of the `iᵣ.1 < 0` and `i.1 < 0` checks in `twoloop` + -- state.dx_history := 0 + -- state.dg_history := 0 + -- state.ρ := 0 + + state.pseudo_iteration := 0 + + state.s := - state.g + + return state + + +/-- Find appropriate step length `α`. Resets the search direction if the it is invalid. -/ +def perform_linesearch (method : LBFGS R m) (state : State R n m) (d : ObjectiveFunction R (R^[n])) : + (Except LineSearchError (State R n m)) := Id.run do + + let mut state := state + let mut dφ₀ := ⟪state.g, state.s⟫ + + let φ₀ := state.f_x + + -- not decreasing, we have to reset the gradient + if dφ₀ >= 0 then + state := reset_search_direction state + dφ₀ := ⟪state.g, state.s⟫ + + state.alpha := method.alphaguess φ₀ dφ₀ d + + state.f_x_previous := φ₀ + state.x_previous := state.x + + let φ := fun α => d.f (state.x + α • state.s) + + -- WARNING! Here we run IO code in pure code, the last `()` is `IO.RealWorld` + -- This hould be fixed, eiter remove LineSearch.call from IO or make this function in IO + match method.lineSearch.call φ φ₀ dφ₀ state.alpha () () with + | .ok ((α,φα),_) _ => + state.alpha := α + return .ok state + | .error e _ => + return .error e + + +def updateState (method : LBFGS R m) (state : State R n m) (d : ObjectiveFunction R (R^[n])) : + (Except LineSearchError (State R n m)) := Id.run do + + let mut state := state + + -- unlike Optim.jl we update pseudo_iteration at the end of updateH + -- I think this is beause of Julia uses 1-based indexing but we use 0-based + -- so incrementing pseudo_iteration is more natural at the end of `updateH` + -- state.pseudo_iteration := state.pseudo_iteration + 1 + + -- update the preconditioner + -- method.precon := precondPrep state.x + + -- Determine the L-BFGS search direction + state := updateSearchDirection method state + + -- -- store old gradient + -- state.g_previous := state.g + + match perform_linesearch method state d with + | .error e => return .error e + | .ok state' => + + state := state' + + -- update position + state.dx := state.alpha • state.s + state.x_previous := state.x + state.x := state.x + state.dx + + -- do not update `f_x` as it will be updated in `updateFG` + + -- TODO: update f_calls and g_calls + -- this information should be somehow given by line search + + return .ok state + + +def updateFG (state : State R n m) (d : ObjectiveFunction R (R^[n])) : + State R n m := Id.run do + + let mut state := state + + state.f_x_previous := state.f_x + state.g_previous := state.g + + let (f_x, updateFun) := d.f' state.x + state.f_x := f_x + state.g := updateFun 1 + + state.f_calls += 1 + state.g_calls += 1 + + return state + + +def updateH (state : State R n m) : + State R n m := Id.run do + + let mut state := state + + state.dg := state.g - state.g_previous + + let ρ := 1 / ⟪state.dg, state.dx⟫ + if ¬(Scalar.isFinite ρ) then + -- Optim.jl resets to zero which does not make sense to me + -- shouldn't we just ignore this step? i.e. reduce `preseudo_iteration` by one? + state.pseudo_iteration := 0 + return state + + let k := state.pseudo_iteration + let i : Fin m := ⟨(k%m).toNat, by sorry_proof⟩ + state.dx_history[i] := state.dx + state.dg_history[i] := state.dg + state.ρ[i] := ρ + + state.pseudo_iteration += 1 + + return state + + +def assessConvergence (method : LBFGS R m) (state : State R n m) := + + let ⟨..⟩ := state + let ⟨..⟩ := method.toOptions + + Id.run do + + let mut x_converged := false + let mut f_converged := false + let mut f_increased := false + let mut g_converged := false + + if (x - x_previous).abs.max ≤ x_abstol then + x_converged := true + + if (x - x_previous).abs.max ≤ x_reltol * x.abs.max then + x_converged := true + + if Scalar.abs (f_x - f_x_previous) ≤ f_abstol then + f_converged := true + + if Scalar.abs (f_x - f_x_previous) ≤ f_reltol * Scalar.abs f_x then + f_converged := true + + if f_x > f_x_previous then + f_increased := true + + g_converged := g.abs.max ≤ g_abstol + + return (x_converged, f_converged, g_converged, f_increased) + + +def initState (method : LBFGS R m) (d : ObjectiveFunction R (R^[n])) (x₀ : R^[n]) : LBFGS.State R n m := Id.run do + + let (fx,df) := d.f' x₀ + let g := df 1 + + let mut state : LBFGS.State R n m := { + x := x₀ + f_x := fx + f_x_previous := fx + g := g + f_calls := 1 + g_calls := 1 + } + + state := reset_search_direction state + + return state + +end LBFGS + + +instance {n} : AbstractOptimizer (LBFGS R m) (LBFGS.State R n m) R (R^[n]) where + + getOptions m := m.toOptions + getPosition s := s.x + getGradient s := s.g + + initialConvergence method state := (false,false) + assessConvergence method state := LBFGS.assessConvergence method state + + printStateHeader := s!"xₙ\txₙ₊₁\tf(xₙ)\t∇f(xₙ)\tsₙ\tα" + printState state := s!"{state.x_previous}\t{state.x}\t{state.f_x_previous}\t{state.g_previous}\t{state.s}\t{state.alpha}" + + initState m d x₀ := LBFGS.initState m d x₀ + + updateState method state d := LBFGS.updateState method state d + updateFG method state d := LBFGS.updateFG state d + updateH method state d := LBFGS.updateH state + + pick_best_x take_prev state := if take_prev then state.x_previous else state.x + pick_best_f take_prev state d := if take_prev then state.f_x_previous else state.f_x + + x_abschange state := (state.x - state.x_previous).abs.max + x_relchange state := (state.x - state.x_previous).abs.max / state.x.abs.max + f_abschange d state := Scalar.abs (state.f_x - state.f_x_previous) + f_relchange d state := Scalar.abs (state.f_x - state.f_x_previous) / Scalar.abs (state.f_x) + g_residual d state := state.g.abs.max + + f_calls d state := state.f_calls + g_calls d state := state.g_calls + h_calls d state := state.h_calls diff --git a/SciLean/Numerics/Optimization/Optimjl/Utilities/PerformLineSearch.lean b/SciLean/Numerics/Optimization/Optimjl/Utilities/PerformLineSearch.lean deleted file mode 100644 index 5ee6b7c2..00000000 --- a/SciLean/Numerics/Optimization/Optimjl/Utilities/PerformLineSearch.lean +++ /dev/null @@ -1,25 +0,0 @@ -import SciLean.Numerics.Optimization.Optimjl.Utilities.Types - -/-! Port of Optim.jl, file src/utilities/perform_linesearch.jl - -github link: -https://github.com/JuliaNLSolvers/Optim.jl/blob/711dfec61acf5dbed677e1af15f2a3347d5a88ad/src/utilities/perform_linesearch.jl - --/ - -namespace SciLean.Optimjl - - - -variable - {R : Type} [RealScalar R] [PlainDataType R] - {X : Type} [NormedAddCommGroup X] [AdjointSpace R X] [CompleteSpace X] - -variable [AbstractOptimizerState R X S] - -variable (R) - - - - -end diff --git a/SciLean/Numerics/Optimization/Optimjl/Utilities/Types.lean b/SciLean/Numerics/Optimization/Optimjl/Utilities/Types.lean index 29d034c4..90fe140b 100644 --- a/SciLean/Numerics/Optimization/Optimjl/Utilities/Types.lean +++ b/SciLean/Numerics/Optimization/Optimjl/Utilities/Types.lean @@ -18,16 +18,16 @@ structure Options (R : Type) [RealScalar R] where f_abstol : R := 0 f_reltol : R := 0 g_abstol : R := 1e-8 - g_reltol : R := 1e-8 + -- g_reltol : R := 1e-8 outer_x_abstol : R := 0 outer_x_reltol : R := 0 outer_f_abstol : R := 0 outer_f_reltol : R := 0 outer_g_abstol : R := 1e-8 outer_g_reltol : R := 1e-8 - f_calls_limit : ℕ := 0 - g_calls_limit : ℕ := 0 - h_calls_limit : ℕ := 0 + f_calls_limit : Option ℕ := none + g_calls_limit : Option ℕ := none + h_calls_limit : Option ℕ := none allow_f_increases : Bool := true allow_outer_f_increases : Bool := true successive_f_tol : ℕ := 1 diff --git a/SciLean/Tactic/DataSynth/Main.lean b/SciLean/Tactic/DataSynth/Main.lean index ccec7f44..c0d8e5d9 100644 --- a/SciLean/Tactic/DataSynth/Main.lean +++ b/SciLean/Tactic/DataSynth/Main.lean @@ -4,6 +4,8 @@ import Batteries.Tactic.Exact import Lean.Meta.Transform +-- set_option linter.unusedVariables false + namespace SciLean.Tactic.DataSynth open Lean Meta @@ -50,13 +52,13 @@ def normalizeLet' (e : Expr) : CoreM Expr := Lean.Core.transform e (post := fun e => match e with - | mkApp3 (.const ``Prod.fst _) _ _ (mkApp4 (.const ``Prod.mk _) _ _ x y) => + | mkApp3 (.const ``Prod.fst _) _ _ (mkApp4 (.const ``Prod.mk _) _ _ x _y) => return .done x - | mkApp3 (.const ``Prod.snd _) _ _ (mkApp4 (.const ``Prod.mk _) _ _ x y) => + | mkApp3 (.const ``Prod.snd _) _ _ (mkApp4 (.const ``Prod.mk _) _ _ _x y) => return .done y - | .proj ``Prod 0 (mkApp4 (.const ``Prod.mk _) _ _ x y) => + | .proj ``Prod 0 (mkApp4 (.const ``Prod.mk _) _ _ x _y) => return .done x - | .proj ``Prod 1 (mkApp4 (.const ``Prod.mk _) _ _ x y) => + | .proj ``Prod 1 (mkApp4 (.const ``Prod.mk _) _ _ _x y) => return .done y | _ => return .done e) @@ -288,7 +290,7 @@ where def Goal.assumption? (goal : Goal) : DataSynthM (Option Result) := do withProfileTrace "assumption?" do (← getLCtx).findDeclRevM? fun localDecl => do - forallTelescope localDecl.type fun xs type => do + forallTelescope localDecl.type fun _xs type => do if localDecl.isImplementationDetail then return none else if type.isAppOf' goal.dataSynthDecl.name then @@ -485,13 +487,13 @@ def letGoals (fgGoal : Goal) (f g : Expr) : DataSynthM (Option (Goal×Goal)) := try withMainTrace (fun _ => return m!"assigning data") do xs[gId]!.mvarId!.assignIfDefeq g - catch e => + catch _e => throwError s!"{← ppExpr (xs[gId]!)} : {← ppExpr (← inferType xs[gId]!)} := {← ppExpr g}" try withMainTrace (fun _ => return m!"assigning data") do xs[fId]!.mvarId!.assignIfDefeq f - catch e => + catch _e => throwError s!"{← ppExpr (xs[fId]!)} : {← ppExpr (← inferType xs[fId]!)} := {← ppExpr f}" @@ -522,9 +524,12 @@ def letResults (fgGoal : Goal) (f g : Expr) (hf hg : Result) : DataSynthM (Optio let r ← fgGoal.getResultFrom proof return r + +set_option linter.unusedVariables false in /-- Given goal for composition `fun x i => f x i` and given free var `i` and `f` return goal for `(f · i)` -/ def piGoal (fGoal : DataSyntGoal) (i : Expr) (fi : Expr) : DataSynthM (Option Goal) := return none +set_option linter.unusedVariables false in /-- Given result for `(f · i)` and free variable `i` return result for `f`-/ def piResult (hf : Result) (i : Expr) : DataSynthM (Option Result) := return none @@ -632,7 +637,7 @@ def letCase (goal : Goal) (f g : FunData) : DataSynthM (Option Result) := do def lamCase (goal : Goal) (f : FunData) : DataSynthM (Option Result) := do withProfileTrace "lamCase" do - lambdaBoundedTelescope f.body 1 fun is b => do + lambdaBoundedTelescope f.body 1 fun is _b => do let i := is[0]! let fi := {f with body := f.body.beta is} let some figoal ← piGoal goal i (← fi.toExpr) | return none @@ -675,7 +680,7 @@ def mainFunCached (goal : Goal) (f : FunData) : DataSynthM (Option Result) := do withMainTrace (fun r => match r with - | .ok (some r) => return m!"[✅] {← goal.pp}" + | .ok (some _r) => return m!"[✅] {← goal.pp}" | .ok none => return m!"[❌] {← goal.pp}" | .error e => return m!"[💥️] {← goal.pp}\n{e.toMessageData}") do diff --git a/Test/optimjl/bfgs.lean b/Test/optimjl/bfgs.lean new file mode 100644 index 00000000..1b102ea7 --- /dev/null +++ b/Test/optimjl/bfgs.lean @@ -0,0 +1,20 @@ +import SciLean.Numerics.Optimization.Optimjl.Multivariate.Solvers.FirstOrder.BFGS +import SciLean.Numerics.Optimization.Optimjl.Multivariate.Optimize.Optimize +import SciLean.Tactic.DataSynth.ArrayOperations + + +open SciLean Optimjl + +def rosenbrock (a b x y : Float) := (a - x)^2 + b * (y - x^2)^2 + +def f : ObjectiveFunction Float (Float^[2]) where + f x := rosenbrock 1 100 x[0] x[1] + hf := by + unfold rosenbrock + apply HasRevFDeriv_from_HasRevFDerivUpdate + data_synth => lsimp + f' := _ + +def main : IO Unit := do + let r ← optimize f {g_abstol:=1e-4, show_trace:=true : BFGS Float} ⊞[-10.0,-10.0] + r.print diff --git a/Test/optimjl/lbfgs.lean b/Test/optimjl/lbfgs.lean new file mode 100644 index 00000000..306a1208 --- /dev/null +++ b/Test/optimjl/lbfgs.lean @@ -0,0 +1,20 @@ +import SciLean.Numerics.Optimization.Optimjl.Multivariate.Solvers.FirstOrder.LBFGS +import SciLean.Numerics.Optimization.Optimjl.Multivariate.Optimize.Optimize +import SciLean.Tactic.DataSynth.ArrayOperations + + +open SciLean Optimjl + +def rosenbrock (a b x y : Float) := (a - x)^2 + b * (y - x^2)^2 + +def f : ObjectiveFunction Float (Float^[2]) where + f x := rosenbrock 1 100 x[0] x[1] + hf := by + unfold rosenbrock + apply HasRevFDeriv_from_HasRevFDerivUpdate + data_synth => lsimp + f' := _ + +def main : IO Unit := do + let r ← optimize f {show_trace:=true : LBFGS Float 1} ⊞[0.0,0] + r.print diff --git a/lakefile.lean b/lakefile.lean index bf812bf7..6d1dc6df 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -1,3 +1,4 @@ + import Lake open Lake DSL System @@ -43,6 +44,15 @@ lean_exe WalkOnSpheres { root := `examples.WalkOnSpheres } +lean_exe BFGS { + root := `Test.optimjl.bfgs +} + +lean_exe LBFGS { + root := `Test.optimjl.lbfgs +} + + lean_exe ForLoopTest { buildType := .release