diff --git a/NEWS.md b/NEWS.md index 9b0ef1caf..86cd981f7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,7 @@ # v4 Breaking changes -1. The main change in this breaking release has been the way mini-batching is handled. The data argument in the solve call and the implicit iteration of that in the callback has been removed, -the stochastic solvers (Optimisers.jl and Sophia) now handle it explicitly. You would now pass in a DataLoader to OptimziationProblem as the second argument to the objective etc (p) if you -want to do minibatching, else for full batch just pass in the full data. + 1. The main change in this breaking release has been the way mini-batching is handled. The data argument in the solve call and the implicit iteration of that in the callback has been removed, + the stochastic solvers (Optimisers.jl and Sophia) now handle it explicitly. You would now pass in a DataLoader to OptimizationProblem as the second argument to the objective etc (p) if you + want to do minibatching, else for full batch just pass in the full data. -2. The support for extra returns from objective function has been removed. Now the objective should only return a scalar loss value, hence callback doesn't take extra arguments other than the state and loss value. - - + 2. The support for extra returns from objective function has been removed. Now the objective should only return a scalar loss value, hence callback doesn't take extra arguments other than the state and loss value. diff --git a/Project.toml b/Project.toml index a7f52a394..e484ab75c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Optimization" uuid = "7f7a1694-90dd-40f0-9382-eb1efda571ba" -version = "4.0.2" +version = "4.0.3" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -11,7 +11,6 @@ LBFGSB = "5be7bae1-8223-5378-bac3-9e7378a2f6e6" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" -MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" OptimizationBase = "bca83a33-5cc9-4baa-983d-23429ab6bcbb" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" @@ -29,16 +28,11 @@ LBFGSB = "0.4.1" LinearAlgebra = "1.10" Logging = "1.10" LoggingExtras = "0.4, 1" -MLUtils = "0.4.4" OptimizationBase = "2" Printf = "1.10" ProgressLogging = "0.1" Reexport = "1.2" SciMLBase = "2.39.0" SparseArrays = "1.10" -Symbolics = "5.12" TerminalLoggers = "0.1" julia = "1.9" - -[extras] -Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" diff --git a/docs/src/index.md b/docs/src/index.md index 95473e8e8..34f3edd07 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -54,21 +54,21 @@ to add the specific wrapper packages. ```@raw html
BlackBoxOptim - - **Global Methods** + - Global Methods - Zeroth order - Unconstrained - Box Constraints
CMAEvolutionaryStrategy - - **Global Methods** + - Global Methods - Zeroth order - Unconstrained - Box Constraints
Evolutionary - - **Global Methods** + - Global Methods - Zeroth order - Unconstrained - Box Constraints @@ -76,38 +76,38 @@ to add the specific wrapper packages.
GCMAES - - **Global Methods** + - Global Methods - First order - Box Constraints - Unconstrained
Manopt - - **Local Methods** + - Local Methods - First order - Second order - Zeroth order - Box Constraints - Constrained 🟡 - - **Global Methods** + - Global Methods - Zeroth order - Unconstrained
MathOptInterface - - **Local Methods** + - Local Methods - First order - Second order - Box Constraints - Constrained - - **Global Methods** + - Global Methods - First order - Second order - Constrained
MultistartOptimization - - **Global Methods** + - Global Methods - Zeroth order - First order - Second order @@ -115,14 +115,14 @@ to add the specific wrapper packages.
Metaheuristics - - **Global Methods** + - Global Methods - Zeroth order - Unconstrained - Box Constraints
NOMAD - - **Global Methods** + - Global Methods - Zeroth order - Unconstrained - Box Constraints @@ -130,13 +130,13 @@ to add the specific wrapper packages.
NLopt - - **Local Methods** + - Local Methods - First order - Zeroth order - Second order 🟡 - Box Constraints - Local Constrained 🟡 - - **Global Methods** + - Global Methods - Zeroth order - First order - Unconstrained @@ -144,20 +144,20 @@ to add the specific wrapper packages.
Optim - - **Local Methods** + - Local Methods - Zeroth order - First order - Second order - Box Constraints - Constrained - - **Global Methods** + - Global Methods - Zeroth order - Unconstrained - Box Constraints
PRIMA - - **Local Methods** + - Local Methods - Derivative-Free: ✅ - **Constraints** - Box Constraints: ✅ @@ -167,13 +167,15 @@ to add the specific wrapper packages. QuadDIRECT - **Constraints** - Box Constraints: ✅ - - **Global Methods** + - Global Methods - Unconstrained: ✅
``` + 🟡 = supported in downstream library but not yet implemented in `Optimization.jl`; PR to add this functionality are welcome ## Citation + ``` @software{vaibhav_kumar_dixit_2023_7738525, author = {Vaibhav Kumar Dixit and Christopher Rackauckas}, @@ -185,37 +187,48 @@ to add the specific wrapper packages. url = {https://doi.org/10.5281/zenodo.7738525}, year = 2023} ``` + ## Reproducibility + ```@raw html
The documentation of this SciML package was built using these direct dependencies, ``` + ```@example using Pkg # hide Pkg.status() # hide ``` + ```@raw html
``` + ```@raw html
and using this machine and Julia version. ``` + ```@example using InteractiveUtils # hide versioninfo() # hide ``` + ```@raw html
``` + ```@raw html
A more complete overview of all dependencies and their versions is also provided. ``` + ```@example using Pkg # hide Pkg.status(; mode = PKGMODE_MANIFEST) # hide ``` + ```@raw html
``` + ```@eval using TOML using Markdown diff --git a/docs/src/tutorials/certification.md b/docs/src/tutorials/certification.md index 9ecdc0c35..09132c2f3 100644 --- a/docs/src/tutorials/certification.md +++ b/docs/src/tutorials/certification.md @@ -7,7 +7,7 @@ This works with the `structural_analysis` keyword argument to `OptimizationProbl We'll use a simple example to illustrate the convexity structure certification process. ```@example symanalysis -using SymbolicAnalysis, Zygote, LinearAlgebra, Optimization, OptimizationMOI +using SymbolicAnalysis, Zygote, LinearAlgebra, Optimization function f(x, p = nothing) return exp(x[1]) + x[1]^2 diff --git a/docs/src/tutorials/minibatch.md b/docs/src/tutorials/minibatch.md index 08f362f71..8748bd066 100644 --- a/docs/src/tutorials/minibatch.md +++ b/docs/src/tutorials/minibatch.md @@ -54,7 +54,7 @@ end function loss_adjoint(fullp, data) batch, time_batch = data pred = predict_adjoint(fullp, time_batch) - sum(abs2, batch .- pred), pred + sum(abs2, batch .- pred) end k = 10 diff --git a/lib/OptimizationOptimJL/Project.toml b/lib/OptimizationOptimJL/Project.toml index b8bcedb5c..5349f7885 100644 --- a/lib/OptimizationOptimJL/Project.toml +++ b/lib/OptimizationOptimJL/Project.toml @@ -1,7 +1,7 @@ name = "OptimizationOptimJL" uuid = "36348300-93cb-4f02-beb5-3c3902f8871e" authors = ["Vaibhav Dixit and contributors"] -version = "0.4.0" +version = "0.4.1" [deps] Optim = "429524aa-4258-5aef-a3af-852621145aeb" diff --git a/lib/OptimizationOptimJL/src/OptimizationOptimJL.jl b/lib/OptimizationOptimJL/src/OptimizationOptimJL.jl index aea9ada02..b2d6db4f8 100644 --- a/lib/OptimizationOptimJL/src/OptimizationOptimJL.jl +++ b/lib/OptimizationOptimJL/src/OptimizationOptimJL.jl @@ -26,6 +26,7 @@ function SciMLBase.requireshessian(opt::Union{ true end SciMLBase.requiresgradient(opt::Optim.Fminbox) = true +# SciMLBase.allowsfg(opt::Union{Optim.AbstractOptimizer, Optim.ConstrainedOptimizer, Optim.Fminbox, Optim.SAMIN}) = true function __map_optimizer_args(cache::OptimizationCache, opt::Union{Optim.AbstractOptimizer, Optim.Fminbox, @@ -142,11 +143,11 @@ function SciMLBase.__solve(cache::OptimizationCache{ θ = metadata[cache.opt isa Optim.NelderMead ? "centroid" : "x"] opt_state = Optimization.OptimizationState(iter = trace.iteration, u = θ, - objective = x[1], + objective = trace.value, grad = get(metadata, "g(x)", nothing), hess = get(metadata, "h(x)", nothing), original = trace) - cb_call = cache.callback(opt_state, x...) + cb_call = cache.callback(opt_state, trace.value) if !(cb_call isa Bool) error("The callback should return a boolean `halt` for whether to stop the optimization process.") end @@ -261,11 +262,11 @@ function SciMLBase.__solve(cache::OptimizationCache{ metadata["x"] opt_state = Optimization.OptimizationState(iter = trace.iteration, u = θ, - objective = x[1], + objective = trace.value, grad = get(metadata, "g(x)", nothing), hess = get(metadata, "h(x)", nothing), original = trace) - cb_call = cache.callback(opt_state, x...) + cb_call = cache.callback(opt_state, trace.value) if !(cb_call isa Bool) error("The callback should return a boolean `halt` for whether to stop the optimization process.") end @@ -277,14 +278,19 @@ function SciMLBase.__solve(cache::OptimizationCache{ __x = first(x) return cache.sense === Optimization.MaxSense ? -__x : __x end - fg! = function (G, θ) - if G !== nothing - cache.f.grad(G, θ) - if cache.sense === Optimization.MaxSense - G .*= -one(eltype(G)) + + if cache.f.fg === nothing + fg! = function (G, θ) + if G !== nothing + cache.f.grad(G, θ) + if cache.sense === Optimization.MaxSense + G .*= -one(eltype(G)) + end end + return _loss(θ) end - return _loss(θ) + else + fg! = cache.f.fg end gg = function (G, θ) @@ -344,9 +350,9 @@ function SciMLBase.__solve(cache::OptimizationCache{ u = metadata["x"], grad = get(metadata, "g(x)", nothing), hess = get(metadata, "h(x)", nothing), - objective = x[1], + objective = trace.value, original = trace) - cb_call = cache.callback(opt_state, x...) + cb_call = cache.callback(opt_state, trace.value) if !(cb_call isa Bool) error("The callback should return a boolean `halt` for whether to stop the optimization process.") end @@ -358,15 +364,21 @@ function SciMLBase.__solve(cache::OptimizationCache{ __x = first(x) return cache.sense === Optimization.MaxSense ? -__x : __x end - fg! = function (G, θ) - if G !== nothing - cache.f.grad(G, θ) - if cache.sense === Optimization.MaxSense - G .*= -one(eltype(G)) + + if cache.f.fg === nothing + fg! = function (G, θ) + if G !== nothing + cache.f.grad(G, θ) + if cache.sense === Optimization.MaxSense + G .*= -one(eltype(G)) + end end + return _loss(θ) end - return _loss(θ) + else + fg! = cache.f.fg end + gg = function (G, θ) cache.f.grad(G, θ) if cache.sense === Optimization.MaxSense @@ -434,7 +446,7 @@ PrecompileTools.@compile_workload begin function obj_f(x, p) A = p[1] b = p[2] - return sum((A * x - b) .^ 2) + return sum((A * x .- b) .^ 2) end function solve_nonnegative_least_squares(A, b, solver) diff --git a/lib/OptimizationOptimisers/Project.toml b/lib/OptimizationOptimisers/Project.toml index e03709fd9..b0e763c2f 100644 --- a/lib/OptimizationOptimisers/Project.toml +++ b/lib/OptimizationOptimisers/Project.toml @@ -1,7 +1,7 @@ name = "OptimizationOptimisers" uuid = "42dfb2eb-d2b4-4451-abcd-913932933ac1" authors = ["Vaibhav Dixit and contributors"] -version = "0.3.2" +version = "0.3.3" [deps] Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" @@ -10,17 +10,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -[weakdeps] -MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" -MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" - -[extensions] -OptimizationOptimisersMLDataDevicesExt = "MLDataDevices" -OptimizationOptimisersMLUtilsExt = "MLUtils" - [compat] -MLDataDevices = "1.1" -MLUtils = "0.4.4" Optimisers = "0.2, 0.3" Optimization = "4" ProgressLogging = "0.1" diff --git a/lib/OptimizationOptimisers/ext/OptimizationOptimisersMLDataDevicesExt.jl b/lib/OptimizationOptimisers/ext/OptimizationOptimisersMLDataDevicesExt.jl deleted file mode 100644 index 545f73c6c..000000000 --- a/lib/OptimizationOptimisers/ext/OptimizationOptimisersMLDataDevicesExt.jl +++ /dev/null @@ -1,8 +0,0 @@ -module OptimizationOptimisersMLDataDevicesExt - -using MLDataDevices -using OptimizationOptimisers - -OptimizationOptimisers.isa_dataiterator(::DeviceIterator) = true - -end diff --git a/lib/OptimizationOptimisers/ext/OptimizationOptimisersMLUtilsExt.jl b/lib/OptimizationOptimisers/ext/OptimizationOptimisersMLUtilsExt.jl deleted file mode 100644 index 1790d7aea..000000000 --- a/lib/OptimizationOptimisers/ext/OptimizationOptimisersMLUtilsExt.jl +++ /dev/null @@ -1,8 +0,0 @@ -module OptimizationOptimisersMLUtilsExt - -using MLUtils -using OptimizationOptimisers - -OptimizationOptimisers.isa_dataiterator(::MLUtils.DataLoader) = true - -end diff --git a/lib/OptimizationOptimisers/src/OptimizationOptimisers.jl b/lib/OptimizationOptimisers/src/OptimizationOptimisers.jl index 67583ce1c..99743d24d 100644 --- a/lib/OptimizationOptimisers/src/OptimizationOptimisers.jl +++ b/lib/OptimizationOptimisers/src/OptimizationOptimisers.jl @@ -2,7 +2,7 @@ module OptimizationOptimisers using Reexport, Printf, ProgressLogging @reexport using Optimisers, Optimization -using Optimization.SciMLBase +using Optimization.SciMLBase, Optimization.OptimizationBase SciMLBase.supports_opt_cache_interface(opt::AbstractRule) = true SciMLBase.requiresgradient(opt::AbstractRule) = true @@ -16,8 +16,6 @@ function SciMLBase.__init( kwargs...) end -isa_dataiterator(data) = false - function SciMLBase.__solve(cache::OptimizationCache{ F, RC, @@ -59,7 +57,7 @@ function SciMLBase.__solve(cache::OptimizationCache{ throw(ArgumentError("The number of epochs must be specified as the epochs or maxiters kwarg.")) end - if isa_dataiterator(cache.p) + if OptimizationBase.isa_dataiterator(cache.p) data = cache.p dataiterate = true else diff --git a/src/sophia.jl b/src/sophia.jl index 5419b87d7..88b0812c3 100644 --- a/src/sophia.jl +++ b/src/sophia.jl @@ -1,5 +1,3 @@ -using Optimization.LinearAlgebra, MLUtils - struct Sophia η::Float64 βs::Tuple{Float64, Float64} @@ -64,7 +62,7 @@ function SciMLBase.__solve(cache::OptimizationCache{ maxiters = Optimization._check_and_convert_maxiters(cache.solver_args.maxiters) - if cache.p isa MLUtils.DataLoader + if OptimizationBase.isa_dataiterator(cache.p) data = cache.p dataiterate = true else diff --git a/test/diffeqfluxtests.jl b/test/diffeqfluxtests.jl index 243027246..2e5142991 100644 --- a/test/diffeqfluxtests.jl +++ b/test/diffeqfluxtests.jl @@ -31,11 +31,11 @@ end function loss_adjoint(p) prediction = predict_adjoint(p) loss = sum(abs2, x - 1 for x in prediction) - return loss, prediction + return loss end iter = 0 -callback = function (state, l, pred) +callback = function (state, l) display(l) # using `remake` to re-create our `prob` with current parameters `p` @@ -81,11 +81,11 @@ end function loss_neuralode(p) pred = predict_neuralode(p) loss = sum(abs2, ode_data .- pred) - return loss, pred + return loss end iter = 0 -callback = function (st, l, pred...) +callback = function (st, l) global iter iter += 1 diff --git a/test/minibatch.jl b/test/minibatch.jl index a1b08a439..4e0ca6ce8 100644 --- a/test/minibatch.jl +++ b/test/minibatch.jl @@ -45,7 +45,7 @@ end function loss_adjoint(fullp, p) (batch, time_batch) = p pred = predict_adjoint(fullp, time_batch) - sum(abs2, batch .- pred), pred + sum(abs2, batch .- pred) end k = 10