From 2c84b19e9649136b68fac34a752ebc55822d07f8 Mon Sep 17 00:00:00 2001
From: Mirek Kratochvil <exa.exa@gmail.com>
Date: Thu, 14 Sep 2023 00:15:39 +0200
Subject: [PATCH] add a few missing things and finish the metabolism example

---
 docs/src/metabolic-modeling.jl | 202 ++++++++++++++++++++++++++++++++-
 src/constraint_tree.jl         |  40 +++++++
 src/solution.jl                |   5 +-
 src/value.jl                   |   1 +
 4 files changed, 244 insertions(+), 4 deletions(-)

diff --git a/docs/src/metabolic-modeling.jl b/docs/src/metabolic-modeling.jl
index d65b742..10b9133 100644
--- a/docs/src/metabolic-modeling.jl
+++ b/docs/src/metabolic-modeling.jl
@@ -9,7 +9,6 @@
 # First, let's import some packages:
 
 import ConstraintTrees as C
-import JuMP, SBML
 
 # We will need a constraint-based metabolic model; for this test we will use
 # the usual "E. Coli core metabolism" model as available from BiGG:
@@ -18,12 +17,15 @@ import Downloads: download
 
 download("http://bigg.ucsd.edu/static/models/e_coli_core.xml", "e_coli_core.xml")
 
+import SBML
 ecoli = SBML.readSBML("e_coli_core.xml")
 
+# ## Allocating and constraining variables
+#
 # Let's first build the constrained representation of the problem. First, we
-# will need a variable for each of the reactions in the model:
+# will need a variable for each of the reactions in the model.
 
-c = C.allocate_variables(keys = Symbol.(keys(ecoli.reactions)));
+c = C.allocate_variables(keys = Symbol.(keys(ecoli.reactions)))
 
 @test length(C.elems(c)) == length(ecoli.reactions) #src
 
@@ -34,6 +36,11 @@ C.elems(c)
 # ...or much more conveniently using the record dot syntax as properties:
 c.R_PFK
 
+# The individual `Value`s in constraint behave like sparse vectors that refer
+# to variables: The first field represents the referenced variable indexes, and
+# the second field represents the coefficients. Compared to the sparse vectors,
+# information about the total number of variables is not stored explicitly.
+
 # Operator `^` is used to name individual constraints and directories in the
 # hierarchy. Let us name our constraints as "fluxes" (which is a common name in
 # metabolic modeling) and explore the result:
@@ -49,3 +56,192 @@ collect(keys(C.elems(c)))
 
 # ...which can be explored with the dot access again:
 c.fluxes.R_PFK
+
+# Indexing via values is possible via the usual bracket notation, and can be
+# freely combined with the dot notation:
+c[:fluxes][:R_PFK]
+
+@test c[:fluxes].R_PFK === c.fluxes[:R_PFK] #src
+
+# ## Adding single-variable constraints
+
+# Each element in the constraint tree consists of a linear combination of the
+# variables, which can be freely used to construct (and constraint) new linear
+# combinations of variables. As the simplest use, we can constraint the
+# variables to their valid bounds as defined by the model:
+rxn_constraints =
+    let rxn_bounds = Symbol.(keys(ecoli.reactions)) .=> zip(SBML.flux_bounds(ecoli)...)
+        C.make_constraint_tree(
+            r => C.Constraint(value = c.fluxes[r].value, bound = (lb, ub)) for
+            (r, ((lb, _), (ub, _))) in rxn_bounds # SBML units are ignored for simplicity
+        )
+    end
+
+# To combine the constraint trees, we can make a nice directory for the
+# constraints and add them to the tree using operator `*`. Making "products" of
+# constraint trees combines the trees in a way that they _share_ their
+# variables. In particular, using the values from `c.fluxes` in the constraints
+# within `rxn_constraints` here will constraint precisely the same variables
+# (and thus values) as the ones in the original system.
+c = c * :constraints^rxn_constraints;
+
+# Our model representation now contains 2 "directories":
+collect(keys(c))
+
+@test 2 == length((keys(c)))#src
+
+# ## Adding combined constraints
+
+# Values may be combined additively and multiplied by real constants; which
+# allows us to easily create more complex linear combination of any values
+# already occurring in the model:
+c.fluxes.R_PFK - 2 * C.fluxes.R_ACALD
+
+# Metabolic modeling relies on the fact that the total rates of any metabolite
+# getting created and consumed by the reaction equals to zero (which
+# corresponds to conservation of mass). We can now add corresponding
+# "stoichiometric" network constraints by following the reactants and products
+# in the SBML structure:
+stoi_constraints = C.make_constraint_tree(
+    Symbol(m) => C.Constraint(
+        value = -sum(
+            (
+                sr.stoichiometry * c.fluxes[Symbol(rid)].value for
+                (rid, r) in ecoli.reactions for sr in r.reactants if sr.species == m
+            ),
+            init = zero(C.Value), # sometimes the sums are empty
+        ) + sum(
+            (
+                sr.stoichiometry * c.fluxes[Symbol(rid)].value for
+                (rid, r) in ecoli.reactions for sr in r.products if sr.species == m
+            ),
+            init = zero(C.Value),
+        ),
+        bound = 0.0,
+    ) for m in keys(ecoli.species)
+)
+
+# Again, we can label the stoichiometry properly and add it to the bigger model
+# representation:
+c = c * :stoichiometry^stoi_constraints;
+
+# ## Saving the objective
+#
+# Constraint based models typically optimize a certain linear formula.
+# Constraint trees do not support setting objectives (they are not
+# constraints), but we can save the objective as a harmless unconstrained
+# "constraint" that can be used later to refer to the objective more easily.
+# We can save that information into the constraint system immediately:
+c *=
+    :objective^C.Constraint(
+        value = sum(
+            c.fluxes[Symbol(rid)].value * coeff for
+            (rid, coeff) in (keys(ecoli.reactions) .=> SBML.flux_objective(ecoli)) if
+            coeff != 0.0
+        ),
+    );
+
+# ## Solving the constraint system using JuMP
+#
+# We can make a small function that throws our model into JuMP, optimizes it,
+# and gives us back a variable assignment vector. This vector can then be used
+# to determine and browse the values of constraints and variables using
+# `SolutionTree`.
+import JuMP
+function optimized_vars(cs::C.ConstraintTree, objective::C.Value, optimizer)
+    model = JuMP.Model(optimizer)
+    JuMP.@variable(model, x[1:C.var_count(cs)])
+    JuMP.@objective(model, JuMP.MAX_SENSE, C.value_product(objective, x))
+    #+
+    function add_constraint(c::C.Constraint)
+        if c.bound isa Float64
+            JuMP.@constraint(model, C.value_product(c.value, x) == c.bound)
+        elseif c.bound isa Tuple{Float64,Float64}
+            val = C.value_product(c.value, x)
+            isinf(c.bound[1]) || JuMP.@constraint(model, val >= c.bound[1])
+            isinf(c.bound[2]) || JuMP.@constraint(model, val <= c.bound[2])
+        end
+    end
+    #+
+    function add_constraint(c::C.ConstraintTree)
+        add_constraint.(values(c))
+    end
+    #+
+    add_constraint(cs)
+    #+
+    JuMP.optimize!(model)
+    #+
+    JuMP.value.(model[:x])
+end
+
+# With this in hand, we can use an external linear problem solver to find the
+# optimum of the constrained system:
+import GLPK
+optimal_variable_assignment = optimized_vars(c, c.objective.value, GLPK.Optimizer)
+
+# To explore the solution more easily, we can make a solution tree with values
+# that correspond to ones in our constraint tree:
+result = C.solution_tree(c, optimal_variable_assignment);
+result.fluxes.R_BIOMASS_Ecoli_core_w_GAM
+
+result.fluxes.R_PFK
+
+result.objective
+
+# Sometimes it is unnecessary to recover the values for all constraints, so we are better off selecting just a subtree:
+C.elems(C.solution_tree(c.fluxes, optimal_variable_assignment))
+
+C.solution_tree(c.objective, optimal_variable_assignment)
+
+# ## Combining and extending constraint systems
+#
+# Constraint trees can be extended with new variables from another constraint
+# trees using the `+` operator. Contrary to the `*` operator, adding the
+# constraint trees does _not_ share the variables between operands, and the
+# resulting constraint tree will basically contain two disconnected trees that
+# solve independently. The user is expected to create additional constraints to
+# connect the independent parts.
+#
+# Here, we demonstrate this by creating a community of two slightly different
+# E. Coli species: First, we disable functionality of a different reaction in
+# each of the models to create a diverse group of differently handicapped
+# organisms:
+c =
+    :community^(
+        :species1^(c * :handicap^C.Constraint(value = c.fluxes.R_PFK.value, bound = 0.0)) +
+        :species2^(c * :handicap^C.Constraint(value = c.fluxes.R_ACALD.value, bound = 0.0))
+    )
+
+# We can create additional variables that represent total community intake of
+# oxygen, and total community production of biomass:
+c +=
+    :exchanges^C.allocate_variables(
+        keys = [:oxygen, :biomass],
+        bounds = [(-10.0, 10.0), nothing],
+    )
+
+# These can be constrained so that the total influx (or outflux) of each of the
+# registered metabolites is in fact equal to total consumption or production by
+# each of the species:
+c *=
+    :exchange_constraints^C.make_constraint_tree(
+        :oxygen => C.Constraint(
+            value = c.exchanges.oxygen.value - c.community.species1.fluxes.R_EX_o2_e.value -
+                    c.community.species2.fluxes.R_EX_o2_e.value,
+            bound = 0.0,
+        ),
+        :biomass => C.Constraint(
+            value = c.exchanges.biomass.value -
+                    c.community.species1.fluxes.R_BIOMASS_Ecoli_core_w_GAM.value -
+                    c.community.species2.fluxes.R_BIOMASS_Ecoli_core_w_GAM.value,
+            bound = 0.0,
+        ),
+    )
+
+# Let's see how much biomass are the two species capable of producing together:
+result = C.solution_tree(c, optimized_vars(c, c.exchanges.biomass.value, GLPK.Optimizer));
+C.elems(result.exchanges)
+
+# Finally, we can iterate over all species in the small community and see how
+# much biomass was actually contributed by each:
+[k => v.fluxes.R_BIOMASS_Ecoli_core_w_GAM for (k, v) in result.community]
diff --git a/src/constraint_tree.jl b/src/constraint_tree.jl
index 6234f76..dbc6461 100644
--- a/src/constraint_tree.jl
+++ b/src/constraint_tree.jl
@@ -105,6 +105,9 @@ Base.keys(x::ConstraintTree) = keys(elems(x))
 
 Base.values(x::ConstraintTree) = values(elems(x))
 
+Base.length(x::ConstraintTree) = length(elems(x))
+
+Base.iterate(x::ConstraintTree) = iterate(elems(x))
 Base.iterate(x::ConstraintTree, st) = iterate(elems(x), st)
 
 Base.eltype(x::ConstraintTree) = eltype(elems(x))
@@ -113,6 +116,43 @@ Base.propertynames(x::ConstraintTree) = keys(x)
 
 Base.getindex(x::ConstraintTree, sym::Symbol) = getindex(elems(x), sym)
 
+#
+# Tree-wide operations with variables
+#
+
+"""
+$(TYPEDSIGNATURES)
+
+Find the expected count of variables in a [`Constraint`](@ref).
+"""
+var_count(x::Constraint) = isempty(x.value.idxs) ? 0 : last(x.value.idxs)
+
+"""
+$(TYPEDSIGNATURES)
+
+Find the expected count of variables in a [`ConstraintTree`](@ref).
+"""
+var_count(x::ConstraintTree) = isempty(elems(x)) ? 0 : maximum(var_count.(values(elems(x))))
+
+"""
+$(TYPEDSIGNATURES)
+
+Offset all variable indexes in a [`Constraint`](@ref) by the given increment.
+"""
+incr_var_idxs(x::Constraint, incr::Int) = Constraint(
+    value = Value(idxs = x.value.idxs .+ incr, weights = x.value.weights),
+    bound = x.bound,
+)
+
+"""
+$(TYPEDSIGNATURES)
+
+Offset all variable indexes in a [`ConstraintTree`](@ref) by the given
+increment.
+"""
+incr_var_idxs(x::ConstraintTree, incr::Int) =
+    ConstraintTree(elems = SortedDict(k => incr_var_idxs(v, incr) for (k, v) in elems(x)))
+
 #
 # Algebraic construction
 #
diff --git a/src/solution.jl b/src/solution.jl
index 08e2694..a9a9de7 100644
--- a/src/solution.jl
+++ b/src/solution.jl
@@ -40,6 +40,9 @@ Base.keys(x::SolutionTree) = keys(elems(x))
 
 Base.values(x::SolutionTree) = values(elems(x))
 
+Base.length(x::SolutionTree) = length(elems(x))
+
+Base.iterate(x::SolutionTree) = iterate(elems(x))
 Base.iterate(x::SolutionTree, st) = iterate(elems(x), st)
 
 Base.eltype(x::SolutionTree) = eltype(elems(x))
@@ -67,6 +70,6 @@ assignment.
 """
 solution_tree(x::ConstraintTree, vars::AbstractVector{Float64}) = SolutionTree(
     elems = SortedDict{Symbol,SolutionTreeElem}(
-        keys(elems(x)) .=> solution_tree.(values(elems(x)), Ref(ov)),
+        keys(x) .=> solution_tree.(values(x), Ref(vars)),
     ),
 )
diff --git a/src/value.jl b/src/value.jl
index 1dfc6a7..f14909a 100644
--- a/src/value.jl
+++ b/src/value.jl
@@ -22,6 +22,7 @@ Base.@kwdef struct Value
     weights::Vector{Float64}
 end
 
+Base.zero(::Type{Value}) = Value(idxs = [], weights = [])
 Base.:*(a::Real, b::Value) = b * a
 Base.:*(a::Value, b::Real) = Value(idxs = a.idxs, weights = b .* a.weights)
 Base.:-(a::Value, b::Value) = a + (-1 * b)