diff --git a/Project.toml b/Project.toml index 4121522..d4520f0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FBCModelTests" uuid = "1060cd45-1c33-4a14-be81-8fa38fdd82bf" authors = ["The developers of FBCModelTests.jl"] -version = "0.2.5" +version = "0.3.0" [deps] COBREXA = "babc4406-5200-4a30-9033-bf5ae714c842" diff --git a/src/memote/checks/Annotation.jl b/src/memote/checks/Annotation.jl index 2d6dd1b..aa9be13 100644 --- a/src/memote/checks/Annotation.jl +++ b/src/memote/checks/Annotation.jl @@ -19,20 +19,20 @@ is_annotated_gene(model, gid) = !isempty(gene_annotations(model, gid)) """ $(TYPEDSIGNATURES) -Helper function to find all the annotations that are missing for a component in a model. +Helper function to find all the annotations for a component in a model. """ -function _find_unannotated_components( +function _find_annotated_components( model::MetabolicModel, id_accessor::String, annotation_accessor, annotation_kws, ) - missing_annos = String[] + found_annos = String[] for anno_kw in annotation_kws annos = parse_annotations(annotation_accessor(model, id_accessor)) - !haskey(annos, anno_kw) && push!(missing_annos, anno_kw) + haskey(annos, anno_kw) && push!(found_annos, anno_kw) end - return missing_annos + return found_annos end """ @@ -40,10 +40,10 @@ $(TYPEDSIGNATURES) Checks if the databases listed in `config.annotation.gene_annotation_keywords` are present in the gene annotations. Returns a vector of annotation keywords -that were not found. +that were found. """ -findall_unannotated_gene_databases(model, gid::String; config = Config.memote_config) = - _find_unannotated_components( +findall_annotated_gene_databases(model, gid::String; config = Config.memote_config) = + _find_annotated_components( model, gid, gene_annotations, @@ -55,28 +55,25 @@ $(TYPEDSIGNATURES) Checks if the databases listed in `config.annotation.metabolite_annotation_keywords` are present in the metabolite annotations. Returns a vector of annotation keywords -that were not found. +that were found. """ -findall_unannotated_metabolite_databases( - model, - mid::String; - config = Config.memote_config, -) = _find_unannotated_components( - model, - mid, - metabolite_annotations, - config.annotation.metabolite_annotation_keywords, -) +findall_annotated_metabolite_databases(model, mid::String; config = Config.memote_config) = + _find_annotated_components( + model, + mid, + metabolite_annotations, + config.annotation.metabolite_annotation_keywords, + ) """ $(TYPEDSIGNATURES) Checks if the databases listed in `config.annotation.reaction_annotation_keywords` are present in the reaction annotations. Returns a vector of annotation keywords -that were not found. +that were found. """ -findall_unannotated_reaction_databases(model, rid::String; config = Config.memote_config) = - _find_unannotated_components( +findall_annotated_reaction_databases(model, rid::String; config = Config.memote_config) = + _find_annotated_components( model, rid, reaction_annotations, @@ -86,23 +83,24 @@ findall_unannotated_reaction_databases(model, rid::String; config = Config.memot """ $(TYPEDSIGNATURES) -Helper function to find all the annotations for a component that do not conform in the model. +Helper function to find all the annotations for a component that conform to +recognized patterns in the model. """ -function _find_nonconformal_annotations( +function _find_conformal_annotations( model::MetabolicModel, id_accessor::String, annotation_accessor, annotation_regex, ) - no_annos = String[] + conform_annos = String[] for (anno_kw, anno_regex) in annotation_regex annos = parse_annotations(annotation_accessor(model, id_accessor)) if haskey(annos, anno_kw) - in.(nothing, Ref(match.(anno_regex, annos[anno_kw]))) && - push!(no_annos, anno_kw) + in.(nothing, Ref(match.(anno_regex, annos[anno_kw]))) || + push!(conform_annos, anno_kw) end end - return no_annos + return conform_annos end """ @@ -111,10 +109,10 @@ $(TYPEDSIGNATURES) Check if the gene annotation entry conforms to commonly recognized formats of annotation database using regex patterns. Uses the database formats listed in `config.annotation.gene_annotation_regexes` to test the conformity. Returns a -string vector of database ids do not conform. +string vector of database ids that do conform. """ -findall_nonconformal_gene_annotations(model, gid::String; config = Config.memote_config) = - _find_nonconformal_annotations( +findall_conformal_gene_annotations(model, gid::String; config = Config.memote_config) = + _find_conformal_annotations( model, gid, gene_annotations, @@ -127,13 +125,13 @@ $(TYPEDSIGNATURES) Check if the metabolite annotation entry conforms to commonly recognized formats of annotation database using regex patterns. Uses the database formats listed in `config.annotation.metabolite_annotation_regexes` to test the conformity. -Returns a string vector of database ids do not conform. +Returns a string vector of database ids that do conform. """ -findall_nonconformal_metabolite_annotations( +findall_conformal_metabolite_annotations( model, mid::String; config = Config.memote_config, -) = _find_nonconformal_annotations( +) = _find_conformal_annotations( model, mid, metabolite_annotations, @@ -146,17 +144,14 @@ $(TYPEDSIGNATURES) Check if the reaction annotation entry conforms to commonly recognized formats of annotation database using regex patterns. Uses the database formats listed in `config.annotation.reaction_annotation_regexes` to test the conformity. Returns -a string vector of database ids do not conform. +a string vector of database ids that do conform. """ -findall_nonconformal_reaction_annotations( - model, - rid::String; - config = Config.memote_config, -) = _find_nonconformal_annotations( - model, - rid, - reaction_annotations, - config.annotation.reaction_annotation_regexes, -) +findall_conformal_reaction_annotations(model, rid::String; config = Config.memote_config) = + _find_conformal_annotations( + model, + rid, + reaction_annotations, + config.annotation.reaction_annotation_regexes, + ) end # module diff --git a/src/memote/checks/Metabolites.jl b/src/memote/checks/Metabolites.jl index 3a7bb01..a2757f8 100644 --- a/src/memote/checks/Metabolites.jl +++ b/src/memote/checks/Metabolites.jl @@ -18,33 +18,24 @@ $(TYPEDSIGNATURES) Test if metabolites `m1` and `m2` are different by comparing their `config.metabolite.test_annotation` field in the annotations of each metabolite. Note, if no annotations are present for one or both of the -metabolites, then return `true`. +metabolites, then return `false`. """ -function metabolites_are_duplicated( - model::MetabolicModel, - m1, - m2; - config = Config.memote_config, -) - k1s = get( - parse_annotations(metabolite_annotations(model, m1)), - config.metabolite.test_annotation, - nothing, - ) - isnothing(k1s) && return true - k2s = get( - parse_annotations(metabolite_annotations(model, m2)), - config.metabolite.test_annotation, - nothing, - ) - isnothing(k2s) && return true +function metabolites_are_duplicated(model::MetabolicModel, m1, m2, test_annotation) + k1s = + get(parse_annotations(metabolite_annotations(model, m1)), test_annotation, nothing) + isnothing(k1s) && return false + k2s = + get(parse_annotations(metabolite_annotations(model, m2)), test_annotation, nothing) + isnothing(k2s) && return false any(in.(k1s, Ref(k2s))) end """ $(TYPEDSIGNATURES) -Return a dictionary of metabolites that are duplicated in their compartment. +Return a dictionary of metabolites that are duplicated in their compartment. If +any of the test annotations, stored in `config.metabolite.test_annotations`, are +repeated, then the metabolite is counted as duplicated. Missing annotations are ignored. """ function metabolites_duplicated_in_compartment( model::MetabolicModel; @@ -55,7 +46,12 @@ function metabolites_duplicated_in_compartment( c1 = metabolite_compartment(model, m1) for m2 in metabolites(model) c2 = metabolite_compartment(model, m2) - if c1 == c2 && m1 != m2 && metabolites_are_duplicated(model, m1, m2; config) + if c1 == c2 && + m1 != m2 && + any( + metabolites_are_duplicated(model, m1, m2, test_annotation) for + test_annotation in config.metabolite.test_annotations + ) push!(unique_metabolites, m1) end end @@ -66,28 +62,34 @@ end """ $(TYPEDSIGNATURES) -Helper function to find orphan or deadend metabolites. Specify `consumed=true` -to consider orphan metabolites or `false` to consider deadend metabolites. Set -`complete_medium=true` to open all boundary reactions to simulate a complete -medium. +Helper function to find orphan or deadend metabolites. Specify +`only_consumed=true` to consider orphan metabolites or `false` to consider +deadend metabolites. """ -function _find_orphan_or_deadend_metabolites(model::MetabolicModel; consumed = true) +function _find_orphan_or_deadend_metabolites(model::MetabolicModel; only_consumed = true) mids = metabolites(model) + rids = reactions(model) mets = String[] S = stoichiometry(model) lbs, ubs = bounds(model) for idx in axes(S, 1) - rids, vals = findnz(S[idx, :]) - if length(vals) == 1 && (consumed ? first(vals) < 0 : first(vals) > 0) - ridx = first(rids) - rid = reactions(model)[ridx] - met = mids[idx] - v = reaction_stoichiometry(model, rid)[met] - # check if reaction can actually make or consume the metabolite - lbs[ridx] < 0 < ubs[ridx] && continue # ignore reversible reactions - consumed && lbs[ridx] * v <= 0 && push!(mets, met) - !consumed && ubs[ridx] * v >= 0 && push!(mets, met) + mid = mids[idx] + one_sided_metabolite = true + for (ridx, _) in zip(findnz(S[idx, :])...) + lbs[ridx] < 0 < ubs[ridx] && (one_sided_metabolite = false; break) # can be produced or consumed + rid = rids[ridx] + rs = reaction_stoichiometry(model, rid)[mid] + if only_consumed # consumed only + (rs < 0 && lbs[ridx] >= 0) || + (rs > 0 && ubs[ridx] <= 0) || + (one_sided_metabolite = false; break) + else # produced only + (rs > 0 && lbs[ridx] >= 0) || + (rs < 0 && ubs[ridx] <= 0) || + (one_sided_metabolite = false; break) + end end + one_sided_metabolite && push!(mets, mid) end return mets end @@ -96,20 +98,19 @@ end $(TYPEDSIGNATURES) Find all metabolites that can only (excludes reversible reactions) be consumed -in the `model` by inspecting the stoichiometric matrix. +in the `model` by inspecting the stoichiometric matrix and reaction bounds. """ find_orphan_metabolites(model::MetabolicModel) = - _find_orphan_or_deadend_metabolites(model, consumed = true) + _find_orphan_or_deadend_metabolites(model, only_consumed = true) """ $(TYPEDSIGNATURES) Find all metabolites that can only (excludes reversible reactions) be produced -in the `model` by inspecting the stoichiometric matrix. +in the `model` by inspecting the stoichiometric matrix and reaction bounds. """ find_deadend_metabolites(model::MetabolicModel) = - _find_orphan_or_deadend_metabolites(model, consumed = false) - + _find_orphan_or_deadend_metabolites(model, only_consumed = false) """ $(TYPEDSIGNATURES) @@ -119,5 +120,4 @@ Returns a list of all metabolites that aren't part of any reactions. find_disconnected_metabolites(model::MetabolicModel) = metabolites(model)[all(stoichiometry(model) .== 0, dims = 2)[:]] - end # module diff --git a/src/memote/checks/Network.jl b/src/memote/checks/Network.jl index baec31d..770158c 100644 --- a/src/memote/checks/Network.jl +++ b/src/memote/checks/Network.jl @@ -108,7 +108,7 @@ function find_all_universally_blocked_reactions( fvas = flux_variability_analysis( stdmodel, - collect(1:n_reactions(model)), + collect(1:n_reactions(stdmodel)), optimizer; modifications = config.network.optimizer_modifications, workers, @@ -121,7 +121,7 @@ function find_all_universally_blocked_reactions( isnothing(vs[2]) && continue abs(vs[1]) <= config.network.blocked_tol && abs(vs[2]) <= config.network.blocked_tol && - (push!(blocked_rxns, rid); break) + push!(blocked_rxns, rid) end blocked_rxns diff --git a/src/memote/config.jl b/src/memote/config.jl index 1b446cb..62ab659 100644 --- a/src/memote/config.jl +++ b/src/memote/config.jl @@ -108,8 +108,8 @@ Base.@kwdef mutable struct AnnotationConfig metabolite_annotation_regexes::Dict{String,Regex} reaction_annotation_keywords::Vector{String} reaction_annotation_regexes::Dict{String,Regex} - maximum_nonconformal_references::Int64 - maximum_missing_databases::Int64 + minimum_conformal_crossreferences::Int64 + minimum_crossreferences::Int64 end annotation_config = AnnotationConfig( @@ -190,8 +190,8 @@ annotation_config = AnnotationConfig( r"^\d+\.-\.-\.-|\d+\.\d+\.-\.-|\d+\.\d+\.\d+\.-|\d+\.\d+\.\d+\.(n)?\d+$", "biocyc" => r"^[A-Z-0-9]+(? 0 "At least 1 biomass reaction is in the model." + @testset "Biomass reaction(s)" begin + @atest length(Biomass.findall_biomass_reactions(model)) > 0 "At least 1 biomass reaction is in the model" - @testset "ATP burn included" begin + @testset "ATP + H2O -> ADP + Pi + H included" begin for rid in Biomass.findall_biomass_reactions(model) - @atest Biomass.atp_present_in_biomass_reaction(model, rid) "$rid (biomass reaction) hydrolyses ATP into ADP." + @atest Biomass.atp_present_in_biomass_reaction(model, rid) "$rid (biomass reaction) hydrolyses ATP into ADP" end end @testset "Molar mass consistent" begin for rid in Biomass.findall_biomass_reactions(model) - @atest Biomass.biomass_reaction_is_consistent(model, rid) "$rid (biomass reaction) is consistent." + @atest Biomass.biomass_reaction_is_consistent(model, rid) "$rid (biomass reaction) is consistent" end end - @testset "Missing common precursors" begin + @testset "Has all common precursors" begin for rid in Biomass.findall_biomass_reactions(model) - @atest Biomass.biomass_missing_essential_precursors(model, rid; config) "$rid (biomass reaction) contains all the common precursors." + @atest Biomass.biomass_missing_essential_precursors(model, rid; config) "$rid (biomass reaction) contains all the common precursors" end end end @info "Testing the stoichiometric consistency of the model..." - @testset "Stoichiometrically consistent" begin - @atest Consistency.model_is_consistent(model, optimizer; config) "The model is stoichiometrically consistent." + @testset "Stoichiometric consistency" begin + @atest Consistency.model_is_consistent(model, optimizer; config) "The model is stoichiometrically consistent" end @info "Testing the energy metabolism..." @@ -219,17 +229,17 @@ function run_tests( @testset "GPR associations" begin @testset "Metabolic reactions have GPRs" begin for rid in filter(x -> is_metabolic_reaction(model, x), reactions(model)) - @atest GPRAssociation.reaction_has_sensible_gpr(model, rid) "$rid (metabolic reaction) has at least 1 GPRs." + @atest GPRAssociation.reaction_has_sensible_gpr(model, rid) "$rid (metabolic reaction) has at least 1 GPRs" end end @testset "Transport reactions have GPRs" begin for rid in filter(x -> is_transport_reaction(model, x), reactions(model)) - @atest GPRAssociation.reaction_has_sensible_gpr(model, rid) "$rid (transport reaction) has at least 1 GPRs." + @atest GPRAssociation.reaction_has_sensible_gpr(model, rid) "$rid (transport reaction) has at least 1 GPRs" end end - @atest length(GPRAssociation.reactions_with_complexes(model)) != 0 "At least 1 enzyme complex exists in the model." + @atest length(GPRAssociation.reactions_with_complexes(model)) != 0 "At least 1 enzyme complex exists in the model" end @info "Testing the reaction information..." @@ -244,7 +254,7 @@ function run_tests( if rid in config.reaction.charge_ignored_reactions @test_skip Reactions.reaction_is_charge_balanced(model, rid) else - @atest Reactions.reaction_is_charge_balanced(model, rid) "$rid is charge balanced." + @atest Reactions.reaction_is_charge_balanced(model, rid) "$rid is charge balanced" end end end @@ -253,54 +263,54 @@ function run_tests( if rid in config.reaction.mass_ignored_reactions @test_skip Reactions.reaction_is_mass_balanced(model, rid) else - @atest Reactions.reaction_is_mass_balanced(model, rid) "$rid is mass balanced." + @atest Reactions.reaction_is_mass_balanced(model, rid) "$rid is mass balanced" end end end @testset "No duplicated reactions" begin dup_rxns = Reactions.findall_duplicated_reactions(model) for rid in reactions(model) - @atest rid ∉ dup_rxns "$rid is not duplicated." + @atest rid ∉ dup_rxns "$rid is not duplicated" end end - @atest Reactions.model_has_atpm_reaction(model) "There is an ATP maintenance reaction in the model." + @atest Reactions.model_has_atpm_reaction(model) "There is an ATP maintenance reaction in the model" end @info "Testing the metabolite information..." @testset "Metabolite information" begin @testset "No missing formulas" begin for mid in metabolites(model) - @atest !isnothing(metabolite_formula(model, mid)) "$mid has a formula." + @atest !isnothing(metabolite_formula(model, mid)) "$mid has a formula" end end @testset "No missing charges" begin for mid in metabolites(model) - @atest !isnothing(metabolite_charge(model, mid)) "$mid has a charge." + @atest !isnothing(metabolite_charge(model, mid)) "$mid has a charge" end end - @testset "Duplicated metabolites" begin + @testset "No duplicated metabolites" begin dup_mids = Metabolites.metabolites_duplicated_in_compartment(model; config) for mid in metabolites(model) - @atest mid ∉ dup_mids "$mid is not duplicated." + @atest mid ∉ dup_mids "$mid is not duplicated" end end @testset "No orphan metabolites" begin orphans = Metabolites.find_orphan_metabolites(model) for mid in metabolites(model) - @atest mid ∉ orphans "$mid is not an orphan metabolite." + @atest mid ∉ orphans "$mid is not an orphan metabolite" end end @testset "No deadend metabolites" begin deadends = Metabolites.find_deadend_metabolites(model) for mid in metabolites(model) - @atest mid ∉ deadends "$mid is not a deadend metabolite." + @atest mid ∉ deadends "$mid is not a deadend metabolite" end end @testset "No disconnected metabolites" begin disconnected = Metabolites.find_disconnected_metabolites(model) for mid in metabolites(model) - @atest mid ∉ disconnected "$mid is not disconnected." + @atest mid ∉ disconnected "$mid is not disconnected" end end end @@ -308,13 +318,13 @@ function run_tests( @info "Testing the network information..." @testset "Network information" begin @atest Network.stoichiometric_max_min_ratio(model) < - config.network.condition_number "The stoichiometric matrix is well conditioned." + config.network.condition_number "The stoichiometric matrix is well conditioned" @testset "No stoichiometrically balanced cycles (reactions)" begin rxns_in_cycles = Network.find_cycle_reactions(model, optimizer; config, workers) for rid in reactions(model) - @atest rid ∉ rxns_in_cycles "$rid does not take part in a stoichiometrically balanced cycle." + @atest rid ∉ rxns_in_cycles "$rid does not take part in a stoichiometrically balanced cycle" end end @testset "No universally blocked reactions" begin @@ -325,7 +335,7 @@ function run_tests( workers, ) for rid in reactions(model) - @atest rid ∉ blocked_rxns "$rid is not a universally blocked reaction." + @atest rid ∉ blocked_rxns "$rid is not a universally blocked reaction" end end end diff --git a/test/data.jl b/test/data.jl index 6b29375..7618714 100644 --- a/test/data.jl +++ b/test/data.jl @@ -44,6 +44,11 @@ model_file = Dict( "http://bigg.ucsd.edu/static/models/iML1515.json", "b0f9199f048779bb08a14dfa6c09ec56d35b8750d2f99681980d0f098355fbf5", ), + ( + "yeast-GEM.xml", + "https://raw.githubusercontent.com/SysBioChalmers/yeast-GEM/main/model/yeast-GEM.xml", + "c728b09d849b744ec7640cbf15776d40fb2d9cbd0b76a840a8661b626c1bd4be", + ), ] ) @@ -51,3 +56,4 @@ model_file = Dict( model = load_model(model_file["e_coli_core.json"]) iJN746 = load_model(model_file["iJN746.json"]) iML1515 = load_model(StandardModel, model_file["iML1515.json"]) +yeastgem = load_model(StandardModel, model_file["yeast-GEM.xml"]) diff --git a/test/memote.jl b/test/memote.jl index b67cc44..6b2a2e2 100644 --- a/test/memote.jl +++ b/test/memote.jl @@ -21,8 +21,8 @@ using FBCModelTests.Memote.Metabolites ) end - @test result.passes == 1783 - @test result.fails == 12 + @test result.passes == 1857 + @test result.fails == 33 @test result.errs == 1 # broken test is the skipped test: model_has_no_erroneous_energy_generating_cycles end @@ -60,14 +60,14 @@ end @test Annotation.is_annotated_metabolite(model, mid) @test Annotation.is_annotated_gene(model, gid) - @test "ncbiprotein" in Annotation.findall_unannotated_gene_databases(model, gid) - @test "inchi" in Annotation.findall_unannotated_metabolite_databases(model, mid) - @test "brenda" in Annotation.findall_unannotated_reaction_databases(model, rid) + @test "ncbiprotein" ∉ Annotation.findall_annotated_gene_databases(model, gid) + @test "inchi" ∉ Annotation.findall_annotated_metabolite_databases(model, mid) + @test "brenda" ∉ Annotation.findall_annotated_reaction_databases(model, rid) - @test "ncbigi" in Annotation.findall_nonconformal_gene_annotations(model, gid) - @test "reactome.compound" in - Annotation.findall_nonconformal_metabolite_annotations(model, mid) - @test isempty(Annotation.findall_nonconformal_reaction_annotations(model, rid)) + @test "ncbigi" ∉ Annotation.findall_conformal_gene_annotations(model, gid) + @test "reactome.compound" ∉ + Annotation.findall_conformal_metabolite_annotations(model, mid) + @test length(Annotation.findall_conformal_reaction_annotations(model, rid)) == 7 end @testset "Biomass" begin @@ -129,9 +129,11 @@ end @testset "Metabolite" begin @test isempty(Metabolites.metabolites_duplicated_in_compartment(model)) - @test isempty(Metabolites.find_orphan_metabolites(model)) + @test "mal__L_e" in Metabolites.find_orphan_metabolites(model) @test isempty(Metabolites.find_deadend_metabolites(model)) @test isempty(Metabolites.find_disconnected_metabolites(model)) + @test length(Metabolites.find_orphan_metabolites(iML1515)) == 65 + @test length(Metabolites.find_deadend_metabolites(iML1515)) == 63 negative_model = convert(StandardModel, deepcopy(model)) dup = deepcopy(negative_model.metabolites["atp_c"])