diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 255099f6..90c2fc8f 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -5,7 +5,7 @@ version: 2
sphinx:
configuration: doc/conf.py
- fail_on_warning: true
+ fail_on_warning: false
build:
os: "ubuntu-22.04"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b944ab53..56ec211a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,47 @@
# PEtab changelog
+## 0.4 series
+
+### 0.4.0
+
+**Prepare for PEtab v2**
+
+To enable ongoing support for PEtab v1, while "forking" the v1 code for PEtab v2, the old code base is now available at `petab.v1`, and the new code base will be at `petab.v2`. For now, old `import petab.*` statements still work, but are marked as deprecated, and `import petab.v1.*` should be used instead. `petablint` will be designed for use with only full PEtab problems in future too, rather than individual tables -- partial problems will be supported to validate individual tables.
+
+* Add PEtab math parser and sympy converter by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/260
+* Deprecate petablint with individual tables by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/274
+* Introduce petab.v1 package by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/282
+* Separate v1 and v2 tests by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/284
+* Add petab.v2.Problem by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/285
+* PEtab v1 to v2 converter by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/281
+* Fix imports related to v1 subpackage by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/293
+
+**Validation**
+
+Validation will become increasingly atomic and OOP, to support extension-specific validation in PEtab v2.
+
+* Validator: check for positive bounds for log-scaled parameter by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/278
+* Validator: check prior parameters by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/279
+* Fix validation for remote files by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/287
+* New validation API by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/288
+
+**Documentation**
+
+* Fixed formatting / missing type annotations by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/292
+* Added versioning and deprecation policy by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/277
+
+**Other changes**
+
+* Simplify yaml schema by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/264
+* Handle numpy types in sympify_petab by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/294
+* New `get_path_prefix` method to get the base path for relative paths in PEtab problem YAML by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/280
+
+**New Contributors**
+
+* @dependabot made their first contribution in https://github.com/PEtab-dev/libpetab-python/pull/267
+
+**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.3.0...v0.4.0
+
## 0.3 series
### 0.3.0
diff --git a/MANIFEST.in b/MANIFEST.in
index 72717189..dbdebd53 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,3 @@
recursive-include petab/schemas *.yaml
-recursive-include petab/visualize/templates *
+recursive-include petab/v1/visualize/templates *
+recursive-exclude tests *
diff --git a/README.md b/README.md
index 167b336e..7873928b 100644
--- a/README.md
+++ b/README.md
@@ -39,11 +39,11 @@ be:
entrypoints to be available as a shell command from anywhere, called
`petablint`
- - [`petab.create_parameter_df`](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.parameters.html#petab.parameters.create_parameter_df)
+ - [petab.create_parameter_df](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.parameters.html#petab.parameters.create_parameter_df)
to create the parameter table, once you have set up the model,
condition table, observable table and measurement table
- - [`petab.create_combine_archive`](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.core.html#petab.core.create_combine_archive)
+ - [petab.create_combine_archive](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.core.html#petab.core.create_combine_archive)
to create a [COMBINE Archive](https://combinearchive.org/index/) from PEtab
files
diff --git a/doc/development.rst b/doc/development.rst
new file mode 100644
index 00000000..df4edf55
--- /dev/null
+++ b/doc/development.rst
@@ -0,0 +1,26 @@
+Development
+===========
+
+Versioning
+----------
+
+We use `Semantic Versioning `_ with the modifications
+described under :ref:`deprecation_policy`.
+
+.. _deprecation_policy:
+
+Deprecation policy
+------------------
+
+petab aims to provide a stable API for users. However, not all features can be
+maintained indefinitely. We will deprecate features in minor releases and
+where possible, issue a warning when they are used. We will keep deprecated
+features for at least six months after the release that includes the
+respective deprecation warning and then remove them earliest in the next minor
+or major release. If a deprecated feature is the source of a major bug, we may
+remove it earlier.
+
+Python compatibility
+--------------------
+
+We follow `numpy's Python support policy `_.
diff --git a/doc/index.rst b/doc/index.rst
index f4abafc7..be67867d 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -14,6 +14,7 @@
Changelog
how_to_cite
license
+ development
.. toctree::
:maxdepth: 2
diff --git a/doc/modules.rst b/doc/modules.rst
index 1454a36a..a227fafa 100644
--- a/doc/modules.rst
+++ b/doc/modules.rst
@@ -1,28 +1,32 @@
API Reference
=============
-.. automodule:: petab
-
.. rubric:: Modules
.. autosummary::
:toctree: build/_autosummary
:recursive:
- petab.C
- petab.calculate
- petab.composite_problem
- petab.conditions
- petab.core
- petab.lint
- petab.measurements
- petab.observables
- petab.parameter_mapping
- petab.parameters
- petab.problem
- petab.sampling
- petab.sbml
- petab.simulate
- petab.simplify
- petab.visualize
- petab.yaml
+ petab
+ petab.v1
+ petab.v1.C
+ petab.v1.calculate
+ petab.v1.composite_problem
+ petab.v1.conditions
+ petab.v1.core
+ petab.v1.lint
+ petab.v1.measurements
+ petab.v1.models
+ petab.v1.observables
+ petab.v1.parameter_mapping
+ petab.v1.parameters
+ petab.v1.problem
+ petab.v1.sampling
+ petab.v1.sbml
+ petab.v1.simulate
+ petab.v1.simplify
+ petab.v1.visualize
+ petab.v1.yaml
+ petab.v2
+ petab.v2.lint
+ petab.v2.problem
diff --git a/petab/C.py b/petab/C.py
index 2e3616ee..77f11abb 100644
--- a/petab/C.py
+++ b/petab/C.py
@@ -1,365 +1,5 @@
-# pylint: disable:invalid-name
-"""
-This file contains constant definitions.
-"""
+"""Deprecated module. Use petab.v1.C instead."""
+from petab import _deprecated_import_v1
+from petab.v1.C import * # noqa: F403, F401, E402
-import math as _math
-
-# MEASUREMENTS
-
-#:
-OBSERVABLE_ID = "observableId"
-
-#:
-PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"
-
-#:
-SIMULATION_CONDITION_ID = "simulationConditionId"
-
-#:
-MEASUREMENT = "measurement"
-
-#:
-TIME = "time"
-
-#: Time value that indicates steady-state measurements
-TIME_STEADY_STATE = _math.inf
-
-#:
-OBSERVABLE_PARAMETERS = "observableParameters"
-
-#:
-NOISE_PARAMETERS = "noiseParameters"
-
-#:
-DATASET_ID = "datasetId"
-
-#:
-REPLICATE_ID = "replicateId"
-
-#: Mandatory columns of measurement table
-MEASUREMENT_DF_REQUIRED_COLS = [
- OBSERVABLE_ID,
- SIMULATION_CONDITION_ID,
- MEASUREMENT,
- TIME,
-]
-
-#: Optional columns of measurement table
-MEASUREMENT_DF_OPTIONAL_COLS = [
- PREEQUILIBRATION_CONDITION_ID,
- OBSERVABLE_PARAMETERS,
- NOISE_PARAMETERS,
- DATASET_ID,
- REPLICATE_ID,
-]
-
-#: Measurement table columns
-MEASUREMENT_DF_COLS = [
- MEASUREMENT_DF_REQUIRED_COLS[0],
- MEASUREMENT_DF_OPTIONAL_COLS[0],
- *MEASUREMENT_DF_REQUIRED_COLS[1:],
- *MEASUREMENT_DF_OPTIONAL_COLS[1:],
-]
-
-
-# PARAMETERS
-
-#:
-PARAMETER_ID = "parameterId"
-#:
-PARAMETER_NAME = "parameterName"
-#:
-PARAMETER_SCALE = "parameterScale"
-#:
-LOWER_BOUND = "lowerBound"
-#:
-UPPER_BOUND = "upperBound"
-#:
-NOMINAL_VALUE = "nominalValue"
-#:
-ESTIMATE = "estimate"
-#:
-INITIALIZATION_PRIOR_TYPE = "initializationPriorType"
-#:
-INITIALIZATION_PRIOR_PARAMETERS = "initializationPriorParameters"
-#:
-OBJECTIVE_PRIOR_TYPE = "objectivePriorType"
-#:
-OBJECTIVE_PRIOR_PARAMETERS = "objectivePriorParameters"
-
-#: Mandatory columns of parameter table
-PARAMETER_DF_REQUIRED_COLS = [
- PARAMETER_ID,
- PARAMETER_SCALE,
- LOWER_BOUND,
- UPPER_BOUND,
- ESTIMATE,
-]
-
-#: Optional columns of parameter table
-PARAMETER_DF_OPTIONAL_COLS = [
- PARAMETER_NAME,
- NOMINAL_VALUE,
- INITIALIZATION_PRIOR_TYPE,
- INITIALIZATION_PRIOR_PARAMETERS,
- OBJECTIVE_PRIOR_TYPE,
- OBJECTIVE_PRIOR_PARAMETERS,
-]
-
-#: Parameter table columns
-PARAMETER_DF_COLS = [
- PARAMETER_DF_REQUIRED_COLS[0],
- PARAMETER_DF_OPTIONAL_COLS[0],
- *PARAMETER_DF_REQUIRED_COLS[1:],
- *PARAMETER_DF_OPTIONAL_COLS[1:],
-]
-
-#:
-INITIALIZATION = "initialization"
-#:
-OBJECTIVE = "objective"
-
-
-# CONDITIONS
-
-#:
-CONDITION_ID = "conditionId"
-#:
-CONDITION_NAME = "conditionName"
-
-
-# OBSERVABLES
-
-#:
-OBSERVABLE_NAME = "observableName"
-#:
-OBSERVABLE_FORMULA = "observableFormula"
-#:
-NOISE_FORMULA = "noiseFormula"
-#:
-OBSERVABLE_TRANSFORMATION = "observableTransformation"
-#:
-NOISE_DISTRIBUTION = "noiseDistribution"
-
-#: Mandatory columns of observables table
-OBSERVABLE_DF_REQUIRED_COLS = [
- OBSERVABLE_ID,
- OBSERVABLE_FORMULA,
- NOISE_FORMULA,
-]
-
-#: Optional columns of observables table
-OBSERVABLE_DF_OPTIONAL_COLS = [
- OBSERVABLE_NAME,
- OBSERVABLE_TRANSFORMATION,
- NOISE_DISTRIBUTION,
-]
-
-#: Observables table columns
-OBSERVABLE_DF_COLS = [
- *OBSERVABLE_DF_REQUIRED_COLS,
- *OBSERVABLE_DF_OPTIONAL_COLS,
-]
-
-
-# TRANSFORMATIONS
-
-#:
-LIN = "lin"
-#:
-LOG = "log"
-#:
-LOG10 = "log10"
-#: Supported observable transformations
-OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10]
-
-
-# NOISE MODELS
-
-#:
-UNIFORM = "uniform"
-#:
-PARAMETER_SCALE_UNIFORM = "parameterScaleUniform"
-#:
-NORMAL = "normal"
-#:
-PARAMETER_SCALE_NORMAL = "parameterScaleNormal"
-#:
-LAPLACE = "laplace"
-#:
-PARAMETER_SCALE_LAPLACE = "parameterScaleLaplace"
-#:
-LOG_NORMAL = "logNormal"
-#:
-LOG_LAPLACE = "logLaplace"
-
-#: Supported prior types
-PRIOR_TYPES = [
- UNIFORM,
- NORMAL,
- LAPLACE,
- LOG_NORMAL,
- LOG_LAPLACE,
- PARAMETER_SCALE_UNIFORM,
- PARAMETER_SCALE_NORMAL,
- PARAMETER_SCALE_LAPLACE,
-]
-
-#: Supported noise distributions
-NOISE_MODELS = [NORMAL, LAPLACE]
-
-
-# VISUALIZATION
-
-#:
-PLOT_ID = "plotId"
-#:
-PLOT_NAME = "plotName"
-#:
-PLOT_TYPE_SIMULATION = "plotTypeSimulation"
-#:
-PLOT_TYPE_DATA = "plotTypeData"
-#:
-X_VALUES = "xValues"
-#:
-X_OFFSET = "xOffset"
-#:
-X_LABEL = "xLabel"
-#:
-X_SCALE = "xScale"
-#:
-Y_VALUES = "yValues"
-#:
-Y_OFFSET = "yOffset"
-#:
-Y_LABEL = "yLabel"
-#:
-Y_SCALE = "yScale"
-#:
-LEGEND_ENTRY = "legendEntry"
-
-#: Mandatory columns of visualization table
-VISUALIZATION_DF_REQUIRED_COLS = [PLOT_ID]
-
-#: Optional columns of visualization table
-VISUALIZATION_DF_OPTIONAL_COLS = [
- PLOT_NAME,
- PLOT_TYPE_SIMULATION,
- PLOT_TYPE_DATA,
- X_VALUES,
- X_OFFSET,
- X_LABEL,
- X_SCALE,
- Y_VALUES,
- Y_OFFSET,
- Y_LABEL,
- Y_SCALE,
- LEGEND_ENTRY,
- DATASET_ID,
-]
-
-#: Visualization table columns
-VISUALIZATION_DF_COLS = [
- *VISUALIZATION_DF_REQUIRED_COLS,
- *VISUALIZATION_DF_OPTIONAL_COLS,
-]
-
-#: Visualization table columns that contain subplot specifications
-VISUALIZATION_DF_SUBPLOT_LEVEL_COLS = [
- PLOT_ID,
- PLOT_NAME,
- PLOT_TYPE_SIMULATION,
- PLOT_TYPE_DATA,
- X_LABEL,
- X_SCALE,
- Y_LABEL,
- Y_SCALE,
-]
-
-#: Visualization table columns that contain single plot specifications
-VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS = [
- X_VALUES,
- X_OFFSET,
- Y_VALUES,
- Y_OFFSET,
- LEGEND_ENTRY,
- DATASET_ID,
-]
-
-#:
-LINE_PLOT = "LinePlot"
-#:
-BAR_PLOT = "BarPlot"
-#:
-SCATTER_PLOT = "ScatterPlot"
-#: Supported plot types
-PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT]
-
-#: Supported xScales
-X_SCALES = [LIN, LOG, LOG10]
-
-#: Supported yScales
-Y_SCALES = [LIN, LOG, LOG10]
-
-
-#:
-MEAN_AND_SD = "MeanAndSD"
-#:
-MEAN_AND_SEM = "MeanAndSEM"
-#:
-REPLICATE = "replicate"
-#:
-PROVIDED = "provided"
-#: Supported settings for handling replicates
-PLOT_TYPES_DATA = [MEAN_AND_SD, MEAN_AND_SEM, REPLICATE, PROVIDED]
-
-
-# YAML
-#:
-FORMAT_VERSION = "format_version"
-#:
-PARAMETER_FILE = "parameter_file"
-#:
-PROBLEMS = "problems"
-#:
-SBML_FILES = "sbml_files"
-#:
-MODEL_FILES = "model_files"
-#:
-MODEL_LOCATION = "location"
-#:
-MODEL_LANGUAGE = "language"
-#:
-CONDITION_FILES = "condition_files"
-#:
-MEASUREMENT_FILES = "measurement_files"
-#:
-OBSERVABLE_FILES = "observable_files"
-#:
-VISUALIZATION_FILES = "visualization_files"
-#:
-MAPPING_FILES = "mapping_files"
-#:
-EXTENSIONS = "extensions"
-
-
-# MAPPING
-#:
-PETAB_ENTITY_ID = "petabEntityId"
-#:
-MODEL_ENTITY_ID = "modelEntityId"
-#:
-MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID]
-
-# MORE
-
-#:
-SIMULATION = "simulation"
-#:
-RESIDUAL = "residual"
-#:
-NOISE_VALUE = "noiseValue"
-
-# separator for multiple parameter values (bounds, observableParameters, ...)
-PARAMETER_SEPARATOR = ";"
+_deprecated_import_v1(__name__)
diff --git a/petab/__init__.py b/petab/__init__.py
index 16cff24b..3dd30598 100644
--- a/petab/__init__.py
+++ b/petab/__init__.py
@@ -2,30 +2,74 @@
PEtab global
============
+.. warning::
+
+ All functions in here are deprecated. Use the respective functions from
+ :mod:`petab.v1` instead.
+
Attributes:
ENV_NUM_THREADS:
Name of environment variable to set number of threads or processes
PEtab should use for operations that can be performed in parallel.
By default, all operations are performed sequentially.
"""
+import functools
+import inspect
+import sys
+import warnings
+from warnings import warn
+
+# deprecated imports
+from petab.v1 import * # noqa: F403, F401, E402
+
+from .v1.format_version import __format_version__ # noqa: F401, E402
+
+# __all__ = [
+# 'ENV_NUM_THREADS',
+# ]
ENV_NUM_THREADS = "PETAB_NUM_THREADS"
-from .C import * # noqa: F403, F401, E402
-from .calculate import * # noqa: F403, F401, E402
-from .composite_problem import * # noqa: F403, F401, E402
-from .conditions import * # noqa: F403, F401, E402
-from .core import * # noqa: F403, F401, E402
-from .format_version import __format_version__ # noqa: F401, E402
-from .lint import * # noqa: F403, F401, E402
-from .mapping import * # noqa: F403, F401, E402
-from .measurements import * # noqa: F403, F401, E402
-from .observables import * # noqa: F403, F401, E402
-from .parameter_mapping import * # noqa: F403, F401, E402
-from .parameters import * # noqa: F403, F401, E402
-from .problem import * # noqa: F403, F401, E402
-from .sampling import * # noqa: F403, F401, E402
-from .sbml import * # noqa: F403, F401, E402
-from .simulate import * # noqa: F403, F401, E402
-from .version import __version__ # noqa: F401, E402
-from .yaml import * # noqa: F403, F401, E402
+
+def _deprecated_v1(func):
+ """Decorator for deprecation warnings for functions."""
+
+ @functools.wraps(func)
+ def new_func(*args, **kwargs):
+ warnings.warn(
+ f"petab.{func.__name__} is deprecated, "
+ f"please use petab.v1.{func.__name__} instead.",
+ category=DeprecationWarning,
+ stacklevel=2,
+ )
+ return func(*args, **kwargs)
+
+ return new_func
+
+
+def _deprecated_import_v1(module_name: str):
+ """Decorator for deprecation warnings for modules."""
+ warn(
+ f"The '{module_name}' module is deprecated and will be removed "
+ f"in the next major release. Please use "
+ f"'petab.v1.{module_name.removeprefix('petab.')}' "
+ "instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+
+
+__all__ = [
+ x
+ for x in dir(sys.modules[__name__])
+ if not x.startswith("_")
+ and x not in {"sys", "warnings", "functools", "warn", "inspect"}
+]
+
+
+# apply decorator to all functions in the module
+for name in __all__:
+ obj = globals().get(name)
+ if callable(obj) and inspect.isfunction(obj):
+ globals()[name] = _deprecated_v1(obj)
+del name, obj
diff --git a/petab/calculate.py b/petab/calculate.py
index f5258fc6..ca4c224f 100644
--- a/petab/calculate.py
+++ b/petab/calculate.py
@@ -1,421 +1,7 @@
-"""Functions performing various calculations."""
+"""Deprecated module for calculating residuals and log-likelihoods.
-import numbers
-from functools import reduce
-from typing import Dict, List, Union
+Use petab.v1.calculate instead."""
+from petab import _deprecated_import_v1
+from petab.v1.calculate import * # noqa: F403, F401, E402
-import numpy as np
-import pandas as pd
-import sympy
-
-import petab
-
-from .C import *
-from .math import sympify_petab
-
-__all__ = [
- "calculate_residuals",
- "calculate_residuals_for_table",
- "get_symbolic_noise_formulas",
- "evaluate_noise_formula",
- "calculate_chi2",
- "calculate_chi2_for_table_from_residuals",
- "calculate_llh",
- "calculate_llh_for_table",
- "calculate_single_llh",
-]
-
-
-def calculate_residuals(
- measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- observable_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- normalize: bool = True,
- scale: bool = True,
-) -> List[pd.DataFrame]:
- """Calculate residuals.
-
- Arguments:
- measurement_dfs:
- The problem measurement tables.
- simulation_dfs:
- Simulation tables corresponding to the measurement tables.
- observable_dfs:
- The problem observable tables.
- parameter_dfs:
- The problem parameter tables.
- normalize:
- Whether to normalize residuals by the noise standard deviation
- terms.
- scale:
- Whether to calculate residuals of scaled values.
-
- Returns:
- List of DataFrames in the same structure as `measurement_dfs`
- with a field `residual` instead of measurement.
- """
- # convenience
- if isinstance(measurement_dfs, pd.DataFrame):
- measurement_dfs = [measurement_dfs]
- if isinstance(simulation_dfs, pd.DataFrame):
- simulation_dfs = [simulation_dfs]
- if isinstance(observable_dfs, pd.DataFrame):
- observable_dfs = [observable_dfs]
- if isinstance(parameter_dfs, pd.DataFrame):
- parameter_dfs = [parameter_dfs]
-
- # iterate over data frames
- residual_dfs = []
- for measurement_df, simulation_df, observable_df, parameter_df in zip(
- measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs
- ):
- residual_df = calculate_residuals_for_table(
- measurement_df,
- simulation_df,
- observable_df,
- parameter_df,
- normalize,
- scale,
- )
- residual_dfs.append(residual_df)
- return residual_dfs
-
-
-def calculate_residuals_for_table(
- measurement_df: pd.DataFrame,
- simulation_df: pd.DataFrame,
- observable_df: pd.DataFrame,
- parameter_df: pd.DataFrame,
- normalize: bool = True,
- scale: bool = True,
-) -> pd.DataFrame:
- """
- Calculate residuals for a single measurement table.
- For the arguments, see `calculate_residuals`.
- """
- # create residual df as copy of measurement df, change column
- residual_df = measurement_df.copy(deep=True).rename(
- columns={MEASUREMENT: RESIDUAL}
- )
- residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64")
- # matching columns
- compared_cols = set(MEASUREMENT_DF_COLS)
- compared_cols -= {MEASUREMENT}
- compared_cols &= set(measurement_df.columns)
- compared_cols &= set(simulation_df.columns)
-
- # compute noise formulas for observables
- noise_formulas = get_symbolic_noise_formulas(observable_df)
-
- # iterate over measurements, find corresponding simulations
- for irow, row in measurement_df.iterrows():
- measurement = row[MEASUREMENT]
- # look up in simulation df
- masks = [
- (simulation_df[col] == row[col]) | petab.is_empty(row[col])
- for col in compared_cols
- ]
- mask = reduce(lambda x, y: x & y, masks)
- simulation = simulation_df.loc[mask][SIMULATION].iloc[0]
- if scale:
- # apply scaling
- observable = observable_df.loc[row[OBSERVABLE_ID]]
- trafo = observable.get(OBSERVABLE_TRANSFORMATION, LIN)
- simulation = petab.scale(simulation, trafo)
- measurement = petab.scale(measurement, trafo)
-
- # non-normalized residual is just the difference
- residual = simulation - measurement
-
- noise_value = 1
- if normalize:
- # look up noise standard deviation
- noise_value = evaluate_noise_formula(
- row, noise_formulas, parameter_df, simulation
- )
- residual /= noise_value
-
- # fill in value
- residual_df.loc[irow, RESIDUAL] = residual
- return residual_df
-
-
-def get_symbolic_noise_formulas(observable_df) -> Dict[str, sympy.Expr]:
- """Sympify noise formulas.
-
- Arguments:
- observable_df: The observable table.
-
- Returns:
- Dictionary of {observable_id}: {noise_formula}.
- """
- noise_formulas = {}
- # iterate over observables
- for row in observable_df.itertuples():
- observable_id = row.Index
- if NOISE_FORMULA not in observable_df.columns:
- noise_formula = None
- else:
- noise_formula = sympify_petab(row.noiseFormula)
- noise_formulas[observable_id] = noise_formula
- return noise_formulas
-
-
-def evaluate_noise_formula(
- measurement: pd.Series,
- noise_formulas: Dict[str, sympy.Expr],
- parameter_df: pd.DataFrame,
- simulation: numbers.Number,
-) -> float:
- """Fill in parameters for `measurement` and evaluate noise_formula.
-
- Arguments:
- measurement: A measurement table row.
- noise_formulas: The noise formulas as computed by
- `get_symbolic_noise_formulas`.
- parameter_df: The parameter table.
- simulation: The simulation corresponding to the measurement, scaled.
-
- Returns:
- The noise value.
- """
- # the observable id
- observable_id = measurement[OBSERVABLE_ID]
-
- # extract measurement specific overrides
- observable_parameter_overrides = petab.split_parameter_replacement_list(
- measurement.get(NOISE_PARAMETERS, None)
- )
- # fill in measurement specific parameters
- overrides = {
- f"noiseParameter{i_obs_par + 1}_{observable_id}": obs_par
- for i_obs_par, obs_par in enumerate(observable_parameter_overrides)
- }
-
- # fill in observables
- overrides[observable_id] = simulation
-
- # fill in general parameters
- for row in parameter_df.itertuples():
- overrides[row.Index] = row.nominalValue
-
- # replace parametric measurement specific parameters
- for key, value in overrides.items():
- if not isinstance(value, numbers.Number):
- # is parameter
- overrides[key] = parameter_df.loc[value, NOMINAL_VALUE]
-
- # replace parameters by values in formula
- noise_formula = noise_formulas[observable_id]
- noise_value = noise_formula.subs(overrides)
-
- # conversion is possible if all parameters are replaced
- try:
- noise_value = float(noise_value)
- except TypeError as e:
- raise ValueError(
- f"Cannot replace all parameters in noise formula {noise_value} "
- f"for observable {observable_id}. "
- f"Missing {noise_formula.free_symbols}. Note that model states "
- "are currently not supported."
- ) from e
- return noise_value
-
-
-def calculate_chi2(
- measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- observable_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- normalize: bool = True,
- scale: bool = True,
-) -> float:
- """Calculate the chi2 value.
-
- Arguments:
- measurement_dfs:
- The problem measurement tables.
- simulation_dfs:
- Simulation tables corresponding to the measurement tables.
- observable_dfs:
- The problem observable tables.
- parameter_dfs:
- The problem parameter tables.
- normalize:
- Whether to normalize residuals by the noise standard deviation
- terms.
- scale:
- Whether to calculate residuals of scaled values.
-
- Returns:
- The aggregated chi2 value.
- """
- residual_dfs = calculate_residuals(
- measurement_dfs,
- simulation_dfs,
- observable_dfs,
- parameter_dfs,
- normalize,
- scale,
- )
- chi2s = [
- calculate_chi2_for_table_from_residuals(df) for df in residual_dfs
- ]
- return sum(chi2s)
-
-
-def calculate_chi2_for_table_from_residuals(
- residual_df: pd.DataFrame,
-) -> float:
- """Compute chi2 value for a single residual table."""
- return (np.array(residual_df[RESIDUAL]) ** 2).sum()
-
-
-def calculate_llh(
- measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- observable_dfs: Union[List[pd.DataFrame], pd.DataFrame],
- parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame],
-) -> float:
- """Calculate total log likelihood.
-
- Arguments:
- measurement_dfs:
- The problem measurement tables.
- simulation_dfs:
- Simulation tables corresponding to the measurement tables.
- observable_dfs:
- The problem observable tables.
- parameter_dfs:
- The problem parameter tables.
-
- Returns:
- The log-likelihood.
- """
- # convenience
- if isinstance(measurement_dfs, pd.DataFrame):
- measurement_dfs = [measurement_dfs]
- if isinstance(simulation_dfs, pd.DataFrame):
- simulation_dfs = [simulation_dfs]
- if isinstance(observable_dfs, pd.DataFrame):
- observable_dfs = [observable_dfs]
- if isinstance(parameter_dfs, pd.DataFrame):
- parameter_dfs = [parameter_dfs]
-
- # iterate over data frames
- llhs = []
- for measurement_df, simulation_df, observable_df, parameter_df in zip(
- measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs
- ):
- _llh = calculate_llh_for_table(
- measurement_df, simulation_df, observable_df, parameter_df
- )
- llhs.append(_llh)
- return sum(llhs)
-
-
-def calculate_llh_for_table(
- measurement_df: pd.DataFrame,
- simulation_df: pd.DataFrame,
- observable_df: pd.DataFrame,
- parameter_df: pd.DataFrame,
-) -> float:
- """Calculate log-likelihood for one set of tables. For the arguments, see
- `calculate_llh`.
- """
- llhs = []
-
- # matching columns
- compared_cols = set(MEASUREMENT_DF_COLS)
- compared_cols -= {MEASUREMENT}
- compared_cols &= set(measurement_df.columns)
- compared_cols &= set(simulation_df.columns)
-
- # compute noise formulas for observables
- noise_formulas = get_symbolic_noise_formulas(observable_df)
-
- # iterate over measurements, find corresponding simulations
- for _, row in measurement_df.iterrows():
- measurement = row[MEASUREMENT]
-
- # look up in simulation df
- masks = [
- (simulation_df[col] == row[col]) | petab.is_empty(row[col])
- for col in compared_cols
- ]
- mask = reduce(lambda x, y: x & y, masks)
-
- simulation = simulation_df.loc[mask][SIMULATION].iloc[0]
-
- observable = observable_df.loc[row[OBSERVABLE_ID]]
-
- # get scale
- scale = observable.get(OBSERVABLE_TRANSFORMATION, LIN)
-
- # get noise standard deviation
- noise_value = evaluate_noise_formula(
- row, noise_formulas, parameter_df, petab.scale(simulation, scale)
- )
-
- # get noise distribution
- noise_distribution = observable.get(NOISE_DISTRIBUTION, NORMAL)
-
- llh = calculate_single_llh(
- measurement, simulation, scale, noise_distribution, noise_value
- )
- llhs.append(llh)
- return sum(llhs)
-
-
-def calculate_single_llh(
- measurement: float,
- simulation: float,
- scale: str,
- noise_distribution: str,
- noise_value: float,
-) -> float:
- """Calculate a single log likelihood.
-
- Arguments:
- measurement: The measurement value.
- simulation: The simulated value.
- scale: The scale on which the noise model is to be applied.
- noise_distribution: The noise distribution.
- noise_value: The considered noise models possess a single noise
- parameter, e.g. the normal standard deviation.
-
- Returns:
- The computed likelihood for the given values.
- """
- # short-hand
- m, s, sigma = measurement, simulation, noise_value
- pi, log, log10 = np.pi, np.log, np.log10
-
- # go over the possible cases
- if noise_distribution == NORMAL and scale == LIN:
- nllh = 0.5 * log(2 * pi * sigma**2) + 0.5 * ((s - m) / sigma) ** 2
- elif noise_distribution == NORMAL and scale == LOG:
- nllh = (
- 0.5 * log(2 * pi * sigma**2 * m**2)
- + 0.5 * ((log(s) - log(m)) / sigma) ** 2
- )
- elif noise_distribution == NORMAL and scale == LOG10:
- nllh = (
- 0.5 * log(2 * pi * sigma**2 * m**2 * log(10) ** 2)
- + 0.5 * ((log10(s) - log10(m)) / sigma) ** 2
- )
- elif noise_distribution == LAPLACE and scale == LIN:
- nllh = log(2 * sigma) + abs((s - m) / sigma)
- elif noise_distribution == LAPLACE and scale == LOG:
- nllh = log(2 * sigma * m) + abs((log(s) - log(m)) / sigma)
- elif noise_distribution == LAPLACE and scale == LOG10:
- nllh = log(2 * sigma * m * log(10)) + abs(
- (log10(s) - log10(m)) / sigma
- )
- else:
- raise NotImplementedError(
- "Unsupported combination of noise_distribution and scale "
- f"specified: {noise_distribution}, {scale}."
- )
- return -nllh
+_deprecated_import_v1(__name__)
diff --git a/petab/composite_problem.py b/petab/composite_problem.py
index b8569cb5..51d30a20 100644
--- a/petab/composite_problem.py
+++ b/petab/composite_problem.py
@@ -1,86 +1,7 @@
-"""PEtab problems consisting of multiple models"""
-import os
-from typing import Dict, List, Union
+"""Deprecated module for composite problems.
-import pandas as pd
+Use petab.v1.composite_problem instead."""
+from petab import _deprecated_import_v1
+from petab.v1.composite_problem import * # noqa: F403, F401, E402
-from . import parameters, problem, yaml
-from .C import * # noqa: F403
-
-__all__ = ["CompositeProblem"]
-
-
-class CompositeProblem:
- """Representation of a PEtab problem consisting of multiple models
-
- Attributes:
- problems:
- List of :py:class:`petab.Problem` s
- parameter_df:
- PEtab parameter DataFrame
- """
-
- def __init__(
- self,
- parameter_df: pd.DataFrame = None,
- problems: List[problem.Problem] = None,
- ):
- """Constructor
-
- Arguments:
- parameter_df:
- see CompositeProblem.parameter_df
- problems:
- see CompositeProblem.problems
- """
- self.problems: List[problem.Problem] = problems
- self.parameter_df: pd.DataFrame = parameter_df
-
- @staticmethod
- def from_yaml(yaml_config: Union[Dict, str]) -> "CompositeProblem":
- """Create from YAML file
-
- Factory method to create a CompositeProblem instance from a PEtab
- YAML config file
-
- Arguments:
- yaml_config: PEtab configuration as dictionary or YAML file name
- """
- if isinstance(yaml_config, str):
- path_prefix = os.path.dirname(yaml_config)
- yaml_config = yaml.load_yaml(yaml_config)
- else:
- path_prefix = ""
-
- parameter_df = parameters.get_parameter_df(
- os.path.join(path_prefix, yaml_config[PARAMETER_FILE])
- )
-
- problems = []
- for problem_config in yaml_config[PROBLEMS]:
- yaml.assert_single_condition_and_sbml_file(problem_config)
-
- # don't set parameter file if we have multiple models
- cur_problem = problem.Problem.from_files(
- sbml_file=os.path.join(
- path_prefix, problem_config[SBML_FILES][0]
- ),
- measurement_file=[
- os.path.join(path_prefix, f)
- for f in problem_config[MEASUREMENT_FILES]
- ],
- condition_file=os.path.join(
- path_prefix, problem_config[CONDITION_FILES][0]
- ),
- visualization_files=[
- os.path.join(path_prefix, f)
- for f in problem_config[VISUALIZATION_FILES]
- ],
- observable_files=[
- os.path.join(path_prefix, f)
- for f in problem_config[OBSERVABLE_FILES]
- ],
- )
- problems.append(cur_problem)
-
- return CompositeProblem(parameter_df=parameter_df, problems=problems)
+_deprecated_import_v1(__name__)
diff --git a/petab/conditions.py b/petab/conditions.py
index deef08f7..cd00e466 100644
--- a/petab/conditions.py
+++ b/petab/conditions.py
@@ -1,118 +1,8 @@
-"""Functions operating on the PEtab condition table"""
+"""Deprecated module for condition tables.
-from pathlib import Path
-from typing import Iterable, List, Optional, Union
+Use petab.v1.conditions instead.
+"""
+from petab import _deprecated_import_v1
+from petab.v1.conditions import * # noqa: F403, F401, E402
-import numpy as np
-import pandas as pd
-
-from . import core, lint
-from .C import *
-
-__all__ = [
- "get_condition_df",
- "write_condition_df",
- "create_condition_df",
- "get_parametric_overrides",
-]
-
-
-def get_condition_df(
- condition_file: Union[str, pd.DataFrame, Path, None],
-) -> pd.DataFrame:
- """Read the provided condition file into a ``pandas.Dataframe``
-
- Conditions are rows, parameters are columns, conditionId is index.
-
- Arguments:
- condition_file: File name of PEtab condition file or pandas.Dataframe
- """
- if condition_file is None:
- return condition_file
-
- if isinstance(condition_file, (str, Path)):
- condition_file = pd.read_csv(
- condition_file, sep="\t", float_precision="round_trip"
- )
-
- lint.assert_no_leading_trailing_whitespace(
- condition_file.columns.values, "condition"
- )
-
- if not isinstance(condition_file.index, pd.RangeIndex):
- condition_file.reset_index(
- drop=condition_file.index.name != CONDITION_ID,
- inplace=True,
- )
-
- try:
- condition_file.set_index([CONDITION_ID], inplace=True)
- except KeyError:
- raise KeyError(
- f"Condition table missing mandatory field {CONDITION_ID}."
- ) from None
-
- return condition_file
-
-
-def write_condition_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
- """Write PEtab condition table
-
- Arguments:
- df: PEtab condition table
- filename: Destination file name
- """
- df = get_condition_df(df)
- df.to_csv(filename, sep="\t", index=True)
-
-
-def create_condition_df(
- parameter_ids: Iterable[str], condition_ids: Optional[Iterable[str]] = None
-) -> pd.DataFrame:
- """Create empty condition DataFrame
-
- Arguments:
- parameter_ids: the columns
- condition_ids: the rows
- Returns:
- A :py:class:`pandas.DataFrame` with empty given rows and columns and
- all nan values
- """
- condition_ids = [] if condition_ids is None else list(condition_ids)
-
- data = {CONDITION_ID: condition_ids}
- df = pd.DataFrame(data)
-
- for p in parameter_ids:
- if not lint.is_valid_identifier(p):
- raise ValueError("Invalid parameter ID: " + p)
- df[p] = np.nan
-
- df.set_index(CONDITION_ID, inplace=True)
-
- return df
-
-
-def get_parametric_overrides(condition_df: pd.DataFrame) -> List[str]:
- """Get parametric overrides from condition table
-
- Arguments:
- condition_df: PEtab condition table
-
- Returns:
- List of parameter IDs that are mapped in a condition-specific way
- """
- constant_parameters = set(condition_df.columns.values.tolist()) - {
- CONDITION_ID,
- CONDITION_NAME,
- }
- result = []
-
- for column in constant_parameters:
- if np.issubdtype(condition_df[column].dtype, np.number):
- continue
-
- floatified = condition_df.loc[:, column].apply(core.to_float_if_float)
-
- result.extend(x for x in floatified if not isinstance(x, float))
- return result
+_deprecated_import_v1(__name__)
diff --git a/petab/core.py b/petab/core.py
index 97b002d2..2668111c 100644
--- a/petab/core.py
+++ b/petab/core.py
@@ -1,545 +1,7 @@
-"""PEtab core functions (or functions that don't fit anywhere else)"""
-import logging
-import os
-import re
-from pathlib import Path
-from typing import (
- Any,
- Callable,
- Dict,
- Iterable,
- List,
- Optional,
- Sequence,
- Union,
-)
-from warnings import warn
+"""Deprecated module for PEtab core classes and functions.
-import numpy as np
-import pandas as pd
-from pandas.api.types import is_string_dtype
+Use petab.v1.core instead."""
+from petab import _deprecated_import_v1
+from petab.v1.core import * # noqa: F403, F401, E402
-from . import yaml
-from .C import * # noqa: F403
-
-logger = logging.getLogger(__name__)
-__all__ = [
- "get_simulation_df",
- "write_simulation_df",
- "get_visualization_df",
- "write_visualization_df",
- "get_notnull_columns",
- "flatten_timepoint_specific_output_overrides",
- "concat_tables",
- "to_float_if_float",
- "is_empty",
- "create_combine_archive",
- "unique_preserve_order",
- "unflatten_simulation_df",
-]
-
-POSSIBLE_GROUPVARS_FLATTENED_PROBLEM = [
- OBSERVABLE_ID,
- OBSERVABLE_PARAMETERS,
- NOISE_PARAMETERS,
- SIMULATION_CONDITION_ID,
- PREEQUILIBRATION_CONDITION_ID,
-]
-
-
-def get_simulation_df(simulation_file: Union[str, Path]) -> pd.DataFrame:
- """Read PEtab simulation table
-
- Arguments:
- simulation_file: URL or filename of PEtab simulation table
-
- Returns:
- Simulation DataFrame
- """
- return pd.read_csv(
- simulation_file, sep="\t", index_col=None, float_precision="round_trip"
- )
-
-
-def write_simulation_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
- """Write PEtab simulation table
-
- Arguments:
- df: PEtab simulation table
- filename: Destination file name
- """
- df.to_csv(filename, sep="\t", index=False)
-
-
-def get_visualization_df(
- visualization_file: Union[str, Path, pd.DataFrame, None],
-) -> Union[pd.DataFrame, None]:
- """Read PEtab visualization table
-
- Arguments:
- visualization_file:
- URL or filename of PEtab visualization table to read from,
- or a DataFrame or None that will be returned as is.
-
- Returns:
- Visualization DataFrame
- """
- if visualization_file is None:
- return None
-
- if isinstance(visualization_file, pd.DataFrame):
- return visualization_file
-
- try:
- types = {PLOT_NAME: str}
- vis_spec = pd.read_csv(
- visualization_file,
- sep="\t",
- index_col=None,
- converters=types,
- float_precision="round_trip",
- )
- except pd.errors.EmptyDataError:
- warn(
- "Visualization table is empty. Defaults will be used. "
- "Refer to the documentation for details.",
- stacklevel=2,
- )
- vis_spec = pd.DataFrame()
- return vis_spec
-
-
-def write_visualization_df(
- df: pd.DataFrame, filename: Union[str, Path]
-) -> None:
- """Write PEtab visualization table
-
- Arguments:
- df: PEtab visualization table
- filename: Destination file name
- """
- df.to_csv(filename, sep="\t", index=False)
-
-
-def get_notnull_columns(df: pd.DataFrame, candidates: Iterable):
- """
- Return list of ``df``-columns in ``candidates`` which are not all null/nan.
-
- The output can e.g. be used as input for ``pandas.DataFrame.groupby``.
-
- Arguments:
- df:
- Dataframe
- candidates:
- Columns of ``df`` to consider
- """
- return [
- col for col in candidates if col in df and not np.all(df[col].isnull())
- ]
-
-
-def get_observable_replacement_id(groupvars, groupvar) -> str:
- """Get the replacement ID for an observable.
-
- Arguments:
- groupvars:
- The columns of a PEtab measurement table that should be unique
- between observables in a flattened PEtab problem.
- groupvar:
- A specific grouping of `groupvars`.
-
- Returns:
- The observable replacement ID.
- """
- replacement_id = ""
- for field in POSSIBLE_GROUPVARS_FLATTENED_PROBLEM:
- if field in groupvars:
- val = (
- str(groupvar[groupvars.index(field)])
- .replace(PARAMETER_SEPARATOR, "_")
- .replace(".", "_")
- )
- if replacement_id == "":
- replacement_id = val
- elif val != "":
- replacement_id += f"__{val}"
- return replacement_id
-
-
-def get_hyperparameter_replacement_id(
- hyperparameter_type,
- observable_replacement_id,
-):
- """Get the full ID for a replaced hyperparameter.
-
- Arguments:
- hyperparameter_type:
- The type of hyperparameter, e.g. `noiseParameter`.
- observable_replacement_id:
- The observable replacement ID, e.g. the output of
- `get_observable_replacement_id`.
-
- Returns:
- The hyperparameter replacement ID, with a field that will be replaced
- by the first matched substring in a regex substitution.
- """
- return f"{hyperparameter_type}\\1_{observable_replacement_id}"
-
-
-def get_flattened_id_mappings(
- petab_problem: "petab.problem.Problem",
-) -> Dict[str, Dict[str, str]]:
- """Get mapping from unflattened to flattened observable IDs.
-
- Arguments:
- petab_problem:
- The unflattened PEtab problem.
-
- Returns:
- A dictionary of dictionaries. Each inner dictionary is a mapping
- from original ID to flattened ID. Each outer dictionary is the mapping
- for either: observable IDs; noise parameter IDs; or, observable
- parameter IDs.
- """
- groupvars = get_notnull_columns(
- petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM
- )
- mappings = {
- OBSERVABLE_ID: {},
- NOISE_PARAMETERS: {},
- OBSERVABLE_PARAMETERS: {},
- }
- for groupvar, measurements in petab_problem.measurement_df.groupby(
- groupvars, dropna=False
- ):
- observable_id = groupvar[groupvars.index(OBSERVABLE_ID)]
- observable_replacement_id = get_observable_replacement_id(
- groupvars, groupvar
- )
-
- logger.debug(f"Creating synthetic observable {observable_id}")
- if observable_replacement_id in petab_problem.observable_df.index:
- raise RuntimeError(
- "could not create synthetic observables "
- f"since {observable_replacement_id} was "
- "already present in observable table"
- )
-
- mappings[OBSERVABLE_ID][observable_replacement_id] = observable_id
-
- for field, hyperparameter_type in [
- (NOISE_PARAMETERS, "noiseParameter"),
- (OBSERVABLE_PARAMETERS, "observableParameter"),
- ]:
- if field in measurements:
- mappings[field][
- get_hyperparameter_replacement_id(
- hyperparameter_type=hyperparameter_type,
- observable_replacement_id=observable_replacement_id,
- )
- ] = rf"{hyperparameter_type}([0-9]+)_{observable_id}"
- return mappings
-
-
-def flatten_timepoint_specific_output_overrides(
- petab_problem: "petab.problem.Problem",
-) -> None:
- """Flatten timepoint-specific output parameter overrides.
-
- If the PEtab problem definition has timepoint-specific
- `observableParameters` or `noiseParameters` for the same observable,
- replace those by replicating the respective observable.
-
- This is a helper function for some tools which may not support such
- timepoint-specific mappings. The observable table and measurement table
- are modified in place.
-
- Arguments:
- petab_problem:
- PEtab problem to work on. Modified in place.
- """
- new_measurement_dfs = []
- new_observable_dfs = []
- groupvars = get_notnull_columns(
- petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM
- )
-
- mappings = get_flattened_id_mappings(petab_problem)
-
- for groupvar, measurements in petab_problem.measurement_df.groupby(
- groupvars, dropna=False
- ):
- obs_id = groupvar[groupvars.index(OBSERVABLE_ID)]
- observable_replacement_id = get_observable_replacement_id(
- groupvars, groupvar
- )
-
- observable = petab_problem.observable_df.loc[obs_id].copy()
- observable.name = observable_replacement_id
- for field, hyperparameter_type, target in [
- (NOISE_PARAMETERS, "noiseParameter", NOISE_FORMULA),
- (OBSERVABLE_PARAMETERS, "observableParameter", OBSERVABLE_FORMULA),
- (OBSERVABLE_PARAMETERS, "observableParameter", NOISE_FORMULA),
- ]:
- if field not in measurements:
- continue
-
- if not is_string_dtype(type(observable[target])):
- # if not a string, we don't have to substitute anything
- continue
-
- hyperparameter_replacement_id = get_hyperparameter_replacement_id(
- hyperparameter_type=hyperparameter_type,
- observable_replacement_id=observable_replacement_id,
- )
- hyperparameter_id = mappings[field][hyperparameter_replacement_id]
- observable[target] = re.sub(
- hyperparameter_id,
- hyperparameter_replacement_id,
- observable[target],
- )
-
- measurements[OBSERVABLE_ID] = observable_replacement_id
- new_measurement_dfs.append(measurements)
- new_observable_dfs.append(observable)
-
- petab_problem.observable_df = pd.concat(new_observable_dfs, axis=1).T
- petab_problem.observable_df.index.name = OBSERVABLE_ID
- petab_problem.measurement_df = pd.concat(new_measurement_dfs)
-
-
-def unflatten_simulation_df(
- simulation_df: pd.DataFrame,
- petab_problem: "petab.problem.Problem",
-) -> pd.DataFrame:
- """Unflatten simulations from a flattened PEtab problem.
-
- A flattened PEtab problem is the output of applying
- :func:`flatten_timepoint_specific_output_overrides` to a PEtab problem.
-
- Arguments:
- simulation_df:
- The simulation dataframe. A dataframe in the same format as a PEtab
- measurements table, but with the ``measurement`` column switched
- with a ``simulation`` column.
- petab_problem:
- The unflattened PEtab problem.
-
- Returns:
- The simulation dataframe for the unflattened PEtab problem.
- """
- mappings = get_flattened_id_mappings(petab_problem)
- original_observable_ids = simulation_df[OBSERVABLE_ID].replace(
- mappings[OBSERVABLE_ID]
- )
- unflattened_simulation_df = simulation_df.assign(
- **{
- OBSERVABLE_ID: original_observable_ids,
- }
- )
- return unflattened_simulation_df
-
-
-def concat_tables(
- tables: Union[
- str, Path, pd.DataFrame, Iterable[Union[pd.DataFrame, str, Path]]
- ],
- file_parser: Optional[Callable] = None,
-) -> pd.DataFrame:
- """Concatenate DataFrames provided as DataFrames or filenames, and a parser
-
- Arguments:
- tables:
- Iterable of tables to join, as DataFrame or filename.
- file_parser:
- Function used to read the table in case filenames are provided,
- accepting a filename as only argument.
-
- Returns:
- The concatenated DataFrames
- """
- if isinstance(tables, pd.DataFrame):
- return tables
-
- if isinstance(tables, (str, Path)):
- return file_parser(tables)
-
- df = pd.DataFrame()
-
- for tmp_df in tables:
- # load from file, if necessary
- if isinstance(tmp_df, (str, Path)):
- tmp_df = file_parser(tmp_df)
-
- df = pd.concat(
- [df, tmp_df],
- sort=False,
- ignore_index=isinstance(tmp_df.index, pd.RangeIndex),
- )
-
- return df
-
-
-def to_float_if_float(x: Any) -> Any:
- """Return input as float if possible, otherwise return as is
-
- Arguments:
- x: Anything
-
- Returns:
- ``x`` as float if possible, otherwise ``x``
- """
- try:
- return float(x)
- except (ValueError, TypeError):
- return x
-
-
-def is_empty(val) -> bool:
- """Check if the value `val`, e.g. a table entry, is empty.
-
- Arguments:
- val: The value to check.
-
- Returns:
- Whether the field is to be considered empty.
- """
- return val == "" or pd.isnull(val)
-
-
-def create_combine_archive(
- yaml_file: Union[str, Path],
- filename: Union[str, Path],
- family_name: Optional[str] = None,
- given_name: Optional[str] = None,
- email: Optional[str] = None,
- organization: Optional[str] = None,
-) -> None:
- """Create COMBINE archive (https://co.mbine.org/documents/archive) based
- on PEtab YAML file.
-
- Arguments:
- yaml_file: Path to PEtab YAML file
- filename: Destination file name
- family_name: Family name of archive creator
- given_name: Given name of archive creator
- email: E-mail address of archive creator
- organization: Organization of archive creator
- """
- path_prefix = os.path.dirname(str(yaml_file))
- yaml_config = yaml.load_yaml(yaml_file)
-
- # function-level import, because module-level import interfered with
- # other SWIG interfaces
- try:
- import libcombine
- except ImportError as err:
- raise ImportError(
- "To use PEtab's COMBINE functionality, libcombine "
- "(python-libcombine) must be installed."
- ) from err
-
- def _add_file_metadata(location: str, description: str = ""):
- """Add metadata to the added file"""
- omex_description = libcombine.OmexDescription()
- omex_description.setAbout(location)
- omex_description.setDescription(description)
- omex_description.setCreated(
- libcombine.OmexDescription.getCurrentDateAndTime()
- )
- archive.addMetadata(location, omex_description)
-
- archive = libcombine.CombineArchive()
-
- # Add PEtab files and metadata
- archive.addFile(
- str(yaml_file),
- os.path.basename(yaml_file),
- "http://identifiers.org/combine.specifications/petab.version-1",
- True,
- )
- _add_file_metadata(
- location=os.path.basename(yaml_file), description="PEtab YAML file"
- )
-
- # Add parameter file(s) that describe a single parameter table.
- # Works for a single file name, or a list of file names.
- for parameter_subset_file in list(
- np.array(yaml_config[PARAMETER_FILE]).flat
- ):
- archive.addFile(
- os.path.join(path_prefix, parameter_subset_file),
- parameter_subset_file,
- libcombine.KnownFormats.lookupFormat("tsv"),
- False,
- )
- _add_file_metadata(
- location=parameter_subset_file, description="PEtab parameter file"
- )
-
- for problem in yaml_config[PROBLEMS]:
- for sbml_file in problem[SBML_FILES]:
- archive.addFile(
- os.path.join(path_prefix, sbml_file),
- sbml_file,
- libcombine.KnownFormats.lookupFormat("sbml"),
- False,
- )
- _add_file_metadata(location=sbml_file, description="SBML model")
-
- for field in [
- MEASUREMENT_FILES,
- OBSERVABLE_FILES,
- VISUALIZATION_FILES,
- CONDITION_FILES,
- ]:
- if field not in problem:
- continue
-
- for file in problem[field]:
- archive.addFile(
- os.path.join(path_prefix, file),
- file,
- libcombine.KnownFormats.lookupFormat("tsv"),
- False,
- )
- desc = field.split("_")[0]
- _add_file_metadata(
- location=file, description=f"PEtab {desc} file"
- )
-
- # Add archive metadata
- description = libcombine.OmexDescription()
- description.setAbout(".")
- description.setDescription("PEtab archive")
- description.setCreated(libcombine.OmexDescription.getCurrentDateAndTime())
-
- # Add creator info
- creator = libcombine.VCard()
- if family_name:
- creator.setFamilyName(family_name)
- if given_name:
- creator.setGivenName(given_name)
- if email:
- creator.setEmail(email)
- if organization:
- creator.setOrganization(organization)
- description.addCreator(creator)
-
- archive.addMetadata(".", description)
- archive.writeToFile(str(filename))
-
-
-def unique_preserve_order(seq: Sequence) -> List:
- """Return a list of unique elements in Sequence, keeping only the first
- occurrence of each element
-
- Parameters:
- seq: Sequence to prune
-
- Returns:
- List of unique elements in ``seq``
- """
- seen = set()
- seen_add = seen.add
- return [x for x in seq if not (x in seen or seen_add(x))]
+_deprecated_import_v1(__name__)
diff --git a/petab/lint.py b/petab/lint.py
index 07c1990b..a7461ca3 100644
--- a/petab/lint.py
+++ b/petab/lint.py
@@ -1,1191 +1,9 @@
-"""Integrity checks and tests for specific features used"""
+"""Deprecated module for linting PEtab files.
-import copy
-import logging
-import numbers
-import re
-from collections import Counter
-from typing import Any, Iterable, Optional
+Use petab.v1.lint instead.
+"""
-import numpy as np
-import pandas as pd
-import sympy as sp
+from petab import _deprecated_import_v1
+from petab.v1.lint import * # noqa: F403, F401, E402
-import petab
-
-from . import core, measurements, parameters
-from .C import * # noqa: F403
-from .math import sympify_petab
-from .models import Model
-
-logger = logging.getLogger(__name__)
-__all__ = [
- "assert_all_parameters_present_in_parameter_df",
- "assert_measured_observables_defined",
- "assert_measurement_conditions_present_in_condition_table",
- "assert_measurements_not_null",
- "assert_measurements_numeric",
- "assert_model_parameters_in_condition_or_parameter_table",
- "assert_no_leading_trailing_whitespace",
- "assert_noise_distributions_valid",
- "assert_parameter_bounds_are_numeric",
- "assert_parameter_estimate_is_boolean",
- "assert_parameter_id_is_string",
- "assert_parameter_prior_parameters_are_valid",
- "assert_parameter_prior_type_is_valid",
- "assert_parameter_scale_is_valid",
- "assert_unique_observable_ids",
- "assert_unique_parameter_ids",
- "check_condition_df",
- "check_ids",
- "check_measurement_df",
- "check_observable_df",
- "check_parameter_bounds",
- "check_parameter_df",
- "condition_table_is_parameter_free",
- "get_non_unique",
- "is_scalar_float",
- "is_valid_identifier",
- "lint_problem",
- "measurement_table_has_observable_parameter_numeric_overrides",
- "measurement_table_has_timepoint_specific_mappings",
- "observable_table_has_nontrivial_noise_formula",
-]
-
-
-def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None:
- """Check if given columns are present in DataFrame
-
- Arguments:
- df: Dataframe to check
- req_cols: Column names which have to be present
- name: Name of the DataFrame to be included in error message
-
- Raises:
- AssertionError: if a column is missing
- """
- if missing_cols := set(req_cols) - set(df.columns.values):
- raise AssertionError(
- f"DataFrame {name} requires the columns {missing_cols}."
- )
-
-
-def assert_no_leading_trailing_whitespace(
- names_list: Iterable[str], name: str
-) -> None:
- """Check that there is no trailing whitespace in elements of Iterable
-
- Arguments:
- names_list: strings to check for whitespace
- name: name of `names_list` for error messages
-
- Raises:
- AssertionError: if there is trailing whitespace
- """
- r = re.compile(r"(?:^\s)|(?:\s$)")
- for i, x in enumerate(names_list):
- if isinstance(x, str) and r.search(x):
- raise AssertionError(f"Whitespace around {name}[{i}] = '{x}'.")
-
-
-def check_condition_df(
- df: pd.DataFrame,
- model: Optional[Model] = None,
- observable_df: Optional[pd.DataFrame] = None,
- mapping_df: Optional[pd.DataFrame] = None,
-) -> None:
- """Run sanity checks on PEtab condition table
-
- Arguments:
- df: PEtab condition DataFrame
- model: Model for additional checking of parameter IDs
- observable_df: PEtab observables DataFrame
- mapping_df: PEtab mapping DataFrame
-
- Raises:
- AssertionError: in case of problems
- """
- # Check required columns are present
- req_cols = []
- _check_df(df, req_cols, "condition")
-
- # Check for correct index
- if df.index.name != CONDITION_ID:
- raise AssertionError(
- f"Condition table has wrong index {df.index.name}."
- f"expected {CONDITION_ID}."
- )
-
- check_ids(df.index.values, kind="condition")
-
- if not df.index.is_unique:
- raise AssertionError(
- "Non-unique condition IDs: "
- f"{df.index.values[df.index.duplicated()]}"
- )
-
- for column_name in req_cols:
- if not np.issubdtype(df[column_name].dtype, np.number):
- assert_no_leading_trailing_whitespace(
- df[column_name].values, column_name
- )
-
- if model is not None:
- allowed_cols = set(model.get_valid_ids_for_condition_table())
- if observable_df is not None:
- allowed_cols |= set(
- petab.get_output_parameters(
- model=model,
- observable_df=observable_df,
- mapping_df=mapping_df,
- )
- )
- if mapping_df is not None:
- allowed_cols |= set(mapping_df.index.values)
- for column_name in df.columns:
- if (
- column_name != CONDITION_NAME
- and column_name not in allowed_cols
- ):
- raise AssertionError(
- "Condition table contains column for unknown entity '"
- f"{column_name}'."
- )
-
-
-def check_measurement_df(
- df: pd.DataFrame, observable_df: Optional[pd.DataFrame] = None
-) -> None:
- """Run sanity checks on PEtab measurement table
-
- Arguments:
- df: PEtab measurement DataFrame
- observable_df: PEtab observable DataFrame for checking if measurements
- are compatible with observable transformations.
-
- Raises:
- AssertionError, ValueError: in case of problems
- """
- _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement")
-
- for column_name in MEASUREMENT_DF_REQUIRED_COLS:
- if not np.issubdtype(df[column_name].dtype, np.number):
- assert_no_leading_trailing_whitespace(
- df[column_name].values, column_name
- )
-
- for column_name in MEASUREMENT_DF_OPTIONAL_COLS:
- if column_name in df and not np.issubdtype(
- df[column_name].dtype, np.number
- ):
- assert_no_leading_trailing_whitespace(
- df[column_name].values, column_name
- )
-
- if observable_df is not None:
- assert_measured_observables_defined(df, observable_df)
- measurements.assert_overrides_match_parameter_count(df, observable_df)
-
- if OBSERVABLE_TRANSFORMATION in observable_df:
- # Check for positivity of measurements in case of
- # log-transformation
- assert_unique_observable_ids(observable_df)
- # If the above is not checked, in the following loop
- # trafo may become a pandas Series
- for measurement, obs_id in zip(df[MEASUREMENT], df[OBSERVABLE_ID]):
- trafo = observable_df.loc[obs_id, OBSERVABLE_TRANSFORMATION]
- if measurement <= 0.0 and trafo in [LOG, LOG10]:
- raise ValueError(
- "Measurements with observable "
- f"transformation {trafo} must be "
- f"positive, but {measurement} <= 0."
- )
-
- assert_measurements_not_null(df)
- assert_measurements_numeric(df)
-
-
-def check_parameter_df(
- df: pd.DataFrame,
- model: Optional[Model] = None,
- observable_df: Optional[pd.DataFrame] = None,
- measurement_df: Optional[pd.DataFrame] = None,
- condition_df: Optional[pd.DataFrame] = None,
- mapping_df: Optional[pd.DataFrame] = None,
-) -> None:
- """Run sanity checks on PEtab parameter table
-
- Arguments:
- df: PEtab parameter DataFrame
- model: Model for additional checking of parameter IDs
- observable_df: PEtab observable table for additional checks
- measurement_df: PEtab measurement table for additional checks
- condition_df: PEtab condition table for additional checks
- mapping_df: PEtab mapping table for additional checks
-
- Raises:
- AssertionError: in case of problems
- """
- _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter")
-
- if df.index.name != PARAMETER_ID:
- raise AssertionError(
- f"Parameter table has wrong index {df.index.name}."
- f"expected {PARAMETER_ID}."
- )
-
- check_ids(df.index.values, kind="parameter")
-
- for column_name in PARAMETER_DF_REQUIRED_COLS[1:]: # 0 is PARAMETER_ID
- if not np.issubdtype(df[column_name].dtype, np.number):
- assert_no_leading_trailing_whitespace(
- df[column_name].values, column_name
- )
-
- # nominal value is generally optional, but required if any for any
- # parameter estimate != 1
- non_estimated_par_ids = list(
- df.index[
- (df[ESTIMATE] != 1)
- | (
- pd.api.types.is_string_dtype(df[ESTIMATE])
- and df[ESTIMATE] != "1"
- )
- ]
- )
- if non_estimated_par_ids:
- if NOMINAL_VALUE not in df:
- raise AssertionError(
- "Parameter table contains parameters "
- f"{non_estimated_par_ids} that are not "
- "specified to be estimated, "
- f"but column {NOMINAL_VALUE} is missing."
- )
- try:
- df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float)
- except ValueError as e:
- raise AssertionError(
- f"Expected numeric values for `{NOMINAL_VALUE}` in parameter "
- "table for all non-estimated parameters."
- ) from e
-
- assert_parameter_id_is_string(df)
- assert_parameter_scale_is_valid(df)
- assert_parameter_bounds_are_numeric(df)
- assert_parameter_estimate_is_boolean(df)
- assert_unique_parameter_ids(df)
- check_parameter_bounds(df)
- assert_parameter_prior_type_is_valid(df)
-
- if model and measurement_df is not None and condition_df is not None:
- assert_all_parameters_present_in_parameter_df(
- df, model, observable_df, measurement_df, condition_df, mapping_df
- )
-
-
-def check_observable_df(observable_df: pd.DataFrame) -> None:
- """Check validity of observable table
-
- Arguments:
- observable_df: PEtab observable DataFrame
-
- Raises:
- AssertionError: in case of problems
- """
- _check_df(observable_df, OBSERVABLE_DF_REQUIRED_COLS[1:], "observable")
-
- check_ids(observable_df.index.values, kind="observable")
-
- for column_name in OBSERVABLE_DF_REQUIRED_COLS[1:]:
- if not np.issubdtype(observable_df[column_name].dtype, np.number):
- assert_no_leading_trailing_whitespace(
- observable_df[column_name].values, column_name
- )
-
- for column_name in OBSERVABLE_DF_OPTIONAL_COLS:
- if column_name in observable_df and not np.issubdtype(
- observable_df[column_name].dtype, np.number
- ):
- assert_no_leading_trailing_whitespace(
- observable_df[column_name].values, column_name
- )
-
- assert_noise_distributions_valid(observable_df)
- assert_unique_observable_ids(observable_df)
-
- # Check that formulas are parsable
- for row in observable_df.itertuples():
- obs = getattr(row, OBSERVABLE_FORMULA)
- try:
- sympify_petab(obs)
- except sp.SympifyError as e:
- raise AssertionError(
- f"Cannot parse expression '{obs}' "
- f"for observable {row.Index}: {e}"
- ) from e
-
- noise = getattr(row, NOISE_FORMULA)
- try:
- sympified_noise = sympify_petab(noise)
- if sympified_noise is None or (
- sympified_noise.is_Number and not sympified_noise.is_finite
- ):
- raise AssertionError(
- f"No or non-finite {NOISE_FORMULA} "
- f"given for observable {row.Index}."
- )
- except sp.SympifyError as e:
- raise AssertionError(
- f"Cannot parse expression '{noise}' "
- f"for noise model for observable "
- f"{row.Index}: {e}"
- ) from e
-
-
-def assert_all_parameters_present_in_parameter_df(
- parameter_df: pd.DataFrame,
- model: Model,
- observable_df: pd.DataFrame,
- measurement_df: pd.DataFrame,
- condition_df: pd.DataFrame,
- mapping_df: pd.DataFrame = None,
-) -> None:
- """Ensure all required parameters are contained in the parameter table
- with no additional ones
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
- model: model
- observable_df: PEtab observable table
- measurement_df: PEtab measurement table
- condition_df: PEtab condition table
- mapping_df: PEtab mapping table for additional checks
-
- Raises:
- AssertionError: in case of problems
- """
- required = parameters.get_required_parameters_for_parameter_table(
- model=model,
- condition_df=condition_df,
- observable_df=observable_df,
- measurement_df=measurement_df,
- mapping_df=mapping_df,
- )
-
- allowed = parameters.get_valid_parameters_for_parameter_table(
- model=model,
- condition_df=condition_df,
- observable_df=observable_df,
- measurement_df=measurement_df,
- mapping_df=mapping_df,
- )
-
- actual = set(parameter_df.index)
- missing = required - actual
- extraneous = actual - allowed
-
- # missing parameters might be present under a different name based on
- # the mapping table
- if missing and mapping_df is not None:
- model_to_petab_mapping = {}
- for map_from, map_to in zip(
- mapping_df.index.values, mapping_df[MODEL_ENTITY_ID]
- ):
- if map_to in model_to_petab_mapping:
- model_to_petab_mapping[map_to].append(map_from)
- else:
- model_to_petab_mapping[map_to] = [map_from]
- missing = {
- missing_id
- for missing_id in missing
- if missing_id not in model_to_petab_mapping
- or all(
- mapping_parameter not in actual
- for mapping_parameter in model_to_petab_mapping[missing_id]
- )
- }
-
- if missing:
- raise AssertionError(
- "Missing parameter(s) in the model or the "
- "parameters table: " + str(missing)
- )
-
- if extraneous:
- raise AssertionError(
- "Extraneous parameter(s) in parameter table: " + str(extraneous)
- )
-
-
-def assert_measured_observables_defined(
- measurement_df: pd.DataFrame, observable_df: pd.DataFrame
-) -> None:
- """Check if all observables in the measurement table have been defined in
- the observable table
-
- Arguments:
- measurement_df: PEtab measurement table
- observable_df: PEtab observable table
-
- Raises:
- AssertionError: in case of problems
- """
- used_observables = set(measurement_df[OBSERVABLE_ID].values)
- defined_observables = set(observable_df.index.values)
- if undefined_observables := (used_observables - defined_observables):
- raise AssertionError(
- f"Observables {undefined_observables} used in "
- "measurement table but not defined in observables table."
- )
-
-
-def condition_table_is_parameter_free(condition_df: pd.DataFrame) -> bool:
- """Check if all entries in the condition table are numeric
- (no parameter IDs)
-
- Arguments:
- condition_df: PEtab condition table
-
- Returns:
- ``True`` if there are no parameter overrides in the condition table,
- ``False`` otherwise.
- """
- return len(petab.get_parametric_overrides(condition_df)) == 0
-
-
-def assert_parameter_id_is_string(parameter_df: pd.DataFrame) -> None:
- """
- Check if all entries in the parameterId column of the parameter table
- are string and not empty.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
-
- Raises:
- AssertionError: in case of problems
- """
- for parameter_id in parameter_df:
- if isinstance(parameter_id, str):
- if parameter_id[0].isdigit():
- raise AssertionError(
- f"{PARAMETER_ID} {parameter_id} starts with integer."
- )
- else:
- raise AssertionError(f"Empty {PARAMETER_ID} found.")
-
-
-def assert_unique_parameter_ids(parameter_df: pd.DataFrame) -> None:
- """
- Check if the parameterId column of the parameter table is unique.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
-
- Raises:
- AssertionError: in case of problems
- """
- non_unique_ids = get_non_unique(parameter_df.index)
- if len(non_unique_ids) > 0:
- raise AssertionError(
- f"Non-unique values found in the {PARAMETER_ID} column"
- " of the parameter table: " + str(non_unique_ids)
- )
-
-
-def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None:
- """
- Check if all entries in the parameterScale column of the parameter table
- are 'lin' for linear, 'log' for natural logarithm or 'log10' for base 10
- logarithm.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
-
- Raises:
- AssertionError: in case of problems
- """
- for parameter_scale in parameter_df[PARAMETER_SCALE]:
- if parameter_scale not in [LIN, LOG, LOG10]:
- raise AssertionError(
- f"Expected {LIN}, {LOG}, or {LOG10}, but "
- f"got {parameter_scale}."
- )
-
-
-def assert_parameter_bounds_are_numeric(parameter_df: pd.DataFrame) -> None:
- """
- Check if all entries in the lowerBound and upperBound columns of the
- parameter table are numeric.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
-
- Raises:
- AssertionError: in case of problems
- """
- parameter_df[LOWER_BOUND].apply(float).all()
- parameter_df[UPPER_BOUND].apply(float).all()
-
-
-def check_parameter_bounds(parameter_df: pd.DataFrame) -> None:
- """
- Check if all entries in the lowerBound are smaller than upperBound column
- in the parameter table and that bounds are positive for parameterScale
- log|log10.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
-
- Raises:
- AssertionError: in case of problems
-
- """
- for _, row in parameter_df.iterrows():
- if int(row[ESTIMATE]):
- if not row[LOWER_BOUND] <= row[UPPER_BOUND]:
- raise AssertionError(
- f"{LOWER_BOUND} greater than {UPPER_BOUND} for "
- f"{PARAMETER_ID} {row.name}."
- )
- if (row[LOWER_BOUND] < 0.0 or row[UPPER_BOUND] < 0.0) and row[
- PARAMETER_SCALE
- ] in [LOG, LOG10]:
- raise AssertionError(
- f"Bounds for {row[PARAMETER_SCALE]} scaled parameter "
- f"{ row.name} must be positive."
- )
-
-
-def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None:
- """Check that valid prior types have been selected
-
- Arguments:
- parameter_df: PEtab parameter table
-
- Raises:
- AssertionError: in case of invalid prior
- """
- for col in [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]:
- if col not in parameter_df.columns:
- continue
- for _, row in parameter_df.iterrows():
- if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]):
- raise AssertionError(
- f"{col} must be one of {PRIOR_TYPES} but is "
- f"'{row[col]}'."
- )
-
-
-def assert_parameter_prior_parameters_are_valid(
- parameter_df: pd.DataFrame,
-) -> None:
- """Check that the prior parameters are valid.
-
- Arguments:
- parameter_df: PEtab parameter table
-
- Raises:
- AssertionError: in case of invalid prior parameters
- """
- prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]
- prior_par_cols = [
- INITIALIZATION_PRIOR_PARAMETERS,
- OBJECTIVE_PRIOR_PARAMETERS,
- ]
-
- # perform test for both priors
- for type_col, par_col in zip(prior_type_cols, prior_par_cols):
- # iterate over rows
- for _, row in parameter_df.iterrows():
- # get type
- if type_col not in row or core.is_empty(row[type_col]):
- type_ = PARAMETER_SCALE_UNIFORM
- else:
- type_ = row[type_col]
- # get parameters
- pars_str = row.get(par_col, "")
- with_default_parameters = [PARAMETER_SCALE_UNIFORM]
- # check if parameters are empty
- if core.is_empty(pars_str):
- if type_ not in with_default_parameters:
- raise AssertionError(
- f"An empty {par_col} is only permitted with "
- f"{type_col} in {with_default_parameters}."
- )
- # empty parameters fine
- continue
- # parse parameters
- try:
- pars = tuple(
- float(val) for val in pars_str.split(PARAMETER_SEPARATOR)
- )
- except ValueError as e:
- raise AssertionError(
- f"Could not parse prior parameters '{pars_str}'."
- ) from e
-
- # all distributions take 2 parameters
- if len(pars) != 2:
- raise AssertionError(
- f"The prior parameters '{pars}' do not contain the "
- "expected number of entries (currently 'par1"
- f"{PARAMETER_SEPARATOR}par2' for all prior types)."
- )
-
-
-def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None:
- """
- Check if all entries in the estimate column of the parameter table are
- 0 or 1.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
-
- Raises:
- AssertionError: in case of problems
- """
- for estimate in parameter_df[ESTIMATE]:
- if int(estimate) not in [True, False]:
- raise AssertionError(
- f"Expected 0 or 1 but got {estimate} in {ESTIMATE} column."
- )
-
-
-def is_scalar_float(x: Any):
- """
- Checks whether input is a number or can be transformed into a number
- via float
-
- :param x:
- input
- :return:
- ``True`` if is or can be converted to number, ``False`` otherwise.
- """
- if isinstance(x, numbers.Number):
- return True
- try:
- float(x)
- return True
- except (ValueError, TypeError):
- return False
-
-
-def measurement_table_has_timepoint_specific_mappings(
- measurement_df: Optional[pd.DataFrame],
- allow_scalar_numeric_noise_parameters: bool = False,
- allow_scalar_numeric_observable_parameters: bool = False,
-) -> bool:
- """
- Are there time-point or replicate specific parameter assignments in the
- measurement table.
-
- Arguments:
- measurement_df:
- PEtab measurement table
-
- allow_scalar_numeric_noise_parameters:
- ignore scalar numeric assignments to noiseParameter placeholders
-
- allow_scalar_numeric_observable_parameters:
- ignore scalar numeric assignments to observableParameter
- placeholders
-
- Returns:
- True if there are time-point or replicate specific (non-numeric)
- parameter assignments in the measurement table, False otherwise.
- """
- if measurement_df is None:
- return False
-
- # since we edit it, copy it first
- measurement_df = copy.deepcopy(measurement_df)
-
- # mask numeric values
- for col, allow_scalar_numeric in [
- (OBSERVABLE_PARAMETERS, allow_scalar_numeric_observable_parameters),
- (NOISE_PARAMETERS, allow_scalar_numeric_noise_parameters),
- ]:
- if col not in measurement_df:
- continue
-
- measurement_df[col] = measurement_df[col].apply(str)
-
- if allow_scalar_numeric:
- measurement_df.loc[
- measurement_df[col].apply(is_scalar_float), col
- ] = np.nan
-
- grouping_cols = core.get_notnull_columns(
- measurement_df,
- [
- OBSERVABLE_ID,
- SIMULATION_CONDITION_ID,
- PREEQUILIBRATION_CONDITION_ID,
- OBSERVABLE_PARAMETERS,
- NOISE_PARAMETERS,
- ],
- )
- grouped_df = measurement_df.groupby(grouping_cols, dropna=False)
-
- grouping_cols = core.get_notnull_columns(
- measurement_df,
- [
- OBSERVABLE_ID,
- SIMULATION_CONDITION_ID,
- PREEQUILIBRATION_CONDITION_ID,
- ],
- )
- grouped_df2 = measurement_df.groupby(grouping_cols)
- # data frame has timepoint specific overrides if grouping by noise
- # parameters and observable parameters in addition to observable,
- # condition and preeq id yields more groups
- return len(grouped_df) != len(grouped_df2)
-
-
-def observable_table_has_nontrivial_noise_formula(
- observable_df: Optional[pd.DataFrame],
-) -> bool:
- """
- Does any observable have a noise formula that is not just a single
- parameter?
-
- Arguments:
- observable_df: PEtab observable table
-
- Returns:
- ``True`` if any noise formula does not consist of a single identifier,
- ``False`` otherwise.
- """
- if observable_df is None:
- return False
-
- return (
- not observable_df[NOISE_FORMULA]
- .apply(
- lambda x: is_scalar_float(x)
- or re.match(r"^[\w]+$", str(x)) is not None
- )
- .all()
- )
-
-
-def measurement_table_has_observable_parameter_numeric_overrides(
- measurement_df: pd.DataFrame,
-) -> bool:
- """Are there any numbers to override observable parameters?
-
- Arguments:
- measurement_df: PEtab measurement table
-
- Returns:
- ``True`` if there are any numbers to override observable/noise
- parameters, ``False`` otherwise.
- """
- if OBSERVABLE_PARAMETERS not in measurement_df:
- return False
-
- for _, row in measurement_df.iterrows():
- for override in measurements.split_parameter_replacement_list(
- row.get(OBSERVABLE_PARAMETERS, None)
- ):
- if isinstance(override, numbers.Number):
- return True
-
- return False
-
-
-def assert_noise_distributions_valid(observable_df: pd.DataFrame) -> None:
- """
- Ensure that noise distributions and transformations for observables are
- valid.
-
- Arguments:
- observable_df: PEtab observable table
-
- Raises:
- AssertionError: in case of problems
- """
- if OBSERVABLE_TRANSFORMATION in observable_df:
- # check for valid values
- for trafo in observable_df[OBSERVABLE_TRANSFORMATION]:
- if trafo not in ["", *OBSERVABLE_TRANSFORMATIONS] and not (
- isinstance(trafo, numbers.Number) and np.isnan(trafo)
- ):
- raise ValueError(
- f"Unrecognized observable transformation in observable "
- f"table: {trafo}."
- )
-
- if NOISE_DISTRIBUTION in observable_df:
- for distr in observable_df[NOISE_DISTRIBUTION]:
- if distr not in ["", *NOISE_MODELS] and not (
- isinstance(distr, numbers.Number) and np.isnan(distr)
- ):
- raise ValueError(
- f"Unrecognized noise distribution in observable "
- f"table: {distr}."
- )
-
-
-def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None:
- """
- Check if the observableId column of the observable table is unique.
-
- Arguments:
- observable_df: PEtab observable DataFrame
-
- Raises:
- AssertionError: in case of problems
- """
- non_unique_ids = get_non_unique(observable_df.index)
- if len(non_unique_ids) > 0:
- raise AssertionError(
- f"Non-unique values found in the {OBSERVABLE_ID} column"
- " of the observable table: " + str(non_unique_ids)
- )
-
-
-def get_non_unique(values):
- counter = Counter(values)
- return [value for (value, count) in counter.items() if count > 1]
-
-
-def lint_problem(problem: "petab.Problem") -> bool:
- """Run PEtab validation on problem
-
- Arguments:
- problem: PEtab problem to check
-
- Returns:
- ``True`` if errors occurred, ``False`` otherwise
- """
- # pylint: disable=too-many-statements
- errors_occurred = False
-
- if problem.extensions_config:
- logger.warning(
- "Validation of PEtab extensions is not yet implemented, "
- "but the given problem uses the following extensions: "
- f"{'', ''.join(problem.extensions_config.keys())}"
- )
-
- # Run checks on individual files
- if problem.model is not None:
- logger.info("Checking model...")
- errors_occurred |= not problem.model.is_valid()
- else:
- logger.warning("Model not available. Skipping.")
-
- if problem.measurement_df is not None:
- logger.info("Checking measurement table...")
- try:
- check_measurement_df(problem.measurement_df, problem.observable_df)
-
- if problem.condition_df is not None:
- assert_measurement_conditions_present_in_condition_table(
- problem.measurement_df, problem.condition_df
- )
- except AssertionError as e:
- logger.error(e)
- errors_occurred = True
- else:
- logger.warning("Measurement table not available. Skipping.")
-
- if problem.condition_df is not None:
- logger.info("Checking condition table...")
- try:
- check_condition_df(
- problem.condition_df,
- model=problem.model,
- observable_df=problem.observable_df,
- mapping_df=problem.mapping_df,
- )
- except AssertionError as e:
- logger.error(e)
- errors_occurred = True
- else:
- logger.warning("Condition table not available. Skipping.")
-
- if problem.observable_df is not None:
- logger.info("Checking observable table...")
- try:
- check_observable_df(problem.observable_df)
- except AssertionError as e:
- logger.error(e)
- errors_occurred = True
- if problem.model is not None:
- for obs_id in problem.observable_df.index:
- if problem.model.has_entity_with_id(obs_id):
- logger.error(
- f"Observable ID {obs_id} shadows model " "entity."
- )
- errors_occurred = True
- else:
- logger.warning("Observable table not available. Skipping.")
-
- if problem.parameter_df is not None:
- logger.info("Checking parameter table...")
- try:
- check_parameter_df(
- problem.parameter_df,
- problem.model,
- problem.observable_df,
- problem.measurement_df,
- problem.condition_df,
- problem.mapping_df,
- )
- except AssertionError as e:
- logger.error(e)
- errors_occurred = True
- else:
- logger.warning("Parameter table not available. Skipping.")
-
- if (
- problem.model is not None
- and problem.condition_df is not None
- and problem.parameter_df is not None
- ):
- try:
- assert_model_parameters_in_condition_or_parameter_table(
- problem.model,
- problem.condition_df,
- problem.parameter_df,
- problem.mapping_df,
- )
- except AssertionError as e:
- logger.error(e)
- errors_occurred = True
-
- if problem.visualization_df is not None:
- logger.info("Checking visualization table...")
- from petab.visualize.lint import validate_visualization_df
-
- errors_occurred |= validate_visualization_df(problem)
- else:
- logger.warning("Visualization table not available. Skipping.")
-
- if errors_occurred:
- logger.error("Not OK")
- elif (
- problem.measurement_df is None
- or problem.condition_df is None
- or problem.model is None
- or problem.parameter_df is None
- or problem.observable_df is None
- ):
- logger.warning(
- "Not all files of the PEtab problem definition could "
- "be checked."
- )
- else:
- logger.info("PEtab format check completed successfully.")
-
- return errors_occurred
-
-
-def assert_model_parameters_in_condition_or_parameter_table(
- model: Model,
- condition_df: pd.DataFrame,
- parameter_df: pd.DataFrame,
- mapping_df: pd.DataFrame = None,
- observable_df: pd.DataFrame = None,
- measurement_df: pd.DataFrame = None,
-) -> None:
- """Model parameters that are rule targets must not be present in the
- parameter table. Other parameters must only be present in either in
- parameter table or condition table columns. Check that.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
- model: PEtab model
- condition_df: PEtab condition table
- mapping_df: PEtab mapping table
- observable_df: PEtab observable table
- measurement_df: PEtab measurement table
-
- Raises:
- AssertionError: in case of problems
- """
- allowed_in_condition_cols = set(model.get_valid_ids_for_condition_table())
- if mapping_df is not None:
- allowed_in_condition_cols |= {
- from_id
- for from_id, to_id in zip(
- mapping_df.index.values, mapping_df[MODEL_ENTITY_ID]
- )
- # mapping table entities mapping to already allowed parameters
- if to_id in allowed_in_condition_cols
- # mapping table entities mapping to species
- or model.is_state_variable(to_id)
- }
-
- allowed_in_parameter_table = (
- parameters.get_valid_parameters_for_parameter_table(
- model=model,
- condition_df=condition_df,
- observable_df=observable_df,
- measurement_df=measurement_df,
- mapping_df=mapping_df,
- )
- )
-
- entities_in_condition_table = set(condition_df.columns) - {CONDITION_NAME}
- entities_in_parameter_table = set(parameter_df.index.values)
-
- disallowed_in_condition = {
- x
- for x in (entities_in_condition_table - allowed_in_condition_cols)
- # we only check model entities here, not output parameters
- if model.has_entity_with_id(x)
- }
- if disallowed_in_condition:
- is_or_are = "is" if len(disallowed_in_condition) == 1 else "are"
- raise AssertionError(
- f"{disallowed_in_condition} {is_or_are} not "
- "allowed to occur in condition table "
- "columns."
- )
-
- disallowed_in_parameters = {
- x
- for x in (entities_in_parameter_table - allowed_in_parameter_table)
- # we only check model entities here, not output parameters
- if model.has_entity_with_id(x)
- }
-
- if disallowed_in_parameters:
- is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are"
- raise AssertionError(
- f"{disallowed_in_parameters} {is_or_are} not "
- "allowed to occur in the parameters table."
- )
-
- in_both = entities_in_condition_table & entities_in_parameter_table
- if in_both:
- is_or_are = "is" if len(in_both) == 1 else "are"
- raise AssertionError(
- f"{in_both} {is_or_are} present in both "
- "the condition table and the parameter table."
- )
-
-
-def assert_measurement_conditions_present_in_condition_table(
- measurement_df: pd.DataFrame, condition_df: pd.DataFrame
-) -> None:
- """Ensure that all entries from measurement_df.simulationConditionId and
- measurement_df.preequilibrationConditionId are present in
- condition_df.index.
-
- Arguments:
- measurement_df: PEtab measurement table
- condition_df: PEtab condition table
-
- Raises:
- AssertionError: in case of problems
- """
- used_conditions = set(measurement_df[SIMULATION_CONDITION_ID].values)
- if PREEQUILIBRATION_CONDITION_ID in measurement_df:
- used_conditions |= set(
- measurement_df[PREEQUILIBRATION_CONDITION_ID].dropna().values
- )
- available_conditions = set(condition_df.index.values)
- if missing_conditions := (used_conditions - available_conditions):
- raise AssertionError(
- "Measurement table references conditions that "
- "are not specified in the condition table: "
- + str(missing_conditions)
- )
-
-
-def assert_measurements_not_null(
- measurement_df: pd.DataFrame,
-) -> None:
- """Check whether all measurements are not null.
-
- Arguments:
- measurement_df:
- PEtab measurement table.
-
- Raises:
- AssertionError:
- Some measurement value(s) are null (missing).
- """
- if measurement_df[MEASUREMENT].isnull().any():
- raise AssertionError("Some measurement(s) are null (missing).")
-
-
-def assert_measurements_numeric(
- measurement_df: pd.DataFrame,
-) -> None:
- """Check whether all measurements are numeric.
-
- Note that null (missing) measurements are ignored.
-
- Arguments:
- measurement_df:
- PEtab measurement table.
-
- Raises:
- AssertionError:
- Some measurement value(s) are not numeric.
- """
- not_null_measurement_values = measurement_df[MEASUREMENT].dropna()
- all_measurements_are_numeric = (
- pd.to_numeric(not_null_measurement_values, errors="coerce")
- .notnull()
- .all()
- )
- if not all_measurements_are_numeric:
- raise AssertionError(
- "Some values in the `petab.C.MEASUREMENT` column of the PEtab "
- "measurements table are not numeric."
- )
-
-
-def is_valid_identifier(x: str) -> bool:
- """Check whether `x` is a valid identifier
-
- Check whether `x` is a valid identifier for conditions, parameters,
- observables... . Identifiers may contain upper and lower case letters,
- digits and underscores, but must not start with a digit.
-
- Arguments:
- x: string to check
-
- Returns:
- ``True`` if valid, ``False`` otherwise
- """
- if pd.isna(x):
- return False
-
- return re.match(r"^[a-zA-Z_]\w*$", x) is not None
-
-
-def check_ids(ids: Iterable[str], kind: str = "") -> None:
- """Check IDs are valid
-
- Arguments:
- ids: Iterable of IDs to check
- kind: Kind of IDs, for more informative error message
-
- Raises:
- ValueError: in case of invalid IDs
- """
- invalids = [
- (index, _id)
- for index, _id in enumerate(ids)
- if not is_valid_identifier(_id)
- ]
-
- if invalids:
- # The first row is the header row, and Python lists are zero-indexed,
- # hence need to add 2 for the correct line number.
- offset = 2
- error_output = "\n".join(
- [
- f"Line {index+offset}: "
- + ("Missing ID" if pd.isna(_id) else _id)
- for index, _id in invalids
- ]
- )
- raise ValueError(f"Invalid {kind} ID(s):\n{error_output}")
+_deprecated_import_v1(__name__)
diff --git a/petab/mapping.py b/petab/mapping.py
index a345ca88..ca6cdd3f 100644
--- a/petab/mapping.py
+++ b/petab/mapping.py
@@ -1,118 +1,7 @@
-"""Functionality related to the PEtab entity mapping table"""
-from pathlib import Path
-from typing import Optional, Union
+"""Deprecated module for mapping tables.
-import pandas as pd
+Use petab.v1.mapping instead."""
+from petab import _deprecated_import_v1
+from petab.v1.mapping import * # noqa: F403, F401, E402
-from . import lint
-from .C import * # noqa: F403
-from .models import Model
-
-__all__ = [
- "get_mapping_df",
- "write_mapping_df",
- "check_mapping_df",
-]
-
-
-def get_mapping_df(
- mapping_file: Union[None, str, Path, pd.DataFrame],
-) -> pd.DataFrame:
- """
- Read the provided mapping file into a ``pandas.Dataframe``.
-
- Arguments:
- mapping_file: Name of file to read from or pandas.Dataframe
-
- Returns:
- Mapping DataFrame
- """
- if mapping_file is None:
- return mapping_file
-
- if isinstance(mapping_file, (str, Path)):
- mapping_file = pd.read_csv(
- mapping_file, sep="\t", float_precision="round_trip"
- )
-
- if not isinstance(mapping_file.index, pd.RangeIndex):
- mapping_file.reset_index(
- drop=mapping_file.index.name != PETAB_ENTITY_ID,
- inplace=True,
- )
-
- for col in MAPPING_DF_REQUIRED_COLS:
- if col not in mapping_file.columns:
- raise KeyError(
- f"Mapping table missing mandatory field {PETAB_ENTITY_ID}."
- )
-
- lint.assert_no_leading_trailing_whitespace(
- mapping_file.reset_index()[col].values, col
- )
-
- mapping_file.set_index([PETAB_ENTITY_ID], inplace=True)
-
- return mapping_file
-
-
-def write_mapping_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
- """Write PEtab mapping table
-
- Arguments:
- df: PEtab mapping table
- filename: Destination file name
- """
- df = get_mapping_df(df)
- df.to_csv(filename, sep="\t", index=True)
-
-
-def check_mapping_df(
- df: pd.DataFrame,
- model: Optional[Model] = None,
-) -> None:
- """Run sanity checks on PEtab mapping table
-
- Arguments:
- df: PEtab mapping DataFrame
- model: Model for additional checking of parameter IDs
-
- Raises:
- AssertionError: in case of problems
- """
- lint._check_df(df, MAPPING_DF_REQUIRED_COLS[1:], "mapping")
-
- if df.index.name != PETAB_ENTITY_ID:
- raise AssertionError(
- f"Mapping table has wrong index {df.index.name}. "
- f"Expected {PETAB_ENTITY_ID}."
- )
-
- lint.check_ids(df.index.values, kind=PETAB_ENTITY_ID)
-
- if model:
- for model_entity_id in df[MODEL_ENTITY_ID]:
- if not model.has_entity_with_id(model_entity_id):
- raise AssertionError(
- "Mapping table maps to unknown "
- f"model entity ID {model_entity_id}."
- )
-
-
-def resolve_mapping(mapping_df: Optional[pd.DataFrame], element: str) -> str:
- """Resolve mapping for a given element.
-
- :param element:
- Element to resolve.
-
- :param mapping_df:
- Mapping table.
-
- :return:
- Resolved element.
- """
- if mapping_df is None:
- return element
- if element in mapping_df.index:
- return mapping_df.loc[element, MODEL_ENTITY_ID]
- return element
+_deprecated_import_v1(__name__)
diff --git a/petab/math/__init__.py b/petab/math/__init__.py
index 27ebacd2..bc857377 100644
--- a/petab/math/__init__.py
+++ b/petab/math/__init__.py
@@ -1,2 +1,9 @@
-"""Functions for parsing and evaluating mathematical expressions."""
+"""Deprecated module for math handling.
+
+Use petab.v1.math instead."""
+from petab import _deprecated_import_v1
+from petab.v1.math import * # noqa: F403, F401, E402
+
from .sympify import sympify_petab # noqa: F401
+
+_deprecated_import_v1(__name__)
diff --git a/petab/math/sympify.py b/petab/math/sympify.py
index 9227c51d..d85b8e1b 100644
--- a/petab/math/sympify.py
+++ b/petab/math/sympify.py
@@ -1,20 +1,5 @@
-"""PEtab math to sympy conversion."""
+"""Deprecated module. Use petab.math.sympify instead."""
+from petab import _deprecated_import_v1
+from petab.v1.math.sympify import * # noqa: F403, F401, E402
-import sympy as sp
-from sympy.abc import _clash
-
-
-def sympify_petab(expr: str) -> sp.Expr:
- """
- Convert a PEtab math expression to a sympy expression.
-
- Parameters
- ----------
- expr:
- The PEtab math expression.
-
- Returns
- -------
- The sympy expression corresponding to ``expr``.
- """
- return sp.sympify(expr, locals=_clash)
+_deprecated_import_v1(__name__)
diff --git a/petab/measurements.py b/petab/measurements.py
index caa32047..fcc0ac8e 100644
--- a/petab/measurements.py
+++ b/petab/measurements.py
@@ -1,348 +1,7 @@
-"""Functions operating on the PEtab measurement table"""
-# noqa: F405
+"""Deprecated module for measurement tables.
-import itertools
-import math
-import numbers
-from pathlib import Path
-from typing import Dict, List, Union
+Use petab.v1.measurements instead."""
+from petab import _deprecated_import_v1
+from petab.v1.measurements import * # noqa: F403, F401, E402
-import numpy as np
-import pandas as pd
-
-from . import core, lint, observables
-from .C import * # noqa: F403
-
-__all__ = [
- "assert_overrides_match_parameter_count",
- "create_measurement_df",
- "get_measurement_df",
- "get_measurement_parameter_ids",
- "get_rows_for_condition",
- "get_simulation_conditions",
- "measurements_have_replicates",
- "measurement_is_at_steady_state",
- "split_parameter_replacement_list",
- "write_measurement_df",
-]
-
-
-def get_measurement_df(
- measurement_file: Union[None, str, Path, pd.DataFrame],
-) -> pd.DataFrame:
- """
- Read the provided measurement file into a ``pandas.Dataframe``.
-
- Arguments:
- measurement_file: Name of file to read from or pandas.Dataframe
-
- Returns:
- Measurement DataFrame
- """
- if measurement_file is None:
- return measurement_file
-
- if isinstance(measurement_file, (str, Path)):
- measurement_file = pd.read_csv(
- measurement_file, sep="\t", float_precision="round_trip"
- )
-
- lint.assert_no_leading_trailing_whitespace(
- measurement_file.columns.values, MEASUREMENT
- )
-
- return measurement_file
-
-
-def write_measurement_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
- """Write PEtab measurement table
-
- Arguments:
- df: PEtab measurement table
- filename: Destination file name
- """
- df = get_measurement_df(df)
- df.to_csv(filename, sep="\t", index=False)
-
-
-def get_simulation_conditions(measurement_df: pd.DataFrame) -> pd.DataFrame:
- """
- Create a table of separate simulation conditions. A simulation condition
- is a specific combination of simulationConditionId and
- preequilibrationConditionId.
-
- Arguments:
- measurement_df: PEtab measurement table
-
- Returns:
- Dataframe with columns 'simulationConditionId' and
- 'preequilibrationConditionId'. All-null columns will be omitted.
- Missing 'preequilibrationConditionId's will be set to '' (empty
- string).
- """
- if measurement_df.empty:
- return pd.DataFrame(data={SIMULATION_CONDITION_ID: []})
- # find columns to group by (i.e. if not all nans).
- # can be improved by checking for identical condition vectors
- grouping_cols = core.get_notnull_columns(
- measurement_df,
- [SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID],
- )
-
- # group by cols and return dataframe containing each combination
- # of those rows only once (and an additional counting row)
- # We require NaN-containing rows, but they are ignored by `groupby`,
- # therefore replace them before
- simulation_conditions = (
- measurement_df.fillna("")
- .groupby(grouping_cols)
- .size()
- .reset_index()[grouping_cols]
- )
- # sort to be really sure that we always get the same order
- return simulation_conditions.sort_values(grouping_cols, ignore_index=True)
-
-
-def get_rows_for_condition(
- measurement_df: pd.DataFrame,
- condition: Union[pd.Series, pd.DataFrame, Dict],
-) -> pd.DataFrame:
- """
- Extract rows in `measurement_df` for `condition` according
- to 'preequilibrationConditionId' and 'simulationConditionId' in
- `condition`.
-
- Arguments:
- measurement_df:
- PEtab measurement DataFrame
- condition:
- DataFrame with single row (or Series) and columns
- 'preequilibrationConditionId' and 'simulationConditionId'.
- Or dictionary with those keys.
-
- Returns:
- The subselection of rows in ``measurement_df`` for the condition
- ``condition``.
- """
- # filter rows for condition
- row_filter = 1
- # check for equality in all grouping cols
- if PREEQUILIBRATION_CONDITION_ID in condition:
- row_filter = (
- measurement_df[PREEQUILIBRATION_CONDITION_ID].fillna("")
- == condition[PREEQUILIBRATION_CONDITION_ID]
- ) & row_filter
- if SIMULATION_CONDITION_ID in condition:
- row_filter = (
- measurement_df[SIMULATION_CONDITION_ID]
- == condition[SIMULATION_CONDITION_ID]
- ) & row_filter
- # apply filter
- cur_measurement_df = measurement_df.loc[row_filter, :]
-
- return cur_measurement_df
-
-
-def get_measurement_parameter_ids(measurement_df: pd.DataFrame) -> List[str]:
- """
- Return list of ID of parameters which occur in measurement table as
- observable or noise parameter overrides.
-
- Arguments:
- measurement_df:
- PEtab measurement DataFrame
-
- Returns:
- List of parameter IDs
- """
-
- def get_unique_parameters(series):
- return core.unique_preserve_order(
- itertools.chain.from_iterable(
- series.apply(split_parameter_replacement_list)
- )
- )
-
- return core.unique_preserve_order(
- get_unique_parameters(measurement_df[OBSERVABLE_PARAMETERS])
- + get_unique_parameters(measurement_df[NOISE_PARAMETERS])
- )
-
-
-def split_parameter_replacement_list(
- list_string: Union[str, numbers.Number], delim: str = PARAMETER_SEPARATOR
-) -> List[Union[str, numbers.Number]]:
- """
- Split values in observableParameters and noiseParameters in measurement
- table.
-
- Arguments:
- list_string: delim-separated stringified list
- delim: delimiter
-
- Returns:
- List of split values. Numeric values may be converted to `float`,
- and parameter IDs are kept as strings.
- """
- if list_string is None or list_string == "":
- return []
-
- if isinstance(list_string, numbers.Number):
- # Empty cells in pandas might be turned into nan
- # We might want to allow nan as replacement...
- if np.isnan(list_string):
- return []
- return [list_string]
-
- result = [x.strip() for x in list_string.split(delim)]
-
- def convert_and_check(x):
- x = core.to_float_if_float(x)
- if isinstance(x, float):
- return x
- if lint.is_valid_identifier(x):
- return x
-
- raise ValueError(
- f"The value '{x}' in the parameter replacement list "
- f"'{list_string}' is neither a number, nor a valid parameter ID."
- )
-
- return list(map(convert_and_check, result))
-
-
-def create_measurement_df() -> pd.DataFrame:
- """Create empty measurement dataframe
-
- Returns:
- Created DataFrame
- """
- return pd.DataFrame(
- data={
- OBSERVABLE_ID: [],
- PREEQUILIBRATION_CONDITION_ID: [],
- SIMULATION_CONDITION_ID: [],
- MEASUREMENT: [],
- TIME: [],
- OBSERVABLE_PARAMETERS: [],
- NOISE_PARAMETERS: [],
- DATASET_ID: [],
- REPLICATE_ID: [],
- }
- )
-
-
-def measurements_have_replicates(measurement_df: pd.DataFrame) -> bool:
- """Tests whether the measurements come with replicates
-
- Arguments:
- measurement_df: Measurement table
-
- Returns:
- ``True`` if there are replicates, ``False`` otherwise
- """
- grouping_cols = core.get_notnull_columns(
- measurement_df,
- [
- OBSERVABLE_ID,
- SIMULATION_CONDITION_ID,
- PREEQUILIBRATION_CONDITION_ID,
- TIME,
- ],
- )
- return np.any(
- measurement_df.fillna("").groupby(grouping_cols).size().values - 1
- )
-
-
-def assert_overrides_match_parameter_count(
- measurement_df: pd.DataFrame, observable_df: pd.DataFrame
-) -> None:
- """Ensure that number of parameters in the observable definition matches
- the number of overrides in ``measurement_df``
-
- Arguments:
- measurement_df: PEtab measurement table
- observable_df: PEtab observable table
- """
- # sympify only once and save number of parameters
- observable_parameters_count = {
- obs_id: len(
- observables.get_formula_placeholders(formula, obs_id, "observable")
- )
- for obs_id, formula in zip(
- observable_df.index.values, observable_df[OBSERVABLE_FORMULA]
- )
- }
- noise_parameters_count = {
- obs_id: len(
- observables.get_formula_placeholders(formula, obs_id, "noise")
- )
- for obs_id, formula in zip(
- observable_df.index.values, observable_df[NOISE_FORMULA]
- )
- }
-
- for _, row in measurement_df.iterrows():
- # check observable parameters
- try:
- expected = observable_parameters_count[row[OBSERVABLE_ID]]
- except KeyError as e:
- raise ValueError(
- f"Observable {row[OBSERVABLE_ID]} used in measurement table "
- f"is not defined."
- ) from e
-
- actual = len(
- split_parameter_replacement_list(
- row.get(OBSERVABLE_PARAMETERS, None)
- )
- )
- # No overrides are also allowed
- if actual != expected:
- formula = observable_df.loc[row[OBSERVABLE_ID], OBSERVABLE_FORMULA]
- raise AssertionError(
- f"Mismatch of observable parameter overrides for "
- f"{row[OBSERVABLE_ID]} ({formula})"
- f"in:\n{row}\n"
- f"Expected {expected} but got {actual}"
- )
-
- # check noise parameters
- replacements = split_parameter_replacement_list(
- row.get(NOISE_PARAMETERS, None)
- )
- try:
- expected = noise_parameters_count[row[OBSERVABLE_ID]]
-
- # No overrides are also allowed
- if len(replacements) != expected:
- raise AssertionError(
- f"Mismatch of noise parameter overrides in:\n{row}\n"
- f"Expected {expected} but got {len(replacements)}"
- )
- except KeyError as err:
- # no overrides defined, but a numerical sigma can be provided
- # anyways
- if len(replacements) != 1 or not isinstance(
- replacements[0], numbers.Number
- ):
- raise AssertionError(
- f"No placeholders have been specified in the noise model "
- f"for observable {row[OBSERVABLE_ID]}, but parameter ID "
- "or multiple overrides were specified in the "
- "noiseParameters column."
- ) from err
-
-
-def measurement_is_at_steady_state(time: float) -> bool:
- """Check whether a measurement is at steady state.
-
- Arguments:
- time:
- The time.
-
- Returns:
- Whether the measurement is at steady state.
- """
- return math.isinf(time)
+_deprecated_import_v1(__name__)
diff --git a/petab/models/__init__.py b/petab/models/__init__.py
index a5628aaa..4b8c87d3 100644
--- a/petab/models/__init__.py
+++ b/petab/models/__init__.py
@@ -1,9 +1,7 @@
-MODEL_TYPE_SBML = "sbml"
-MODEL_TYPE_PYSB = "pysb"
+"""Deprecated module for PEtab models.
-known_model_types = {
- MODEL_TYPE_SBML,
- MODEL_TYPE_PYSB,
-}
+Use petab.v1.models instead"""
+from petab import _deprecated_import_v1
+from petab.v1.models import * # noqa: F403, F401, E402
-from .model import Model # noqa F401
+_deprecated_import_v1(__name__)
diff --git a/petab/models/model.py b/petab/models/model.py
index 5d2f63ad..72387313 100644
--- a/petab/models/model.py
+++ b/petab/models/model.py
@@ -1,154 +1,7 @@
-"""PEtab model abstraction"""
-from __future__ import annotations
+"""Deprecated module for PEtab models.
-import abc
-from pathlib import Path
-from typing import Any, Iterable
+Use petab.v1.models instead."""
+from petab import _deprecated_import_v1
+from petab.v1.models.model import * # noqa: F403, F401, E402
-
-class Model(abc.ABC):
- """Base class for wrappers for any PEtab-supported model type"""
-
- @abc.abstractmethod
- def __init__(self):
- ...
-
- @staticmethod
- @abc.abstractmethod
- def from_file(filepath_or_buffer: Any, model_id: str) -> Model:
- """Load the model from the given path/URL
-
- :param filepath_or_buffer: URL or path of the model
- :param model_id: Model ID
- :returns: A ``Model`` instance holding the given model
- """
- ...
-
- @abc.abstractmethod
- def to_file(self, filename: [str, Path]):
- """Save the model to the given file
-
- :param filename: Destination filename
- """
- ...
-
- @classmethod
- @property
- @abc.abstractmethod
- def type_id(cls):
- ...
-
- @property
- @abc.abstractmethod
- def model_id(self):
- ...
-
- @abc.abstractmethod
- def get_parameter_value(self, id_: str) -> float:
- """Get a parameter value
-
- :param id_: ID of the parameter whose value is to be returned
- :raises ValueError: If no parameter with the given ID exists
- :returns: The value of the given parameter as specified in the model
- """
- ...
-
- @abc.abstractmethod
- def get_free_parameter_ids_with_values(
- self,
- ) -> Iterable[tuple[str, float]]:
- """Get free model parameters along with their values
-
- Returns:
- Iterator over tuples of (parameter_id, parameter_value)
- """
- ...
-
- @abc.abstractmethod
- def get_parameter_ids(self) -> Iterable[str]:
- """Get all parameter IDs from this model
-
- :returns: Iterator over model parameter IDs
- """
- ...
-
- @abc.abstractmethod
- def has_entity_with_id(self, entity_id) -> bool:
- """Check if there is a model entity with the given ID
-
- :param entity_id: ID to check for
- :returns: ``True``, if there is an entity with the given ID,
- ``False`` otherwise
- """
- ...
-
- @abc.abstractmethod
- def get_valid_parameters_for_parameter_table(self) -> Iterable[str]:
- """Get IDs of all parameters that are allowed to occur in the PEtab
- parameters table
-
- :returns: Iterator over parameter IDs
- """
- ...
-
- @abc.abstractmethod
- def get_valid_ids_for_condition_table(self) -> Iterable[str]:
- """Get IDs of all model entities that are allowed to occur as columns
- in the PEtab conditions table.
-
- :returns: Iterator over model entity IDs
- """
- ...
-
- @abc.abstractmethod
- def symbol_allowed_in_observable_formula(self, id_: str) -> bool:
- """Check if the given ID is allowed to be used in observable and noise
- formulas
-
- :returns: ``True``, if allowed, ``False`` otherwise
- """
- ...
-
- @abc.abstractmethod
- def is_valid(self) -> bool:
- """Validate this model
-
- :returns: `True` if the model is valid, `False` if there are errors in
- this model
- """
- ...
-
- @abc.abstractmethod
- def is_state_variable(self, id_: str) -> bool:
- """Check whether the given ID corresponds to a model state variable"""
- ...
-
-
-def model_factory(
- filepath_or_buffer: Any, model_language: str, model_id: str = None
-) -> Model:
- """Create a PEtab model instance from the given model
-
- :param filepath_or_buffer: Path/URL of the model
- :param model_language: PEtab model language ID for the given model
- :param model_id: PEtab model ID for the given model
- :returns: A :py:class:`Model` instance representing the given model
- """
- from . import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, known_model_types
-
- if model_language == MODEL_TYPE_SBML:
- from .sbml_model import SbmlModel
-
- return SbmlModel.from_file(filepath_or_buffer, model_id=model_id)
-
- if model_language == MODEL_TYPE_PYSB:
- from .pysb_model import PySBModel
-
- return PySBModel.from_file(filepath_or_buffer, model_id=model_id)
-
- if model_language in known_model_types:
- raise NotImplementedError(
- f"Unsupported model format: {model_language}"
- )
-
- raise ValueError(f"Unknown model format: {model_language}")
+_deprecated_import_v1(__name__)
diff --git a/petab/models/pysb_model.py b/petab/models/pysb_model.py
index 9dfd9512..f60945f4 100644
--- a/petab/models/pysb_model.py
+++ b/petab/models/pysb_model.py
@@ -1,227 +1,7 @@
-"""Functions for handling PySB models"""
+"""Deprecated module for PySB models.
-import itertools
-import re
-import sys
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+Use petab.v1.models.pysb_model instead."""
+from petab import _deprecated_import_v1
+from petab.v1.models.pysb_model import * # noqa: F403, F401, E402
-import pysb
-
-from . import MODEL_TYPE_PYSB
-from .model import Model
-
-
-def _pysb_model_from_path(pysb_model_file: Union[str, Path]) -> pysb.Model:
- """Load a pysb model module and return the :class:`pysb.Model` instance
-
- :param pysb_model_file: Full or relative path to the PySB model module
- :return: The pysb Model instance
- """
- pysb_model_file = Path(pysb_model_file)
- pysb_model_module_name = pysb_model_file.with_suffix("").name
-
- import importlib.util
-
- spec = importlib.util.spec_from_file_location(
- pysb_model_module_name, pysb_model_file
- )
- module = importlib.util.module_from_spec(spec)
- sys.modules[pysb_model_module_name] = module
- spec.loader.exec_module(module)
-
- # find a pysb.Model instance in the module
- # 1) check if module.model exists and is a pysb.Model
- model = getattr(module, "model", None)
- if model:
- return model
-
- # 2) check if there is any other pysb.Model instance
- for x in dir(module):
- attr = getattr(module, x)
- if isinstance(attr, pysb.Model):
- return attr
-
- raise ValueError(f"Could not find any pysb.Model in {pysb_model_file}.")
-
-
-class PySBModel(Model):
- """PEtab wrapper for PySB models"""
-
- type_id = MODEL_TYPE_PYSB
-
- def __init__(self, model: pysb.Model, model_id: str):
- super().__init__()
-
- self.model = model
- self._model_id = model_id
-
- @staticmethod
- def from_file(filepath_or_buffer, model_id: str):
- return PySBModel(
- model=_pysb_model_from_path(filepath_or_buffer), model_id=model_id
- )
-
- def to_file(self, filename: [str, Path]):
- from pysb.export import export
-
- model_source = export(self.model, "pysb_flat")
- with open(filename, "w") as f:
- f.write(model_source)
-
- @property
- def model_id(self):
- return self._model_id
-
- @model_id.setter
- def model_id(self, model_id):
- self._model_id = model_id
-
- def get_parameter_ids(self) -> Iterable[str]:
- return (p.name for p in self.model.parameters)
-
- def get_parameter_value(self, id_: str) -> float:
- try:
- return self.model.parameters[id_].value
- except KeyError as e:
- raise ValueError(f"Parameter {id_} does not exist.") from e
-
- def get_free_parameter_ids_with_values(
- self,
- ) -> Iterable[Tuple[str, float]]:
- return ((p.name, p.value) for p in self.model.parameters)
-
- def has_entity_with_id(self, entity_id) -> bool:
- try:
- _ = self.model.components[entity_id]
- return True
- except KeyError:
- return False
-
- def get_valid_parameters_for_parameter_table(self) -> Iterable[str]:
- # all parameters are allowed in the parameter table
- return self.get_parameter_ids()
-
- def get_valid_ids_for_condition_table(self) -> Iterable[str]:
- return itertools.chain(
- self.get_parameter_ids(), self.get_compartment_ids()
- )
-
- def symbol_allowed_in_observable_formula(self, id_: str) -> bool:
- return id_ in (
- x.name
- for x in itertools.chain(
- self.model.parameters,
- self.model.observables,
- self.model.expressions,
- )
- )
-
- def is_valid(self) -> bool:
- # PySB models are always valid
- return True
-
- def is_state_variable(self, id_: str) -> bool:
- # If there is a component with that name, it's not a state variable
- # (there are no dynamically-sized compartments)
- if self.model.components.get(id_, None):
- return False
-
- # Try parsing the ID
- try:
- result = parse_species_name(id_)
- except ValueError:
- return False
- else:
- # check if the ID is plausible
- for monomer, compartment, site_config in result:
- pysb_monomer: pysb.Monomer = self.model.monomers.get(monomer)
- if pysb_monomer is None:
- return False
- if compartment:
- pysb_compartment = self.model.compartments.get(compartment)
- if pysb_compartment is None:
- return False
- for site, state in site_config.items():
- if site not in pysb_monomer.sites:
- return False
- if state not in pysb_monomer.site_states[site]:
- return False
- if set(pysb_monomer.sites) - set(site_config.keys()):
- # There are undefined sites
- return False
- return True
-
- def get_compartment_ids(self) -> Iterable[str]:
- return (compartment.name for compartment in self.model.compartments)
-
-
-def parse_species_name(
- name: str,
-) -> List[Tuple[str, Optional[str], Dict[str, Any]]]:
- """Parse a PySB species name
-
- :param name: Species name to parse
- :returns: List of species, representing complex constituents, each as
- a tuple of the monomer name, the compartment name, and a dict of sites
- mapping to site states.
- :raises ValueError: In case this is not a valid ID
- """
- if "=MultiState(" in name:
- raise NotImplementedError("MultiState is not yet supported.")
-
- complex_constituent_pattern = re.compile(
- r"^(?P\w+)\((?P.*)\)"
- r"( \*\* (?P.*))?$"
- )
- result = []
- complex_constituents = name.split(" % ")
-
- for complex_constituent in complex_constituents:
- match = complex_constituent_pattern.match(complex_constituent)
- if not match:
- raise ValueError(
- f"Invalid species name: '{name}' " f"('{complex_constituent}')"
- )
- monomer = match.groupdict()["monomer"]
- site_config_str = match.groupdict()["site_config"]
- compartment = match.groupdict()["compartment"]
-
- site_config = {}
- for site_str in site_config_str.split(", "):
- if not site_str:
- continue
- site, config = site_str.split("=")
- if config == "None":
- config = None
- elif config.startswith("'"):
- if not config.endswith("'"):
- raise ValueError(
- f"Invalid species name: '{name}' " f"('{config}')"
- )
- # strip quotes
- config = config[1:-1]
- else:
- config = int(config)
- site_config[site] = config
- result.append(
- (monomer, compartment, site_config),
- )
-
- return result
-
-
-def pattern_from_string(string: str, model: pysb.Model) -> pysb.ComplexPattern:
- """Convert a pattern string to a Pattern instance"""
- parts = parse_species_name(string)
- patterns = []
- for part in parts:
- patterns.append(
- pysb.MonomerPattern(
- monomer=model.monomers.get(part[0]),
- compartment=model.compartments.get(part[1], None),
- site_conditions=part[2],
- )
- )
-
- return pysb.ComplexPattern(patterns, compartment=None)
+_deprecated_import_v1(__name__)
diff --git a/petab/models/sbml_model.py b/petab/models/sbml_model.py
index d68884fd..e754e903 100644
--- a/petab/models/sbml_model.py
+++ b/petab/models/sbml_model.py
@@ -1,222 +1,5 @@
-"""Functions for handling SBML models"""
+"""Deprecated module. Use petab.v1.models.sbml_model instead."""
+from petab import _deprecated_import_v1
+from petab.v1.models.sbml_model import * # noqa: F403, F401, E402
-import itertools
-from pathlib import Path
-from typing import Iterable, Optional, Tuple
-
-import libsbml
-import sympy as sp
-from sympy.abc import _clash
-
-from ..sbml import (
- get_sbml_model,
- is_sbml_consistent,
- load_sbml_from_string,
- write_sbml,
-)
-from . import MODEL_TYPE_SBML
-from .model import Model
-
-
-class SbmlModel(Model):
- """PEtab wrapper for SBML models"""
-
- type_id = MODEL_TYPE_SBML
-
- def __init__(
- self,
- sbml_model: libsbml.Model = None,
- sbml_reader: libsbml.SBMLReader = None,
- sbml_document: libsbml.SBMLDocument = None,
- model_id: str = None,
- ):
- super().__init__()
-
- self.sbml_reader: Optional[libsbml.SBMLReader] = sbml_reader
- self.sbml_document: Optional[libsbml.SBMLDocument] = sbml_document
- self.sbml_model: Optional[libsbml.Model] = sbml_model
-
- self._model_id = model_id or sbml_model.getIdAttribute()
-
- def __getstate__(self):
- """Return state for pickling"""
- state = self.__dict__.copy()
-
- # libsbml stuff cannot be serialized directly
- if self.sbml_model:
- sbml_document = self.sbml_model.getSBMLDocument()
- sbml_writer = libsbml.SBMLWriter()
- state["sbml_string"] = sbml_writer.writeSBMLToString(sbml_document)
-
- exclude = ["sbml_reader", "sbml_document", "sbml_model"]
- for key in exclude:
- state.pop(key)
-
- return state
-
- def __setstate__(self, state):
- """Set state after unpickling"""
- # load SBML model from pickled string
- sbml_string = state.pop("sbml_string", None)
- if sbml_string:
- (
- self.sbml_reader,
- self.sbml_document,
- self.sbml_model,
- ) = load_sbml_from_string(sbml_string)
-
- self.__dict__.update(state)
-
- @staticmethod
- def from_file(filepath_or_buffer, model_id: str = None):
- sbml_reader, sbml_document, sbml_model = get_sbml_model(
- filepath_or_buffer
- )
- return SbmlModel(
- sbml_model=sbml_model,
- sbml_reader=sbml_reader,
- sbml_document=sbml_document,
- model_id=model_id,
- )
-
- @property
- def model_id(self):
- return self._model_id
-
- @model_id.setter
- def model_id(self, model_id):
- self._model_id = model_id
-
- def to_file(self, filename: [str, Path]):
- write_sbml(
- self.sbml_document or self.sbml_model.getSBMLDocument(), filename
- )
-
- def get_parameter_value(self, id_: str) -> float:
- parameter = self.sbml_model.getParameter(id_)
- if not parameter:
- raise ValueError(f"Parameter {id_} does not exist.")
- return parameter.getValue()
-
- def get_free_parameter_ids_with_values(
- self,
- ) -> Iterable[Tuple[str, float]]:
- rule_targets = {
- ar.getVariable() for ar in self.sbml_model.getListOfRules()
- }
-
- def get_initial(p):
- # return the initial assignment value if there is one, and it is a
- # number; `None`, if there is a non-numeric initial assignment;
- # otherwise, the parameter value
- if ia := self.sbml_model.getInitialAssignmentBySymbol(p.getId()):
- sym_expr = sympify_sbml(ia.getMath())
- return (
- float(sym_expr.evalf())
- if sym_expr.evalf().is_Number
- else None
- )
- return p.getValue()
-
- return (
- (p.getId(), initial)
- for p in self.sbml_model.getListOfParameters()
- if p.getId() not in rule_targets
- and (initial := get_initial(p)) is not None
- )
-
- def get_parameter_ids(self) -> Iterable[str]:
- rule_targets = {
- ar.getVariable() for ar in self.sbml_model.getListOfRules()
- }
-
- return (
- p.getId()
- for p in self.sbml_model.getListOfParameters()
- if p.getId() not in rule_targets
- )
-
- def get_parameter_ids_with_values(self) -> Iterable[Tuple[str, float]]:
- rule_targets = {
- ar.getVariable() for ar in self.sbml_model.getListOfRules()
- }
-
- return (
- (p.getId(), p.getValue())
- for p in self.sbml_model.getListOfParameters()
- if p.getId() not in rule_targets
- )
-
- def has_entity_with_id(self, entity_id) -> bool:
- return self.sbml_model.getElementBySId(entity_id) is not None
-
- def get_valid_parameters_for_parameter_table(self) -> Iterable[str]:
- # All parameters except rule-targets
- disallowed_set = {
- ar.getVariable() for ar in self.sbml_model.getListOfRules()
- }
-
- return (
- p.getId()
- for p in self.sbml_model.getListOfParameters()
- if p.getId() not in disallowed_set
- )
-
- def get_valid_ids_for_condition_table(self) -> Iterable[str]:
- return (
- x.getId()
- for x in itertools.chain(
- self.sbml_model.getListOfParameters(),
- self.sbml_model.getListOfSpecies(),
- self.sbml_model.getListOfCompartments(),
- )
- )
-
- def symbol_allowed_in_observable_formula(self, id_: str) -> bool:
- return self.sbml_model.getElementBySId(id_) or id_ == "time"
-
- def is_valid(self) -> bool:
- return is_sbml_consistent(self.sbml_model.getSBMLDocument())
-
- def is_state_variable(self, id_: str) -> bool:
- return (
- self.sbml_model.getSpecies(id_) is not None
- or self.sbml_model.getCompartment(id_) is not None
- or self.sbml_model.getRuleByVariable(id_) is not None
- )
-
-
-def sympify_sbml(sbml_obj: libsbml.ASTNode | libsbml.SBase) -> sp.Expr:
- """Convert SBML math expression to sympy expression.
-
- Parameters
- ----------
- sbml_obj:
- SBML math element or an SBML object with a math element.
-
- Returns
- -------
- The sympy expression corresponding to ``sbml_obj``.
- """
- ast_node = (
- sbml_obj
- if isinstance(sbml_obj, libsbml.ASTNode)
- else sbml_obj.getMath()
- )
-
- parser_settings = libsbml.L3ParserSettings(
- ast_node.getParentSBMLObject().getModel(),
- libsbml.L3P_PARSE_LOG_AS_LOG10,
- libsbml.L3P_EXPAND_UNARY_MINUS,
- libsbml.L3P_NO_UNITS,
- libsbml.L3P_AVOGADRO_IS_CSYMBOL,
- libsbml.L3P_COMPARE_BUILTINS_CASE_INSENSITIVE,
- None,
- libsbml.L3P_MODULO_IS_PIECEWISE,
- )
-
- formula_str = libsbml.formulaToL3StringWithSettings(
- ast_node, parser_settings
- )
-
- return sp.sympify(formula_str, locals=_clash)
+_deprecated_import_v1(__name__)
diff --git a/petab/observables.py b/petab/observables.py
index 35cbd705..0d94736b 100644
--- a/petab/observables.py
+++ b/petab/observables.py
@@ -1,228 +1,8 @@
-"""Functions for working with the PEtab observables table"""
+"""Deprecated module for observable tables.
-import re
-from collections import OrderedDict
-from pathlib import Path
-from typing import List, Literal, Union
+Use petab.v1.observables instead.
+"""
+from petab import _deprecated_import_v1
+from petab.v1.observables import * # noqa: F403, F401, E402
-import pandas as pd
-
-from . import core, lint
-from .C import * # noqa: F403
-from .math import sympify_petab
-from .models import Model
-
-__all__ = [
- "create_observable_df",
- "get_formula_placeholders",
- "get_observable_df",
- "get_output_parameters",
- "get_placeholders",
- "write_observable_df",
-]
-
-
-def get_observable_df(
- observable_file: Union[str, pd.DataFrame, Path, None],
-) -> Union[pd.DataFrame, None]:
- """
- Read the provided observable file into a ``pandas.Dataframe``.
-
- Arguments:
- observable_file: Name of the file to read from or pandas.Dataframe.
-
- Returns:
- Observable DataFrame
- """
- if observable_file is None:
- return observable_file
-
- if isinstance(observable_file, (str, Path)):
- observable_file = pd.read_csv(
- observable_file, sep="\t", float_precision="round_trip"
- )
-
- lint.assert_no_leading_trailing_whitespace(
- observable_file.columns.values, "observable"
- )
-
- if not isinstance(observable_file.index, pd.RangeIndex):
- observable_file.reset_index(
- drop=observable_file.index.name != OBSERVABLE_ID,
- inplace=True,
- )
-
- try:
- observable_file.set_index([OBSERVABLE_ID], inplace=True)
- except KeyError:
- raise KeyError(
- f"Observable table missing mandatory field {OBSERVABLE_ID}."
- ) from None
-
- return observable_file
-
-
-def write_observable_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
- """Write PEtab observable table
-
- Arguments:
- df: PEtab observable table
- filename: Destination file name
- """
- df = get_observable_df(df)
- df.to_csv(filename, sep="\t", index=True)
-
-
-def get_output_parameters(
- observable_df: pd.DataFrame,
- model: Model,
- observables: bool = True,
- noise: bool = True,
- mapping_df: pd.DataFrame = None,
-) -> List[str]:
- """Get output parameters
-
- Returns IDs of parameters used in observable and noise formulas that are
- not defined in the model.
-
- Arguments:
- observable_df: PEtab observable table
- model: The underlying model
- observables: Include parameters from observableFormulas
- noise: Include parameters from noiseFormulas
- mapping_df: PEtab mapping table
-
- Returns:
- List of output parameter IDs
- """
- formulas = []
- if observables:
- formulas.extend(observable_df[OBSERVABLE_FORMULA])
- if noise and NOISE_FORMULA in observable_df:
- formulas.extend(observable_df[NOISE_FORMULA])
- output_parameters = OrderedDict()
-
- for formula in formulas:
- free_syms = sorted(
- sympify_petab(formula).free_symbols,
- key=lambda symbol: symbol.name,
- )
- for free_sym in free_syms:
- sym = str(free_sym)
- if model.symbol_allowed_in_observable_formula(sym):
- continue
-
- # does it map to a model entity?
- if (
- mapping_df is not None
- and sym in mapping_df.index
- and model.symbol_allowed_in_observable_formula(
- mapping_df.loc[sym, MODEL_ENTITY_ID]
- )
- ):
- continue
-
- output_parameters[sym] = None
-
- return list(output_parameters.keys())
-
-
-def get_formula_placeholders(
- formula_string: str,
- observable_id: str,
- override_type: Literal["observable", "noise"],
-) -> List[str]:
- """
- Get placeholder variables in noise or observable definition for the
- given observable ID.
-
- Arguments:
- formula_string: observable formula
- observable_id: ID of current observable
- override_type: ``'observable'`` or ``'noise'``, depending on whether
- ``formula`` is for observable or for noise model
-
- Returns:
- List of placeholder parameter IDs in the order expected in the
- observableParameter column of the measurement table.
- """
- if not formula_string:
- return []
-
- if not isinstance(formula_string, str):
- return []
-
- pattern = re.compile(
- r"(?:^|\W)("
- + re.escape(override_type)
- + r"Parameter\d+_"
- + re.escape(observable_id)
- + r")(?=\W|$)"
- )
- placeholder_set = set(pattern.findall(formula_string))
-
- # need to sort and check that there are no gaps in numbering
- placeholders = [
- f"{override_type}Parameter{i}_{observable_id}"
- for i in range(1, len(placeholder_set) + 1)
- ]
-
- if placeholder_set != set(placeholders):
- raise AssertionError(
- "Non-consecutive numbering of placeholder "
- f"parameter for {placeholder_set}"
- )
-
- return placeholders
-
-
-def get_placeholders(
- observable_df: pd.DataFrame,
- observables: bool = True,
- noise: bool = True,
-) -> List[str]:
- """Get all placeholder parameters from observable table observableFormulas
- and noiseFormulas
-
- Arguments:
- observable_df: PEtab observable table
- observables: Include parameters from observableFormulas
- noise: Include parameters from noiseFormulas
-
- Returns:
- List of placeholder parameters from observable table observableFormulas
- and noiseFormulas.
- """
- # collect placeholder parameters overwritten by
- # {observable,noise}Parameters
- placeholder_types = []
- formula_columns = []
- if observables:
- placeholder_types.append("observable")
- formula_columns.append(OBSERVABLE_FORMULA)
- if noise:
- placeholder_types.append("noise")
- formula_columns.append(NOISE_FORMULA)
-
- placeholders = []
- for _, row in observable_df.iterrows():
- for placeholder_type, formula_column in zip(
- placeholder_types, formula_columns
- ):
- if formula_column not in row:
- continue
-
- cur_placeholders = get_formula_placeholders(
- row[formula_column], row.name, placeholder_type
- )
- placeholders.extend(cur_placeholders)
- return core.unique_preserve_order(placeholders)
-
-
-def create_observable_df() -> pd.DataFrame:
- """Create empty observable dataframe
-
- Returns:
- Created DataFrame
- """
- return pd.DataFrame(data={col: [] for col in OBSERVABLE_DF_COLS})
+_deprecated_import_v1(__name__)
diff --git a/petab/parameter_mapping.py b/petab/parameter_mapping.py
index 4fa3115d..79598380 100644
--- a/petab/parameter_mapping.py
+++ b/petab/parameter_mapping.py
@@ -1,801 +1,7 @@
-"""Functions related to mapping parameter from model to parameter estimation
-problem
-"""
+"""Deprecated module for parameter mapping.
-import logging
-import numbers
-import os
-import re
-import warnings
-from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union
+Use petab.v1.parameter_mapping instead."""
+from petab import _deprecated_import_v1
+from petab.v1.parameter_mapping import * # noqa: F403, F401, E402
-import libsbml
-import numpy as np
-import pandas as pd
-
-from . import (
- ENV_NUM_THREADS,
- core,
- lint,
- measurements,
- observables,
- parameters,
-)
-from .C import * # noqa: F403
-from .mapping import resolve_mapping
-from .models import Model
-
-logger = logging.getLogger(__name__)
-__all__ = [
- "get_optimization_to_simulation_parameter_mapping",
- "get_parameter_mapping_for_condition",
- "handle_missing_overrides",
- "merge_preeq_and_sim_pars",
- "merge_preeq_and_sim_pars_condition",
- "ParMappingDict",
- "ParMappingDictTuple",
- "ScaleMappingDict",
- "ScaleMappingDictTuple",
- "ParMappingDictQuadruple",
-]
-
-
-# Parameter mapping for condition
-ParMappingDict = Dict[str, Union[str, numbers.Number]]
-# Parameter mapping for combination of preequilibration and simulation
-# condition
-ParMappingDictTuple = Tuple[ParMappingDict, ParMappingDict]
-# Same for scale mapping
-ScaleMappingDict = Dict[str, str]
-ScaleMappingDictTuple = Tuple[ScaleMappingDict, ScaleMappingDict]
-# Parameter mapping for combination of preequilibration and simulation
-# conditions, for parameter and scale mapping
-ParMappingDictQuadruple = Tuple[
- ParMappingDict, ParMappingDict, ScaleMappingDict, ScaleMappingDict
-]
-
-
-def get_optimization_to_simulation_parameter_mapping(
- condition_df: pd.DataFrame,
- measurement_df: pd.DataFrame,
- parameter_df: Optional[pd.DataFrame] = None,
- observable_df: Optional[pd.DataFrame] = None,
- mapping_df: Optional[pd.DataFrame] = None,
- sbml_model: libsbml.Model = None,
- simulation_conditions: Optional[pd.DataFrame] = None,
- warn_unmapped: Optional[bool] = True,
- scaled_parameters: bool = False,
- fill_fixed_parameters: bool = True,
- allow_timepoint_specific_numeric_noise_parameters: bool = False,
- model: Model = None,
-) -> List[ParMappingDictQuadruple]:
- """
- Create list of mapping dicts from PEtab-problem to model parameters.
-
- Mapping can be performed in parallel. The number of threads is controlled
- by the environment variable with the name of
- :py:data:`petab.ENV_NUM_THREADS`.
-
- Parameters:
- condition_df, measurement_df, parameter_df, observable_df:
- The dataframes in the PEtab format.
- sbml_model:
- The SBML model (deprecated)
- model:
- The model.
- simulation_conditions:
- Table of simulation conditions as created by
- ``petab.get_simulation_conditions``.
- warn_unmapped:
- If ``True``, log warning regarding unmapped parameters
- scaled_parameters:
- Whether parameter values should be scaled.
- fill_fixed_parameters:
- Whether to fill in nominal values for fixed parameters
- (estimate=0 in parameters table).
- allow_timepoint_specific_numeric_noise_parameters:
- Mapping of timepoint-specific parameters overrides is generally
- not supported. If this option is set to True, this function will
- not fail in case of timepoint-specific fixed noise parameters,
- if the noise formula consists only of one single parameter.
- It is expected that the respective mapping is performed elsewhere.
- The value mapped to the respective parameter here is undefined.
-
- Returns:
- Parameter value and parameter scale mapping for all conditions.
-
- The length of the returned array is the number of unique combinations
- of ``simulationConditionId`` s and ``preequilibrationConditionId`` s
- from the measurement table. Each entry is a tuple of four dicts of
- length equal to the number of model parameters.
- The first two dicts map simulation parameter IDs to optimization
- parameter IDs or values (where values are fixed) for preequilibration
- and simulation condition, respectively.
- The last two dicts map simulation parameter IDs to the parameter scale
- of the respective parameter, again for preequilibration and simulation
- condition.
- If no preequilibration condition is defined, the respective dicts will
- be empty. ``NaN`` is used where no mapping exists.
- """
- if sbml_model:
- warnings.warn(
- "Passing a model via the `sbml_model` argument is "
- "deprecated, use `model=petab.models.sbml_model."
- "SbmlModel(...)` instead.",
- DeprecationWarning,
- stacklevel=2,
- )
- from petab.models.sbml_model import SbmlModel
-
- if model:
- raise ValueError(
- "Arguments `model` and `sbml_model` are " "mutually exclusive."
- )
- model = SbmlModel(sbml_model=sbml_model)
-
- # Ensure inputs are okay
- _perform_mapping_checks(
- measurement_df,
- allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
- )
-
- if simulation_conditions is None:
- simulation_conditions = measurements.get_simulation_conditions(
- measurement_df
- )
-
- simulation_parameters = dict(model.get_free_parameter_ids_with_values())
- # Add output parameters that are not already defined in the model
- if observable_df is not None:
- output_parameters = observables.get_output_parameters(
- observable_df=observable_df, model=model, mapping_df=mapping_df
- )
- for par_id in output_parameters:
- simulation_parameters[par_id] = np.nan
-
- num_threads = int(os.environ.get(ENV_NUM_THREADS, 1))
-
- # If sequential execution is requested, let's not create any
- # thread-allocation overhead
- if num_threads == 1:
- mapping = map(
- _map_condition,
- _map_condition_arg_packer(
- simulation_conditions,
- measurement_df,
- condition_df,
- parameter_df,
- mapping_df,
- model,
- simulation_parameters,
- warn_unmapped,
- scaled_parameters,
- fill_fixed_parameters,
- allow_timepoint_specific_numeric_noise_parameters,
- ),
- )
- return list(mapping)
-
- # Run multi-threaded
- from concurrent.futures import ThreadPoolExecutor
-
- with ThreadPoolExecutor(max_workers=num_threads) as executor:
- mapping = executor.map(
- _map_condition,
- _map_condition_arg_packer(
- simulation_conditions,
- measurement_df,
- condition_df,
- parameter_df,
- mapping_df,
- model,
- simulation_parameters,
- warn_unmapped,
- scaled_parameters,
- fill_fixed_parameters,
- allow_timepoint_specific_numeric_noise_parameters,
- ),
- )
- return list(mapping)
-
-
-def _map_condition_arg_packer(
- simulation_conditions,
- measurement_df,
- condition_df,
- parameter_df,
- mapping_df,
- model,
- simulation_parameters,
- warn_unmapped,
- scaled_parameters,
- fill_fixed_parameters,
- allow_timepoint_specific_numeric_noise_parameters,
-):
- """Helper function to pack extra arguments for _map_condition"""
- for _, condition in simulation_conditions.iterrows():
- yield (
- condition,
- measurement_df,
- condition_df,
- parameter_df,
- mapping_df,
- model,
- simulation_parameters,
- warn_unmapped,
- scaled_parameters,
- fill_fixed_parameters,
- allow_timepoint_specific_numeric_noise_parameters,
- )
-
-
-def _map_condition(packed_args):
- """Helper function for parallel condition mapping.
-
- For arguments see
- :py:func:`get_optimization_to_simulation_parameter_mapping`.
- """
- (
- condition,
- measurement_df,
- condition_df,
- parameter_df,
- mapping_df,
- model,
- simulation_parameters,
- warn_unmapped,
- scaled_parameters,
- fill_fixed_parameters,
- allow_timepoint_specific_numeric_noise_parameters,
- ) = packed_args
-
- cur_measurement_df = None
- # Get the condition specific measurements for the current condition, but
- # only if relevant for parameter mapping
- if (
- OBSERVABLE_PARAMETERS in measurement_df
- and measurement_df[OBSERVABLE_PARAMETERS].notna().any()
- ) or (
- NOISE_PARAMETERS in measurement_df
- and measurement_df[NOISE_PARAMETERS].notna().any()
- ):
- cur_measurement_df = measurements.get_rows_for_condition(
- measurement_df, condition
- )
-
- if (
- PREEQUILIBRATION_CONDITION_ID not in condition
- or not isinstance(condition[PREEQUILIBRATION_CONDITION_ID], str)
- or not condition[PREEQUILIBRATION_CONDITION_ID]
- ):
- par_map_preeq = {}
- scale_map_preeq = {}
- else:
- par_map_preeq, scale_map_preeq = get_parameter_mapping_for_condition(
- condition_id=condition[PREEQUILIBRATION_CONDITION_ID],
- is_preeq=True,
- cur_measurement_df=cur_measurement_df,
- model=model,
- condition_df=condition_df,
- parameter_df=parameter_df,
- mapping_df=mapping_df,
- simulation_parameters=simulation_parameters,
- warn_unmapped=warn_unmapped,
- scaled_parameters=scaled_parameters,
- fill_fixed_parameters=fill_fixed_parameters,
- allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
- )
-
- par_map_sim, scale_map_sim = get_parameter_mapping_for_condition(
- condition_id=condition[SIMULATION_CONDITION_ID],
- is_preeq=False,
- cur_measurement_df=cur_measurement_df,
- model=model,
- condition_df=condition_df,
- parameter_df=parameter_df,
- mapping_df=mapping_df,
- simulation_parameters=simulation_parameters,
- warn_unmapped=warn_unmapped,
- scaled_parameters=scaled_parameters,
- fill_fixed_parameters=fill_fixed_parameters,
- allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
- )
-
- return par_map_preeq, par_map_sim, scale_map_preeq, scale_map_sim
-
-
-def get_parameter_mapping_for_condition(
- condition_id: str,
- is_preeq: bool,
- cur_measurement_df: Optional[pd.DataFrame] = None,
- sbml_model: libsbml.Model = None,
- condition_df: pd.DataFrame = None,
- parameter_df: pd.DataFrame = None,
- mapping_df: Optional[pd.DataFrame] = None,
- simulation_parameters: Optional[Dict[str, str]] = None,
- warn_unmapped: bool = True,
- scaled_parameters: bool = False,
- fill_fixed_parameters: bool = True,
- allow_timepoint_specific_numeric_noise_parameters: bool = False,
- model: Model = None,
-) -> Tuple[ParMappingDict, ScaleMappingDict]:
- """
- Create dictionary of parameter value and parameter scale mappings from
- PEtab-problem to SBML parameters for the given condition.
-
- Parameters:
- condition_id:
- Condition ID for which to perform mapping
- is_preeq:
- If ``True``, output parameters will not be mapped
- cur_measurement_df:
- Measurement sub-table for current condition, can be ``None`` if
- not relevant for parameter mapping
- condition_df:
- PEtab condition DataFrame
- parameter_df:
- PEtab parameter DataFrame
- mapping_df:
- PEtab mapping DataFrame
- sbml_model:
- The SBML model (deprecated)
- model:
- The model.
- simulation_parameters:
- Model simulation parameter IDs mapped to parameter values (output
- of ``petab.sbml.get_model_parameters(.., with_values=True)``).
- Optional, saves time if precomputed.
- warn_unmapped:
- If ``True``, log warning regarding unmapped parameters
- scaled_parameters:
- Whether parameter values should be scaled.
- fill_fixed_parameters:
- Whether to fill in nominal values for fixed parameters
- (estimate=0 in parameters table).
- allow_timepoint_specific_numeric_noise_parameters:
- Mapping of timepoint-specific parameters overrides is generally
- not supported. If this option is set to True, this function will
- not fail in case of timepoint-specific fixed noise parameters,
- if the noise formula consists only of one single parameter.
- It is expected that the respective mapping is performed elsewhere.
- The value mapped to the respective parameter here is undefined.
-
- Returns:
- Tuple of two dictionaries. First dictionary mapping model parameter IDs
- to mapped parameters IDs to be estimated or to filled-in values in case
- of non-estimated parameters.
- Second dictionary mapping model parameter IDs to their scale.
- ``NaN`` is used where no mapping exists.
- """
- if sbml_model:
- warnings.warn(
- "Passing a model via the `sbml_model` argument is "
- "deprecated, use `model=petab.models.sbml_model."
- "SbmlModel(...)` instead.",
- DeprecationWarning,
- stacklevel=2,
- )
- from petab.models.sbml_model import SbmlModel
-
- if model:
- raise ValueError(
- "Arguments `model` and `sbml_model` are " "mutually exclusive."
- )
- model = SbmlModel(sbml_model=sbml_model)
-
- if cur_measurement_df is not None:
- _perform_mapping_checks(
- cur_measurement_df,
- allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
- )
-
- if simulation_parameters is None:
- simulation_parameters = dict(
- model.get_free_parameter_ids_with_values()
- )
-
- # NOTE: order matters here - the former is overwritten by the latter:
- # model < condition table < measurement < table parameter table
-
- # initialize mapping dicts
- # for the case of matching simulation and optimization parameter vector
- par_mapping = simulation_parameters.copy()
- scale_mapping = {par_id: LIN for par_id in par_mapping.keys()}
- _output_parameters_to_nan(par_mapping)
-
- # not strictly necessary for preequilibration, be we do it to have
- # same length of parameter vectors
- if cur_measurement_df is not None:
- _apply_output_parameter_overrides(par_mapping, cur_measurement_df)
-
- if not is_preeq:
- handle_missing_overrides(par_mapping, warn=warn_unmapped)
-
- _apply_condition_parameters(
- par_mapping,
- scale_mapping,
- condition_id,
- condition_df,
- model,
- mapping_df,
- )
- _apply_parameter_table(
- par_mapping,
- scale_mapping,
- parameter_df,
- scaled_parameters,
- fill_fixed_parameters,
- )
-
- return par_mapping, scale_mapping
-
-
-def _output_parameters_to_nan(mapping: ParMappingDict) -> None:
- """Set output parameters in mapping dictionary to nan"""
- rex = re.compile("^(noise|observable)Parameter[0-9]+_")
- for key in mapping.keys():
- try:
- matches = rex.match(key)
- except TypeError:
- continue
-
- if matches:
- mapping[key] = np.nan
-
-
-def _apply_output_parameter_overrides(
- mapping: ParMappingDict, cur_measurement_df: pd.DataFrame
-) -> None:
- """
- Apply output parameter overrides to the parameter mapping dict for a given
- condition as defined in the measurement table (``observableParameter``,
- ``noiseParameters``).
-
- Arguments:
- mapping: parameter mapping dict as obtained from
- :py:func:`get_parameter_mapping_for_condition`.
- cur_measurement_df:
- Subset of the measurement table for the current condition
- """
- for _, row in cur_measurement_df.iterrows():
- # we trust that the number of overrides matches (see above)
- overrides = measurements.split_parameter_replacement_list(
- row.get(OBSERVABLE_PARAMETERS, None)
- )
- _apply_overrides_for_observable(
- mapping, row[OBSERVABLE_ID], "observable", overrides
- )
-
- overrides = measurements.split_parameter_replacement_list(
- row.get(NOISE_PARAMETERS, None)
- )
- _apply_overrides_for_observable(
- mapping, row[OBSERVABLE_ID], "noise", overrides
- )
-
-
-def _apply_overrides_for_observable(
- mapping: ParMappingDict,
- observable_id: str,
- override_type: Literal["observable", "noise"],
- overrides: List[str],
-) -> None:
- """
- Apply parameter-overrides for observables and noises to mapping
- matrix.
-
- Arguments:
- mapping: mapping dict to which to apply overrides
- observable_id: observable ID
- override_type: ``'observable'`` or ``'noise'``
- overrides: list of overrides for noise or observable parameters
- """
- for i, override in enumerate(overrides):
- overridee_id = f"{override_type}Parameter{i+1}_{observable_id}"
- mapping[overridee_id] = override
-
-
-def _apply_condition_parameters(
- par_mapping: ParMappingDict,
- scale_mapping: ScaleMappingDict,
- condition_id: str,
- condition_df: pd.DataFrame,
- model: Model,
- mapping_df: Optional[pd.DataFrame] = None,
-) -> None:
- """Replace parameter IDs in parameter mapping dictionary by condition
- table parameter values (in-place).
-
- Arguments:
- par_mapping: see :py:func:`get_parameter_mapping_for_condition`
- condition_id: ID of condition to work on
- condition_df: PEtab condition table
- """
- for overridee_id in condition_df.columns:
- if overridee_id == CONDITION_NAME:
- continue
-
- overridee_id = resolve_mapping(mapping_df, overridee_id)
-
- # Species, compartments, and rule targets are handled elsewhere
- if model.is_state_variable(overridee_id):
- continue
-
- par_mapping[overridee_id] = core.to_float_if_float(
- condition_df.loc[condition_id, overridee_id]
- )
-
- if isinstance(par_mapping[overridee_id], numbers.Number) and np.isnan(
- par_mapping[overridee_id]
- ):
- # NaN in the condition table for an entity without time derivative
- # indicates that the model value should be used
- try:
- par_mapping[overridee_id] = model.get_parameter_value(
- overridee_id
- )
- except ValueError as e:
- raise NotImplementedError(
- "Not sure how to handle NaN in condition table for "
- f"{overridee_id}."
- ) from e
-
- scale_mapping[overridee_id] = LIN
-
-
-def _apply_parameter_table(
- par_mapping: ParMappingDict,
- scale_mapping: ScaleMappingDict,
- parameter_df: Optional[pd.DataFrame] = None,
- scaled_parameters: bool = False,
- fill_fixed_parameters: bool = True,
-) -> None:
- """Replace parameters from parameter table in mapping list for a given
- condition and set the corresponding scale.
-
- Replace non-estimated parameters by ``nominalValues``
- (un-scaled / lin-scaled), replace estimated parameters by the respective
- ID.
-
- Arguments:
- par_mapping:
- mapping dict obtained from
- :py:func:`get_parameter_mapping_for_condition`
- parameter_df:
- PEtab parameter table
- """
- if parameter_df is None:
- return
-
- for row in parameter_df.itertuples():
- if row.Index not in par_mapping:
- # The current parameter is not required for this condition
- continue
-
- scale = getattr(row, PARAMETER_SCALE, LIN)
- scale_mapping[row.Index] = scale
- if fill_fixed_parameters and getattr(row, ESTIMATE) == 0:
- val = getattr(row, NOMINAL_VALUE)
- if scaled_parameters:
- val = parameters.scale(val, scale)
- else:
- scale_mapping[row.Index] = LIN
- par_mapping[row.Index] = val
- else:
- par_mapping[row.Index] = row.Index
-
- # Replace any leftover mapped parameter coming from condition table
- for problem_par, sim_par in par_mapping.items():
- # string indicates unmapped
- if not isinstance(sim_par, str):
- continue
-
- try:
- # the overridee is a model parameter
- par_mapping[problem_par] = par_mapping[sim_par]
- scale_mapping[problem_par] = scale_mapping[sim_par]
- except KeyError:
- if parameter_df is None:
- raise
-
- # or the overridee is only defined in the parameter table
- scale = (
- parameter_df.loc[sim_par, PARAMETER_SCALE]
- if PARAMETER_SCALE in parameter_df
- else LIN
- )
-
- if (
- fill_fixed_parameters
- and ESTIMATE in parameter_df
- and parameter_df.loc[sim_par, ESTIMATE] == 0
- ):
- val = parameter_df.loc[sim_par, NOMINAL_VALUE]
- if scaled_parameters:
- val = parameters.scale(val, scale)
- else:
- scale = LIN
- par_mapping[problem_par] = val
-
- scale_mapping[problem_par] = scale
-
-
-def _perform_mapping_checks(
- measurement_df: pd.DataFrame,
- allow_timepoint_specific_numeric_noise_parameters: bool = False,
-) -> None:
- """Check for PEtab features which we can't account for during parameter
- mapping.
- """
- if lint.measurement_table_has_timepoint_specific_mappings(
- measurement_df,
- allow_scalar_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
- ):
- # we could allow that for floats, since they don't matter in this
- # function and would be simply ignored
- raise ValueError(
- "Timepoint-specific parameter overrides currently unsupported."
- )
-
-
-def handle_missing_overrides(
- mapping_par_opt_to_par_sim: ParMappingDict,
- warn: bool = True,
- condition_id: str = None,
-) -> None:
- """
- Find all observable parameters and noise parameters that were not mapped
- and set their mapping to np.nan.
-
- Assumes that parameters matching the regular expression
- ``(noise|observable)Parameter[0-9]+_`` were all supposed to be overwritten.
-
- Parameters:
- mapping_par_opt_to_par_sim:
- Output of :py:func:`get_parameter_mapping_for_condition`
- warn:
- If True, log warning regarding unmapped parameters
- condition_id:
- Optional condition ID for more informative output
- """
- _missed_vals = []
- rex = re.compile("^(noise|observable)Parameter[0-9]+_")
- for key, val in mapping_par_opt_to_par_sim.items():
- try:
- matches = rex.match(val)
- except TypeError:
- continue
-
- if matches:
- mapping_par_opt_to_par_sim[key] = np.nan
- _missed_vals.append(key)
-
- if _missed_vals and warn:
- logger.warning(
- f"Could not map the following overrides for condition "
- f"{condition_id}: "
- f"{_missed_vals}. Usually, this is just due to missing "
- f"data points."
- )
-
-
-def merge_preeq_and_sim_pars_condition(
- condition_map_preeq: ParMappingDict,
- condition_map_sim: ParMappingDict,
- condition_scale_map_preeq: ScaleMappingDict,
- condition_scale_map_sim: ScaleMappingDict,
- condition: Any,
-) -> None:
- """Merge preequilibration and simulation parameters and scales for a single
- condition while checking for compatibility.
-
- This function is meant for the case where we cannot have different
- parameters (and scales) for preequilibration and simulation. Therefore,
- merge both and ensure matching scales and parameters.
- ``condition_map_sim`` and ``condition_scale_map_sim`` will be modified in
- place.
-
- Arguments:
- condition_map_preeq, condition_map_sim:
- Parameter mapping as obtained from
- :py:func:`get_parameter_mapping_for_condition`
- condition_scale_map_preeq, condition_scale_map_sim:
- Parameter scale mapping as obtained from
- :py:func:`get_parameter_mapping_for_condition`
- condition: Condition identifier for more informative error messages
- """
- if not condition_map_preeq:
- # nothing to do
- return
-
- all_par_ids = set(condition_map_sim.keys()) | set(
- condition_map_preeq.keys()
- )
-
- for par_id in all_par_ids:
- if par_id not in condition_map_preeq:
- # nothing to do
- continue
-
- if par_id not in condition_map_sim:
- # unmapped for simulation -> just use preeq values
- condition_map_sim[par_id] = condition_map_preeq[par_id]
- condition_scale_map_sim[par_id] = condition_scale_map_preeq[par_id]
- continue
-
- # present in both
- par_preeq = condition_map_preeq[par_id]
- par_sim = condition_map_sim[par_id]
- if par_preeq != par_sim and not (
- core.is_empty(par_sim) and core.is_empty(par_preeq)
- ):
- # both identical or both nan is okay
- if core.is_empty(par_sim):
- # unmapped for simulation
- condition_map_sim[par_id] = par_preeq
- elif core.is_empty(par_preeq):
- # unmapped for preeq is okay
- pass
- else:
- raise ValueError(
- "Cannot handle different values for dynamic "
- f"parameters: for condition {condition} "
- f"parameter {par_id} is {par_preeq} for preeq "
- f"and {par_sim} for simulation."
- )
-
- scale_preeq = condition_scale_map_preeq[par_id]
- scale_sim = condition_scale_map_sim[par_id]
-
- if scale_preeq != scale_sim:
- # both identical is okay
- if core.is_empty(par_sim):
- # unmapped for simulation
- condition_scale_map_sim[par_id] = scale_preeq
- elif core.is_empty(par_preeq):
- # unmapped for preeq is okay
- pass
- else:
- raise ValueError(
- "Cannot handle different parameter scales "
- f"parameters: for condition {condition} "
- f"scale for parameter {par_id} is {scale_preeq} for preeq "
- f"and {scale_sim} for simulation."
- )
-
-
-def merge_preeq_and_sim_pars(
- parameter_mappings: Iterable[ParMappingDictTuple],
- scale_mappings: Iterable[ScaleMappingDictTuple],
-) -> Tuple[List[ParMappingDictTuple], List[ScaleMappingDictTuple]]:
- """Merge preequilibration and simulation parameters and scales for a list
- of conditions while checking for compatibility.
-
- Parameters:
- parameter_mappings:
- As returned by
- :py:func:`petab.get_optimization_to_simulation_parameter_mapping`.
- scale_mappings:
- As returned by
- :py:func:`petab.get_optimization_to_simulation_parameter_mapping`.
-
- Returns:
- The parameter and scale simulation mappings, modified and checked.
- """
- parameter_mapping = []
- scale_mapping = []
- for ic, (
- (map_preeq, map_sim),
- (scale_map_preeq, scale_map_sim),
- ) in enumerate(zip(parameter_mappings, scale_mappings)):
- merge_preeq_and_sim_pars_condition(
- condition_map_preeq=map_preeq,
- condition_map_sim=map_sim,
- condition_scale_map_preeq=scale_map_preeq,
- condition_scale_map_sim=scale_map_sim,
- condition=ic,
- )
- parameter_mapping.append(map_sim)
- scale_mapping.append(scale_map_sim)
-
- return parameter_mapping, scale_mapping
+_deprecated_import_v1(__name__)
diff --git a/petab/parameters.py b/petab/parameters.py
index 3339ef63..39e66fe3 100644
--- a/petab/parameters.py
+++ b/petab/parameters.py
@@ -1,639 +1,7 @@
-"""Functions operating on the PEtab parameter table"""
+"""Deprecated module for parameter table handling.
-import numbers
-import warnings
-from collections import OrderedDict
-from pathlib import Path
-from typing import (
- Dict,
- Iterable,
- List,
- Literal,
- Optional,
- Sequence,
- Set,
- Tuple,
- Union,
-)
+Use petab.v1.parameters instead."""
+from petab import _deprecated_import_v1
+from petab.v1.parameters import * # noqa: F403, F401, E402
-import libsbml
-import numpy as np
-import pandas as pd
-
-from . import conditions, core, lint, measurements, observables
-from .C import * # noqa: F403
-from .models import Model
-
-__all__ = [
- "create_parameter_df",
- "get_optimization_parameter_scaling",
- "get_optimization_parameters",
- "get_parameter_df",
- "get_priors_from_df",
- "get_valid_parameters_for_parameter_table",
- "map_scale",
- "map_unscale",
- "normalize_parameter_df",
- "scale",
- "unscale",
- "write_parameter_df",
-]
-
-PARAMETER_SCALE_ARGS = Literal["", "lin", "log", "log10"]
-
-
-def get_parameter_df(
- parameter_file: Union[
- str, Path, pd.DataFrame, Iterable[Union[str, Path, pd.DataFrame]], None
- ],
-) -> Union[pd.DataFrame, None]:
- """
- Read the provided parameter file into a ``pandas.Dataframe``.
-
- Arguments:
- parameter_file: Name of the file to read from or pandas.Dataframe,
- or an Iterable.
-
- Returns:
- Parameter ``DataFrame``, or ``None`` if ``None`` was passed.
- """
- if parameter_file is None:
- return None
- if isinstance(parameter_file, pd.DataFrame):
- parameter_df = parameter_file
- elif isinstance(parameter_file, (str, Path)):
- parameter_df = pd.read_csv(
- parameter_file, sep="\t", float_precision="round_trip"
- )
- elif isinstance(parameter_file, Iterable):
- dfs = [get_parameter_df(x) for x in parameter_file if x]
-
- if not dfs:
- return None
-
- parameter_df = pd.concat(dfs)
- # Check for contradicting parameter definitions
- _check_for_contradicting_parameter_definitions(parameter_df)
-
- return parameter_df
-
- lint.assert_no_leading_trailing_whitespace(
- parameter_df.columns.values, "parameter"
- )
-
- if not isinstance(parameter_df.index, pd.RangeIndex):
- parameter_df.reset_index(
- drop=parameter_file.index.name != PARAMETER_ID,
- inplace=True,
- )
-
- try:
- parameter_df.set_index([PARAMETER_ID], inplace=True)
- except KeyError as e:
- raise KeyError(
- f"Parameter table missing mandatory field {PARAMETER_ID}."
- ) from e
- _check_for_contradicting_parameter_definitions(parameter_df)
-
- return parameter_df
-
-
-def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame):
- """
- Raises a ValueError for non-unique parameter IDs
- """
- parameter_duplicates = set(
- parameter_df.index.values[parameter_df.index.duplicated()]
- )
- if parameter_duplicates:
- raise ValueError(
- f"The values of `{PARAMETER_ID}` must be unique. The "
- f"following duplicates were found:\n{parameter_duplicates}"
- )
-
-
-def write_parameter_df(df: pd.DataFrame, filename: Union[str, Path]) -> None:
- """Write PEtab parameter table
-
- Arguments:
- df: PEtab parameter table
- filename: Destination file name
- """
- df = get_parameter_df(df)
- df.to_csv(filename, sep="\t", index=True)
-
-
-def get_optimization_parameters(parameter_df: pd.DataFrame) -> List[str]:
- """
- Get list of optimization parameter IDs from parameter table.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
-
- Returns:
- List of IDs of parameters selected for optimization.
- """
- return list(parameter_df.index[parameter_df[ESTIMATE] == 1])
-
-
-def get_optimization_parameter_scaling(
- parameter_df: pd.DataFrame,
-) -> Dict[str, str]:
- """
- Get Dictionary with optimization parameter IDs mapped to parameter scaling
- strings.
-
- Arguments:
- parameter_df: PEtab parameter DataFrame
-
- Returns:
- Dictionary with optimization parameter IDs mapped to parameter scaling
- strings.
- """
- estimated_df = parameter_df.loc[parameter_df[ESTIMATE] == 1]
- return dict(zip(estimated_df.index, estimated_df[PARAMETER_SCALE]))
-
-
-def create_parameter_df(
- sbml_model: Optional[libsbml.Model] = None,
- condition_df: Optional[pd.DataFrame] = None,
- observable_df: Optional[pd.DataFrame] = None,
- measurement_df: Optional[pd.DataFrame] = None,
- model: Optional[Model] = None,
- include_optional: bool = False,
- parameter_scale: str = LOG10,
- lower_bound: Iterable = None,
- upper_bound: Iterable = None,
- mapping_df: Optional[pd.DataFrame] = None,
-) -> pd.DataFrame:
- """Create a new PEtab parameter table
-
- All table entries can be provided as string or list-like with length
- matching the number of parameters
-
- Arguments:
- sbml_model: SBML Model (deprecated, mutually exclusive with ``model``)
- model: PEtab model (mutually exclusive with ``sbml_model``)
- condition_df: PEtab condition DataFrame
- observable_df: PEtab observable DataFrame
- measurement_df: PEtab measurement DataFrame
- include_optional: By default this only returns parameters that are
- required to be present in the parameter table. If set to ``True``,
- this returns all parameters that are allowed to be present in the
- parameter table (i.e. also including parameters specified in the
- model).
- parameter_scale: parameter scaling
- lower_bound: lower bound for parameter value
- upper_bound: upper bound for parameter value
- mapping_df: PEtab mapping DataFrame
-
- Returns:
- The created parameter DataFrame
- """
- if sbml_model:
- warnings.warn(
- "Passing a model via the `sbml_model` argument is "
- "deprecated, use `model=petab.models.sbml_model."
- "SbmlModel(...)` instead.",
- DeprecationWarning,
- stacklevel=2,
- )
- from petab.models.sbml_model import SbmlModel
-
- if model:
- raise ValueError(
- "Arguments `model` and `sbml_model` are " "mutually exclusive."
- )
- model = SbmlModel(sbml_model=sbml_model)
- if include_optional:
- parameter_ids = list(
- get_valid_parameters_for_parameter_table(
- model=model,
- condition_df=condition_df,
- observable_df=observable_df,
- measurement_df=measurement_df,
- )
- )
- else:
- parameter_ids = list(
- get_required_parameters_for_parameter_table(
- model=model,
- condition_df=condition_df,
- observable_df=observable_df,
- measurement_df=measurement_df,
- mapping_df=mapping_df,
- )
- )
-
- df = pd.DataFrame(
- data={
- PARAMETER_ID: parameter_ids,
- PARAMETER_NAME: parameter_ids,
- PARAMETER_SCALE: parameter_scale,
- LOWER_BOUND: lower_bound,
- UPPER_BOUND: upper_bound,
- NOMINAL_VALUE: np.nan,
- ESTIMATE: 1,
- INITIALIZATION_PRIOR_TYPE: "",
- INITIALIZATION_PRIOR_PARAMETERS: "",
- OBJECTIVE_PRIOR_TYPE: "",
- OBJECTIVE_PRIOR_PARAMETERS: "",
- }
- )
- df.set_index([PARAMETER_ID], inplace=True)
-
- # For model parameters, set nominal values as defined in the model
- for parameter_id in df.index:
- try:
- df.loc[parameter_id, NOMINAL_VALUE] = model.get_parameter_value(
- parameter_id
- )
- except ValueError:
- # parameter was introduced as condition-specific override and
- # is potentially not present in the model
- pass
- return df
-
-
-def get_required_parameters_for_parameter_table(
- model: Model,
- condition_df: pd.DataFrame,
- observable_df: pd.DataFrame,
- measurement_df: pd.DataFrame,
- mapping_df: pd.DataFrame = None,
-) -> Set[str]:
- """
- Get set of parameters which need to go into the parameter table
-
- Arguments:
- model: PEtab model
- condition_df: PEtab condition table
- observable_df: PEtab observable table
- measurement_df: PEtab measurement table
- mapping_df: PEtab mapping table
-
- Returns:
- Set of parameter IDs which PEtab requires to be present in the
- parameter table. That is all {observable,noise}Parameters from the
- measurement table as well as all parametric condition table overrides
- that are not defined in the model.
- """
- # use ordered dict as proxy for ordered set
- parameter_ids = OrderedDict()
-
- # Add parameters from measurement table, unless they are fixed parameters
- def append_overrides(overrides):
- for p in overrides:
- if isinstance(p, str) and p not in condition_df.columns:
- parameter_ids[p] = None
-
- for _, row in measurement_df.iterrows():
- # we trust that the number of overrides matches
- append_overrides(
- measurements.split_parameter_replacement_list(
- row.get(OBSERVABLE_PARAMETERS, None)
- )
- )
- append_overrides(
- measurements.split_parameter_replacement_list(
- row.get(NOISE_PARAMETERS, None)
- )
- )
-
- # Add output parameters except for placeholders
- for formula_type, placeholder_sources in (
- (
- # Observable formulae
- {"observables": True, "noise": False},
- # can only contain observable placeholders
- {"noise": False, "observables": True},
- ),
- (
- # Noise formulae
- {"observables": False, "noise": True},
- # can contain noise and observable placeholders
- {"noise": True, "observables": True},
- ),
- ):
- output_parameters = observables.get_output_parameters(
- observable_df,
- model,
- mapping_df=mapping_df,
- **formula_type,
- )
- placeholders = observables.get_placeholders(
- observable_df,
- **placeholder_sources,
- )
- for p in output_parameters:
- if p not in placeholders:
- parameter_ids[p] = None
-
- # Add condition table parametric overrides unless already defined in the
- # model
- for p in conditions.get_parametric_overrides(condition_df):
- if not model.has_entity_with_id(p):
- parameter_ids[p] = None
-
- # remove parameters that occur in the condition table and are overridden
- # for ALL conditions
- for p in condition_df.columns[~condition_df.isnull().any()]:
- try:
- del parameter_ids[p]
- except KeyError:
- pass
- return parameter_ids.keys()
-
-
-def get_valid_parameters_for_parameter_table(
- model: Model,
- condition_df: pd.DataFrame,
- observable_df: pd.DataFrame,
- measurement_df: pd.DataFrame,
- mapping_df: pd.DataFrame = None,
-) -> Set[str]:
- """
- Get set of parameters which may be present inside the parameter table
-
- Arguments:
- model: PEtab model
- condition_df: PEtab condition table
- observable_df: PEtab observable table
- measurement_df: PEtab measurement table
- mapping_df: PEtab mapping table for additional checks
-
- Returns:
- Set of parameter IDs which PEtab allows to be present in the
- parameter table.
- """
- # - grab all allowed model parameters
- # - grab corresponding names from mapping table
- # - grab all output parameters defined in {observable,noise}Formula
- # - grab all parameters from measurement table
- # - grab all parametric overrides from condition table
- # - remove parameters for which condition table columns exist
- # - remove placeholder parameters
- # (only partial overrides are not supported)
-
- # must not go into parameter table
- blackset = set()
-
- if observable_df is not None:
- placeholders = set(observables.get_placeholders(observable_df))
-
- # collect assignment targets
- blackset |= placeholders
-
- if condition_df is not None:
- blackset |= set(condition_df.columns.values) - {CONDITION_NAME}
-
- # don't use sets here, to have deterministic ordering,
- # e.g. for creating parameter tables
- parameter_ids = OrderedDict.fromkeys(
- p
- for p in model.get_valid_parameters_for_parameter_table()
- if p not in blackset
- )
-
- if mapping_df is not None:
- for from_id, to_id in zip(
- mapping_df.index.values, mapping_df[MODEL_ENTITY_ID]
- ):
- if to_id in parameter_ids.keys():
- parameter_ids[from_id] = None
-
- if observable_df is not None:
- # add output parameters from observables table
- output_parameters = observables.get_output_parameters(
- observable_df=observable_df, model=model
- )
- for p in output_parameters:
- if p not in blackset:
- parameter_ids[p] = None
-
- # Append parameters from measurement table, unless they occur as condition
- # table columns
- def append_overrides(overrides):
- for p in overrides:
- if isinstance(p, str) and p not in blackset:
- parameter_ids[p] = None
-
- if measurement_df is not None:
- for _, row in measurement_df.iterrows():
- # we trust that the number of overrides matches
- append_overrides(
- measurements.split_parameter_replacement_list(
- row.get(OBSERVABLE_PARAMETERS, None)
- )
- )
- append_overrides(
- measurements.split_parameter_replacement_list(
- row.get(NOISE_PARAMETERS, None)
- )
- )
-
- # Append parameter overrides from condition table
- if condition_df is not None:
- for p in conditions.get_parametric_overrides(condition_df):
- parameter_ids[p] = None
-
- return parameter_ids.keys()
-
-
-def get_priors_from_df(
- parameter_df: pd.DataFrame,
- mode: Literal["initialization", "objective"],
- parameter_ids: Sequence[str] = None,
-) -> List[Tuple]:
- """Create list with information about the parameter priors
-
- Arguments:
- parameter_df: PEtab parameter table
- mode: ``'initialization'`` or ``'objective'``
- parameter_ids: A sequence of parameter IDs for which to sample starting
- points.
- For subsetting or reordering the parameters.
- Defaults to all estimated parameters.
-
- Returns:
- List with prior information.
- """
- # get types and parameters of priors from dataframe
- par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1]
-
- if parameter_ids:
- try:
- par_to_estimate = par_to_estimate.loc[parameter_ids, :]
- except KeyError as e:
- missing_ids = set(parameter_ids) - set(par_to_estimate.index)
- raise KeyError(
- "Parameter table does not contain estimated parameter(s) "
- f"{missing_ids}."
- ) from e
-
- prior_list = []
- for _, row in par_to_estimate.iterrows():
- # retrieve info about type
- prior_type = str(row.get(f"{mode}PriorType", ""))
- if core.is_empty(prior_type):
- prior_type = PARAMETER_SCALE_UNIFORM
-
- # retrieve info about parameters of priors, make it a tuple of floats
- pars_str = str(row.get(f"{mode}PriorParameters", ""))
- if core.is_empty(pars_str):
- lb, ub = map_scale(
- [row[LOWER_BOUND], row[UPPER_BOUND]],
- [row[PARAMETER_SCALE]] * 2,
- )
- pars_str = f"{lb}{PARAMETER_SEPARATOR}{ub}"
- prior_pars = tuple(
- float(entry) for entry in pars_str.split(PARAMETER_SEPARATOR)
- )
-
- # add parameter scale and bounds, as this may be needed
- par_scale = row[PARAMETER_SCALE]
- par_bounds = (row[LOWER_BOUND], row[UPPER_BOUND])
-
- # if no prior is specified, we assume a non-informative (uniform) one
- if prior_type == "nan":
- prior_type = PARAMETER_SCALE_UNIFORM
- prior_pars = (
- scale(row[LOWER_BOUND], par_scale),
- scale(row[UPPER_BOUND], par_scale),
- )
-
- prior_list.append((prior_type, prior_pars, par_scale, par_bounds))
-
- return prior_list
-
-
-def scale(
- parameter: numbers.Number,
- scale_str: PARAMETER_SCALE_ARGS,
-) -> numbers.Number:
- """Scale parameter according to ``scale_str``.
-
- Arguments:
- parameter:
- Parameter to be scaled.
- scale_str:
- One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``.
-
- Returns:
- The scaled parameter.
- """
- if scale_str == LIN or not scale_str:
- return parameter
- if scale_str == LOG:
- return np.log(parameter)
- if scale_str == LOG10:
- return np.log10(parameter)
- raise ValueError(f"Invalid parameter scaling: {scale_str}")
-
-
-def unscale(
- parameter: numbers.Number,
- scale_str: PARAMETER_SCALE_ARGS,
-) -> numbers.Number:
- """Unscale parameter according to ``scale_str``.
-
- Arguments:
- parameter:
- Parameter to be unscaled.
- scale_str:
- One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``.
-
- Returns:
- The unscaled parameter.
- """
- if scale_str == LIN or not scale_str:
- return parameter
- if scale_str == LOG:
- return np.exp(parameter)
- if scale_str == LOG10:
- return 10**parameter
- raise ValueError(f"Invalid parameter scaling: {scale_str}")
-
-
-def map_scale(
- parameters: Sequence[numbers.Number],
- scale_strs: Union[Iterable[PARAMETER_SCALE_ARGS], PARAMETER_SCALE_ARGS],
-) -> Iterable[numbers.Number]:
- """Scale the parameters, i.e. as :func:`scale`, but for Sequences.
-
- Arguments:
- parameters:
- Parameters to be scaled.
- scale_strs:
- Scales to apply. Broadcast if a single string.
-
- Returns:
- The scaled parameters.
- """
- if isinstance(scale_strs, str):
- scale_strs = [scale_strs] * len(parameters)
- return (
- scale(par_val, scale_str)
- for par_val, scale_str in zip(parameters, scale_strs)
- )
-
-
-def map_unscale(
- parameters: Sequence[numbers.Number],
- scale_strs: Union[Iterable[PARAMETER_SCALE_ARGS], PARAMETER_SCALE_ARGS],
-) -> Iterable[numbers.Number]:
- """Unscale the parameters, i.e. as :func:`unscale`, but for Sequences.
-
- Arguments:
- parameters:
- Parameters to be unscaled.
- scale_strs:
- Scales that the parameters are currently on.
- Broadcast if a single string.
-
- Returns:
- The unscaled parameters.
- """
- if isinstance(scale_strs, str):
- scale_strs = [scale_strs] * len(parameters)
- return (
- unscale(par_val, scale_str)
- for par_val, scale_str in zip(parameters, scale_strs)
- )
-
-
-def normalize_parameter_df(parameter_df: pd.DataFrame) -> pd.DataFrame:
- """Add missing columns and fill in default values."""
- df = parameter_df.copy(deep=True)
-
- if PARAMETER_NAME not in df:
- df[PARAMETER_NAME] = df.reset_index()[PARAMETER_ID]
-
- prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]
- prior_par_cols = [
- INITIALIZATION_PRIOR_PARAMETERS,
- OBJECTIVE_PRIOR_PARAMETERS,
- ]
- # iterate over initialization and objective priors
- for prior_type_col, prior_par_col in zip(prior_type_cols, prior_par_cols):
- # fill in default values for prior type
- if prior_type_col not in df:
- df[prior_type_col] = PARAMETER_SCALE_UNIFORM
- else:
- for irow, row in df.iterrows():
- if core.is_empty(row[prior_type_col]):
- df.loc[irow, prior_type_col] = PARAMETER_SCALE_UNIFORM
- if prior_par_col not in df:
- df[prior_par_col] = None
- for irow, row in df.iterrows():
- if (
- core.is_empty(row[prior_par_col])
- and row[prior_type_col] == PARAMETER_SCALE_UNIFORM
- ):
- lb, ub = map_scale(
- [row[LOWER_BOUND], row[UPPER_BOUND]],
- [row[PARAMETER_SCALE]] * 2,
- )
- df.loc[irow, prior_par_col] = f"{lb}{PARAMETER_SEPARATOR}{ub}"
-
- return df
+_deprecated_import_v1(__name__)
diff --git a/petab/petablint.py b/petab/petablint.py
index 45995602..f8228d42 100755
--- a/petab/petablint.py
+++ b/petab/petablint.py
@@ -8,8 +8,13 @@
from colorama import Fore
from colorama import init as init_colorama
+from jsonschema.exceptions import ValidationError as SchemaValidationError
-import petab
+import petab.v1 as petab
+from petab.v1.C import FORMAT_VERSION
+from petab.v2.lint import lint_problem
+from petab.versions import get_major_version
+from petab.yaml import validate
logger = logging.getLogger(__name__)
@@ -49,34 +54,39 @@ def parse_cli_args():
)
# Call with set of files
- parser.add_argument(
+ group = parser.add_argument_group(
+ "Check individual files *DEPRECATED*. Please contact us via "
+ "https://github.com/PEtab-dev/libpetab-python/issues, "
+ "if you need this."
+ )
+ group.add_argument(
"-s", "--sbml", dest="sbml_file_name", help="SBML model filename"
)
- parser.add_argument(
+ group.add_argument(
"-o",
"--observables",
dest="observable_file_name",
help="Observable table",
)
- parser.add_argument(
+ group.add_argument(
"-m",
"--measurements",
dest="measurement_file_name",
help="Measurement table",
)
- parser.add_argument(
+ group.add_argument(
"-c",
"--conditions",
dest="condition_file_name",
help="Conditions table",
)
- parser.add_argument(
+ group.add_argument(
"-p",
"--parameters",
dest="parameter_file_name",
help="Parameter table",
)
- parser.add_argument(
+ group.add_argument(
"--vis",
"--visualizations",
dest="visualization_file_name",
@@ -87,13 +97,18 @@ def parse_cli_args():
group.add_argument(
"-y",
"--yaml",
+ dest="yaml_file_name_deprecated",
+ help="PEtab YAML problem filename. "
+ "*DEPRECATED* pass the file name as positional argument instead.",
+ )
+ group.add_argument(
dest="yaml_file_name",
help="PEtab YAML problem filename",
+ nargs="?",
)
args = parser.parse_args()
-
- if args.yaml_file_name and any(
+ if any(
(
args.sbml_file_name,
args.condition_file_name,
@@ -101,34 +116,51 @@ def parse_cli_args():
args.parameter_file_name,
)
):
- parser.error(
- "When providing a yaml file, no other files may " "be specified."
+ logger.warning(
+ "Passing individual tables to petablint is deprecated, please "
+ "provide a PEtab YAML file instead. "
+ "Please contact us via "
+ "https://github.com/PEtab-dev/libpetab-python/issues, "
+ "if you need this."
)
+ if args.yaml_file_name or args.yaml_file_name_deprecated:
+ parser.error(
+ "When providing a yaml file, no other files may be specified."
+ )
+
+ if args.yaml_file_name_deprecated:
+ logger.warning(
+ "The -y/--yaml option is deprecated. "
+ "Please provide the YAML file as a positional argument."
+ )
+ if args.yaml_file_name:
+ parser.error(
+ "Please provide only one of --yaml or positional argument."
+ )
+
+ args.yaml_file_name = args.yaml_file_name or args.yaml_file_name_deprecated
return args
def main():
"""Run PEtab validator"""
- args = parse_cli_args()
init_colorama(autoreset=True)
-
ch = logging.StreamHandler()
+ ch.setFormatter(LintFormatter())
+ logging.basicConfig(level=logging.DEBUG, handlers=[ch])
+
+ args = parse_cli_args()
+
if args.verbose:
ch.setLevel(logging.DEBUG)
else:
ch.setLevel(logging.WARN)
- ch.setFormatter(LintFormatter())
- logging.basicConfig(level=logging.DEBUG, handlers=[ch])
if args.yaml_file_name:
- from jsonschema.exceptions import ValidationError
-
- from petab.yaml import validate
-
try:
validate(args.yaml_file_name)
- except ValidationError as e:
+ except SchemaValidationError as e:
logger.error(
"Provided YAML file does not adhere to PEtab " f"schema: {e}"
)
@@ -140,37 +172,54 @@ def main():
# problem = petab.CompositeProblem.from_yaml(args.yaml_file_name)
return
- problem = petab.Problem.from_yaml(args.yaml_file_name)
-
- else:
- logger.debug("Looking for...")
- if args.sbml_file_name:
- logger.debug(f"\tSBML model: {args.sbml_file_name}")
- if args.condition_file_name:
- logger.debug(f"\tCondition table: {args.condition_file_name}")
- if args.observable_file_name:
- logger.debug(f"\tObservable table: {args.observable_file_name}")
- if args.measurement_file_name:
- logger.debug(f"\tMeasurement table: {args.measurement_file_name}")
- if args.parameter_file_name:
- logger.debug(f"\tParameter table: {args.parameter_file_name}")
- if args.visualization_file_name:
- logger.debug(
- "\tVisualization table: " f"{args.visualization_file_name}"
- )
+ match get_major_version(args.yaml_file_name):
+ case 1:
+ problem = petab.Problem.from_yaml(args.yaml_file_name)
+ ret = petab.lint.lint_problem(problem)
+ sys.exit(ret)
+ case 2:
+ validation_issues = lint_problem(args.yaml_file_name)
+ if validation_issues:
+ validation_issues.log(logger=logger)
+ sys.exit(1)
+ logger.info("PEtab format check completed successfully.")
+ sys.exit(0)
+ case _:
+ logger.error(
+ "The provided PEtab files are of unsupported version "
+ f"or the `{FORMAT_VERSION}` field is missing in the yaml "
+ "file."
+ )
+
+ # DEPRECATED - only supported for v1
+ logger.debug("Looking for...")
+ if args.sbml_file_name:
+ logger.debug(f"\tSBML model: {args.sbml_file_name}")
+ if args.condition_file_name:
+ logger.debug(f"\tCondition table: {args.condition_file_name}")
+ if args.observable_file_name:
+ logger.debug(f"\tObservable table: {args.observable_file_name}")
+ if args.measurement_file_name:
+ logger.debug(f"\tMeasurement table: {args.measurement_file_name}")
+ if args.parameter_file_name:
+ logger.debug(f"\tParameter table: {args.parameter_file_name}")
+ if args.visualization_file_name:
+ logger.debug(
+ "\tVisualization table: " f"{args.visualization_file_name}"
+ )
- try:
- problem = petab.Problem.from_files(
- sbml_file=args.sbml_file_name,
- condition_file=args.condition_file_name,
- measurement_file=args.measurement_file_name,
- parameter_file=args.parameter_file_name,
- observable_files=args.observable_file_name,
- visualization_files=args.visualization_file_name,
- )
- except FileNotFoundError as e:
- logger.error(e)
- sys.exit(1)
+ try:
+ problem = petab.Problem.from_files(
+ sbml_file=args.sbml_file_name,
+ condition_file=args.condition_file_name,
+ measurement_file=args.measurement_file_name,
+ parameter_file=args.parameter_file_name,
+ observable_files=args.observable_file_name,
+ visualization_files=args.visualization_file_name,
+ )
+ except FileNotFoundError as e:
+ logger.error(e)
+ sys.exit(1)
ret = petab.lint.lint_problem(problem)
sys.exit(ret)
diff --git a/petab/schemas/petab_schema.v1.0.0.yaml b/petab/schemas/petab_schema.v1.0.0.yaml
index bf012e57..255fbb04 100644
--- a/petab/schemas/petab_schema.v1.0.0.yaml
+++ b/petab/schemas/petab_schema.v1.0.0.yaml
@@ -1,12 +1,26 @@
-# For syntax see: https://json-schema.org/understanding-json-schema/index.html
+# For syntax see: https://json-schema.org/understanding-json-schema
#$schema: "https://json-schema.org/draft/2019-09/meta/core"
$schema: "http://json-schema.org/draft-06/schema"
description: PEtab parameter estimation problem config file schema
+definitions:
+ list_of_files:
+ type: array
+ description: List of files.
+ items:
+ type: string
+ description: File name or URL.
+ version_number:
+ type: string
+ pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$
+ description: Version number (corresponding to PEP 440).
+
properties:
format_version:
- type: integer
+ anyof:
+ - $ref: "#/definitions/version_number"
+ - type: integer
description: Version of the PEtab format (e.g. 1).
parameter_file:
@@ -35,45 +49,25 @@ properties:
properties:
sbml_files:
- type: array
- description: List of PEtab SBML files.
-
- items:
- type: string
- description: PEtab SBML file name or URL.
+ $ref: "#/definitions/list_of_files"
+ description: List of SBML model files.
measurement_files:
- type: array
+ $ref: "#/definitions/list_of_files"
description: List of PEtab measurement files.
- items:
- type: string
- description: PEtab measurement file name or URL.
-
condition_files:
- type: array
+ $ref: "#/definitions/list_of_files"
description: List of PEtab condition files.
- items:
- type: string
- description: PEtab condition file name or URL.
-
observable_files:
- type: array
+ $ref: "#/definitions/list_of_files"
description: List of PEtab observable files.
- items:
- type: string
- description: PEtab observable file name or URL.
-
visualization_files:
- type: array
+ $ref: "#/definitions/list_of_files"
description: List of PEtab visualization files.
- items:
- type: string
- description: PEtab visualization file name or URL.
-
required:
- sbml_files
- observable_files
diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml
index 416f0c4a..ddeb428a 100644
--- a/petab/schemas/petab_schema.v2.0.0.yaml
+++ b/petab/schemas/petab_schema.v2.0.0.yaml
@@ -1,17 +1,26 @@
-# For syntax see: https://json-schema.org/understanding-json-schema/index.html
+# For syntax see: https://json-schema.org/understanding-json-schema
#$schema: "https://json-schema.org/draft/2019-09/meta/core"
$schema: "http://json-schema.org/draft-06/schema"
description: PEtab parameter estimation problem config file schema
+definitions:
+ list_of_files:
+ type: array
+ description: List of files.
+ items:
+ type: string
+ description: File name or URL.
+ version_number:
+ type: string
+ pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$
+ description: Version number (corresponding to PEP 440).
+
properties:
format_version:
anyof:
- - type: string
- # (corresponding to PEP 440).
- pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$
+ - $ref: "#/definitions/version_number"
- type: integer
-
description: Version of the PEtab format
parameter_file:
@@ -60,36 +69,20 @@ properties:
additionalProperties: false
measurement_files:
- type: array
description: List of PEtab measurement files.
-
- items:
- type: string
- description: PEtab measurement file name or URL.
+ $ref: "#/definitions/list_of_files"
condition_files:
- type: array
description: List of PEtab condition files.
-
- items:
- type: string
- description: PEtab condition file name or URL.
+ $ref: "#/definitions/list_of_files"
observable_files:
- type: array
description: List of PEtab observable files.
-
- items:
- type: string
- description: PEtab observable file name or URL.
+ $ref: "#/definitions/list_of_files"
visualization_files:
- type: array
description: List of PEtab visualization files.
-
- items:
- type: string
- description: PEtab visualization file name or URL.
+ $ref: "#/definitions/list_of_files"
mapping_file:
type: string
@@ -113,8 +106,7 @@ properties:
Information on a specific extension
properties:
version:
- type: string
- pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$
+ $ref: "#/definitions/version_number"
required:
- version
diff --git a/petab/simplify.py b/petab/simplify.py
index 5946be05..cd7ba25e 100644
--- a/petab/simplify.py
+++ b/petab/simplify.py
@@ -1,115 +1,7 @@
-"""Functionality for simplifying PEtab problems"""
-from math import nan
+"""Deprecated module for simplifying PEtab problems.
-import pandas as pd
+Use petab.simplify instead."""
+from petab import _deprecated_import_v1
+from petab.v1.simplify import * # noqa: F403, F401, E402
-import petab
-
-from . import Problem
-from .C import * # noqa: F403
-from .lint import lint_problem
-
-__all__ = [
- "remove_nan_measurements",
- "remove_unused_observables",
- "remove_unused_conditions",
- "simplify_problem",
- "condition_parameters_to_parameter_table",
-]
-
-
-def remove_nan_measurements(problem: Problem):
- """Drop any measurements that are NaN"""
- problem.measurement_df = problem.measurement_df[
- ~problem.measurement_df[MEASUREMENT].isna()
- ]
- problem.measurement_df.reset_index(inplace=True, drop=True)
-
-
-def remove_unused_observables(problem: Problem):
- """Remove observables that have no measurements"""
- measured_observables = set(problem.measurement_df[OBSERVABLE_ID].unique())
- problem.observable_df = problem.observable_df[
- problem.observable_df.index.isin(measured_observables)
- ]
-
-
-def remove_unused_conditions(problem: Problem):
- """Remove conditions that have no measurements"""
- measured_conditions = set(
- problem.measurement_df[SIMULATION_CONDITION_ID].unique()
- )
- if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
- measured_conditions |= set(
- problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].unique()
- )
-
- problem.condition_df = problem.condition_df[
- problem.condition_df.index.isin(measured_conditions)
- ]
-
-
-def simplify_problem(problem: Problem):
- if lint_problem(problem):
- raise ValueError("Invalid PEtab problem supplied.")
-
- remove_unused_observables(problem)
- remove_unused_conditions(problem)
- condition_parameters_to_parameter_table(problem)
-
- if lint_problem(problem):
- raise AssertionError("Invalid PEtab problem generated.")
-
-
-def condition_parameters_to_parameter_table(problem: Problem):
- """Move parameters from the condition table to the parameters table, if
- the same parameter value is used for all conditions.
- """
- if (
- problem.condition_df is None
- or problem.condition_df.empty
- or problem.model is None
- ):
- return
-
- replacements = {}
- for parameter_id in problem.condition_df:
- if parameter_id == CONDITION_NAME:
- continue
-
- if problem.model.is_state_variable(parameter_id):
- # initial states can't go the parameters table
- continue
-
- series = problem.condition_df[parameter_id]
- value = petab.to_float_if_float(series.iloc[0])
-
- # same value for all conditions and no parametric overrides (str)?
- if isinstance(value, float) and len(series.unique()) == 1:
- replacements[parameter_id] = series.iloc[0]
-
- if not replacements:
- return
-
- rows = [
- {
- PARAMETER_ID: parameter_id,
- PARAMETER_SCALE: LIN,
- LOWER_BOUND: nan,
- UPPER_BOUND: nan,
- NOMINAL_VALUE: value,
- ESTIMATE: 0,
- }
- for parameter_id, value in replacements.items()
- ]
- rows = pd.DataFrame(rows)
- rows.set_index(PARAMETER_ID, inplace=True)
-
- if problem.parameter_df is None:
- problem.parameter_df = rows
- else:
- problem.parameter_df = pd.concat([problem.parameter_df, rows])
-
- problem.condition_df = problem.condition_df.drop(
- columns=replacements.keys()
- )
+_deprecated_import_v1(__name__)
diff --git a/petab/simulate.py b/petab/simulate.py
index 59aa46cf..afa866a6 100644
--- a/petab/simulate.py
+++ b/petab/simulate.py
@@ -1,261 +1,8 @@
-"""PEtab simulator base class and related functions."""
+"""Deprecated module for simulating PEtab models.
-import abc
-import pathlib
-import shutil
-import tempfile
-from typing import Dict, Optional, Union
-from warnings import warn
+Use petab.v1.simulate instead."""
-import numpy as np
-import pandas as pd
-import sympy as sp
+from petab import _deprecated_import_v1
+from petab.v1.simulate import * # noqa: F403, F401, E402
-import petab
-
-__all__ = ["Simulator", "sample_noise"]
-
-
-class Simulator(abc.ABC):
- """Base class that specific simulators should inherit.
-
- Specific simulators should minimally implement the
- :meth:`petab.simulate.Simulator.simulate_without_noise` method.
- Example (AMICI): https://bit.ly/33SUSG4
-
- Attributes:
- noise_formulas:
- The formulae that will be used to calculate the scale of noise
- distributions.
- petab_problem:
- A PEtab problem, which will be simulated.
- rng:
- A NumPy random generator, used to sample from noise distributions.
- temporary_working_dir:
- Whether ``working_dir`` is a temporary directory, which can be
- deleted without significant consequence.
- working_dir:
- All simulator-specific output files will be saved here. This
- directory and its contents may be modified and deleted, and
- should be considered ephemeral.
- """
-
- def __init__(
- self,
- petab_problem: petab.Problem,
- working_dir: Optional[Union[pathlib.Path, str]] = None,
- ):
- """Initialize the simulator.
-
- Initialize the simulator with sufficient information to perform a
- simulation. If no working directory is specified, a temporary one is
- created.
-
- Arguments:
- petab_problem:
- A PEtab problem.
- working_dir:
- All simulator-specific output files will be saved here. This
- directory and its contents may be modified and deleted, and
- should be considered ephemeral.
- """
- self.petab_problem = petab_problem
-
- self.temporary_working_dir = False
- if working_dir is None:
- working_dir = tempfile.mkdtemp()
- self.temporary_working_dir = True
- if not isinstance(working_dir, pathlib.Path):
- working_dir = pathlib.Path(working_dir)
- self.working_dir = working_dir
- self.working_dir.mkdir(parents=True, exist_ok=True)
-
- self.noise_formulas = petab.calculate.get_symbolic_noise_formulas(
- self.petab_problem.observable_df
- )
- self.rng = np.random.default_rng()
-
- def remove_working_dir(self, force: bool = False, **kwargs) -> None:
- """Remove the simulator working directory, and all files within.
-
- See the :meth:`petab.simulate.Simulator.__init__` method arguments.
-
- Arguments:
- force:
- If ``True``, the working directory is removed regardless of
- whether it is a temporary directory.
- **kwargs:
- Additional keyword arguments are passed to
- :func:`shutil.rmtree`.
- """
- if force or self.temporary_working_dir:
- shutil.rmtree(self.working_dir, **kwargs)
- if self.working_dir.is_dir():
- warn(
- "Failed to remove the working directory: "
- + str(self.working_dir),
- stacklevel=2,
- )
- else:
- warn(
- "By default, specified working directories are not removed. "
- "Please call this method with `force=True`, or manually "
- f"delete the working directory: {self.working_dir}",
- stacklevel=2,
- )
-
- @abc.abstractmethod
- def simulate_without_noise(self) -> pd.DataFrame:
- """Simulate the PEtab problem.
-
- This is an abstract method that should be implemented with a simulation
- package. Examples of this are referenced in the class docstring.
-
- Returns:
- Simulated data, as a PEtab measurements table, which should be
- equivalent to replacing all values in the
- :const:`petab.C.MEASUREMENT` column of the measurements table (of
- the PEtab problem supplied to the
- :meth:`petab.simulate.Simulator.__init__` method), with
- simulated values.
- """
- raise NotImplementedError()
-
- def simulate(
- self,
- noise: bool = False,
- noise_scaling_factor: float = 1,
- as_measurement: bool = False,
- **kwargs,
- ) -> pd.DataFrame:
- """Simulate a PEtab problem, optionally with noise.
-
- Arguments:
- noise: If True, noise is added to simulated data.
- noise_scaling_factor:
- A multiplier of the scale of the noise distribution.
- as_measurement:
- Whether the data column is named :const:`petab.C.MEASUREMENT`
- (`True`) or :const:`petab.C.SIMULATION` (`False`).
- **kwargs:
- Additional keyword arguments are passed to
- :meth:`petab.simulate.Simulator.simulate_without_noise`.
-
- Returns:
- Simulated data, as a PEtab measurements table.
- """
- simulation_df = self.simulate_without_noise(**kwargs)
- if noise:
- simulation_df = self.add_noise(simulation_df, noise_scaling_factor)
-
- columns = {petab.C.MEASUREMENT: petab.C.SIMULATION}
- if as_measurement:
- columns = {petab.C.SIMULATION: petab.C.MEASUREMENT}
- simulation_df = simulation_df.rename(columns=columns)
-
- return simulation_df
-
- def add_noise(
- self,
- simulation_df: pd.DataFrame,
- noise_scaling_factor: float = 1,
- **kwargs,
- ) -> pd.DataFrame:
- """Add noise to simulated data.
-
- Arguments:
- simulation_df:
- A PEtab measurements table that contains simulated data.
- noise_scaling_factor:
- A multiplier of the scale of the noise distribution.
- **kwargs:
- Additional keyword arguments are passed to
- :func:`sample_noise`.
-
- Returns:
- Simulated data with noise, as a PEtab measurements table.
- """
- simulation_df_with_noise = simulation_df.copy()
- simulation_df_with_noise[petab.C.MEASUREMENT] = [
- sample_noise(
- self.petab_problem,
- row,
- row[petab.C.MEASUREMENT],
- self.noise_formulas,
- self.rng,
- noise_scaling_factor,
- **kwargs,
- )
- for _, row in simulation_df_with_noise.iterrows()
- ]
- return simulation_df_with_noise
-
-
-def sample_noise(
- petab_problem: petab.Problem,
- measurement_row: pd.Series,
- simulated_value: float,
- noise_formulas: Optional[Dict[str, sp.Expr]] = None,
- rng: Optional[np.random.Generator] = None,
- noise_scaling_factor: float = 1,
- zero_bounded: bool = False,
-) -> float:
- """Generate a sample from a PEtab noise distribution.
-
- Arguments:
- petab_problem:
- The PEtab problem used to generate the simulated value.
- Instance of :class:`petab.Problem`.
- measurement_row:
- The row in the PEtab problem measurement table that corresponds
- to the simulated value.
- simulated_value:
- A simulated value without noise.
- noise_formulas:
- Processed noise formulas from the PEtab observables table, in the
- form output by :func:`petab.calculate.get_symbolic_noise_formulas`.
- rng:
- A NumPy random generator.
- noise_scaling_factor:
- A multiplier of the scale of the noise distribution.
- zero_bounded:
- Return zero if the sign of the return value and ``simulated_value``
- differ. Can be used to ensure non-negative and non-positive values,
- if the sign of ``simulated_value`` should not change.
-
- Returns:
- The sample from the PEtab noise distribution.
- """
- if noise_formulas is None:
- noise_formulas = petab.calculate.get_symbolic_noise_formulas(
- petab_problem.observable_df
- )
- if rng is None:
- rng = np.random.default_rng()
-
- noise_value = petab.calculate.evaluate_noise_formula(
- measurement_row,
- noise_formulas,
- petab_problem.parameter_df,
- simulated_value,
- )
-
- # default noise distribution is petab.C.NORMAL
- noise_distribution = petab_problem.observable_df.loc[
- measurement_row[petab.C.OBSERVABLE_ID]
- ].get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL)
- # an empty noise distribution column in an observables table can result in
- # `noise_distribution == float('nan')`
- if pd.isna(noise_distribution):
- noise_distribution = petab.C.NORMAL
-
- # below is e.g.: `np.random.normal(loc=simulation, scale=noise_value)`
- simulated_value_with_noise = getattr(rng, noise_distribution)(
- loc=simulated_value, scale=noise_value * noise_scaling_factor
- )
-
- if zero_bounded and np.sign(simulated_value) != np.sign(
- simulated_value_with_noise
- ):
- return 0.0
- return simulated_value_with_noise
+_deprecated_import_v1(__name__)
diff --git a/petab/v1/C.py b/petab/v1/C.py
new file mode 100644
index 00000000..70ce22c3
--- /dev/null
+++ b/petab/v1/C.py
@@ -0,0 +1,372 @@
+# pylint: disable:invalid-name
+"""
+This file contains constant definitions.
+"""
+import math as _math
+import sys
+
+# MEASUREMENTS
+
+#:
+OBSERVABLE_ID = "observableId"
+
+#:
+PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"
+
+#:
+SIMULATION_CONDITION_ID = "simulationConditionId"
+
+#:
+MEASUREMENT = "measurement"
+
+#:
+TIME = "time"
+
+#: Time value that indicates steady-state measurements
+TIME_STEADY_STATE = _math.inf
+
+#:
+OBSERVABLE_PARAMETERS = "observableParameters"
+
+#:
+NOISE_PARAMETERS = "noiseParameters"
+
+#:
+DATASET_ID = "datasetId"
+
+#:
+REPLICATE_ID = "replicateId"
+
+#: Mandatory columns of measurement table
+MEASUREMENT_DF_REQUIRED_COLS = [
+ OBSERVABLE_ID,
+ SIMULATION_CONDITION_ID,
+ MEASUREMENT,
+ TIME,
+]
+
+#: Optional columns of measurement table
+MEASUREMENT_DF_OPTIONAL_COLS = [
+ PREEQUILIBRATION_CONDITION_ID,
+ OBSERVABLE_PARAMETERS,
+ NOISE_PARAMETERS,
+ DATASET_ID,
+ REPLICATE_ID,
+]
+
+#: Measurement table columns
+MEASUREMENT_DF_COLS = [
+ MEASUREMENT_DF_REQUIRED_COLS[0],
+ MEASUREMENT_DF_OPTIONAL_COLS[0],
+ *MEASUREMENT_DF_REQUIRED_COLS[1:],
+ *MEASUREMENT_DF_OPTIONAL_COLS[1:],
+]
+
+
+# PARAMETERS
+
+#:
+PARAMETER_ID = "parameterId"
+#:
+PARAMETER_NAME = "parameterName"
+#:
+PARAMETER_SCALE = "parameterScale"
+#:
+LOWER_BOUND = "lowerBound"
+#:
+UPPER_BOUND = "upperBound"
+#:
+NOMINAL_VALUE = "nominalValue"
+#:
+ESTIMATE = "estimate"
+#:
+INITIALIZATION_PRIOR_TYPE = "initializationPriorType"
+#:
+INITIALIZATION_PRIOR_PARAMETERS = "initializationPriorParameters"
+#:
+OBJECTIVE_PRIOR_TYPE = "objectivePriorType"
+#:
+OBJECTIVE_PRIOR_PARAMETERS = "objectivePriorParameters"
+
+#: Mandatory columns of parameter table
+PARAMETER_DF_REQUIRED_COLS = [
+ PARAMETER_ID,
+ PARAMETER_SCALE,
+ LOWER_BOUND,
+ UPPER_BOUND,
+ ESTIMATE,
+]
+
+#: Optional columns of parameter table
+PARAMETER_DF_OPTIONAL_COLS = [
+ PARAMETER_NAME,
+ NOMINAL_VALUE,
+ INITIALIZATION_PRIOR_TYPE,
+ INITIALIZATION_PRIOR_PARAMETERS,
+ OBJECTIVE_PRIOR_TYPE,
+ OBJECTIVE_PRIOR_PARAMETERS,
+]
+
+#: Parameter table columns
+PARAMETER_DF_COLS = [
+ PARAMETER_DF_REQUIRED_COLS[0],
+ PARAMETER_DF_OPTIONAL_COLS[0],
+ *PARAMETER_DF_REQUIRED_COLS[1:],
+ *PARAMETER_DF_OPTIONAL_COLS[1:],
+]
+
+#:
+INITIALIZATION = "initialization"
+#:
+OBJECTIVE = "objective"
+
+
+# CONDITIONS
+
+#:
+CONDITION_ID = "conditionId"
+#:
+CONDITION_NAME = "conditionName"
+
+
+# OBSERVABLES
+
+#:
+OBSERVABLE_NAME = "observableName"
+#:
+OBSERVABLE_FORMULA = "observableFormula"
+#:
+NOISE_FORMULA = "noiseFormula"
+#:
+OBSERVABLE_TRANSFORMATION = "observableTransformation"
+#:
+NOISE_DISTRIBUTION = "noiseDistribution"
+
+#: Mandatory columns of observables table
+OBSERVABLE_DF_REQUIRED_COLS = [
+ OBSERVABLE_ID,
+ OBSERVABLE_FORMULA,
+ NOISE_FORMULA,
+]
+
+#: Optional columns of observables table
+OBSERVABLE_DF_OPTIONAL_COLS = [
+ OBSERVABLE_NAME,
+ OBSERVABLE_TRANSFORMATION,
+ NOISE_DISTRIBUTION,
+]
+
+#: Observables table columns
+OBSERVABLE_DF_COLS = [
+ *OBSERVABLE_DF_REQUIRED_COLS,
+ *OBSERVABLE_DF_OPTIONAL_COLS,
+]
+
+
+# TRANSFORMATIONS
+
+#:
+LIN = "lin"
+#:
+LOG = "log"
+#:
+LOG10 = "log10"
+#: Supported observable transformations
+OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10]
+
+
+# NOISE MODELS
+
+#:
+UNIFORM = "uniform"
+#:
+PARAMETER_SCALE_UNIFORM = "parameterScaleUniform"
+#:
+NORMAL = "normal"
+#:
+PARAMETER_SCALE_NORMAL = "parameterScaleNormal"
+#:
+LAPLACE = "laplace"
+#:
+PARAMETER_SCALE_LAPLACE = "parameterScaleLaplace"
+#:
+LOG_NORMAL = "logNormal"
+#:
+LOG_LAPLACE = "logLaplace"
+
+#: Supported prior types
+PRIOR_TYPES = [
+ UNIFORM,
+ NORMAL,
+ LAPLACE,
+ LOG_NORMAL,
+ LOG_LAPLACE,
+ PARAMETER_SCALE_UNIFORM,
+ PARAMETER_SCALE_NORMAL,
+ PARAMETER_SCALE_LAPLACE,
+]
+
+#: Supported noise distributions
+NOISE_MODELS = [NORMAL, LAPLACE]
+
+
+# VISUALIZATION
+
+#:
+PLOT_ID = "plotId"
+#:
+PLOT_NAME = "plotName"
+#:
+PLOT_TYPE_SIMULATION = "plotTypeSimulation"
+#:
+PLOT_TYPE_DATA = "plotTypeData"
+#:
+X_VALUES = "xValues"
+#:
+X_OFFSET = "xOffset"
+#:
+X_LABEL = "xLabel"
+#:
+X_SCALE = "xScale"
+#:
+Y_VALUES = "yValues"
+#:
+Y_OFFSET = "yOffset"
+#:
+Y_LABEL = "yLabel"
+#:
+Y_SCALE = "yScale"
+#:
+LEGEND_ENTRY = "legendEntry"
+
+#: Mandatory columns of visualization table
+VISUALIZATION_DF_REQUIRED_COLS = [PLOT_ID]
+
+#: Optional columns of visualization table
+VISUALIZATION_DF_OPTIONAL_COLS = [
+ PLOT_NAME,
+ PLOT_TYPE_SIMULATION,
+ PLOT_TYPE_DATA,
+ X_VALUES,
+ X_OFFSET,
+ X_LABEL,
+ X_SCALE,
+ Y_VALUES,
+ Y_OFFSET,
+ Y_LABEL,
+ Y_SCALE,
+ LEGEND_ENTRY,
+ DATASET_ID,
+]
+
+#: Visualization table columns
+VISUALIZATION_DF_COLS = [
+ *VISUALIZATION_DF_REQUIRED_COLS,
+ *VISUALIZATION_DF_OPTIONAL_COLS,
+]
+
+#: Visualization table columns that contain subplot specifications
+VISUALIZATION_DF_SUBPLOT_LEVEL_COLS = [
+ PLOT_ID,
+ PLOT_NAME,
+ PLOT_TYPE_SIMULATION,
+ PLOT_TYPE_DATA,
+ X_LABEL,
+ X_SCALE,
+ Y_LABEL,
+ Y_SCALE,
+]
+
+#: Visualization table columns that contain single plot specifications
+VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS = [
+ X_VALUES,
+ X_OFFSET,
+ Y_VALUES,
+ Y_OFFSET,
+ LEGEND_ENTRY,
+ DATASET_ID,
+]
+
+#:
+LINE_PLOT = "LinePlot"
+#:
+BAR_PLOT = "BarPlot"
+#:
+SCATTER_PLOT = "ScatterPlot"
+#: Supported plot types
+PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT]
+
+#: Supported xScales
+X_SCALES = [LIN, LOG, LOG10]
+
+#: Supported yScales
+Y_SCALES = [LIN, LOG, LOG10]
+
+
+#:
+MEAN_AND_SD = "MeanAndSD"
+#:
+MEAN_AND_SEM = "MeanAndSEM"
+#:
+REPLICATE = "replicate"
+#:
+PROVIDED = "provided"
+#: Supported settings for handling replicates
+PLOT_TYPES_DATA = [MEAN_AND_SD, MEAN_AND_SEM, REPLICATE, PROVIDED]
+
+
+# YAML
+#:
+FORMAT_VERSION = "format_version"
+#:
+PARAMETER_FILE = "parameter_file"
+#:
+PROBLEMS = "problems"
+#:
+SBML_FILES = "sbml_files"
+#:
+MODEL_FILES = "model_files"
+#:
+MODEL_LOCATION = "location"
+#:
+MODEL_LANGUAGE = "language"
+#:
+CONDITION_FILES = "condition_files"
+#:
+MEASUREMENT_FILES = "measurement_files"
+#:
+OBSERVABLE_FILES = "observable_files"
+#:
+VISUALIZATION_FILES = "visualization_files"
+#:
+MAPPING_FILES = "mapping_files"
+#:
+EXTENSIONS = "extensions"
+
+
+# MAPPING
+#:
+PETAB_ENTITY_ID = "petabEntityId"
+#:
+MODEL_ENTITY_ID = "modelEntityId"
+#:
+MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID]
+
+# MORE
+
+#:
+SIMULATION = "simulation"
+#:
+RESIDUAL = "residual"
+#:
+NOISE_VALUE = "noiseValue"
+
+# separator for multiple parameter values (bounds, observableParameters, ...)
+PARAMETER_SEPARATOR = ";"
+
+
+__all__ = [
+ x
+ for x in dir(sys.modules[__name__])
+ if not x.startswith("_") and x not in {"sys", "math"}
+]
diff --git a/petab/v1/__init__.py b/petab/v1/__init__.py
new file mode 100644
index 00000000..a8609621
--- /dev/null
+++ b/petab/v1/__init__.py
@@ -0,0 +1,23 @@
+"""The PEtab 1.0 subpackage.
+
+Contains all functionality related to handling PEtab 1.0 problems.
+"""
+
+from ..version import __version__ # noqa: F401, E402
+from .C import * # noqa: F403, F401, E402
+from .calculate import * # noqa: F403, F401, E402
+from .composite_problem import * # noqa: F403, F401, E402
+from .conditions import * # noqa: F403, F401, E402
+from .core import * # noqa: F403, F401, E402
+from .format_version import __format_version__ # noqa: F401, E402
+from .lint import * # noqa: F403, F401, E402
+from .mapping import * # noqa: F403, F401, E402
+from .measurements import * # noqa: F403, F401, E402
+from .observables import * # noqa: F403, F401, E402
+from .parameter_mapping import * # noqa: F403, F401, E402
+from .parameters import * # noqa: F403, F401, E402
+from .problem import * # noqa: F403, F401, E402
+from .sampling import * # noqa: F403, F401, E402
+from .sbml import * # noqa: F403, F401, E402
+from .simulate import * # noqa: F403, F401, E402
+from .yaml import * # noqa: F403, F401, E402
diff --git a/petab/v1/calculate.py b/petab/v1/calculate.py
new file mode 100644
index 00000000..3cc86f73
--- /dev/null
+++ b/petab/v1/calculate.py
@@ -0,0 +1,430 @@
+"""Functions performing various calculations."""
+
+import numbers
+from functools import reduce
+
+import numpy as np
+import pandas as pd
+import sympy as sp
+
+import petab.v1 as petab
+
+from .C import *
+from .math import sympify_petab
+
+__all__ = [
+ "calculate_residuals",
+ "calculate_residuals_for_table",
+ "get_symbolic_noise_formulas",
+ "evaluate_noise_formula",
+ "calculate_chi2",
+ "calculate_chi2_for_table_from_residuals",
+ "calculate_llh",
+ "calculate_llh_for_table",
+ "calculate_single_llh",
+]
+
+
+def calculate_residuals(
+ measurement_dfs: list[pd.DataFrame] | pd.DataFrame,
+ simulation_dfs: list[pd.DataFrame] | pd.DataFrame,
+ observable_dfs: list[pd.DataFrame] | pd.DataFrame,
+ parameter_dfs: list[pd.DataFrame] | pd.DataFrame,
+ normalize: bool = True,
+ scale: bool = True,
+) -> list[pd.DataFrame]:
+ """Calculate residuals.
+
+ Arguments:
+ measurement_dfs:
+ The problem measurement tables.
+ simulation_dfs:
+ Simulation tables corresponding to the measurement tables.
+ observable_dfs:
+ The problem observable tables.
+ parameter_dfs:
+ The problem parameter tables.
+ normalize:
+ Whether to normalize residuals by the noise standard deviation
+ terms.
+ scale:
+ Whether to calculate residuals of scaled values.
+
+ Returns:
+ List of DataFrames in the same structure as `measurement_dfs`
+ with a field `residual` instead of measurement.
+ """
+ # convenience
+ if isinstance(measurement_dfs, pd.DataFrame):
+ measurement_dfs = [measurement_dfs]
+ if isinstance(simulation_dfs, pd.DataFrame):
+ simulation_dfs = [simulation_dfs]
+ if isinstance(observable_dfs, pd.DataFrame):
+ observable_dfs = [observable_dfs]
+ if isinstance(parameter_dfs, pd.DataFrame):
+ parameter_dfs = [parameter_dfs]
+
+ # iterate over data frames
+ residual_dfs = []
+ for measurement_df, simulation_df, observable_df, parameter_df in zip(
+ measurement_dfs,
+ simulation_dfs,
+ observable_dfs,
+ parameter_dfs,
+ strict=True,
+ ):
+ residual_df = calculate_residuals_for_table(
+ measurement_df,
+ simulation_df,
+ observable_df,
+ parameter_df,
+ normalize,
+ scale,
+ )
+ residual_dfs.append(residual_df)
+ return residual_dfs
+
+
+def calculate_residuals_for_table(
+ measurement_df: pd.DataFrame,
+ simulation_df: pd.DataFrame,
+ observable_df: pd.DataFrame,
+ parameter_df: pd.DataFrame,
+ normalize: bool = True,
+ scale: bool = True,
+) -> pd.DataFrame:
+ """
+ Calculate residuals for a single measurement table.
+ For the arguments, see `calculate_residuals`.
+ """
+ # create residual df as copy of measurement df, change column
+ residual_df = measurement_df.copy(deep=True).rename(
+ columns={MEASUREMENT: RESIDUAL}
+ )
+ residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64")
+ # matching columns
+ compared_cols = set(MEASUREMENT_DF_COLS)
+ compared_cols -= {MEASUREMENT}
+ compared_cols &= set(measurement_df.columns)
+ compared_cols &= set(simulation_df.columns)
+
+ # compute noise formulas for observables
+ noise_formulas = get_symbolic_noise_formulas(observable_df)
+
+ # iterate over measurements, find corresponding simulations
+ for irow, row in measurement_df.iterrows():
+ measurement = row[MEASUREMENT]
+ # look up in simulation df
+ masks = [
+ (simulation_df[col] == row[col]) | petab.is_empty(row[col])
+ for col in compared_cols
+ ]
+ mask = reduce(lambda x, y: x & y, masks)
+ simulation = simulation_df.loc[mask][SIMULATION].iloc[0]
+ if scale:
+ # apply scaling
+ observable = observable_df.loc[row[OBSERVABLE_ID]]
+ trafo = observable.get(OBSERVABLE_TRANSFORMATION, LIN)
+ simulation = petab.scale(simulation, trafo)
+ measurement = petab.scale(measurement, trafo)
+
+ # non-normalized residual is just the difference
+ residual = simulation - measurement
+
+ noise_value = 1
+ if normalize:
+ # look up noise standard deviation
+ noise_value = evaluate_noise_formula(
+ row, noise_formulas, parameter_df, simulation
+ )
+ residual /= noise_value
+
+ # fill in value
+ residual_df.loc[irow, RESIDUAL] = residual
+ return residual_df
+
+
+def get_symbolic_noise_formulas(observable_df) -> dict[str, sp.Expr]:
+ """Sympify noise formulas.
+
+ Arguments:
+ observable_df: The observable table.
+
+ Returns:
+ Dictionary of {observable_id}: {noise_formula}.
+ """
+ noise_formulas = {}
+ # iterate over observables
+ for row in observable_df.itertuples():
+ observable_id = row.Index
+ if NOISE_FORMULA not in observable_df.columns:
+ noise_formula = None
+ else:
+ noise_formula = sympify_petab(row.noiseFormula)
+ noise_formulas[observable_id] = noise_formula
+ return noise_formulas
+
+
+def evaluate_noise_formula(
+ measurement: pd.Series,
+ noise_formulas: dict[str, sp.Expr],
+ parameter_df: pd.DataFrame,
+ simulation: numbers.Number,
+) -> float:
+ """Fill in parameters for `measurement` and evaluate noise_formula.
+
+ Arguments:
+ measurement: A measurement table row.
+ noise_formulas: The noise formulas as computed by
+ `get_symbolic_noise_formulas`.
+ parameter_df: The parameter table.
+ simulation: The simulation corresponding to the measurement, scaled.
+
+ Returns:
+ The noise value.
+ """
+ # the observable id
+ observable_id = measurement[OBSERVABLE_ID]
+
+ # extract measurement specific overrides
+ observable_parameter_overrides = petab.split_parameter_replacement_list(
+ measurement.get(NOISE_PARAMETERS, None)
+ )
+ # fill in measurement specific parameters
+ overrides = {
+ sp.Symbol(
+ f"noiseParameter{i_obs_par + 1}_{observable_id}", real=True
+ ): obs_par
+ for i_obs_par, obs_par in enumerate(observable_parameter_overrides)
+ }
+
+ # fill in observables
+ overrides[sp.Symbol(observable_id, real=True)] = simulation
+
+ # fill in general parameters
+ for row in parameter_df.itertuples():
+ overrides[sp.Symbol(row.Index, real=True)] = row.nominalValue
+
+ # replace parametric measurement specific parameters
+ for key, value in overrides.items():
+ if not isinstance(value, numbers.Number):
+ # is parameter
+ overrides[key] = parameter_df.loc[value, NOMINAL_VALUE]
+
+ # replace parameters by values in formula
+ noise_formula = noise_formulas[observable_id]
+ noise_value = noise_formula.subs(overrides)
+
+ # conversion is possible if all parameters are replaced
+ try:
+ noise_value = float(noise_value)
+ except TypeError as e:
+ raise ValueError(
+ f"Cannot replace all parameters in noise formula {noise_value} "
+ f"for observable {observable_id}. "
+ f"Missing {noise_formula.free_symbols}. Note that model states "
+ "are currently not supported."
+ ) from e
+ return noise_value
+
+
+def calculate_chi2(
+ measurement_dfs: list[pd.DataFrame] | pd.DataFrame,
+ simulation_dfs: list[pd.DataFrame] | pd.DataFrame,
+ observable_dfs: list[pd.DataFrame] | pd.DataFrame,
+ parameter_dfs: list[pd.DataFrame] | pd.DataFrame,
+ normalize: bool = True,
+ scale: bool = True,
+) -> float:
+ """Calculate the chi2 value.
+
+ Arguments:
+ measurement_dfs:
+ The problem measurement tables.
+ simulation_dfs:
+ Simulation tables corresponding to the measurement tables.
+ observable_dfs:
+ The problem observable tables.
+ parameter_dfs:
+ The problem parameter tables.
+ normalize:
+ Whether to normalize residuals by the noise standard deviation
+ terms.
+ scale:
+ Whether to calculate residuals of scaled values.
+
+ Returns:
+ The aggregated chi2 value.
+ """
+ residual_dfs = calculate_residuals(
+ measurement_dfs,
+ simulation_dfs,
+ observable_dfs,
+ parameter_dfs,
+ normalize,
+ scale,
+ )
+ chi2s = [
+ calculate_chi2_for_table_from_residuals(df) for df in residual_dfs
+ ]
+ return sum(chi2s)
+
+
+def calculate_chi2_for_table_from_residuals(
+ residual_df: pd.DataFrame,
+) -> float:
+ """Compute chi2 value for a single residual table."""
+ return (np.array(residual_df[RESIDUAL]) ** 2).sum()
+
+
+def calculate_llh(
+ measurement_dfs: list[pd.DataFrame] | pd.DataFrame,
+ simulation_dfs: list[pd.DataFrame] | pd.DataFrame,
+ observable_dfs: list[pd.DataFrame] | pd.DataFrame,
+ parameter_dfs: list[pd.DataFrame] | pd.DataFrame,
+) -> float:
+ """Calculate total log likelihood.
+
+ Arguments:
+ measurement_dfs:
+ The problem measurement tables.
+ simulation_dfs:
+ Simulation tables corresponding to the measurement tables.
+ observable_dfs:
+ The problem observable tables.
+ parameter_dfs:
+ The problem parameter tables.
+
+ Returns:
+ The log-likelihood.
+ """
+ # convenience
+ if isinstance(measurement_dfs, pd.DataFrame):
+ measurement_dfs = [measurement_dfs]
+ if isinstance(simulation_dfs, pd.DataFrame):
+ simulation_dfs = [simulation_dfs]
+ if isinstance(observable_dfs, pd.DataFrame):
+ observable_dfs = [observable_dfs]
+ if isinstance(parameter_dfs, pd.DataFrame):
+ parameter_dfs = [parameter_dfs]
+
+ # iterate over data frames
+ llhs = []
+ for measurement_df, simulation_df, observable_df, parameter_df in zip(
+ measurement_dfs,
+ simulation_dfs,
+ observable_dfs,
+ parameter_dfs,
+ strict=True,
+ ):
+ _llh = calculate_llh_for_table(
+ measurement_df, simulation_df, observable_df, parameter_df
+ )
+ llhs.append(_llh)
+ return sum(llhs)
+
+
+def calculate_llh_for_table(
+ measurement_df: pd.DataFrame,
+ simulation_df: pd.DataFrame,
+ observable_df: pd.DataFrame,
+ parameter_df: pd.DataFrame,
+) -> float:
+ """Calculate log-likelihood for one set of tables. For the arguments, see
+ `calculate_llh`.
+ """
+ llhs = []
+
+ # matching columns
+ compared_cols = set(MEASUREMENT_DF_COLS)
+ compared_cols -= {MEASUREMENT}
+ compared_cols &= set(measurement_df.columns)
+ compared_cols &= set(simulation_df.columns)
+
+ # compute noise formulas for observables
+ noise_formulas = get_symbolic_noise_formulas(observable_df)
+
+ # iterate over measurements, find corresponding simulations
+ for _, row in measurement_df.iterrows():
+ measurement = row[MEASUREMENT]
+
+ # look up in simulation df
+ masks = [
+ (simulation_df[col] == row[col]) | petab.is_empty(row[col])
+ for col in compared_cols
+ ]
+ mask = reduce(lambda x, y: x & y, masks)
+
+ simulation = simulation_df.loc[mask][SIMULATION].iloc[0]
+
+ observable = observable_df.loc[row[OBSERVABLE_ID]]
+
+ # get scale
+ scale = observable.get(OBSERVABLE_TRANSFORMATION, LIN)
+
+ # get noise standard deviation
+ noise_value = evaluate_noise_formula(
+ row, noise_formulas, parameter_df, petab.scale(simulation, scale)
+ )
+
+ # get noise distribution
+ noise_distribution = observable.get(NOISE_DISTRIBUTION, NORMAL)
+
+ llh = calculate_single_llh(
+ measurement, simulation, scale, noise_distribution, noise_value
+ )
+ llhs.append(llh)
+ return sum(llhs)
+
+
+def calculate_single_llh(
+ measurement: float,
+ simulation: float,
+ scale: str,
+ noise_distribution: str,
+ noise_value: float,
+) -> float:
+ """Calculate a single log likelihood.
+
+ Arguments:
+ measurement: The measurement value.
+ simulation: The simulated value.
+ scale: The scale on which the noise model is to be applied.
+ noise_distribution: The noise distribution.
+ noise_value: The considered noise models possess a single noise
+ parameter, e.g. the normal standard deviation.
+
+ Returns:
+ The computed likelihood for the given values.
+ """
+ # short-hand
+ m, s, sigma = measurement, simulation, noise_value
+ pi, log, log10 = np.pi, np.log, np.log10
+
+ # go over the possible cases
+ if noise_distribution == NORMAL and scale == LIN:
+ nllh = 0.5 * log(2 * pi * sigma**2) + 0.5 * ((s - m) / sigma) ** 2
+ elif noise_distribution == NORMAL and scale == LOG:
+ nllh = (
+ 0.5 * log(2 * pi * sigma**2 * m**2)
+ + 0.5 * ((log(s) - log(m)) / sigma) ** 2
+ )
+ elif noise_distribution == NORMAL and scale == LOG10:
+ nllh = (
+ 0.5 * log(2 * pi * sigma**2 * m**2 * log(10) ** 2)
+ + 0.5 * ((log10(s) - log10(m)) / sigma) ** 2
+ )
+ elif noise_distribution == LAPLACE and scale == LIN:
+ nllh = log(2 * sigma) + abs((s - m) / sigma)
+ elif noise_distribution == LAPLACE and scale == LOG:
+ nllh = log(2 * sigma * m) + abs((log(s) - log(m)) / sigma)
+ elif noise_distribution == LAPLACE and scale == LOG10:
+ nllh = log(2 * sigma * m * log(10)) + abs(
+ (log10(s) - log10(m)) / sigma
+ )
+ else:
+ raise NotImplementedError(
+ "Unsupported combination of noise_distribution and scale "
+ f"specified: {noise_distribution}, {scale}."
+ )
+ return -nllh
diff --git a/petab/v1/composite_problem.py b/petab/v1/composite_problem.py
new file mode 100644
index 00000000..5f07d523
--- /dev/null
+++ b/petab/v1/composite_problem.py
@@ -0,0 +1,85 @@
+"""PEtab problems consisting of multiple models"""
+import os
+
+import pandas as pd
+
+from . import parameters, problem, yaml
+from .C import * # noqa: F403
+
+__all__ = ["CompositeProblem"]
+
+
+class CompositeProblem:
+ """Representation of a PEtab problem consisting of multiple models
+
+ Attributes:
+ problems:
+ List of :py:class:`petab.Problem` s
+ parameter_df:
+ PEtab parameter DataFrame
+ """
+
+ def __init__(
+ self,
+ parameter_df: pd.DataFrame = None,
+ problems: list[problem.Problem] = None,
+ ):
+ """Constructor
+
+ Arguments:
+ parameter_df:
+ see CompositeProblem.parameter_df
+ problems:
+ see CompositeProblem.problems
+ """
+ self.problems: list[problem.Problem] = problems
+ self.parameter_df: pd.DataFrame = parameter_df
+
+ @staticmethod
+ def from_yaml(yaml_config: dict | str) -> "CompositeProblem":
+ """Create from YAML file
+
+ Factory method to create a CompositeProblem instance from a PEtab
+ YAML config file
+
+ Arguments:
+ yaml_config: PEtab configuration as dictionary or YAML file name
+ """
+ if isinstance(yaml_config, str):
+ path_prefix = os.path.dirname(yaml_config)
+ yaml_config = yaml.load_yaml(yaml_config)
+ else:
+ path_prefix = ""
+
+ parameter_df = parameters.get_parameter_df(
+ os.path.join(path_prefix, yaml_config[PARAMETER_FILE])
+ )
+
+ problems = []
+ for problem_config in yaml_config[PROBLEMS]:
+ yaml.assert_single_condition_and_sbml_file(problem_config)
+
+ # don't set parameter file if we have multiple models
+ cur_problem = problem.Problem.from_files(
+ sbml_file=os.path.join(
+ path_prefix, problem_config[SBML_FILES][0]
+ ),
+ measurement_file=[
+ os.path.join(path_prefix, f)
+ for f in problem_config[MEASUREMENT_FILES]
+ ],
+ condition_file=os.path.join(
+ path_prefix, problem_config[CONDITION_FILES][0]
+ ),
+ visualization_files=[
+ os.path.join(path_prefix, f)
+ for f in problem_config[VISUALIZATION_FILES]
+ ],
+ observable_files=[
+ os.path.join(path_prefix, f)
+ for f in problem_config[OBSERVABLE_FILES]
+ ],
+ )
+ problems.append(cur_problem)
+
+ return CompositeProblem(parameter_df=parameter_df, problems=problems)
diff --git a/petab/v1/conditions.py b/petab/v1/conditions.py
new file mode 100644
index 00000000..4e691d62
--- /dev/null
+++ b/petab/v1/conditions.py
@@ -0,0 +1,118 @@
+"""Functions operating on the PEtab condition table"""
+
+from collections.abc import Iterable
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from . import core, lint
+from .C import *
+
+__all__ = [
+ "get_condition_df",
+ "write_condition_df",
+ "create_condition_df",
+ "get_parametric_overrides",
+]
+
+
+def get_condition_df(
+ condition_file: str | pd.DataFrame | Path | None,
+) -> pd.DataFrame:
+ """Read the provided condition file into a ``pandas.Dataframe``
+
+ Conditions are rows, parameters are columns, conditionId is index.
+
+ Arguments:
+ condition_file: File name of PEtab condition file or pandas.Dataframe
+ """
+ if condition_file is None:
+ return condition_file
+
+ if isinstance(condition_file, str | Path):
+ condition_file = pd.read_csv(
+ condition_file, sep="\t", float_precision="round_trip"
+ )
+
+ lint.assert_no_leading_trailing_whitespace(
+ condition_file.columns.values, "condition"
+ )
+
+ if not isinstance(condition_file.index, pd.RangeIndex):
+ condition_file.reset_index(
+ drop=condition_file.index.name != CONDITION_ID,
+ inplace=True,
+ )
+
+ try:
+ condition_file.set_index([CONDITION_ID], inplace=True)
+ except KeyError:
+ raise KeyError(
+ f"Condition table missing mandatory field {CONDITION_ID}."
+ ) from None
+
+ return condition_file
+
+
+def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None:
+ """Write PEtab condition table
+
+ Arguments:
+ df: PEtab condition table
+ filename: Destination file name
+ """
+ df = get_condition_df(df)
+ df.to_csv(filename, sep="\t", index=True)
+
+
+def create_condition_df(
+ parameter_ids: Iterable[str], condition_ids: Iterable[str] | None = None
+) -> pd.DataFrame:
+ """Create empty condition DataFrame
+
+ Arguments:
+ parameter_ids: the columns
+ condition_ids: the rows
+ Returns:
+ A :py:class:`pandas.DataFrame` with empty given rows and columns and
+ all nan values
+ """
+ condition_ids = [] if condition_ids is None else list(condition_ids)
+
+ data = {CONDITION_ID: condition_ids}
+ df = pd.DataFrame(data)
+
+ for p in parameter_ids:
+ if not lint.is_valid_identifier(p):
+ raise ValueError("Invalid parameter ID: " + p)
+ df[p] = np.nan
+
+ df.set_index(CONDITION_ID, inplace=True)
+
+ return df
+
+
+def get_parametric_overrides(condition_df: pd.DataFrame) -> list[str]:
+ """Get parametric overrides from condition table
+
+ Arguments:
+ condition_df: PEtab condition table
+
+ Returns:
+ List of parameter IDs that are mapped in a condition-specific way
+ """
+ constant_parameters = set(condition_df.columns.values.tolist()) - {
+ CONDITION_ID,
+ CONDITION_NAME,
+ }
+ result = []
+
+ for column in constant_parameters:
+ if np.issubdtype(condition_df[column].dtype, np.number):
+ continue
+
+ floatified = condition_df.loc[:, column].apply(core.to_float_if_float)
+
+ result.extend(x for x in floatified if not isinstance(x, float))
+ return result
diff --git a/petab/v1/core.py b/petab/v1/core.py
new file mode 100644
index 00000000..5004141f
--- /dev/null
+++ b/petab/v1/core.py
@@ -0,0 +1,535 @@
+"""PEtab core functions (or functions that don't fit anywhere else)"""
+import logging
+import os
+import re
+from collections.abc import Callable, Iterable, Sequence
+from pathlib import Path
+from typing import (
+ Any,
+)
+from warnings import warn
+
+import numpy as np
+import pandas as pd
+from pandas.api.types import is_string_dtype
+
+from . import yaml
+from .C import * # noqa: F403
+
+logger = logging.getLogger(__name__)
+__all__ = [
+ "get_simulation_df",
+ "write_simulation_df",
+ "get_visualization_df",
+ "write_visualization_df",
+ "get_notnull_columns",
+ "flatten_timepoint_specific_output_overrides",
+ "concat_tables",
+ "to_float_if_float",
+ "is_empty",
+ "create_combine_archive",
+ "unique_preserve_order",
+ "unflatten_simulation_df",
+]
+
+POSSIBLE_GROUPVARS_FLATTENED_PROBLEM = [
+ OBSERVABLE_ID,
+ OBSERVABLE_PARAMETERS,
+ NOISE_PARAMETERS,
+ SIMULATION_CONDITION_ID,
+ PREEQUILIBRATION_CONDITION_ID,
+]
+
+
+def get_simulation_df(simulation_file: str | Path) -> pd.DataFrame:
+ """Read PEtab simulation table
+
+ Arguments:
+ simulation_file: URL or filename of PEtab simulation table
+
+ Returns:
+ Simulation DataFrame
+ """
+ return pd.read_csv(
+ simulation_file, sep="\t", index_col=None, float_precision="round_trip"
+ )
+
+
+def write_simulation_df(df: pd.DataFrame, filename: str | Path) -> None:
+ """Write PEtab simulation table
+
+ Arguments:
+ df: PEtab simulation table
+ filename: Destination file name
+ """
+ df.to_csv(filename, sep="\t", index=False)
+
+
+def get_visualization_df(
+ visualization_file: str | Path | pd.DataFrame | None,
+) -> pd.DataFrame | None:
+ """Read PEtab visualization table
+
+ Arguments:
+ visualization_file:
+ URL or filename of PEtab visualization table to read from,
+ or a DataFrame or None that will be returned as is.
+
+ Returns:
+ Visualization DataFrame
+ """
+ if visualization_file is None:
+ return None
+
+ if isinstance(visualization_file, pd.DataFrame):
+ return visualization_file
+
+ try:
+ types = {PLOT_NAME: str}
+ vis_spec = pd.read_csv(
+ visualization_file,
+ sep="\t",
+ index_col=None,
+ converters=types,
+ float_precision="round_trip",
+ )
+ except pd.errors.EmptyDataError:
+ warn(
+ "Visualization table is empty. Defaults will be used. "
+ "Refer to the documentation for details.",
+ stacklevel=2,
+ )
+ vis_spec = pd.DataFrame()
+ return vis_spec
+
+
+def write_visualization_df(df: pd.DataFrame, filename: str | Path) -> None:
+ """Write PEtab visualization table
+
+ Arguments:
+ df: PEtab visualization table
+ filename: Destination file name
+ """
+ df.to_csv(filename, sep="\t", index=False)
+
+
+def get_notnull_columns(df: pd.DataFrame, candidates: Iterable):
+ """
+ Return list of ``df``-columns in ``candidates`` which are not all null/nan.
+
+ The output can e.g. be used as input for ``pandas.DataFrame.groupby``.
+
+ Arguments:
+ df:
+ Dataframe
+ candidates:
+ Columns of ``df`` to consider
+ """
+ return [
+ col for col in candidates if col in df and not np.all(df[col].isnull())
+ ]
+
+
+def get_observable_replacement_id(groupvars, groupvar) -> str:
+ """Get the replacement ID for an observable.
+
+ Arguments:
+ groupvars:
+ The columns of a PEtab measurement table that should be unique
+ between observables in a flattened PEtab problem.
+ groupvar:
+ A specific grouping of `groupvars`.
+
+ Returns:
+ The observable replacement ID.
+ """
+ replacement_id = ""
+ for field in POSSIBLE_GROUPVARS_FLATTENED_PROBLEM:
+ if field in groupvars:
+ val = (
+ str(groupvar[groupvars.index(field)])
+ .replace(PARAMETER_SEPARATOR, "_")
+ .replace(".", "_")
+ )
+ if replacement_id == "":
+ replacement_id = val
+ elif val != "":
+ replacement_id += f"__{val}"
+ return replacement_id
+
+
+def get_hyperparameter_replacement_id(
+ hyperparameter_type,
+ observable_replacement_id,
+):
+ """Get the full ID for a replaced hyperparameter.
+
+ Arguments:
+ hyperparameter_type:
+ The type of hyperparameter, e.g. `noiseParameter`.
+ observable_replacement_id:
+ The observable replacement ID, e.g. the output of
+ `get_observable_replacement_id`.
+
+ Returns:
+ The hyperparameter replacement ID, with a field that will be replaced
+ by the first matched substring in a regex substitution.
+ """
+ return f"{hyperparameter_type}\\1_{observable_replacement_id}"
+
+
+def get_flattened_id_mappings(
+ petab_problem: "petab.problem.Problem",
+) -> dict[str, dict[str, str]]:
+ """Get mapping from unflattened to flattened observable IDs.
+
+ Arguments:
+ petab_problem:
+ The unflattened PEtab problem.
+
+ Returns:
+ A dictionary of dictionaries. Each inner dictionary is a mapping
+ from original ID to flattened ID. Each outer dictionary is the mapping
+ for either: observable IDs; noise parameter IDs; or, observable
+ parameter IDs.
+ """
+ groupvars = get_notnull_columns(
+ petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM
+ )
+ mappings = {
+ OBSERVABLE_ID: {},
+ NOISE_PARAMETERS: {},
+ OBSERVABLE_PARAMETERS: {},
+ }
+ for groupvar, measurements in petab_problem.measurement_df.groupby(
+ groupvars, dropna=False
+ ):
+ observable_id = groupvar[groupvars.index(OBSERVABLE_ID)]
+ observable_replacement_id = get_observable_replacement_id(
+ groupvars, groupvar
+ )
+
+ logger.debug(f"Creating synthetic observable {observable_id}")
+ if observable_replacement_id in petab_problem.observable_df.index:
+ raise RuntimeError(
+ "could not create synthetic observables "
+ f"since {observable_replacement_id} was "
+ "already present in observable table"
+ )
+
+ mappings[OBSERVABLE_ID][observable_replacement_id] = observable_id
+
+ for field, hyperparameter_type in [
+ (NOISE_PARAMETERS, "noiseParameter"),
+ (OBSERVABLE_PARAMETERS, "observableParameter"),
+ ]:
+ if field in measurements:
+ mappings[field][
+ get_hyperparameter_replacement_id(
+ hyperparameter_type=hyperparameter_type,
+ observable_replacement_id=observable_replacement_id,
+ )
+ ] = rf"{hyperparameter_type}([0-9]+)_{observable_id}"
+ return mappings
+
+
+def flatten_timepoint_specific_output_overrides(
+ petab_problem: "petab.problem.Problem",
+) -> None:
+ """Flatten timepoint-specific output parameter overrides.
+
+ If the PEtab problem definition has timepoint-specific
+ `observableParameters` or `noiseParameters` for the same observable,
+ replace those by replicating the respective observable.
+
+ This is a helper function for some tools which may not support such
+ timepoint-specific mappings. The observable table and measurement table
+ are modified in place.
+
+ Arguments:
+ petab_problem:
+ PEtab problem to work on. Modified in place.
+ """
+ new_measurement_dfs = []
+ new_observable_dfs = []
+ groupvars = get_notnull_columns(
+ petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM
+ )
+
+ mappings = get_flattened_id_mappings(petab_problem)
+
+ for groupvar, measurements in petab_problem.measurement_df.groupby(
+ groupvars, dropna=False
+ ):
+ obs_id = groupvar[groupvars.index(OBSERVABLE_ID)]
+ observable_replacement_id = get_observable_replacement_id(
+ groupvars, groupvar
+ )
+
+ observable = petab_problem.observable_df.loc[obs_id].copy()
+ observable.name = observable_replacement_id
+ for field, hyperparameter_type, target in [
+ (NOISE_PARAMETERS, "noiseParameter", NOISE_FORMULA),
+ (OBSERVABLE_PARAMETERS, "observableParameter", OBSERVABLE_FORMULA),
+ (OBSERVABLE_PARAMETERS, "observableParameter", NOISE_FORMULA),
+ ]:
+ if field not in measurements:
+ continue
+
+ if not is_string_dtype(type(observable[target])):
+ # if not a string, we don't have to substitute anything
+ continue
+
+ hyperparameter_replacement_id = get_hyperparameter_replacement_id(
+ hyperparameter_type=hyperparameter_type,
+ observable_replacement_id=observable_replacement_id,
+ )
+ hyperparameter_id = mappings[field][hyperparameter_replacement_id]
+ observable[target] = re.sub(
+ hyperparameter_id,
+ hyperparameter_replacement_id,
+ observable[target],
+ )
+
+ measurements[OBSERVABLE_ID] = observable_replacement_id
+ new_measurement_dfs.append(measurements)
+ new_observable_dfs.append(observable)
+
+ petab_problem.observable_df = pd.concat(new_observable_dfs, axis=1).T
+ petab_problem.observable_df.index.name = OBSERVABLE_ID
+ petab_problem.measurement_df = pd.concat(new_measurement_dfs)
+
+
+def unflatten_simulation_df(
+ simulation_df: pd.DataFrame,
+ petab_problem: "petab.problem.Problem",
+) -> pd.DataFrame:
+ """Unflatten simulations from a flattened PEtab problem.
+
+ A flattened PEtab problem is the output of applying
+ :func:`flatten_timepoint_specific_output_overrides` to a PEtab problem.
+
+ Arguments:
+ simulation_df:
+ The simulation dataframe. A dataframe in the same format as a PEtab
+ measurements table, but with the ``measurement`` column switched
+ with a ``simulation`` column.
+ petab_problem:
+ The unflattened PEtab problem.
+
+ Returns:
+ The simulation dataframe for the unflattened PEtab problem.
+ """
+ mappings = get_flattened_id_mappings(petab_problem)
+ original_observable_ids = simulation_df[OBSERVABLE_ID].replace(
+ mappings[OBSERVABLE_ID]
+ )
+ unflattened_simulation_df = simulation_df.assign(
+ **{
+ OBSERVABLE_ID: original_observable_ids,
+ }
+ )
+ return unflattened_simulation_df
+
+
+def concat_tables(
+ tables: str | Path | pd.DataFrame | Iterable[pd.DataFrame | str | Path],
+ file_parser: Callable | None = None,
+) -> pd.DataFrame:
+ """Concatenate DataFrames provided as DataFrames or filenames, and a parser
+
+ Arguments:
+ tables:
+ Iterable of tables to join, as DataFrame or filename.
+ file_parser:
+ Function used to read the table in case filenames are provided,
+ accepting a filename as only argument.
+
+ Returns:
+ The concatenated DataFrames
+ """
+ if isinstance(tables, pd.DataFrame):
+ return tables
+
+ if isinstance(tables, str | Path):
+ return file_parser(tables)
+
+ df = pd.DataFrame()
+
+ for tmp_df in tables:
+ # load from file, if necessary
+ if isinstance(tmp_df, str | Path):
+ tmp_df = file_parser(tmp_df)
+
+ df = pd.concat(
+ [df, tmp_df],
+ sort=False,
+ ignore_index=isinstance(tmp_df.index, pd.RangeIndex),
+ )
+
+ return df
+
+
+def to_float_if_float(x: Any) -> Any:
+ """Return input as float if possible, otherwise return as is
+
+ Arguments:
+ x: Anything
+
+ Returns:
+ ``x`` as float if possible, otherwise ``x``
+ """
+ try:
+ return float(x)
+ except (ValueError, TypeError):
+ return x
+
+
+def is_empty(val) -> bool:
+ """Check if the value `val`, e.g. a table entry, is empty.
+
+ Arguments:
+ val: The value to check.
+
+ Returns:
+ Whether the field is to be considered empty.
+ """
+ return val == "" or pd.isnull(val)
+
+
+def create_combine_archive(
+ yaml_file: str | Path,
+ filename: str | Path,
+ family_name: str | None = None,
+ given_name: str | None = None,
+ email: str | None = None,
+ organization: str | None = None,
+) -> None:
+ """Create COMBINE archive (https://co.mbine.org/documents/archive) based
+ on PEtab YAML file.
+
+ Arguments:
+ yaml_file: Path to PEtab YAML file
+ filename: Destination file name
+ family_name: Family name of archive creator
+ given_name: Given name of archive creator
+ email: E-mail address of archive creator
+ organization: Organization of archive creator
+ """
+ path_prefix = os.path.dirname(str(yaml_file))
+ yaml_config = yaml.load_yaml(yaml_file)
+
+ # function-level import, because module-level import interfered with
+ # other SWIG interfaces
+ try:
+ import libcombine
+ except ImportError as err:
+ raise ImportError(
+ "To use PEtab's COMBINE functionality, libcombine "
+ "(python-libcombine) must be installed."
+ ) from err
+
+ def _add_file_metadata(location: str, description: str = ""):
+ """Add metadata to the added file"""
+ omex_description = libcombine.OmexDescription()
+ omex_description.setAbout(location)
+ omex_description.setDescription(description)
+ omex_description.setCreated(
+ libcombine.OmexDescription.getCurrentDateAndTime()
+ )
+ archive.addMetadata(location, omex_description)
+
+ archive = libcombine.CombineArchive()
+
+ # Add PEtab files and metadata
+ archive.addFile(
+ str(yaml_file),
+ os.path.basename(yaml_file),
+ "http://identifiers.org/combine.specifications/petab.version-1",
+ True,
+ )
+ _add_file_metadata(
+ location=os.path.basename(yaml_file), description="PEtab YAML file"
+ )
+
+ # Add parameter file(s) that describe a single parameter table.
+ # Works for a single file name, or a list of file names.
+ for parameter_subset_file in list(
+ np.array(yaml_config[PARAMETER_FILE]).flat
+ ):
+ archive.addFile(
+ os.path.join(path_prefix, parameter_subset_file),
+ parameter_subset_file,
+ libcombine.KnownFormats.lookupFormat("tsv"),
+ False,
+ )
+ _add_file_metadata(
+ location=parameter_subset_file, description="PEtab parameter file"
+ )
+
+ for problem in yaml_config[PROBLEMS]:
+ for sbml_file in problem[SBML_FILES]:
+ archive.addFile(
+ os.path.join(path_prefix, sbml_file),
+ sbml_file,
+ libcombine.KnownFormats.lookupFormat("sbml"),
+ False,
+ )
+ _add_file_metadata(location=sbml_file, description="SBML model")
+
+ for field in [
+ MEASUREMENT_FILES,
+ OBSERVABLE_FILES,
+ VISUALIZATION_FILES,
+ CONDITION_FILES,
+ ]:
+ if field not in problem:
+ continue
+
+ for file in problem[field]:
+ archive.addFile(
+ os.path.join(path_prefix, file),
+ file,
+ libcombine.KnownFormats.lookupFormat("tsv"),
+ False,
+ )
+ desc = field.split("_")[0]
+ _add_file_metadata(
+ location=file, description=f"PEtab {desc} file"
+ )
+
+ # Add archive metadata
+ description = libcombine.OmexDescription()
+ description.setAbout(".")
+ description.setDescription("PEtab archive")
+ description.setCreated(libcombine.OmexDescription.getCurrentDateAndTime())
+
+ # Add creator info
+ creator = libcombine.VCard()
+ if family_name:
+ creator.setFamilyName(family_name)
+ if given_name:
+ creator.setGivenName(given_name)
+ if email:
+ creator.setEmail(email)
+ if organization:
+ creator.setOrganization(organization)
+ description.addCreator(creator)
+
+ archive.addMetadata(".", description)
+ archive.writeToFile(str(filename))
+
+
+def unique_preserve_order(seq: Sequence) -> list:
+ """Return a list of unique elements in Sequence, keeping only the first
+ occurrence of each element
+
+ Parameters:
+ seq: Sequence to prune
+
+ Returns:
+ List of unique elements in ``seq``
+ """
+ seen = set()
+ seen_add = seen.add
+ return [x for x in seq if not (x in seen or seen_add(x))]
diff --git a/petab/format_version.py b/petab/v1/format_version.py
similarity index 100%
rename from petab/format_version.py
rename to petab/v1/format_version.py
diff --git a/petab/v1/lint.py b/petab/v1/lint.py
new file mode 100644
index 00000000..6f70520b
--- /dev/null
+++ b/petab/v1/lint.py
@@ -0,0 +1,1221 @@
+"""Integrity checks and tests for specific features used"""
+
+import copy
+import logging
+import numbers
+import re
+from collections import Counter
+from collections.abc import Iterable
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import sympy as sp
+
+import petab.v1 as petab
+
+from . import core, measurements, parameters
+from .C import * # noqa: F403
+from .math import sympify_petab
+from .models import Model
+
+logger = logging.getLogger(__name__)
+__all__ = [
+ "assert_all_parameters_present_in_parameter_df",
+ "assert_measured_observables_defined",
+ "assert_measurement_conditions_present_in_condition_table",
+ "assert_measurements_not_null",
+ "assert_measurements_numeric",
+ "assert_model_parameters_in_condition_or_parameter_table",
+ "assert_no_leading_trailing_whitespace",
+ "assert_noise_distributions_valid",
+ "assert_parameter_bounds_are_numeric",
+ "assert_parameter_estimate_is_boolean",
+ "assert_parameter_id_is_string",
+ "assert_parameter_prior_parameters_are_valid",
+ "assert_parameter_prior_type_is_valid",
+ "assert_parameter_scale_is_valid",
+ "assert_unique_observable_ids",
+ "assert_unique_parameter_ids",
+ "check_condition_df",
+ "check_ids",
+ "check_measurement_df",
+ "check_observable_df",
+ "check_parameter_bounds",
+ "check_parameter_df",
+ "condition_table_is_parameter_free",
+ "get_non_unique",
+ "is_scalar_float",
+ "is_valid_identifier",
+ "lint_problem",
+ "measurement_table_has_observable_parameter_numeric_overrides",
+ "measurement_table_has_timepoint_specific_mappings",
+ "observable_table_has_nontrivial_noise_formula",
+]
+
+
+def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None:
+ """Check if given columns are present in DataFrame
+
+ Arguments:
+ df: Dataframe to check
+ req_cols: Column names which have to be present
+ name: Name of the DataFrame to be included in error message
+
+ Raises:
+ AssertionError: if a column is missing
+ """
+ if missing_cols := set(req_cols) - set(df.columns.values):
+ raise AssertionError(
+ f"DataFrame {name} requires the columns {missing_cols}."
+ )
+
+
+def assert_no_leading_trailing_whitespace(
+ names_list: Iterable[str], name: str
+) -> None:
+ """Check that there is no trailing whitespace in elements of Iterable
+
+ Arguments:
+ names_list: strings to check for whitespace
+ name: name of `names_list` for error messages
+
+ Raises:
+ AssertionError: if there is trailing whitespace
+ """
+ r = re.compile(r"(?:^\s)|(?:\s$)")
+ for i, x in enumerate(names_list):
+ if isinstance(x, str) and r.search(x):
+ raise AssertionError(f"Whitespace around {name}[{i}] = '{x}'.")
+
+
+def check_condition_df(
+ df: pd.DataFrame,
+ model: Model | None = None,
+ observable_df: pd.DataFrame | None = None,
+ mapping_df: pd.DataFrame | None = None,
+) -> None:
+ """Run sanity checks on PEtab condition table
+
+ Arguments:
+ df: PEtab condition DataFrame
+ model: Model for additional checking of parameter IDs
+ observable_df: PEtab observables DataFrame
+ mapping_df: PEtab mapping DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ # Check required columns are present
+ req_cols = []
+ _check_df(df, req_cols, "condition")
+
+ # Check for correct index
+ if df.index.name != CONDITION_ID:
+ raise AssertionError(
+ f"Condition table has wrong index {df.index.name}."
+ f"expected {CONDITION_ID}."
+ )
+
+ check_ids(df.index.values, kind="condition")
+
+ if not df.index.is_unique:
+ raise AssertionError(
+ "Non-unique condition IDs: "
+ f"{df.index.values[df.index.duplicated()]}"
+ )
+
+ for column_name in req_cols:
+ if not np.issubdtype(df[column_name].dtype, np.number):
+ assert_no_leading_trailing_whitespace(
+ df[column_name].values, column_name
+ )
+
+ if model is not None:
+ allowed_cols = set(model.get_valid_ids_for_condition_table())
+ if observable_df is not None:
+ allowed_cols |= set(
+ petab.get_output_parameters(
+ model=model,
+ observable_df=observable_df,
+ mapping_df=mapping_df,
+ )
+ )
+ if mapping_df is not None:
+ allowed_cols |= set(mapping_df.index.values)
+ for column_name in df.columns:
+ if (
+ column_name != CONDITION_NAME
+ and column_name not in allowed_cols
+ ):
+ raise AssertionError(
+ "Condition table contains column for unknown entity '"
+ f"{column_name}'."
+ )
+
+
+def check_measurement_df(
+ df: pd.DataFrame, observable_df: pd.DataFrame | None = None
+) -> None:
+ """Run sanity checks on PEtab measurement table
+
+ Arguments:
+ df: PEtab measurement DataFrame
+ observable_df: PEtab observable DataFrame for checking if measurements
+ are compatible with observable transformations.
+
+ Raises:
+ AssertionError, ValueError: in case of problems
+ """
+ _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement")
+
+ for column_name in MEASUREMENT_DF_REQUIRED_COLS:
+ if not np.issubdtype(df[column_name].dtype, np.number):
+ assert_no_leading_trailing_whitespace(
+ df[column_name].values, column_name
+ )
+
+ for column_name in MEASUREMENT_DF_OPTIONAL_COLS:
+ if column_name in df and not np.issubdtype(
+ df[column_name].dtype, np.number
+ ):
+ assert_no_leading_trailing_whitespace(
+ df[column_name].values, column_name
+ )
+
+ if observable_df is not None:
+ assert_measured_observables_defined(df, observable_df)
+ measurements.assert_overrides_match_parameter_count(df, observable_df)
+
+ if OBSERVABLE_TRANSFORMATION in observable_df:
+ # Check for positivity of measurements in case of
+ # log-transformation
+ assert_unique_observable_ids(observable_df)
+ # If the above is not checked, in the following loop
+ # trafo may become a pandas Series
+ for measurement, obs_id in zip(
+ df[MEASUREMENT], df[OBSERVABLE_ID], strict=True
+ ):
+ trafo = observable_df.loc[obs_id, OBSERVABLE_TRANSFORMATION]
+ if measurement <= 0.0 and trafo in [LOG, LOG10]:
+ raise ValueError(
+ "Measurements with observable "
+ f"transformation {trafo} must be "
+ f"positive, but {measurement} <= 0."
+ )
+
+ assert_measurements_not_null(df)
+ assert_measurements_numeric(df)
+
+
+def check_parameter_df(
+ df: pd.DataFrame,
+ model: Model | None = None,
+ observable_df: pd.DataFrame | None = None,
+ measurement_df: pd.DataFrame | None = None,
+ condition_df: pd.DataFrame | None = None,
+ mapping_df: pd.DataFrame | None = None,
+) -> None:
+ """Run sanity checks on PEtab parameter table
+
+ Arguments:
+ df: PEtab parameter DataFrame
+ model: Model for additional checking of parameter IDs
+ observable_df: PEtab observable table for additional checks
+ measurement_df: PEtab measurement table for additional checks
+ condition_df: PEtab condition table for additional checks
+ mapping_df: PEtab mapping table for additional checks
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter")
+
+ if df.index.name != PARAMETER_ID:
+ raise AssertionError(
+ f"Parameter table has wrong index {df.index.name}."
+ f"expected {PARAMETER_ID}."
+ )
+
+ check_ids(df.index.values, kind="parameter")
+
+ for column_name in PARAMETER_DF_REQUIRED_COLS[1:]: # 0 is PARAMETER_ID
+ if not np.issubdtype(df[column_name].dtype, np.number):
+ assert_no_leading_trailing_whitespace(
+ df[column_name].values, column_name
+ )
+
+ # nominal value is generally optional, but required if any for any
+ # parameter estimate != 1
+ non_estimated_par_ids = list(
+ df.index[
+ (df[ESTIMATE] != 1)
+ | (
+ pd.api.types.is_string_dtype(df[ESTIMATE])
+ and df[ESTIMATE] != "1"
+ )
+ ]
+ )
+ if non_estimated_par_ids:
+ if NOMINAL_VALUE not in df:
+ raise AssertionError(
+ "Parameter table contains parameters "
+ f"{non_estimated_par_ids} that are not "
+ "specified to be estimated, "
+ f"but column {NOMINAL_VALUE} is missing."
+ )
+ try:
+ df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float)
+ except ValueError as e:
+ raise AssertionError(
+ f"Expected numeric values for `{NOMINAL_VALUE}` in parameter "
+ "table for all non-estimated parameters."
+ ) from e
+
+ assert_parameter_id_is_string(df)
+ assert_parameter_scale_is_valid(df)
+ assert_parameter_bounds_are_numeric(df)
+ assert_parameter_estimate_is_boolean(df)
+ assert_unique_parameter_ids(df)
+ check_parameter_bounds(df)
+ assert_parameter_prior_type_is_valid(df)
+ assert_parameter_prior_parameters_are_valid(df)
+
+ if model and measurement_df is not None and condition_df is not None:
+ assert_all_parameters_present_in_parameter_df(
+ df, model, observable_df, measurement_df, condition_df, mapping_df
+ )
+
+
+def check_observable_df(observable_df: pd.DataFrame) -> None:
+ """Check validity of observable table
+
+ Arguments:
+ observable_df: PEtab observable DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ _check_df(observable_df, OBSERVABLE_DF_REQUIRED_COLS[1:], "observable")
+
+ check_ids(observable_df.index.values, kind="observable")
+
+ for column_name in OBSERVABLE_DF_REQUIRED_COLS[1:]:
+ if not np.issubdtype(observable_df[column_name].dtype, np.number):
+ assert_no_leading_trailing_whitespace(
+ observable_df[column_name].values, column_name
+ )
+
+ for column_name in OBSERVABLE_DF_OPTIONAL_COLS:
+ if column_name in observable_df and not np.issubdtype(
+ observable_df[column_name].dtype, np.number
+ ):
+ assert_no_leading_trailing_whitespace(
+ observable_df[column_name].values, column_name
+ )
+
+ assert_noise_distributions_valid(observable_df)
+ assert_unique_observable_ids(observable_df)
+
+ # Check that formulas are parsable
+ for row in observable_df.itertuples():
+ obs = getattr(row, OBSERVABLE_FORMULA)
+ try:
+ sympify_petab(obs)
+ except sp.SympifyError as e:
+ raise AssertionError(
+ f"Cannot parse expression '{obs}' "
+ f"for observable {row.Index}: {e}"
+ ) from e
+
+ noise = getattr(row, NOISE_FORMULA)
+ try:
+ sympified_noise = sympify_petab(noise)
+ if sympified_noise is None or (
+ sympified_noise.is_Number and not sympified_noise.is_finite
+ ):
+ raise AssertionError(
+ f"No or non-finite {NOISE_FORMULA} "
+ f"given for observable {row.Index}."
+ )
+ except sp.SympifyError as e:
+ raise AssertionError(
+ f"Cannot parse expression '{noise}' "
+ f"for noise model for observable "
+ f"{row.Index}: {e}"
+ ) from e
+
+
+def assert_all_parameters_present_in_parameter_df(
+ parameter_df: pd.DataFrame,
+ model: Model,
+ observable_df: pd.DataFrame,
+ measurement_df: pd.DataFrame,
+ condition_df: pd.DataFrame,
+ mapping_df: pd.DataFrame = None,
+) -> None:
+ """Ensure all required parameters are contained in the parameter table
+ with no additional ones
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+ model: model
+ observable_df: PEtab observable table
+ measurement_df: PEtab measurement table
+ condition_df: PEtab condition table
+ mapping_df: PEtab mapping table for additional checks
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ required = parameters.get_required_parameters_for_parameter_table(
+ model=model,
+ condition_df=condition_df,
+ observable_df=observable_df,
+ measurement_df=measurement_df,
+ mapping_df=mapping_df,
+ )
+
+ allowed = parameters.get_valid_parameters_for_parameter_table(
+ model=model,
+ condition_df=condition_df,
+ observable_df=observable_df,
+ measurement_df=measurement_df,
+ mapping_df=mapping_df,
+ )
+
+ actual = set(parameter_df.index)
+ missing = required - actual
+ extraneous = actual - allowed
+
+ # missing parameters might be present under a different name based on
+ # the mapping table
+ if missing and mapping_df is not None:
+ model_to_petab_mapping = {}
+ for map_from, map_to in zip(
+ mapping_df.index.values, mapping_df[MODEL_ENTITY_ID], strict=True
+ ):
+ if map_to in model_to_petab_mapping:
+ model_to_petab_mapping[map_to].append(map_from)
+ else:
+ model_to_petab_mapping[map_to] = [map_from]
+ missing = {
+ missing_id
+ for missing_id in missing
+ if missing_id not in model_to_petab_mapping
+ or all(
+ mapping_parameter not in actual
+ for mapping_parameter in model_to_petab_mapping[missing_id]
+ )
+ }
+
+ if missing:
+ raise AssertionError(
+ "Missing parameter(s) in the model or the "
+ "parameters table: " + str(missing)
+ )
+
+ if extraneous:
+ raise AssertionError(
+ "Extraneous parameter(s) in parameter table: " + str(extraneous)
+ )
+
+
+def assert_measured_observables_defined(
+ measurement_df: pd.DataFrame, observable_df: pd.DataFrame
+) -> None:
+ """Check if all observables in the measurement table have been defined in
+ the observable table
+
+ Arguments:
+ measurement_df: PEtab measurement table
+ observable_df: PEtab observable table
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ used_observables = set(measurement_df[OBSERVABLE_ID].values)
+ defined_observables = set(observable_df.index.values)
+ if undefined_observables := (used_observables - defined_observables):
+ raise AssertionError(
+ f"Observables {undefined_observables} used in "
+ "measurement table but not defined in observables table."
+ )
+
+
+def condition_table_is_parameter_free(condition_df: pd.DataFrame) -> bool:
+ """Check if all entries in the condition table are numeric
+ (no parameter IDs)
+
+ Arguments:
+ condition_df: PEtab condition table
+
+ Returns:
+ ``True`` if there are no parameter overrides in the condition table,
+ ``False`` otherwise.
+ """
+ return len(petab.get_parametric_overrides(condition_df)) == 0
+
+
+def assert_parameter_id_is_string(parameter_df: pd.DataFrame) -> None:
+ """
+ Check if all entries in the parameterId column of the parameter table
+ are string and not empty.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ for parameter_id in parameter_df:
+ if isinstance(parameter_id, str):
+ if parameter_id[0].isdigit():
+ raise AssertionError(
+ f"{PARAMETER_ID} {parameter_id} starts with integer."
+ )
+ else:
+ raise AssertionError(f"Empty {PARAMETER_ID} found.")
+
+
+def assert_unique_parameter_ids(parameter_df: pd.DataFrame) -> None:
+ """
+ Check if the parameterId column of the parameter table is unique.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ non_unique_ids = get_non_unique(parameter_df.index)
+ if len(non_unique_ids) > 0:
+ raise AssertionError(
+ f"Non-unique values found in the {PARAMETER_ID} column"
+ " of the parameter table: " + str(non_unique_ids)
+ )
+
+
+def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None:
+ """
+ Check if all entries in the parameterScale column of the parameter table
+ are 'lin' for linear, 'log' for natural logarithm or 'log10' for base 10
+ logarithm.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ for parameter_scale in parameter_df[PARAMETER_SCALE]:
+ if parameter_scale not in [LIN, LOG, LOG10]:
+ raise AssertionError(
+ f"Expected {LIN}, {LOG}, or {LOG10}, but "
+ f"got {parameter_scale}."
+ )
+
+
+def assert_parameter_bounds_are_numeric(parameter_df: pd.DataFrame) -> None:
+ """
+ Check if all entries in the lowerBound and upperBound columns of the
+ parameter table are numeric.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ parameter_df[LOWER_BOUND].apply(float).all()
+ parameter_df[UPPER_BOUND].apply(float).all()
+
+
+def check_parameter_bounds(parameter_df: pd.DataFrame) -> None:
+ """
+ Check if all entries in the lowerBound are smaller than upperBound column
+ in the parameter table and that bounds are positive for parameterScale
+ log|log10.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+
+ """
+ for _, row in parameter_df.iterrows():
+ if int(row[ESTIMATE]):
+ if not row[LOWER_BOUND] <= row[UPPER_BOUND]:
+ raise AssertionError(
+ f"{LOWER_BOUND} greater than {UPPER_BOUND} for "
+ f"{PARAMETER_ID} {row.name}."
+ )
+ if (row[LOWER_BOUND] < 0.0 or row[UPPER_BOUND] < 0.0) and row[
+ PARAMETER_SCALE
+ ] in [LOG, LOG10]:
+ raise AssertionError(
+ f"Bounds for {row[PARAMETER_SCALE]} scaled parameter "
+ f"{ row.name} must be positive."
+ )
+ if (
+ row.get(PARAMETER_SCALE, LIN) in [LOG, LOG10]
+ and (row[LOWER_BOUND] == 0.0 or row[UPPER_BOUND] == 0.0)
+ and not row.get(INITIALIZATION_PRIOR_TYPE)
+ ):
+ raise AssertionError(
+ f"Bounds for {row[PARAMETER_SCALE]} scaled parameter "
+ f"{row.name} must be positive if no "
+ f"{INITIALIZATION_PRIOR_TYPE} is provided. "
+ "Cannot sample from unbounded interval."
+ )
+
+
+def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None:
+ """Check that valid prior types have been selected
+
+ Arguments:
+ parameter_df: PEtab parameter table
+
+ Raises:
+ AssertionError: in case of invalid prior
+ """
+ for col in [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]:
+ if col not in parameter_df.columns:
+ continue
+ for _, row in parameter_df.iterrows():
+ if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]):
+ raise AssertionError(
+ f"{col} must be one of {PRIOR_TYPES} but is "
+ f"'{row[col]}'."
+ )
+
+
+def assert_parameter_prior_parameters_are_valid(
+ parameter_df: pd.DataFrame,
+) -> None:
+ """Check that the prior parameters are valid.
+
+ Arguments:
+ parameter_df: PEtab parameter table
+
+ Raises:
+ AssertionError: in case of invalid prior parameters
+ """
+ prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]
+ prior_par_cols = [
+ INITIALIZATION_PRIOR_PARAMETERS,
+ OBJECTIVE_PRIOR_PARAMETERS,
+ ]
+
+ # perform test for both priors
+ for type_col, par_col in zip(prior_type_cols, prior_par_cols, strict=True):
+ # iterate over rows
+ for _, row in parameter_df.iterrows():
+ # get type
+ if type_col not in row or core.is_empty(row[type_col]):
+ type_ = PARAMETER_SCALE_UNIFORM
+ else:
+ type_ = row[type_col]
+ # get parameters
+ pars_str = row.get(par_col, "")
+ with_default_parameters = [PARAMETER_SCALE_UNIFORM]
+ # check if parameters are empty
+ if core.is_empty(pars_str):
+ if type_ not in with_default_parameters:
+ raise AssertionError(
+ f"An empty {par_col} is only permitted with "
+ f"{type_col} in {with_default_parameters}."
+ )
+ # empty parameters fine
+ continue
+ # parse parameters
+ try:
+ pars = tuple(
+ float(val) for val in pars_str.split(PARAMETER_SEPARATOR)
+ )
+ except ValueError as e:
+ raise AssertionError(
+ f"Could not parse prior parameters '{pars_str}'."
+ ) from e
+
+ # all distributions take 2 parameters
+ if len(pars) != 2:
+ raise AssertionError(
+ f"The prior parameters '{pars}' do not contain the "
+ "expected number of entries (currently 'par1"
+ f"{PARAMETER_SEPARATOR}par2' for all prior types)."
+ )
+
+ # we can't sample uniformly from [log(0)=-inf, ...]
+ if (
+ type_col == INITIALIZATION_PRIOR_TYPE
+ and row.get(type_col, "") == PARAMETER_SCALE_UNIFORM
+ and row.get(PARAMETER_SCALE, LIN) in [LOG, LOG10]
+ and (pars[0] == 0.0 or pars[1] == 0.0)
+ ):
+ raise AssertionError(
+ f"{prior_par_cols} for {row[PARAMETER_SCALE]} scaled "
+ f"parameter {row.name} must be positive if "
+ f"{type_col}={PARAMETER_SCALE_UNIFORM}."
+ )
+
+
+def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None:
+ """
+ Check if all entries in the estimate column of the parameter table are
+ 0 or 1.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ for estimate in parameter_df[ESTIMATE]:
+ if int(estimate) not in [True, False]:
+ raise AssertionError(
+ f"Expected 0 or 1 but got {estimate} in {ESTIMATE} column."
+ )
+
+
+def is_scalar_float(x: Any):
+ """
+ Checks whether input is a number or can be transformed into a number
+ via float
+
+ :param x:
+ input
+ :return:
+ ``True`` if is or can be converted to number, ``False`` otherwise.
+ """
+ if isinstance(x, numbers.Number):
+ return True
+ try:
+ float(x)
+ return True
+ except (ValueError, TypeError):
+ return False
+
+
+def measurement_table_has_timepoint_specific_mappings(
+ measurement_df: pd.DataFrame | None,
+ allow_scalar_numeric_noise_parameters: bool = False,
+ allow_scalar_numeric_observable_parameters: bool = False,
+) -> bool:
+ """
+ Are there time-point or replicate specific parameter assignments in the
+ measurement table.
+
+ Arguments:
+ measurement_df:
+ PEtab measurement table
+
+ allow_scalar_numeric_noise_parameters:
+ ignore scalar numeric assignments to noiseParameter placeholders
+
+ allow_scalar_numeric_observable_parameters:
+ ignore scalar numeric assignments to observableParameter
+ placeholders
+
+ Returns:
+ True if there are time-point or replicate specific (non-numeric)
+ parameter assignments in the measurement table, False otherwise.
+ """
+ if measurement_df is None:
+ return False
+
+ # since we edit it, copy it first
+ measurement_df = copy.deepcopy(measurement_df)
+
+ # mask numeric values
+ for col, allow_scalar_numeric in [
+ (OBSERVABLE_PARAMETERS, allow_scalar_numeric_observable_parameters),
+ (NOISE_PARAMETERS, allow_scalar_numeric_noise_parameters),
+ ]:
+ if col not in measurement_df:
+ continue
+
+ measurement_df[col] = measurement_df[col].apply(str)
+
+ if allow_scalar_numeric:
+ measurement_df.loc[
+ measurement_df[col].apply(is_scalar_float), col
+ ] = np.nan
+
+ grouping_cols = core.get_notnull_columns(
+ measurement_df,
+ [
+ OBSERVABLE_ID,
+ SIMULATION_CONDITION_ID,
+ PREEQUILIBRATION_CONDITION_ID,
+ OBSERVABLE_PARAMETERS,
+ NOISE_PARAMETERS,
+ ],
+ )
+ grouped_df = measurement_df.groupby(grouping_cols, dropna=False)
+
+ grouping_cols = core.get_notnull_columns(
+ measurement_df,
+ [
+ OBSERVABLE_ID,
+ SIMULATION_CONDITION_ID,
+ PREEQUILIBRATION_CONDITION_ID,
+ ],
+ )
+ grouped_df2 = measurement_df.groupby(grouping_cols)
+ # data frame has timepoint specific overrides if grouping by noise
+ # parameters and observable parameters in addition to observable,
+ # condition and preeq id yields more groups
+ return len(grouped_df) != len(grouped_df2)
+
+
+def observable_table_has_nontrivial_noise_formula(
+ observable_df: pd.DataFrame | None,
+) -> bool:
+ """
+ Does any observable have a noise formula that is not just a single
+ parameter?
+
+ Arguments:
+ observable_df: PEtab observable table
+
+ Returns:
+ ``True`` if any noise formula does not consist of a single identifier,
+ ``False`` otherwise.
+ """
+ if observable_df is None:
+ return False
+
+ return (
+ not observable_df[NOISE_FORMULA]
+ .apply(
+ lambda x: is_scalar_float(x)
+ or re.match(r"^[\w]+$", str(x)) is not None
+ )
+ .all()
+ )
+
+
+def measurement_table_has_observable_parameter_numeric_overrides(
+ measurement_df: pd.DataFrame,
+) -> bool:
+ """Are there any numbers to override observable parameters?
+
+ Arguments:
+ measurement_df: PEtab measurement table
+
+ Returns:
+ ``True`` if there are any numbers to override observable/noise
+ parameters, ``False`` otherwise.
+ """
+ if OBSERVABLE_PARAMETERS not in measurement_df:
+ return False
+
+ for _, row in measurement_df.iterrows():
+ for override in measurements.split_parameter_replacement_list(
+ row.get(OBSERVABLE_PARAMETERS, None)
+ ):
+ if isinstance(override, numbers.Number):
+ return True
+
+ return False
+
+
+def assert_noise_distributions_valid(observable_df: pd.DataFrame) -> None:
+ """
+ Ensure that noise distributions and transformations for observables are
+ valid.
+
+ Arguments:
+ observable_df: PEtab observable table
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ if OBSERVABLE_TRANSFORMATION in observable_df:
+ # check for valid values
+ for trafo in observable_df[OBSERVABLE_TRANSFORMATION]:
+ if trafo not in ["", *OBSERVABLE_TRANSFORMATIONS] and not (
+ isinstance(trafo, numbers.Number) and np.isnan(trafo)
+ ):
+ raise ValueError(
+ f"Unrecognized observable transformation in observable "
+ f"table: {trafo}."
+ )
+
+ if NOISE_DISTRIBUTION in observable_df:
+ for distr in observable_df[NOISE_DISTRIBUTION]:
+ if distr not in ["", *NOISE_MODELS] and not (
+ isinstance(distr, numbers.Number) and np.isnan(distr)
+ ):
+ raise ValueError(
+ f"Unrecognized noise distribution in observable "
+ f"table: {distr}."
+ )
+
+
+def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None:
+ """
+ Check if the observableId column of the observable table is unique.
+
+ Arguments:
+ observable_df: PEtab observable DataFrame
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ non_unique_ids = get_non_unique(observable_df.index)
+ if len(non_unique_ids) > 0:
+ raise AssertionError(
+ f"Non-unique values found in the {OBSERVABLE_ID} column"
+ " of the observable table: " + str(non_unique_ids)
+ )
+
+
+def get_non_unique(values):
+ counter = Counter(values)
+ return [value for (value, count) in counter.items() if count > 1]
+
+
+def lint_problem(problem: "petab.Problem") -> bool:
+ """Run PEtab validation on problem
+
+ Arguments:
+ problem: PEtab problem to check
+
+ Returns:
+ ``True`` if errors occurred, ``False`` otherwise
+ """
+ # pylint: disable=too-many-statements
+ errors_occurred = False
+
+ if problem.extensions_config:
+ logger.warning(
+ "Validation of PEtab extensions is not yet implemented, "
+ "but the given problem uses the following extensions: "
+ f"{'', ''.join(problem.extensions_config.keys())}"
+ )
+
+ # Run checks on individual files
+ if problem.model is not None:
+ logger.info("Checking model...")
+ errors_occurred |= not problem.model.is_valid()
+ else:
+ logger.warning("Model not available. Skipping.")
+
+ if problem.measurement_df is not None:
+ logger.info("Checking measurement table...")
+ try:
+ check_measurement_df(problem.measurement_df, problem.observable_df)
+
+ if problem.condition_df is not None:
+ assert_measurement_conditions_present_in_condition_table(
+ problem.measurement_df, problem.condition_df
+ )
+ except AssertionError as e:
+ logger.error(e)
+ errors_occurred = True
+ else:
+ logger.warning("Measurement table not available. Skipping.")
+
+ if problem.condition_df is not None:
+ logger.info("Checking condition table...")
+ try:
+ check_condition_df(
+ problem.condition_df,
+ model=problem.model,
+ observable_df=problem.observable_df,
+ mapping_df=problem.mapping_df,
+ )
+ except AssertionError as e:
+ logger.error(e)
+ errors_occurred = True
+ else:
+ logger.warning("Condition table not available. Skipping.")
+
+ if problem.observable_df is not None:
+ logger.info("Checking observable table...")
+ try:
+ check_observable_df(problem.observable_df)
+ except AssertionError as e:
+ logger.error(e)
+ errors_occurred = True
+ if problem.model is not None:
+ for obs_id in problem.observable_df.index:
+ if problem.model.has_entity_with_id(obs_id):
+ logger.error(
+ f"Observable ID {obs_id} shadows model " "entity."
+ )
+ errors_occurred = True
+ else:
+ logger.warning("Observable table not available. Skipping.")
+
+ if problem.parameter_df is not None:
+ logger.info("Checking parameter table...")
+ try:
+ check_parameter_df(
+ problem.parameter_df,
+ problem.model,
+ problem.observable_df,
+ problem.measurement_df,
+ problem.condition_df,
+ problem.mapping_df,
+ )
+ except AssertionError as e:
+ logger.error(e)
+ errors_occurred = True
+ else:
+ logger.warning("Parameter table not available. Skipping.")
+
+ if (
+ problem.model is not None
+ and problem.condition_df is not None
+ and problem.parameter_df is not None
+ ):
+ try:
+ assert_model_parameters_in_condition_or_parameter_table(
+ problem.model,
+ problem.condition_df,
+ problem.parameter_df,
+ problem.mapping_df,
+ )
+ except AssertionError as e:
+ logger.error(e)
+ errors_occurred = True
+
+ if problem.visualization_df is not None:
+ logger.info("Checking visualization table...")
+ from petab.v1.visualize.lint import validate_visualization_df
+
+ errors_occurred |= validate_visualization_df(problem)
+ else:
+ logger.warning("Visualization table not available. Skipping.")
+
+ if errors_occurred:
+ logger.error("Not OK")
+ elif (
+ problem.measurement_df is None
+ or problem.condition_df is None
+ or problem.model is None
+ or problem.parameter_df is None
+ or problem.observable_df is None
+ ):
+ logger.warning(
+ "Not all files of the PEtab problem definition could "
+ "be checked."
+ )
+ else:
+ logger.info("PEtab format check completed successfully.")
+
+ return errors_occurred
+
+
+def assert_model_parameters_in_condition_or_parameter_table(
+ model: Model,
+ condition_df: pd.DataFrame,
+ parameter_df: pd.DataFrame,
+ mapping_df: pd.DataFrame = None,
+ observable_df: pd.DataFrame = None,
+ measurement_df: pd.DataFrame = None,
+) -> None:
+ """Model parameters that are rule targets must not be present in the
+ parameter table. Other parameters must only be present in either in
+ parameter table or condition table columns. Check that.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+ model: PEtab model
+ condition_df: PEtab condition table
+ mapping_df: PEtab mapping table
+ observable_df: PEtab observable table
+ measurement_df: PEtab measurement table
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ allowed_in_condition_cols = set(model.get_valid_ids_for_condition_table())
+ if mapping_df is not None:
+ allowed_in_condition_cols |= {
+ from_id
+ for from_id, to_id in zip(
+ mapping_df.index.values,
+ mapping_df[MODEL_ENTITY_ID],
+ strict=True,
+ )
+ # mapping table entities mapping to already allowed parameters
+ if to_id in allowed_in_condition_cols
+ # mapping table entities mapping to species
+ or model.is_state_variable(to_id)
+ }
+
+ allowed_in_parameter_table = (
+ parameters.get_valid_parameters_for_parameter_table(
+ model=model,
+ condition_df=condition_df,
+ observable_df=observable_df,
+ measurement_df=measurement_df,
+ mapping_df=mapping_df,
+ )
+ )
+
+ entities_in_condition_table = set(condition_df.columns) - {CONDITION_NAME}
+ entities_in_parameter_table = set(parameter_df.index.values)
+
+ disallowed_in_condition = {
+ x
+ for x in (entities_in_condition_table - allowed_in_condition_cols)
+ # we only check model entities here, not output parameters
+ if model.has_entity_with_id(x)
+ }
+ if disallowed_in_condition:
+ is_or_are = "is" if len(disallowed_in_condition) == 1 else "are"
+ raise AssertionError(
+ f"{disallowed_in_condition} {is_or_are} not "
+ "allowed to occur in condition table "
+ "columns."
+ )
+
+ disallowed_in_parameters = {
+ x
+ for x in (entities_in_parameter_table - allowed_in_parameter_table)
+ # we only check model entities here, not output parameters
+ if model.has_entity_with_id(x)
+ }
+
+ if disallowed_in_parameters:
+ is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are"
+ raise AssertionError(
+ f"{disallowed_in_parameters} {is_or_are} not "
+ "allowed to occur in the parameters table."
+ )
+
+ in_both = entities_in_condition_table & entities_in_parameter_table
+ if in_both:
+ is_or_are = "is" if len(in_both) == 1 else "are"
+ raise AssertionError(
+ f"{in_both} {is_or_are} present in both "
+ "the condition table and the parameter table."
+ )
+
+
+def assert_measurement_conditions_present_in_condition_table(
+ measurement_df: pd.DataFrame, condition_df: pd.DataFrame
+) -> None:
+ """Ensure that all entries from measurement_df.simulationConditionId and
+ measurement_df.preequilibrationConditionId are present in
+ condition_df.index.
+
+ Arguments:
+ measurement_df: PEtab measurement table
+ condition_df: PEtab condition table
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ used_conditions = set(measurement_df[SIMULATION_CONDITION_ID].values)
+ if PREEQUILIBRATION_CONDITION_ID in measurement_df:
+ used_conditions |= set(
+ measurement_df[PREEQUILIBRATION_CONDITION_ID].dropna().values
+ )
+ available_conditions = set(condition_df.index.values)
+ if missing_conditions := (used_conditions - available_conditions):
+ raise AssertionError(
+ "Measurement table references conditions that "
+ "are not specified in the condition table: "
+ + str(missing_conditions)
+ )
+
+
+def assert_measurements_not_null(
+ measurement_df: pd.DataFrame,
+) -> None:
+ """Check whether all measurements are not null.
+
+ Arguments:
+ measurement_df:
+ PEtab measurement table.
+
+ Raises:
+ AssertionError:
+ Some measurement value(s) are null (missing).
+ """
+ if measurement_df[MEASUREMENT].isnull().any():
+ raise AssertionError("Some measurement(s) are null (missing).")
+
+
+def assert_measurements_numeric(
+ measurement_df: pd.DataFrame,
+) -> None:
+ """Check whether all measurements are numeric.
+
+ Note that null (missing) measurements are ignored.
+
+ Arguments:
+ measurement_df:
+ PEtab measurement table.
+
+ Raises:
+ AssertionError:
+ Some measurement value(s) are not numeric.
+ """
+ not_null_measurement_values = measurement_df[MEASUREMENT].dropna()
+ all_measurements_are_numeric = (
+ pd.to_numeric(not_null_measurement_values, errors="coerce")
+ .notnull()
+ .all()
+ )
+ if not all_measurements_are_numeric:
+ raise AssertionError(
+ "Some values in the `petab.C.MEASUREMENT` column of the PEtab "
+ "measurements table are not numeric."
+ )
+
+
+def is_valid_identifier(x: str) -> bool:
+ """Check whether `x` is a valid identifier
+
+ Check whether `x` is a valid identifier for conditions, parameters,
+ observables... . Identifiers may contain upper and lower case letters,
+ digits and underscores, but must not start with a digit.
+
+ Arguments:
+ x: string to check
+
+ Returns:
+ ``True`` if valid, ``False`` otherwise
+ """
+ if pd.isna(x):
+ return False
+
+ return re.match(r"^[a-zA-Z_]\w*$", x) is not None
+
+
+def check_ids(ids: Iterable[str], kind: str = "") -> None:
+ """Check IDs are valid
+
+ Arguments:
+ ids: Iterable of IDs to check
+ kind: Kind of IDs, for more informative error message
+
+ Raises:
+ ValueError: in case of invalid IDs
+ """
+ invalids = [
+ (index, _id)
+ for index, _id in enumerate(ids)
+ if not is_valid_identifier(_id)
+ ]
+
+ if invalids:
+ # The first row is the header row, and Python lists are zero-indexed,
+ # hence need to add 2 for the correct line number.
+ offset = 2
+ error_output = "\n".join(
+ [
+ f"Line {index+offset}: "
+ + ("Missing ID" if pd.isna(_id) else _id)
+ for index, _id in invalids
+ ]
+ )
+ raise ValueError(f"Invalid {kind} ID(s):\n{error_output}")
diff --git a/petab/v1/mapping.py b/petab/v1/mapping.py
new file mode 100644
index 00000000..80c71c68
--- /dev/null
+++ b/petab/v1/mapping.py
@@ -0,0 +1,118 @@
+"""Functionality related to the PEtab entity mapping table"""
+from pathlib import Path
+
+import pandas as pd
+
+from . import lint
+from .C import * # noqa: F403
+from .models import Model
+
+__all__ = [
+ "get_mapping_df",
+ "write_mapping_df",
+ "check_mapping_df",
+ "resolve_mapping",
+]
+
+
+def get_mapping_df(
+ mapping_file: None | str | Path | pd.DataFrame,
+) -> pd.DataFrame:
+ """
+ Read the provided mapping file into a ``pandas.Dataframe``.
+
+ Arguments:
+ mapping_file: Name of file to read from or pandas.Dataframe
+
+ Returns:
+ Mapping DataFrame
+ """
+ if mapping_file is None:
+ return mapping_file
+
+ if isinstance(mapping_file, str | Path):
+ mapping_file = pd.read_csv(
+ mapping_file, sep="\t", float_precision="round_trip"
+ )
+
+ if not isinstance(mapping_file.index, pd.RangeIndex):
+ mapping_file.reset_index(
+ drop=mapping_file.index.name != PETAB_ENTITY_ID,
+ inplace=True,
+ )
+
+ for col in MAPPING_DF_REQUIRED_COLS:
+ if col not in mapping_file.columns:
+ raise KeyError(
+ f"Mapping table missing mandatory field {PETAB_ENTITY_ID}."
+ )
+
+ lint.assert_no_leading_trailing_whitespace(
+ mapping_file.reset_index()[col].values, col
+ )
+
+ mapping_file.set_index([PETAB_ENTITY_ID], inplace=True)
+
+ return mapping_file
+
+
+def write_mapping_df(df: pd.DataFrame, filename: str | Path) -> None:
+ """Write PEtab mapping table
+
+ Arguments:
+ df: PEtab mapping table
+ filename: Destination file name
+ """
+ df = get_mapping_df(df)
+ df.to_csv(filename, sep="\t", index=True)
+
+
+def check_mapping_df(
+ df: pd.DataFrame,
+ model: Model | None = None,
+) -> None:
+ """Run sanity checks on PEtab mapping table
+
+ Arguments:
+ df: PEtab mapping DataFrame
+ model: Model for additional checking of parameter IDs
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ lint._check_df(df, MAPPING_DF_REQUIRED_COLS[1:], "mapping")
+
+ if df.index.name != PETAB_ENTITY_ID:
+ raise AssertionError(
+ f"Mapping table has wrong index {df.index.name}. "
+ f"Expected {PETAB_ENTITY_ID}."
+ )
+
+ lint.check_ids(df.index.values, kind=PETAB_ENTITY_ID)
+
+ if model:
+ for model_entity_id in df[MODEL_ENTITY_ID]:
+ if not model.has_entity_with_id(model_entity_id):
+ raise AssertionError(
+ "Mapping table maps to unknown "
+ f"model entity ID {model_entity_id}."
+ )
+
+
+def resolve_mapping(mapping_df: pd.DataFrame | None, element: str) -> str:
+ """Resolve mapping for a given element.
+
+ :param element:
+ Element to resolve.
+
+ :param mapping_df:
+ Mapping table.
+
+ :return:
+ Resolved element.
+ """
+ if mapping_df is None:
+ return element
+ if element in mapping_df.index:
+ return mapping_df.loc[element, MODEL_ENTITY_ID]
+ return element
diff --git a/petab/v1/math/PetabMathExprLexer.g4 b/petab/v1/math/PetabMathExprLexer.g4
new file mode 100644
index 00000000..69504e0a
--- /dev/null
+++ b/petab/v1/math/PetabMathExprLexer.g4
@@ -0,0 +1,35 @@
+// Lexer grammar for PEtab math expressions
+// run `regenerate.sh` to regenerate the lexer
+lexer grammar PetabMathExprLexer;
+
+
+NUMBER : EXPONENT_FLOAT | INTEGER | POINT_FLOAT | INF;
+INTEGER : DIGITS ;
+EXPONENT_FLOAT : (INTEGER | POINT_FLOAT) EXPONENT ;
+POINT_FLOAT : DIGITS '.' DIGITS ;
+fragment EXPONENT: ('e' | 'E') ('+' | '-')? DIGITS ;
+FLOAT_NUMBER: POINT_FLOAT | EXPONENT_FLOAT;
+fragment DIGITS : [0-9]+ ;
+
+WS : [ \t\r\n]+ -> skip ;
+TRUE : 'true' ;
+FALSE : 'false' ;
+INF : 'inf' ;
+NAME : [a-zA-Z_][a-zA-Z0-9_]* ;
+OPEN_PAREN : '(' ;
+CLOSE_PAREN : ')' ;
+BOOLEAN_OR : '||' ;
+BOOLEAN_AND : '&&' ;
+GT : '>' ;
+LT : '<' ;
+GTE : '>=' ;
+LTE : '<=' ;
+EQ : '==' ;
+NEQ : '!=' ;
+PLUS : '+' ;
+MINUS : '-' ;
+ASTERISK : '*' ;
+SLASH : '/' ;
+CARET: '^';
+EXCLAMATION_MARK: '!';
+COMMA: ',';
diff --git a/petab/v1/math/PetabMathExprParser.g4 b/petab/v1/math/PetabMathExprParser.g4
new file mode 100644
index 00000000..543c67e8
--- /dev/null
+++ b/petab/v1/math/PetabMathExprParser.g4
@@ -0,0 +1,42 @@
+// Parser grammar for PEtab math expressions
+// run `regenerate.sh` to regenerate the parser
+parser grammar PetabMathExprParser;
+
+options { tokenVocab=PetabMathExprLexer; }
+
+petabExpression:
+ expr EOF ;
+
+expr:
+ expr '^' expr # PowerExpr
+ | ('+'|'-') expr # UnaryExpr
+ | '!' expr # BooleanNotExpr
+ | expr ('*'|'/') expr # MultExpr
+ | expr ('+'|'-') expr # AddExpr
+ | '(' expr ')' # ParenExpr
+ | expr comp_op expr # ComparisonExpr
+ | expr (BOOLEAN_AND | BOOLEAN_OR) expr # BooleanAndOrExpr
+ | number # Number_
+ | booleanLiteral # BooleanLiteral_
+ | functionCall # functionCall_
+ | var # VarExpr_
+ ;
+
+comp_op:
+ GT
+ | LT
+ | GTE
+ | LTE
+ | EQ
+ | NEQ
+ ;
+
+argumentList: expr (',' expr)* ;
+functionCall: NAME OPEN_PAREN argumentList CLOSE_PAREN ;
+
+booleanLiteral:
+ TRUE
+ | FALSE
+ ;
+number: NUMBER ;
+var: NAME ;
diff --git a/petab/v1/math/SympyVisitor.py b/petab/v1/math/SympyVisitor.py
new file mode 100644
index 00000000..016e872c
--- /dev/null
+++ b/petab/v1/math/SympyVisitor.py
@@ -0,0 +1,303 @@
+"""PEtab-math to sympy conversion."""
+import sympy as sp
+from sympy.logic.boolalg import Boolean, BooleanFalse, BooleanTrue
+
+from ._generated.PetabMathExprParser import PetabMathExprParser
+from ._generated.PetabMathExprParserVisitor import PetabMathExprParserVisitor
+
+__all__ = ["MathVisitorSympy"]
+
+# Mappings of PEtab math functions to sympy functions
+
+# trigonometric functions
+_trig_funcs = {
+ "sin": sp.sin,
+ "cos": sp.cos,
+ "tan": sp.tan,
+ "sec": sp.sec,
+ "csc": sp.csc,
+ "cot": sp.cot,
+ "sinh": sp.sinh,
+ "cosh": sp.cosh,
+ "tanh": sp.tanh,
+ "sech": sp.sech,
+ "csch": sp.csch,
+ "coth": sp.coth,
+ "arccos": sp.acos,
+ "arcsin": sp.asin,
+ "arctan": sp.atan,
+ "arcsec": sp.asec,
+ "arccsc": sp.acsc,
+ "arccot": sp.acot,
+ "arcsinh": sp.asinh,
+ "arccosh": sp.acosh,
+ "arctanh": sp.atanh,
+ "arcsech": sp.asech,
+ "arccsch": sp.acsch,
+ "arccoth": sp.acoth,
+}
+_unary_funcs = {
+ "exp": sp.exp,
+ "log10": lambda x: -sp.oo if x.is_zero is True else sp.log(x, 10),
+ "log2": lambda x: -sp.oo if x.is_zero is True else sp.log(x, 2),
+ "ln": sp.log,
+ "sqrt": sp.sqrt,
+ "abs": sp.Abs,
+ "sign": sp.sign,
+}
+_binary_funcs = {
+ "pow": sp.Pow,
+ "min": sp.Min,
+ "max": sp.Max,
+}
+
+# reserved names that cannot be used as variable names
+_reserved_names = {
+ "inf",
+ "nan",
+ "true",
+ "false",
+}
+
+
+class MathVisitorSympy(PetabMathExprParserVisitor):
+ """
+ ANTLR4 visitor for PEtab-math-to-sympy conversion.
+
+ Visitor for PEtab math expression AST generated using ANTLR4.
+ Converts PEtab math expressions to sympy expressions.
+
+ Most users will not need to interact with this class directly, but rather
+ use :func:`petab.math.sympify_petab`.
+
+ Evaluation of any sub-expressions currently relies on sympy's defaults.
+
+ For a general introduction to ANTLR4 visitors, see:
+ https://github.com/antlr/antlr4/blob/7d4cea92bc3f7d709f09c3f1ac77c5bbc71a6749/doc/python-target.md
+ """
+
+ def visitPetabExpression(
+ self, ctx: PetabMathExprParser.PetabExpressionContext
+ ) -> sp.Expr | sp.Basic:
+ """Visit the root of the expression tree."""
+ return self.visit(ctx.getChild(0))
+
+ def visitNumber(self, ctx: PetabMathExprParser.NumberContext) -> sp.Float:
+ """Convert number to sympy Float."""
+ return sp.Float(ctx.getText())
+
+ def visitVar(self, ctx: PetabMathExprParser.VarContext) -> sp.Symbol:
+ """Convert identifier to sympy Symbol."""
+ if ctx.getText().lower() in _reserved_names:
+ raise ValueError(f"Use of reserved name {ctx.getText()!r}")
+ return sp.Symbol(ctx.getText(), real=True)
+
+ def visitMultExpr(
+ self, ctx: PetabMathExprParser.MultExprContext
+ ) -> sp.Expr:
+ """Convert multiplication and division expressions to sympy."""
+ if ctx.getChildCount() == 3:
+ operand1 = bool2num(self.visit(ctx.getChild(0)))
+ operand2 = bool2num(self.visit(ctx.getChild(2)))
+ if ctx.ASTERISK():
+ return operand1 * operand2
+ if ctx.SLASH():
+ return operand1 / operand2
+
+ raise AssertionError(f"Unexpected expression: {ctx.getText()}")
+
+ def visitAddExpr(self, ctx: PetabMathExprParser.AddExprContext) -> sp.Expr:
+ """Convert addition and subtraction expressions to sympy."""
+ op1 = bool2num(self.visit(ctx.getChild(0)))
+ op2 = bool2num(self.visit(ctx.getChild(2)))
+ if ctx.PLUS():
+ return op1 + op2
+ if ctx.MINUS():
+ return op1 - op2
+
+ raise AssertionError(
+ f"Unexpected operator: {ctx.getChild(1).getText()} "
+ f"in {ctx.getText()}"
+ )
+
+ def visitArgumentList(
+ self, ctx: PetabMathExprParser.ArgumentListContext
+ ) -> list[sp.Basic | sp.Expr]:
+ """Convert function argument lists to a list of sympy expressions."""
+ return [self.visit(c) for c in ctx.children[::2]]
+
+ def visitFunctionCall(
+ self, ctx: PetabMathExprParser.FunctionCallContext
+ ) -> sp.Expr:
+ """Convert function call to sympy expression."""
+ if ctx.getChildCount() < 4:
+ raise AssertionError(f"Unexpected expression: {ctx.getText()}")
+ func_name = ctx.getChild(0).getText()
+ args = self.visit(ctx.getChild(2))
+
+ if func_name != "piecewise":
+ # all functions except piecewise expect numerical arguments
+ args = list(map(bool2num, args))
+
+ if func_name in _trig_funcs:
+ if len(args) != 1:
+ raise AssertionError(
+ f"Unexpected number of arguments: {len(args)} "
+ f"in {ctx.getText()}"
+ )
+ return _trig_funcs[func_name](*args)
+ if func_name in _unary_funcs:
+ if len(args) != 1:
+ raise AssertionError(
+ f"Unexpected number of arguments: {len(args)} "
+ f"in {ctx.getText()}"
+ )
+ return _unary_funcs[func_name](*args)
+ if func_name in _binary_funcs:
+ if len(args) != 2:
+ raise AssertionError(
+ f"Unexpected number of arguments: {len(args)} "
+ f"in {ctx.getText()}"
+ )
+ return _binary_funcs[func_name](*args)
+ if func_name == "log":
+ if len(args) not in [1, 2]:
+ raise AssertionError(
+ f"Unexpected number of arguments: {len(args)} "
+ f"in {ctx.getText()}"
+ )
+ return -sp.oo if args[0].is_zero is True else sp.log(*args)
+
+ if func_name == "piecewise":
+ if (len(args) - 1) % 2 != 0:
+ raise AssertionError(
+ f"Unexpected number of arguments: {len(args)} "
+ f"in {ctx.getText()}"
+ )
+ # sympy's Piecewise requires an explicit condition for the final
+ # `else` case
+ args.append(sp.true)
+ sp_args = (
+ (true_expr, num2bool(condition))
+ for true_expr, condition in zip(
+ args[::2], args[1::2], strict=True
+ )
+ )
+ return sp.Piecewise(*sp_args)
+
+ raise ValueError(f"Unknown function: {ctx.getText()}")
+
+ def visitParenExpr(self, ctx: PetabMathExprParser.ParenExprContext):
+ """Convert parenthesized expression to sympy."""
+ return self.visit(ctx.getChild(1))
+
+ def visitPowerExpr(
+ self, ctx: PetabMathExprParser.PowerExprContext
+ ) -> sp.Pow:
+ """Convert power expression to sympy."""
+ if ctx.getChildCount() != 3:
+ raise AssertionError(
+ f"Unexpected number of children: {ctx.getChildCount()} "
+ f"in {ctx.getText()}"
+ )
+ operand1 = bool2num(self.visit(ctx.getChild(0)))
+ operand2 = bool2num(self.visit(ctx.getChild(2)))
+ return sp.Pow(operand1, operand2)
+
+ def visitUnaryExpr(
+ self, ctx: PetabMathExprParser.UnaryExprContext
+ ) -> sp.Basic | sp.Expr:
+ """Convert unary expressions to sympy."""
+ if ctx.getChildCount() == 2:
+ operand = bool2num(self.visit(ctx.getChild(1)))
+ match ctx.getChild(0).getText():
+ case "-":
+ return -operand
+ case "+":
+ return operand
+
+ raise AssertionError(f"Unexpected expression: {ctx.getText()}")
+
+ def visitComparisonExpr(
+ self, ctx: PetabMathExprParser.ComparisonExprContext
+ ) -> sp.Basic | sp.Expr:
+ """Convert comparison expressions to sympy."""
+ if ctx.getChildCount() != 3:
+ raise AssertionError(f"Unexpected expression: {ctx.getText()}")
+ lhs = self.visit(ctx.getChild(0))
+ op = ctx.getChild(1).getText()
+ rhs = self.visit(ctx.getChild(2))
+
+ ops = {
+ "==": sp.Equality,
+ "!=": sp.Unequality,
+ "<": sp.StrictLessThan,
+ ">": sp.StrictGreaterThan,
+ "<=": sp.LessThan,
+ ">=": sp.GreaterThan,
+ }
+ if op in ops:
+ lhs = bool2num(lhs)
+ rhs = bool2num(rhs)
+ return ops[op](lhs, rhs)
+
+ raise AssertionError(f"Unexpected operator: {op}")
+
+ def visitBooleanNotExpr(
+ self, ctx: PetabMathExprParser.BooleanNotExprContext
+ ) -> sp.Basic | sp.Expr:
+ """Convert boolean NOT expressions to sympy."""
+ if ctx.getChildCount() == 2:
+ return ~num2bool(self.visit(ctx.getChild(1)))
+
+ raise AssertionError(f"Unexpected expression: {ctx.getText()}")
+
+ def visitBooleanAndOrExpr(
+ self, ctx: PetabMathExprParser.BooleanAndOrExprContext
+ ) -> sp.Basic | sp.Expr:
+ """Convert boolean AND and OR expressions to sympy."""
+ if ctx.getChildCount() != 3:
+ raise AssertionError(f"Unexpected expression: {ctx.getText()}")
+
+ operand1 = num2bool(self.visit(ctx.getChild(0)))
+ operand2 = num2bool(self.visit(ctx.getChild(2)))
+
+ if ctx.BOOLEAN_AND():
+ return operand1 & operand2
+ if ctx.BOOLEAN_OR():
+ return operand1 | operand2
+
+ raise AssertionError(f"Unexpected expression: {ctx.getText()}")
+
+ def visitBooleanLiteral(
+ self, ctx: PetabMathExprParser.BooleanLiteralContext
+ ) -> Boolean:
+ """Convert boolean literals to sympy."""
+ if ctx.TRUE():
+ return sp.true
+
+ if ctx.FALSE():
+ return sp.false
+
+ raise AssertionError(f"Unexpected boolean literal: {ctx.getText()}")
+
+
+def bool2num(x: sp.Basic | sp.Expr) -> sp.Basic | sp.Expr:
+ """Convert sympy Booleans to Floats."""
+ if isinstance(x, BooleanFalse):
+ return sp.Float(0)
+ if isinstance(x, BooleanTrue):
+ return sp.Float(1)
+ return x
+
+
+def num2bool(x: sp.Basic | sp.Expr) -> sp.Basic | sp.Expr:
+ """Convert sympy Floats to booleans."""
+ if isinstance(x, BooleanTrue | BooleanFalse):
+ return x
+ # Note: sp.Float(0) == 0 is False in sympy>=1.13
+ if x.is_zero is True:
+ return sp.false
+ if x.is_zero is False:
+ return sp.true
+ return sp.Piecewise((True, x != 0.0), (False, True))
diff --git a/petab/v1/math/__init__.py b/petab/v1/math/__init__.py
new file mode 100644
index 00000000..27ebacd2
--- /dev/null
+++ b/petab/v1/math/__init__.py
@@ -0,0 +1,2 @@
+"""Functions for parsing and evaluating mathematical expressions."""
+from .sympify import sympify_petab # noqa: F401
diff --git a/petab/v1/math/_generated/PetabMathExprLexer.interp b/petab/v1/math/_generated/PetabMathExprLexer.interp
new file mode 100644
index 00000000..85ffff54
--- /dev/null
+++ b/petab/v1/math/_generated/PetabMathExprLexer.interp
@@ -0,0 +1,100 @@
+token literal names:
+null
+null
+null
+null
+null
+null
+null
+'true'
+'false'
+'inf'
+null
+'('
+')'
+'||'
+'&&'
+'>'
+'<'
+'>='
+'<='
+'=='
+'!='
+'+'
+'-'
+'*'
+'/'
+'^'
+'!'
+','
+
+token symbolic names:
+null
+NUMBER
+INTEGER
+EXPONENT_FLOAT
+POINT_FLOAT
+FLOAT_NUMBER
+WS
+TRUE
+FALSE
+INF
+NAME
+OPEN_PAREN
+CLOSE_PAREN
+BOOLEAN_OR
+BOOLEAN_AND
+GT
+LT
+GTE
+LTE
+EQ
+NEQ
+PLUS
+MINUS
+ASTERISK
+SLASH
+CARET
+EXCLAMATION_MARK
+COMMA
+
+rule names:
+NUMBER
+INTEGER
+EXPONENT_FLOAT
+POINT_FLOAT
+EXPONENT
+FLOAT_NUMBER
+DIGITS
+WS
+TRUE
+FALSE
+INF
+NAME
+OPEN_PAREN
+CLOSE_PAREN
+BOOLEAN_OR
+BOOLEAN_AND
+GT
+LT
+GTE
+LTE
+EQ
+NEQ
+PLUS
+MINUS
+ASTERISK
+SLASH
+CARET
+EXCLAMATION_MARK
+COMMA
+
+channel names:
+DEFAULT_TOKEN_CHANNEL
+HIDDEN
+
+mode names:
+DEFAULT_MODE
+
+atn:
+[4, 0, 27, 161, 6, -1, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 2, 3, 7, 3, 2, 4, 7, 4, 2, 5, 7, 5, 2, 6, 7, 6, 2, 7, 7, 7, 2, 8, 7, 8, 2, 9, 7, 9, 2, 10, 7, 10, 2, 11, 7, 11, 2, 12, 7, 12, 2, 13, 7, 13, 2, 14, 7, 14, 2, 15, 7, 15, 2, 16, 7, 16, 2, 17, 7, 17, 2, 18, 7, 18, 2, 19, 7, 19, 2, 20, 7, 20, 2, 21, 7, 21, 2, 22, 7, 22, 2, 23, 7, 23, 2, 24, 7, 24, 2, 25, 7, 25, 2, 26, 7, 26, 2, 27, 7, 27, 2, 28, 7, 28, 1, 0, 1, 0, 1, 0, 1, 0, 3, 0, 64, 8, 0, 1, 1, 1, 1, 1, 2, 1, 2, 3, 2, 70, 8, 2, 1, 2, 1, 2, 1, 3, 1, 3, 1, 3, 1, 3, 1, 4, 1, 4, 3, 4, 80, 8, 4, 1, 4, 1, 4, 1, 5, 1, 5, 3, 5, 86, 8, 5, 1, 6, 4, 6, 89, 8, 6, 11, 6, 12, 6, 90, 1, 7, 4, 7, 94, 8, 7, 11, 7, 12, 7, 95, 1, 7, 1, 7, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 9, 1, 9, 1, 9, 1, 9, 1, 9, 1, 9, 1, 10, 1, 10, 1, 10, 1, 10, 1, 11, 1, 11, 5, 11, 117, 8, 11, 10, 11, 12, 11, 120, 9, 11, 1, 12, 1, 12, 1, 13, 1, 13, 1, 14, 1, 14, 1, 14, 1, 15, 1, 15, 1, 15, 1, 16, 1, 16, 1, 17, 1, 17, 1, 18, 1, 18, 1, 18, 1, 19, 1, 19, 1, 19, 1, 20, 1, 20, 1, 20, 1, 21, 1, 21, 1, 21, 1, 22, 1, 22, 1, 23, 1, 23, 1, 24, 1, 24, 1, 25, 1, 25, 1, 26, 1, 26, 1, 27, 1, 27, 1, 28, 1, 28, 0, 0, 29, 1, 1, 3, 2, 5, 3, 7, 4, 9, 0, 11, 5, 13, 0, 15, 6, 17, 7, 19, 8, 21, 9, 23, 10, 25, 11, 27, 12, 29, 13, 31, 14, 33, 15, 35, 16, 37, 17, 39, 18, 41, 19, 43, 20, 45, 21, 47, 22, 49, 23, 51, 24, 53, 25, 55, 26, 57, 27, 1, 0, 6, 2, 0, 69, 69, 101, 101, 2, 0, 43, 43, 45, 45, 1, 0, 48, 57, 3, 0, 9, 10, 13, 13, 32, 32, 3, 0, 65, 90, 95, 95, 97, 122, 4, 0, 48, 57, 65, 90, 95, 95, 97, 122, 167, 0, 1, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 11, 1, 0, 0, 0, 0, 15, 1, 0, 0, 0, 0, 17, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 21, 1, 0, 0, 0, 0, 23, 1, 0, 0, 0, 0, 25, 1, 0, 0, 0, 0, 27, 1, 0, 0, 0, 0, 29, 1, 0, 0, 0, 0, 31, 1, 0, 0, 0, 0, 33, 1, 0, 0, 0, 0, 35, 1, 0, 0, 0, 0, 37, 1, 0, 0, 0, 0, 39, 1, 0, 0, 0, 0, 41, 1, 0, 0, 0, 0, 43, 1, 0, 0, 0, 0, 45, 1, 0, 0, 0, 0, 47, 1, 0, 0, 0, 0, 49, 1, 0, 0, 0, 0, 51, 1, 0, 0, 0, 0, 53, 1, 0, 0, 0, 0, 55, 1, 0, 0, 0, 0, 57, 1, 0, 0, 0, 1, 63, 1, 0, 0, 0, 3, 65, 1, 0, 0, 0, 5, 69, 1, 0, 0, 0, 7, 73, 1, 0, 0, 0, 9, 77, 1, 0, 0, 0, 11, 85, 1, 0, 0, 0, 13, 88, 1, 0, 0, 0, 15, 93, 1, 0, 0, 0, 17, 99, 1, 0, 0, 0, 19, 104, 1, 0, 0, 0, 21, 110, 1, 0, 0, 0, 23, 114, 1, 0, 0, 0, 25, 121, 1, 0, 0, 0, 27, 123, 1, 0, 0, 0, 29, 125, 1, 0, 0, 0, 31, 128, 1, 0, 0, 0, 33, 131, 1, 0, 0, 0, 35, 133, 1, 0, 0, 0, 37, 135, 1, 0, 0, 0, 39, 138, 1, 0, 0, 0, 41, 141, 1, 0, 0, 0, 43, 144, 1, 0, 0, 0, 45, 147, 1, 0, 0, 0, 47, 149, 1, 0, 0, 0, 49, 151, 1, 0, 0, 0, 51, 153, 1, 0, 0, 0, 53, 155, 1, 0, 0, 0, 55, 157, 1, 0, 0, 0, 57, 159, 1, 0, 0, 0, 59, 64, 3, 5, 2, 0, 60, 64, 3, 3, 1, 0, 61, 64, 3, 7, 3, 0, 62, 64, 3, 21, 10, 0, 63, 59, 1, 0, 0, 0, 63, 60, 1, 0, 0, 0, 63, 61, 1, 0, 0, 0, 63, 62, 1, 0, 0, 0, 64, 2, 1, 0, 0, 0, 65, 66, 3, 13, 6, 0, 66, 4, 1, 0, 0, 0, 67, 70, 3, 3, 1, 0, 68, 70, 3, 7, 3, 0, 69, 67, 1, 0, 0, 0, 69, 68, 1, 0, 0, 0, 70, 71, 1, 0, 0, 0, 71, 72, 3, 9, 4, 0, 72, 6, 1, 0, 0, 0, 73, 74, 3, 13, 6, 0, 74, 75, 5, 46, 0, 0, 75, 76, 3, 13, 6, 0, 76, 8, 1, 0, 0, 0, 77, 79, 7, 0, 0, 0, 78, 80, 7, 1, 0, 0, 79, 78, 1, 0, 0, 0, 79, 80, 1, 0, 0, 0, 80, 81, 1, 0, 0, 0, 81, 82, 3, 13, 6, 0, 82, 10, 1, 0, 0, 0, 83, 86, 3, 7, 3, 0, 84, 86, 3, 5, 2, 0, 85, 83, 1, 0, 0, 0, 85, 84, 1, 0, 0, 0, 86, 12, 1, 0, 0, 0, 87, 89, 7, 2, 0, 0, 88, 87, 1, 0, 0, 0, 89, 90, 1, 0, 0, 0, 90, 88, 1, 0, 0, 0, 90, 91, 1, 0, 0, 0, 91, 14, 1, 0, 0, 0, 92, 94, 7, 3, 0, 0, 93, 92, 1, 0, 0, 0, 94, 95, 1, 0, 0, 0, 95, 93, 1, 0, 0, 0, 95, 96, 1, 0, 0, 0, 96, 97, 1, 0, 0, 0, 97, 98, 6, 7, 0, 0, 98, 16, 1, 0, 0, 0, 99, 100, 5, 116, 0, 0, 100, 101, 5, 114, 0, 0, 101, 102, 5, 117, 0, 0, 102, 103, 5, 101, 0, 0, 103, 18, 1, 0, 0, 0, 104, 105, 5, 102, 0, 0, 105, 106, 5, 97, 0, 0, 106, 107, 5, 108, 0, 0, 107, 108, 5, 115, 0, 0, 108, 109, 5, 101, 0, 0, 109, 20, 1, 0, 0, 0, 110, 111, 5, 105, 0, 0, 111, 112, 5, 110, 0, 0, 112, 113, 5, 102, 0, 0, 113, 22, 1, 0, 0, 0, 114, 118, 7, 4, 0, 0, 115, 117, 7, 5, 0, 0, 116, 115, 1, 0, 0, 0, 117, 120, 1, 0, 0, 0, 118, 116, 1, 0, 0, 0, 118, 119, 1, 0, 0, 0, 119, 24, 1, 0, 0, 0, 120, 118, 1, 0, 0, 0, 121, 122, 5, 40, 0, 0, 122, 26, 1, 0, 0, 0, 123, 124, 5, 41, 0, 0, 124, 28, 1, 0, 0, 0, 125, 126, 5, 124, 0, 0, 126, 127, 5, 124, 0, 0, 127, 30, 1, 0, 0, 0, 128, 129, 5, 38, 0, 0, 129, 130, 5, 38, 0, 0, 130, 32, 1, 0, 0, 0, 131, 132, 5, 62, 0, 0, 132, 34, 1, 0, 0, 0, 133, 134, 5, 60, 0, 0, 134, 36, 1, 0, 0, 0, 135, 136, 5, 62, 0, 0, 136, 137, 5, 61, 0, 0, 137, 38, 1, 0, 0, 0, 138, 139, 5, 60, 0, 0, 139, 140, 5, 61, 0, 0, 140, 40, 1, 0, 0, 0, 141, 142, 5, 61, 0, 0, 142, 143, 5, 61, 0, 0, 143, 42, 1, 0, 0, 0, 144, 145, 5, 33, 0, 0, 145, 146, 5, 61, 0, 0, 146, 44, 1, 0, 0, 0, 147, 148, 5, 43, 0, 0, 148, 46, 1, 0, 0, 0, 149, 150, 5, 45, 0, 0, 150, 48, 1, 0, 0, 0, 151, 152, 5, 42, 0, 0, 152, 50, 1, 0, 0, 0, 153, 154, 5, 47, 0, 0, 154, 52, 1, 0, 0, 0, 155, 156, 5, 94, 0, 0, 156, 54, 1, 0, 0, 0, 157, 158, 5, 33, 0, 0, 158, 56, 1, 0, 0, 0, 159, 160, 5, 44, 0, 0, 160, 58, 1, 0, 0, 0, 8, 0, 63, 69, 79, 85, 90, 95, 118, 1, 6, 0, 0]
diff --git a/petab/v1/math/_generated/PetabMathExprLexer.py b/petab/v1/math/_generated/PetabMathExprLexer.py
new file mode 100644
index 00000000..4b16f1e8
--- /dev/null
+++ b/petab/v1/math/_generated/PetabMathExprLexer.py
@@ -0,0 +1,1608 @@
+# Generated from PetabMathExprLexer.g4 by ANTLR 4.13.1
+import sys
+
+from antlr4 import *
+
+if sys.version_info[1] > 5:
+ from typing import TextIO
+else:
+ from typing.io import TextIO
+
+
+def serializedATN():
+ return [
+ 4,
+ 0,
+ 27,
+ 161,
+ 6,
+ -1,
+ 2,
+ 0,
+ 7,
+ 0,
+ 2,
+ 1,
+ 7,
+ 1,
+ 2,
+ 2,
+ 7,
+ 2,
+ 2,
+ 3,
+ 7,
+ 3,
+ 2,
+ 4,
+ 7,
+ 4,
+ 2,
+ 5,
+ 7,
+ 5,
+ 2,
+ 6,
+ 7,
+ 6,
+ 2,
+ 7,
+ 7,
+ 7,
+ 2,
+ 8,
+ 7,
+ 8,
+ 2,
+ 9,
+ 7,
+ 9,
+ 2,
+ 10,
+ 7,
+ 10,
+ 2,
+ 11,
+ 7,
+ 11,
+ 2,
+ 12,
+ 7,
+ 12,
+ 2,
+ 13,
+ 7,
+ 13,
+ 2,
+ 14,
+ 7,
+ 14,
+ 2,
+ 15,
+ 7,
+ 15,
+ 2,
+ 16,
+ 7,
+ 16,
+ 2,
+ 17,
+ 7,
+ 17,
+ 2,
+ 18,
+ 7,
+ 18,
+ 2,
+ 19,
+ 7,
+ 19,
+ 2,
+ 20,
+ 7,
+ 20,
+ 2,
+ 21,
+ 7,
+ 21,
+ 2,
+ 22,
+ 7,
+ 22,
+ 2,
+ 23,
+ 7,
+ 23,
+ 2,
+ 24,
+ 7,
+ 24,
+ 2,
+ 25,
+ 7,
+ 25,
+ 2,
+ 26,
+ 7,
+ 26,
+ 2,
+ 27,
+ 7,
+ 27,
+ 2,
+ 28,
+ 7,
+ 28,
+ 1,
+ 0,
+ 1,
+ 0,
+ 1,
+ 0,
+ 1,
+ 0,
+ 3,
+ 0,
+ 64,
+ 8,
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 2,
+ 1,
+ 2,
+ 3,
+ 2,
+ 70,
+ 8,
+ 2,
+ 1,
+ 2,
+ 1,
+ 2,
+ 1,
+ 3,
+ 1,
+ 3,
+ 1,
+ 3,
+ 1,
+ 3,
+ 1,
+ 4,
+ 1,
+ 4,
+ 3,
+ 4,
+ 80,
+ 8,
+ 4,
+ 1,
+ 4,
+ 1,
+ 4,
+ 1,
+ 5,
+ 1,
+ 5,
+ 3,
+ 5,
+ 86,
+ 8,
+ 5,
+ 1,
+ 6,
+ 4,
+ 6,
+ 89,
+ 8,
+ 6,
+ 11,
+ 6,
+ 12,
+ 6,
+ 90,
+ 1,
+ 7,
+ 4,
+ 7,
+ 94,
+ 8,
+ 7,
+ 11,
+ 7,
+ 12,
+ 7,
+ 95,
+ 1,
+ 7,
+ 1,
+ 7,
+ 1,
+ 8,
+ 1,
+ 8,
+ 1,
+ 8,
+ 1,
+ 8,
+ 1,
+ 8,
+ 1,
+ 9,
+ 1,
+ 9,
+ 1,
+ 9,
+ 1,
+ 9,
+ 1,
+ 9,
+ 1,
+ 9,
+ 1,
+ 10,
+ 1,
+ 10,
+ 1,
+ 10,
+ 1,
+ 10,
+ 1,
+ 11,
+ 1,
+ 11,
+ 5,
+ 11,
+ 117,
+ 8,
+ 11,
+ 10,
+ 11,
+ 12,
+ 11,
+ 120,
+ 9,
+ 11,
+ 1,
+ 12,
+ 1,
+ 12,
+ 1,
+ 13,
+ 1,
+ 13,
+ 1,
+ 14,
+ 1,
+ 14,
+ 1,
+ 14,
+ 1,
+ 15,
+ 1,
+ 15,
+ 1,
+ 15,
+ 1,
+ 16,
+ 1,
+ 16,
+ 1,
+ 17,
+ 1,
+ 17,
+ 1,
+ 18,
+ 1,
+ 18,
+ 1,
+ 18,
+ 1,
+ 19,
+ 1,
+ 19,
+ 1,
+ 19,
+ 1,
+ 20,
+ 1,
+ 20,
+ 1,
+ 20,
+ 1,
+ 21,
+ 1,
+ 21,
+ 1,
+ 21,
+ 1,
+ 22,
+ 1,
+ 22,
+ 1,
+ 23,
+ 1,
+ 23,
+ 1,
+ 24,
+ 1,
+ 24,
+ 1,
+ 25,
+ 1,
+ 25,
+ 1,
+ 26,
+ 1,
+ 26,
+ 1,
+ 27,
+ 1,
+ 27,
+ 1,
+ 28,
+ 1,
+ 28,
+ 0,
+ 0,
+ 29,
+ 1,
+ 1,
+ 3,
+ 2,
+ 5,
+ 3,
+ 7,
+ 4,
+ 9,
+ 0,
+ 11,
+ 5,
+ 13,
+ 0,
+ 15,
+ 6,
+ 17,
+ 7,
+ 19,
+ 8,
+ 21,
+ 9,
+ 23,
+ 10,
+ 25,
+ 11,
+ 27,
+ 12,
+ 29,
+ 13,
+ 31,
+ 14,
+ 33,
+ 15,
+ 35,
+ 16,
+ 37,
+ 17,
+ 39,
+ 18,
+ 41,
+ 19,
+ 43,
+ 20,
+ 45,
+ 21,
+ 47,
+ 22,
+ 49,
+ 23,
+ 51,
+ 24,
+ 53,
+ 25,
+ 55,
+ 26,
+ 57,
+ 27,
+ 1,
+ 0,
+ 6,
+ 2,
+ 0,
+ 69,
+ 69,
+ 101,
+ 101,
+ 2,
+ 0,
+ 43,
+ 43,
+ 45,
+ 45,
+ 1,
+ 0,
+ 48,
+ 57,
+ 3,
+ 0,
+ 9,
+ 10,
+ 13,
+ 13,
+ 32,
+ 32,
+ 3,
+ 0,
+ 65,
+ 90,
+ 95,
+ 95,
+ 97,
+ 122,
+ 4,
+ 0,
+ 48,
+ 57,
+ 65,
+ 90,
+ 95,
+ 95,
+ 97,
+ 122,
+ 167,
+ 0,
+ 1,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 3,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 7,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 11,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 15,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 17,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 19,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 21,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 23,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 25,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 27,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 29,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 31,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 33,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 35,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 37,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 39,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 41,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 43,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 45,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 47,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 49,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 51,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 53,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 55,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 57,
+ 1,
+ 0,
+ 0,
+ 0,
+ 1,
+ 63,
+ 1,
+ 0,
+ 0,
+ 0,
+ 3,
+ 65,
+ 1,
+ 0,
+ 0,
+ 0,
+ 5,
+ 69,
+ 1,
+ 0,
+ 0,
+ 0,
+ 7,
+ 73,
+ 1,
+ 0,
+ 0,
+ 0,
+ 9,
+ 77,
+ 1,
+ 0,
+ 0,
+ 0,
+ 11,
+ 85,
+ 1,
+ 0,
+ 0,
+ 0,
+ 13,
+ 88,
+ 1,
+ 0,
+ 0,
+ 0,
+ 15,
+ 93,
+ 1,
+ 0,
+ 0,
+ 0,
+ 17,
+ 99,
+ 1,
+ 0,
+ 0,
+ 0,
+ 19,
+ 104,
+ 1,
+ 0,
+ 0,
+ 0,
+ 21,
+ 110,
+ 1,
+ 0,
+ 0,
+ 0,
+ 23,
+ 114,
+ 1,
+ 0,
+ 0,
+ 0,
+ 25,
+ 121,
+ 1,
+ 0,
+ 0,
+ 0,
+ 27,
+ 123,
+ 1,
+ 0,
+ 0,
+ 0,
+ 29,
+ 125,
+ 1,
+ 0,
+ 0,
+ 0,
+ 31,
+ 128,
+ 1,
+ 0,
+ 0,
+ 0,
+ 33,
+ 131,
+ 1,
+ 0,
+ 0,
+ 0,
+ 35,
+ 133,
+ 1,
+ 0,
+ 0,
+ 0,
+ 37,
+ 135,
+ 1,
+ 0,
+ 0,
+ 0,
+ 39,
+ 138,
+ 1,
+ 0,
+ 0,
+ 0,
+ 41,
+ 141,
+ 1,
+ 0,
+ 0,
+ 0,
+ 43,
+ 144,
+ 1,
+ 0,
+ 0,
+ 0,
+ 45,
+ 147,
+ 1,
+ 0,
+ 0,
+ 0,
+ 47,
+ 149,
+ 1,
+ 0,
+ 0,
+ 0,
+ 49,
+ 151,
+ 1,
+ 0,
+ 0,
+ 0,
+ 51,
+ 153,
+ 1,
+ 0,
+ 0,
+ 0,
+ 53,
+ 155,
+ 1,
+ 0,
+ 0,
+ 0,
+ 55,
+ 157,
+ 1,
+ 0,
+ 0,
+ 0,
+ 57,
+ 159,
+ 1,
+ 0,
+ 0,
+ 0,
+ 59,
+ 64,
+ 3,
+ 5,
+ 2,
+ 0,
+ 60,
+ 64,
+ 3,
+ 3,
+ 1,
+ 0,
+ 61,
+ 64,
+ 3,
+ 7,
+ 3,
+ 0,
+ 62,
+ 64,
+ 3,
+ 21,
+ 10,
+ 0,
+ 63,
+ 59,
+ 1,
+ 0,
+ 0,
+ 0,
+ 63,
+ 60,
+ 1,
+ 0,
+ 0,
+ 0,
+ 63,
+ 61,
+ 1,
+ 0,
+ 0,
+ 0,
+ 63,
+ 62,
+ 1,
+ 0,
+ 0,
+ 0,
+ 64,
+ 2,
+ 1,
+ 0,
+ 0,
+ 0,
+ 65,
+ 66,
+ 3,
+ 13,
+ 6,
+ 0,
+ 66,
+ 4,
+ 1,
+ 0,
+ 0,
+ 0,
+ 67,
+ 70,
+ 3,
+ 3,
+ 1,
+ 0,
+ 68,
+ 70,
+ 3,
+ 7,
+ 3,
+ 0,
+ 69,
+ 67,
+ 1,
+ 0,
+ 0,
+ 0,
+ 69,
+ 68,
+ 1,
+ 0,
+ 0,
+ 0,
+ 70,
+ 71,
+ 1,
+ 0,
+ 0,
+ 0,
+ 71,
+ 72,
+ 3,
+ 9,
+ 4,
+ 0,
+ 72,
+ 6,
+ 1,
+ 0,
+ 0,
+ 0,
+ 73,
+ 74,
+ 3,
+ 13,
+ 6,
+ 0,
+ 74,
+ 75,
+ 5,
+ 46,
+ 0,
+ 0,
+ 75,
+ 76,
+ 3,
+ 13,
+ 6,
+ 0,
+ 76,
+ 8,
+ 1,
+ 0,
+ 0,
+ 0,
+ 77,
+ 79,
+ 7,
+ 0,
+ 0,
+ 0,
+ 78,
+ 80,
+ 7,
+ 1,
+ 0,
+ 0,
+ 79,
+ 78,
+ 1,
+ 0,
+ 0,
+ 0,
+ 79,
+ 80,
+ 1,
+ 0,
+ 0,
+ 0,
+ 80,
+ 81,
+ 1,
+ 0,
+ 0,
+ 0,
+ 81,
+ 82,
+ 3,
+ 13,
+ 6,
+ 0,
+ 82,
+ 10,
+ 1,
+ 0,
+ 0,
+ 0,
+ 83,
+ 86,
+ 3,
+ 7,
+ 3,
+ 0,
+ 84,
+ 86,
+ 3,
+ 5,
+ 2,
+ 0,
+ 85,
+ 83,
+ 1,
+ 0,
+ 0,
+ 0,
+ 85,
+ 84,
+ 1,
+ 0,
+ 0,
+ 0,
+ 86,
+ 12,
+ 1,
+ 0,
+ 0,
+ 0,
+ 87,
+ 89,
+ 7,
+ 2,
+ 0,
+ 0,
+ 88,
+ 87,
+ 1,
+ 0,
+ 0,
+ 0,
+ 89,
+ 90,
+ 1,
+ 0,
+ 0,
+ 0,
+ 90,
+ 88,
+ 1,
+ 0,
+ 0,
+ 0,
+ 90,
+ 91,
+ 1,
+ 0,
+ 0,
+ 0,
+ 91,
+ 14,
+ 1,
+ 0,
+ 0,
+ 0,
+ 92,
+ 94,
+ 7,
+ 3,
+ 0,
+ 0,
+ 93,
+ 92,
+ 1,
+ 0,
+ 0,
+ 0,
+ 94,
+ 95,
+ 1,
+ 0,
+ 0,
+ 0,
+ 95,
+ 93,
+ 1,
+ 0,
+ 0,
+ 0,
+ 95,
+ 96,
+ 1,
+ 0,
+ 0,
+ 0,
+ 96,
+ 97,
+ 1,
+ 0,
+ 0,
+ 0,
+ 97,
+ 98,
+ 6,
+ 7,
+ 0,
+ 0,
+ 98,
+ 16,
+ 1,
+ 0,
+ 0,
+ 0,
+ 99,
+ 100,
+ 5,
+ 116,
+ 0,
+ 0,
+ 100,
+ 101,
+ 5,
+ 114,
+ 0,
+ 0,
+ 101,
+ 102,
+ 5,
+ 117,
+ 0,
+ 0,
+ 102,
+ 103,
+ 5,
+ 101,
+ 0,
+ 0,
+ 103,
+ 18,
+ 1,
+ 0,
+ 0,
+ 0,
+ 104,
+ 105,
+ 5,
+ 102,
+ 0,
+ 0,
+ 105,
+ 106,
+ 5,
+ 97,
+ 0,
+ 0,
+ 106,
+ 107,
+ 5,
+ 108,
+ 0,
+ 0,
+ 107,
+ 108,
+ 5,
+ 115,
+ 0,
+ 0,
+ 108,
+ 109,
+ 5,
+ 101,
+ 0,
+ 0,
+ 109,
+ 20,
+ 1,
+ 0,
+ 0,
+ 0,
+ 110,
+ 111,
+ 5,
+ 105,
+ 0,
+ 0,
+ 111,
+ 112,
+ 5,
+ 110,
+ 0,
+ 0,
+ 112,
+ 113,
+ 5,
+ 102,
+ 0,
+ 0,
+ 113,
+ 22,
+ 1,
+ 0,
+ 0,
+ 0,
+ 114,
+ 118,
+ 7,
+ 4,
+ 0,
+ 0,
+ 115,
+ 117,
+ 7,
+ 5,
+ 0,
+ 0,
+ 116,
+ 115,
+ 1,
+ 0,
+ 0,
+ 0,
+ 117,
+ 120,
+ 1,
+ 0,
+ 0,
+ 0,
+ 118,
+ 116,
+ 1,
+ 0,
+ 0,
+ 0,
+ 118,
+ 119,
+ 1,
+ 0,
+ 0,
+ 0,
+ 119,
+ 24,
+ 1,
+ 0,
+ 0,
+ 0,
+ 120,
+ 118,
+ 1,
+ 0,
+ 0,
+ 0,
+ 121,
+ 122,
+ 5,
+ 40,
+ 0,
+ 0,
+ 122,
+ 26,
+ 1,
+ 0,
+ 0,
+ 0,
+ 123,
+ 124,
+ 5,
+ 41,
+ 0,
+ 0,
+ 124,
+ 28,
+ 1,
+ 0,
+ 0,
+ 0,
+ 125,
+ 126,
+ 5,
+ 124,
+ 0,
+ 0,
+ 126,
+ 127,
+ 5,
+ 124,
+ 0,
+ 0,
+ 127,
+ 30,
+ 1,
+ 0,
+ 0,
+ 0,
+ 128,
+ 129,
+ 5,
+ 38,
+ 0,
+ 0,
+ 129,
+ 130,
+ 5,
+ 38,
+ 0,
+ 0,
+ 130,
+ 32,
+ 1,
+ 0,
+ 0,
+ 0,
+ 131,
+ 132,
+ 5,
+ 62,
+ 0,
+ 0,
+ 132,
+ 34,
+ 1,
+ 0,
+ 0,
+ 0,
+ 133,
+ 134,
+ 5,
+ 60,
+ 0,
+ 0,
+ 134,
+ 36,
+ 1,
+ 0,
+ 0,
+ 0,
+ 135,
+ 136,
+ 5,
+ 62,
+ 0,
+ 0,
+ 136,
+ 137,
+ 5,
+ 61,
+ 0,
+ 0,
+ 137,
+ 38,
+ 1,
+ 0,
+ 0,
+ 0,
+ 138,
+ 139,
+ 5,
+ 60,
+ 0,
+ 0,
+ 139,
+ 140,
+ 5,
+ 61,
+ 0,
+ 0,
+ 140,
+ 40,
+ 1,
+ 0,
+ 0,
+ 0,
+ 141,
+ 142,
+ 5,
+ 61,
+ 0,
+ 0,
+ 142,
+ 143,
+ 5,
+ 61,
+ 0,
+ 0,
+ 143,
+ 42,
+ 1,
+ 0,
+ 0,
+ 0,
+ 144,
+ 145,
+ 5,
+ 33,
+ 0,
+ 0,
+ 145,
+ 146,
+ 5,
+ 61,
+ 0,
+ 0,
+ 146,
+ 44,
+ 1,
+ 0,
+ 0,
+ 0,
+ 147,
+ 148,
+ 5,
+ 43,
+ 0,
+ 0,
+ 148,
+ 46,
+ 1,
+ 0,
+ 0,
+ 0,
+ 149,
+ 150,
+ 5,
+ 45,
+ 0,
+ 0,
+ 150,
+ 48,
+ 1,
+ 0,
+ 0,
+ 0,
+ 151,
+ 152,
+ 5,
+ 42,
+ 0,
+ 0,
+ 152,
+ 50,
+ 1,
+ 0,
+ 0,
+ 0,
+ 153,
+ 154,
+ 5,
+ 47,
+ 0,
+ 0,
+ 154,
+ 52,
+ 1,
+ 0,
+ 0,
+ 0,
+ 155,
+ 156,
+ 5,
+ 94,
+ 0,
+ 0,
+ 156,
+ 54,
+ 1,
+ 0,
+ 0,
+ 0,
+ 157,
+ 158,
+ 5,
+ 33,
+ 0,
+ 0,
+ 158,
+ 56,
+ 1,
+ 0,
+ 0,
+ 0,
+ 159,
+ 160,
+ 5,
+ 44,
+ 0,
+ 0,
+ 160,
+ 58,
+ 1,
+ 0,
+ 0,
+ 0,
+ 8,
+ 0,
+ 63,
+ 69,
+ 79,
+ 85,
+ 90,
+ 95,
+ 118,
+ 1,
+ 6,
+ 0,
+ 0,
+ ]
+
+
+class PetabMathExprLexer(Lexer):
+ atn = ATNDeserializer().deserialize(serializedATN())
+
+ decisionsToDFA = [DFA(ds, i) for i, ds in enumerate(atn.decisionToState)]
+
+ NUMBER = 1
+ INTEGER = 2
+ EXPONENT_FLOAT = 3
+ POINT_FLOAT = 4
+ FLOAT_NUMBER = 5
+ WS = 6
+ TRUE = 7
+ FALSE = 8
+ INF = 9
+ NAME = 10
+ OPEN_PAREN = 11
+ CLOSE_PAREN = 12
+ BOOLEAN_OR = 13
+ BOOLEAN_AND = 14
+ GT = 15
+ LT = 16
+ GTE = 17
+ LTE = 18
+ EQ = 19
+ NEQ = 20
+ PLUS = 21
+ MINUS = 22
+ ASTERISK = 23
+ SLASH = 24
+ CARET = 25
+ EXCLAMATION_MARK = 26
+ COMMA = 27
+
+ channelNames = ["DEFAULT_TOKEN_CHANNEL", "HIDDEN"]
+
+ modeNames = ["DEFAULT_MODE"]
+
+ literalNames = [
+ "",
+ "'true'",
+ "'false'",
+ "'inf'",
+ "'('",
+ "')'",
+ "'||'",
+ "'&&'",
+ "'>'",
+ "'<'",
+ "'>='",
+ "'<='",
+ "'=='",
+ "'!='",
+ "'+'",
+ "'-'",
+ "'*'",
+ "'/'",
+ "'^'",
+ "'!'",
+ "','",
+ ]
+
+ symbolicNames = [
+ "",
+ "NUMBER",
+ "INTEGER",
+ "EXPONENT_FLOAT",
+ "POINT_FLOAT",
+ "FLOAT_NUMBER",
+ "WS",
+ "TRUE",
+ "FALSE",
+ "INF",
+ "NAME",
+ "OPEN_PAREN",
+ "CLOSE_PAREN",
+ "BOOLEAN_OR",
+ "BOOLEAN_AND",
+ "GT",
+ "LT",
+ "GTE",
+ "LTE",
+ "EQ",
+ "NEQ",
+ "PLUS",
+ "MINUS",
+ "ASTERISK",
+ "SLASH",
+ "CARET",
+ "EXCLAMATION_MARK",
+ "COMMA",
+ ]
+
+ ruleNames = [
+ "NUMBER",
+ "INTEGER",
+ "EXPONENT_FLOAT",
+ "POINT_FLOAT",
+ "EXPONENT",
+ "FLOAT_NUMBER",
+ "DIGITS",
+ "WS",
+ "TRUE",
+ "FALSE",
+ "INF",
+ "NAME",
+ "OPEN_PAREN",
+ "CLOSE_PAREN",
+ "BOOLEAN_OR",
+ "BOOLEAN_AND",
+ "GT",
+ "LT",
+ "GTE",
+ "LTE",
+ "EQ",
+ "NEQ",
+ "PLUS",
+ "MINUS",
+ "ASTERISK",
+ "SLASH",
+ "CARET",
+ "EXCLAMATION_MARK",
+ "COMMA",
+ ]
+
+ grammarFileName = "PetabMathExprLexer.g4"
+
+ def __init__(self, input=None, output: TextIO = sys.stdout):
+ super().__init__(input, output)
+ self.checkVersion("4.13.1")
+ self._interp = LexerATNSimulator(
+ self, self.atn, self.decisionsToDFA, PredictionContextCache()
+ )
+ self._actions = None
+ self._predicates = None
diff --git a/petab/v1/math/_generated/PetabMathExprLexer.tokens b/petab/v1/math/_generated/PetabMathExprLexer.tokens
new file mode 100644
index 00000000..bfa04b53
--- /dev/null
+++ b/petab/v1/math/_generated/PetabMathExprLexer.tokens
@@ -0,0 +1,47 @@
+NUMBER=1
+INTEGER=2
+EXPONENT_FLOAT=3
+POINT_FLOAT=4
+FLOAT_NUMBER=5
+WS=6
+TRUE=7
+FALSE=8
+INF=9
+NAME=10
+OPEN_PAREN=11
+CLOSE_PAREN=12
+BOOLEAN_OR=13
+BOOLEAN_AND=14
+GT=15
+LT=16
+GTE=17
+LTE=18
+EQ=19
+NEQ=20
+PLUS=21
+MINUS=22
+ASTERISK=23
+SLASH=24
+CARET=25
+EXCLAMATION_MARK=26
+COMMA=27
+'true'=7
+'false'=8
+'inf'=9
+'('=11
+')'=12
+'||'=13
+'&&'=14
+'>'=15
+'<'=16
+'>='=17
+'<='=18
+'=='=19
+'!='=20
+'+'=21
+'-'=22
+'*'=23
+'/'=24
+'^'=25
+'!'=26
+','=27
diff --git a/petab/v1/math/_generated/PetabMathExprParser.interp b/petab/v1/math/_generated/PetabMathExprParser.interp
new file mode 100644
index 00000000..0d3f8f5b
--- /dev/null
+++ b/petab/v1/math/_generated/PetabMathExprParser.interp
@@ -0,0 +1,73 @@
+token literal names:
+null
+null
+null
+null
+null
+null
+null
+'true'
+'false'
+'inf'
+null
+'('
+')'
+'||'
+'&&'
+'>'
+'<'
+'>='
+'<='
+'=='
+'!='
+'+'
+'-'
+'*'
+'/'
+'^'
+'!'
+','
+
+token symbolic names:
+null
+NUMBER
+INTEGER
+EXPONENT_FLOAT
+POINT_FLOAT
+FLOAT_NUMBER
+WS
+TRUE
+FALSE
+INF
+NAME
+OPEN_PAREN
+CLOSE_PAREN
+BOOLEAN_OR
+BOOLEAN_AND
+GT
+LT
+GTE
+LTE
+EQ
+NEQ
+PLUS
+MINUS
+ASTERISK
+SLASH
+CARET
+EXCLAMATION_MARK
+COMMA
+
+rule names:
+petabExpression
+expr
+comp_op
+argumentList
+functionCall
+booleanLiteral
+number
+var
+
+
+atn:
+[4, 1, 27, 77, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 2, 3, 7, 3, 2, 4, 7, 4, 2, 5, 7, 5, 2, 6, 7, 6, 2, 7, 7, 7, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 33, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 51, 8, 1, 10, 1, 12, 1, 54, 9, 1, 1, 2, 1, 2, 1, 3, 1, 3, 1, 3, 5, 3, 61, 8, 3, 10, 3, 12, 3, 64, 9, 3, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 5, 1, 5, 1, 6, 1, 6, 1, 7, 1, 7, 1, 7, 0, 1, 2, 8, 0, 2, 4, 6, 8, 10, 12, 14, 0, 5, 1, 0, 21, 22, 1, 0, 23, 24, 1, 0, 13, 14, 1, 0, 15, 20, 1, 0, 7, 8, 80, 0, 16, 1, 0, 0, 0, 2, 32, 1, 0, 0, 0, 4, 55, 1, 0, 0, 0, 6, 57, 1, 0, 0, 0, 8, 65, 1, 0, 0, 0, 10, 70, 1, 0, 0, 0, 12, 72, 1, 0, 0, 0, 14, 74, 1, 0, 0, 0, 16, 17, 3, 2, 1, 0, 17, 18, 5, 0, 0, 1, 18, 1, 1, 0, 0, 0, 19, 20, 6, 1, -1, 0, 20, 21, 7, 0, 0, 0, 21, 33, 3, 2, 1, 11, 22, 23, 5, 26, 0, 0, 23, 33, 3, 2, 1, 10, 24, 25, 5, 11, 0, 0, 25, 26, 3, 2, 1, 0, 26, 27, 5, 12, 0, 0, 27, 33, 1, 0, 0, 0, 28, 33, 3, 12, 6, 0, 29, 33, 3, 10, 5, 0, 30, 33, 3, 8, 4, 0, 31, 33, 3, 14, 7, 0, 32, 19, 1, 0, 0, 0, 32, 22, 1, 0, 0, 0, 32, 24, 1, 0, 0, 0, 32, 28, 1, 0, 0, 0, 32, 29, 1, 0, 0, 0, 32, 30, 1, 0, 0, 0, 32, 31, 1, 0, 0, 0, 33, 52, 1, 0, 0, 0, 34, 35, 10, 12, 0, 0, 35, 36, 5, 25, 0, 0, 36, 51, 3, 2, 1, 12, 37, 38, 10, 9, 0, 0, 38, 39, 7, 1, 0, 0, 39, 51, 3, 2, 1, 10, 40, 41, 10, 8, 0, 0, 41, 42, 7, 0, 0, 0, 42, 51, 3, 2, 1, 9, 43, 44, 10, 6, 0, 0, 44, 45, 3, 4, 2, 0, 45, 46, 3, 2, 1, 7, 46, 51, 1, 0, 0, 0, 47, 48, 10, 5, 0, 0, 48, 49, 7, 2, 0, 0, 49, 51, 3, 2, 1, 6, 50, 34, 1, 0, 0, 0, 50, 37, 1, 0, 0, 0, 50, 40, 1, 0, 0, 0, 50, 43, 1, 0, 0, 0, 50, 47, 1, 0, 0, 0, 51, 54, 1, 0, 0, 0, 52, 50, 1, 0, 0, 0, 52, 53, 1, 0, 0, 0, 53, 3, 1, 0, 0, 0, 54, 52, 1, 0, 0, 0, 55, 56, 7, 3, 0, 0, 56, 5, 1, 0, 0, 0, 57, 62, 3, 2, 1, 0, 58, 59, 5, 27, 0, 0, 59, 61, 3, 2, 1, 0, 60, 58, 1, 0, 0, 0, 61, 64, 1, 0, 0, 0, 62, 60, 1, 0, 0, 0, 62, 63, 1, 0, 0, 0, 63, 7, 1, 0, 0, 0, 64, 62, 1, 0, 0, 0, 65, 66, 5, 10, 0, 0, 66, 67, 5, 11, 0, 0, 67, 68, 3, 6, 3, 0, 68, 69, 5, 12, 0, 0, 69, 9, 1, 0, 0, 0, 70, 71, 7, 4, 0, 0, 71, 11, 1, 0, 0, 0, 72, 73, 5, 1, 0, 0, 73, 13, 1, 0, 0, 0, 74, 75, 5, 10, 0, 0, 75, 15, 1, 0, 0, 0, 4, 32, 50, 52, 62]
diff --git a/petab/v1/math/_generated/PetabMathExprParser.py b/petab/v1/math/_generated/PetabMathExprParser.py
new file mode 100644
index 00000000..6341a56b
--- /dev/null
+++ b/petab/v1/math/_generated/PetabMathExprParser.py
@@ -0,0 +1,1764 @@
+# Generated from PetabMathExprParser.g4 by ANTLR 4.13.1
+import sys
+
+from antlr4 import *
+
+if sys.version_info[1] > 5:
+ from typing import TextIO
+else:
+ from typing.io import TextIO
+
+
+def serializedATN():
+ return [
+ 4,
+ 1,
+ 27,
+ 77,
+ 2,
+ 0,
+ 7,
+ 0,
+ 2,
+ 1,
+ 7,
+ 1,
+ 2,
+ 2,
+ 7,
+ 2,
+ 2,
+ 3,
+ 7,
+ 3,
+ 2,
+ 4,
+ 7,
+ 4,
+ 2,
+ 5,
+ 7,
+ 5,
+ 2,
+ 6,
+ 7,
+ 6,
+ 2,
+ 7,
+ 7,
+ 7,
+ 1,
+ 0,
+ 1,
+ 0,
+ 1,
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 3,
+ 1,
+ 33,
+ 8,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 5,
+ 1,
+ 51,
+ 8,
+ 1,
+ 10,
+ 1,
+ 12,
+ 1,
+ 54,
+ 9,
+ 1,
+ 1,
+ 2,
+ 1,
+ 2,
+ 1,
+ 3,
+ 1,
+ 3,
+ 1,
+ 3,
+ 5,
+ 3,
+ 61,
+ 8,
+ 3,
+ 10,
+ 3,
+ 12,
+ 3,
+ 64,
+ 9,
+ 3,
+ 1,
+ 4,
+ 1,
+ 4,
+ 1,
+ 4,
+ 1,
+ 4,
+ 1,
+ 4,
+ 1,
+ 5,
+ 1,
+ 5,
+ 1,
+ 6,
+ 1,
+ 6,
+ 1,
+ 7,
+ 1,
+ 7,
+ 1,
+ 7,
+ 0,
+ 1,
+ 2,
+ 8,
+ 0,
+ 2,
+ 4,
+ 6,
+ 8,
+ 10,
+ 12,
+ 14,
+ 0,
+ 5,
+ 1,
+ 0,
+ 21,
+ 22,
+ 1,
+ 0,
+ 23,
+ 24,
+ 1,
+ 0,
+ 13,
+ 14,
+ 1,
+ 0,
+ 15,
+ 20,
+ 1,
+ 0,
+ 7,
+ 8,
+ 80,
+ 0,
+ 16,
+ 1,
+ 0,
+ 0,
+ 0,
+ 2,
+ 32,
+ 1,
+ 0,
+ 0,
+ 0,
+ 4,
+ 55,
+ 1,
+ 0,
+ 0,
+ 0,
+ 6,
+ 57,
+ 1,
+ 0,
+ 0,
+ 0,
+ 8,
+ 65,
+ 1,
+ 0,
+ 0,
+ 0,
+ 10,
+ 70,
+ 1,
+ 0,
+ 0,
+ 0,
+ 12,
+ 72,
+ 1,
+ 0,
+ 0,
+ 0,
+ 14,
+ 74,
+ 1,
+ 0,
+ 0,
+ 0,
+ 16,
+ 17,
+ 3,
+ 2,
+ 1,
+ 0,
+ 17,
+ 18,
+ 5,
+ 0,
+ 0,
+ 1,
+ 18,
+ 1,
+ 1,
+ 0,
+ 0,
+ 0,
+ 19,
+ 20,
+ 6,
+ 1,
+ -1,
+ 0,
+ 20,
+ 21,
+ 7,
+ 0,
+ 0,
+ 0,
+ 21,
+ 33,
+ 3,
+ 2,
+ 1,
+ 11,
+ 22,
+ 23,
+ 5,
+ 26,
+ 0,
+ 0,
+ 23,
+ 33,
+ 3,
+ 2,
+ 1,
+ 10,
+ 24,
+ 25,
+ 5,
+ 11,
+ 0,
+ 0,
+ 25,
+ 26,
+ 3,
+ 2,
+ 1,
+ 0,
+ 26,
+ 27,
+ 5,
+ 12,
+ 0,
+ 0,
+ 27,
+ 33,
+ 1,
+ 0,
+ 0,
+ 0,
+ 28,
+ 33,
+ 3,
+ 12,
+ 6,
+ 0,
+ 29,
+ 33,
+ 3,
+ 10,
+ 5,
+ 0,
+ 30,
+ 33,
+ 3,
+ 8,
+ 4,
+ 0,
+ 31,
+ 33,
+ 3,
+ 14,
+ 7,
+ 0,
+ 32,
+ 19,
+ 1,
+ 0,
+ 0,
+ 0,
+ 32,
+ 22,
+ 1,
+ 0,
+ 0,
+ 0,
+ 32,
+ 24,
+ 1,
+ 0,
+ 0,
+ 0,
+ 32,
+ 28,
+ 1,
+ 0,
+ 0,
+ 0,
+ 32,
+ 29,
+ 1,
+ 0,
+ 0,
+ 0,
+ 32,
+ 30,
+ 1,
+ 0,
+ 0,
+ 0,
+ 32,
+ 31,
+ 1,
+ 0,
+ 0,
+ 0,
+ 33,
+ 52,
+ 1,
+ 0,
+ 0,
+ 0,
+ 34,
+ 35,
+ 10,
+ 12,
+ 0,
+ 0,
+ 35,
+ 36,
+ 5,
+ 25,
+ 0,
+ 0,
+ 36,
+ 51,
+ 3,
+ 2,
+ 1,
+ 12,
+ 37,
+ 38,
+ 10,
+ 9,
+ 0,
+ 0,
+ 38,
+ 39,
+ 7,
+ 1,
+ 0,
+ 0,
+ 39,
+ 51,
+ 3,
+ 2,
+ 1,
+ 10,
+ 40,
+ 41,
+ 10,
+ 8,
+ 0,
+ 0,
+ 41,
+ 42,
+ 7,
+ 0,
+ 0,
+ 0,
+ 42,
+ 51,
+ 3,
+ 2,
+ 1,
+ 9,
+ 43,
+ 44,
+ 10,
+ 6,
+ 0,
+ 0,
+ 44,
+ 45,
+ 3,
+ 4,
+ 2,
+ 0,
+ 45,
+ 46,
+ 3,
+ 2,
+ 1,
+ 7,
+ 46,
+ 51,
+ 1,
+ 0,
+ 0,
+ 0,
+ 47,
+ 48,
+ 10,
+ 5,
+ 0,
+ 0,
+ 48,
+ 49,
+ 7,
+ 2,
+ 0,
+ 0,
+ 49,
+ 51,
+ 3,
+ 2,
+ 1,
+ 6,
+ 50,
+ 34,
+ 1,
+ 0,
+ 0,
+ 0,
+ 50,
+ 37,
+ 1,
+ 0,
+ 0,
+ 0,
+ 50,
+ 40,
+ 1,
+ 0,
+ 0,
+ 0,
+ 50,
+ 43,
+ 1,
+ 0,
+ 0,
+ 0,
+ 50,
+ 47,
+ 1,
+ 0,
+ 0,
+ 0,
+ 51,
+ 54,
+ 1,
+ 0,
+ 0,
+ 0,
+ 52,
+ 50,
+ 1,
+ 0,
+ 0,
+ 0,
+ 52,
+ 53,
+ 1,
+ 0,
+ 0,
+ 0,
+ 53,
+ 3,
+ 1,
+ 0,
+ 0,
+ 0,
+ 54,
+ 52,
+ 1,
+ 0,
+ 0,
+ 0,
+ 55,
+ 56,
+ 7,
+ 3,
+ 0,
+ 0,
+ 56,
+ 5,
+ 1,
+ 0,
+ 0,
+ 0,
+ 57,
+ 62,
+ 3,
+ 2,
+ 1,
+ 0,
+ 58,
+ 59,
+ 5,
+ 27,
+ 0,
+ 0,
+ 59,
+ 61,
+ 3,
+ 2,
+ 1,
+ 0,
+ 60,
+ 58,
+ 1,
+ 0,
+ 0,
+ 0,
+ 61,
+ 64,
+ 1,
+ 0,
+ 0,
+ 0,
+ 62,
+ 60,
+ 1,
+ 0,
+ 0,
+ 0,
+ 62,
+ 63,
+ 1,
+ 0,
+ 0,
+ 0,
+ 63,
+ 7,
+ 1,
+ 0,
+ 0,
+ 0,
+ 64,
+ 62,
+ 1,
+ 0,
+ 0,
+ 0,
+ 65,
+ 66,
+ 5,
+ 10,
+ 0,
+ 0,
+ 66,
+ 67,
+ 5,
+ 11,
+ 0,
+ 0,
+ 67,
+ 68,
+ 3,
+ 6,
+ 3,
+ 0,
+ 68,
+ 69,
+ 5,
+ 12,
+ 0,
+ 0,
+ 69,
+ 9,
+ 1,
+ 0,
+ 0,
+ 0,
+ 70,
+ 71,
+ 7,
+ 4,
+ 0,
+ 0,
+ 71,
+ 11,
+ 1,
+ 0,
+ 0,
+ 0,
+ 72,
+ 73,
+ 5,
+ 1,
+ 0,
+ 0,
+ 73,
+ 13,
+ 1,
+ 0,
+ 0,
+ 0,
+ 74,
+ 75,
+ 5,
+ 10,
+ 0,
+ 0,
+ 75,
+ 15,
+ 1,
+ 0,
+ 0,
+ 0,
+ 4,
+ 32,
+ 50,
+ 52,
+ 62,
+ ]
+
+
+class PetabMathExprParser(Parser):
+ grammarFileName = "PetabMathExprParser.g4"
+
+ atn = ATNDeserializer().deserialize(serializedATN())
+
+ decisionsToDFA = [DFA(ds, i) for i, ds in enumerate(atn.decisionToState)]
+
+ sharedContextCache = PredictionContextCache()
+
+ literalNames = [
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "'true'",
+ "'false'",
+ "'inf'",
+ "",
+ "'('",
+ "')'",
+ "'||'",
+ "'&&'",
+ "'>'",
+ "'<'",
+ "'>='",
+ "'<='",
+ "'=='",
+ "'!='",
+ "'+'",
+ "'-'",
+ "'*'",
+ "'/'",
+ "'^'",
+ "'!'",
+ "','",
+ ]
+
+ symbolicNames = [
+ "",
+ "NUMBER",
+ "INTEGER",
+ "EXPONENT_FLOAT",
+ "POINT_FLOAT",
+ "FLOAT_NUMBER",
+ "WS",
+ "TRUE",
+ "FALSE",
+ "INF",
+ "NAME",
+ "OPEN_PAREN",
+ "CLOSE_PAREN",
+ "BOOLEAN_OR",
+ "BOOLEAN_AND",
+ "GT",
+ "LT",
+ "GTE",
+ "LTE",
+ "EQ",
+ "NEQ",
+ "PLUS",
+ "MINUS",
+ "ASTERISK",
+ "SLASH",
+ "CARET",
+ "EXCLAMATION_MARK",
+ "COMMA",
+ ]
+
+ RULE_petabExpression = 0
+ RULE_expr = 1
+ RULE_comp_op = 2
+ RULE_argumentList = 3
+ RULE_functionCall = 4
+ RULE_booleanLiteral = 5
+ RULE_number = 6
+ RULE_var = 7
+
+ ruleNames = [
+ "petabExpression",
+ "expr",
+ "comp_op",
+ "argumentList",
+ "functionCall",
+ "booleanLiteral",
+ "number",
+ "var",
+ ]
+
+ EOF = Token.EOF
+ NUMBER = 1
+ INTEGER = 2
+ EXPONENT_FLOAT = 3
+ POINT_FLOAT = 4
+ FLOAT_NUMBER = 5
+ WS = 6
+ TRUE = 7
+ FALSE = 8
+ INF = 9
+ NAME = 10
+ OPEN_PAREN = 11
+ CLOSE_PAREN = 12
+ BOOLEAN_OR = 13
+ BOOLEAN_AND = 14
+ GT = 15
+ LT = 16
+ GTE = 17
+ LTE = 18
+ EQ = 19
+ NEQ = 20
+ PLUS = 21
+ MINUS = 22
+ ASTERISK = 23
+ SLASH = 24
+ CARET = 25
+ EXCLAMATION_MARK = 26
+ COMMA = 27
+
+ def __init__(self, input: TokenStream, output: TextIO = sys.stdout):
+ super().__init__(input, output)
+ self.checkVersion("4.13.1")
+ self._interp = ParserATNSimulator(
+ self, self.atn, self.decisionsToDFA, self.sharedContextCache
+ )
+ self._predicates = None
+
+ class PetabExpressionContext(ParserRuleContext):
+ __slots__ = "parser"
+
+ def __init__(
+ self,
+ parser,
+ parent: ParserRuleContext = None,
+ invokingState: int = -1,
+ ):
+ super().__init__(parent, invokingState)
+ self.parser = parser
+
+ def expr(self):
+ return self.getTypedRuleContext(PetabMathExprParser.ExprContext, 0)
+
+ def EOF(self):
+ return self.getToken(PetabMathExprParser.EOF, 0)
+
+ def getRuleIndex(self):
+ return PetabMathExprParser.RULE_petabExpression
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitPetabExpression"):
+ return visitor.visitPetabExpression(self)
+ else:
+ return visitor.visitChildren(self)
+
+ def petabExpression(self):
+ localctx = PetabMathExprParser.PetabExpressionContext(
+ self, self._ctx, self.state
+ )
+ self.enterRule(localctx, 0, self.RULE_petabExpression)
+ try:
+ self.enterOuterAlt(localctx, 1)
+ self.state = 16
+ self.expr(0)
+ self.state = 17
+ self.match(PetabMathExprParser.EOF)
+ except RecognitionException as re:
+ localctx.exception = re
+ self._errHandler.reportError(self, re)
+ self._errHandler.recover(self, re)
+ finally:
+ self.exitRule()
+ return localctx
+
+ class ExprContext(ParserRuleContext):
+ __slots__ = "parser"
+
+ def __init__(
+ self,
+ parser,
+ parent: ParserRuleContext = None,
+ invokingState: int = -1,
+ ):
+ super().__init__(parent, invokingState)
+ self.parser = parser
+
+ def getRuleIndex(self):
+ return PetabMathExprParser.RULE_expr
+
+ def copyFrom(self, ctx: ParserRuleContext):
+ super().copyFrom(ctx)
+
+ class PowerExprContext(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def expr(self, i: int = None):
+ if i is None:
+ return self.getTypedRuleContexts(
+ PetabMathExprParser.ExprContext
+ )
+ else:
+ return self.getTypedRuleContext(
+ PetabMathExprParser.ExprContext, i
+ )
+
+ def CARET(self):
+ return self.getToken(PetabMathExprParser.CARET, 0)
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitPowerExpr"):
+ return visitor.visitPowerExpr(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class BooleanAndOrExprContext(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def expr(self, i: int = None):
+ if i is None:
+ return self.getTypedRuleContexts(
+ PetabMathExprParser.ExprContext
+ )
+ else:
+ return self.getTypedRuleContext(
+ PetabMathExprParser.ExprContext, i
+ )
+
+ def BOOLEAN_AND(self):
+ return self.getToken(PetabMathExprParser.BOOLEAN_AND, 0)
+
+ def BOOLEAN_OR(self):
+ return self.getToken(PetabMathExprParser.BOOLEAN_OR, 0)
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitBooleanAndOrExpr"):
+ return visitor.visitBooleanAndOrExpr(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class ComparisonExprContext(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def expr(self, i: int = None):
+ if i is None:
+ return self.getTypedRuleContexts(
+ PetabMathExprParser.ExprContext
+ )
+ else:
+ return self.getTypedRuleContext(
+ PetabMathExprParser.ExprContext, i
+ )
+
+ def comp_op(self):
+ return self.getTypedRuleContext(
+ PetabMathExprParser.Comp_opContext, 0
+ )
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitComparisonExpr"):
+ return visitor.visitComparisonExpr(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class MultExprContext(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def expr(self, i: int = None):
+ if i is None:
+ return self.getTypedRuleContexts(
+ PetabMathExprParser.ExprContext
+ )
+ else:
+ return self.getTypedRuleContext(
+ PetabMathExprParser.ExprContext, i
+ )
+
+ def ASTERISK(self):
+ return self.getToken(PetabMathExprParser.ASTERISK, 0)
+
+ def SLASH(self):
+ return self.getToken(PetabMathExprParser.SLASH, 0)
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitMultExpr"):
+ return visitor.visitMultExpr(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class BooleanLiteral_Context(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def booleanLiteral(self):
+ return self.getTypedRuleContext(
+ PetabMathExprParser.BooleanLiteralContext, 0
+ )
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitBooleanLiteral_"):
+ return visitor.visitBooleanLiteral_(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class AddExprContext(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def expr(self, i: int = None):
+ if i is None:
+ return self.getTypedRuleContexts(
+ PetabMathExprParser.ExprContext
+ )
+ else:
+ return self.getTypedRuleContext(
+ PetabMathExprParser.ExprContext, i
+ )
+
+ def PLUS(self):
+ return self.getToken(PetabMathExprParser.PLUS, 0)
+
+ def MINUS(self):
+ return self.getToken(PetabMathExprParser.MINUS, 0)
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitAddExpr"):
+ return visitor.visitAddExpr(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class BooleanNotExprContext(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def EXCLAMATION_MARK(self):
+ return self.getToken(PetabMathExprParser.EXCLAMATION_MARK, 0)
+
+ def expr(self):
+ return self.getTypedRuleContext(PetabMathExprParser.ExprContext, 0)
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitBooleanNotExpr"):
+ return visitor.visitBooleanNotExpr(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class ParenExprContext(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def OPEN_PAREN(self):
+ return self.getToken(PetabMathExprParser.OPEN_PAREN, 0)
+
+ def expr(self):
+ return self.getTypedRuleContext(PetabMathExprParser.ExprContext, 0)
+
+ def CLOSE_PAREN(self):
+ return self.getToken(PetabMathExprParser.CLOSE_PAREN, 0)
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitParenExpr"):
+ return visitor.visitParenExpr(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class FunctionCall_Context(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def functionCall(self):
+ return self.getTypedRuleContext(
+ PetabMathExprParser.FunctionCallContext, 0
+ )
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitFunctionCall_"):
+ return visitor.visitFunctionCall_(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class UnaryExprContext(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def expr(self):
+ return self.getTypedRuleContext(PetabMathExprParser.ExprContext, 0)
+
+ def PLUS(self):
+ return self.getToken(PetabMathExprParser.PLUS, 0)
+
+ def MINUS(self):
+ return self.getToken(PetabMathExprParser.MINUS, 0)
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitUnaryExpr"):
+ return visitor.visitUnaryExpr(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class Number_Context(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def number(self):
+ return self.getTypedRuleContext(
+ PetabMathExprParser.NumberContext, 0
+ )
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitNumber_"):
+ return visitor.visitNumber_(self)
+ else:
+ return visitor.visitChildren(self)
+
+ class VarExpr_Context(ExprContext):
+ def __init__(
+ self, parser, ctx: ParserRuleContext
+ ): # actually a PetabMathExprParser.ExprContext
+ super().__init__(parser)
+ self.copyFrom(ctx)
+
+ def var(self):
+ return self.getTypedRuleContext(PetabMathExprParser.VarContext, 0)
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitVarExpr_"):
+ return visitor.visitVarExpr_(self)
+ else:
+ return visitor.visitChildren(self)
+
+ def expr(self, _p: int = 0):
+ _parentctx = self._ctx
+ _parentState = self.state
+ localctx = PetabMathExprParser.ExprContext(
+ self, self._ctx, _parentState
+ )
+ _prevctx = localctx
+ _startState = 2
+ self.enterRecursionRule(localctx, 2, self.RULE_expr, _p)
+ self._la = 0 # Token type
+ try:
+ self.enterOuterAlt(localctx, 1)
+ self.state = 32
+ self._errHandler.sync(self)
+ la_ = self._interp.adaptivePredict(self._input, 0, self._ctx)
+ if la_ == 1:
+ localctx = PetabMathExprParser.UnaryExprContext(self, localctx)
+ self._ctx = localctx
+ _prevctx = localctx
+
+ self.state = 20
+ _la = self._input.LA(1)
+ if not (_la == 21 or _la == 22):
+ self._errHandler.recoverInline(self)
+ else:
+ self._errHandler.reportMatch(self)
+ self.consume()
+ self.state = 21
+ self.expr(11)
+ pass
+
+ elif la_ == 2:
+ localctx = PetabMathExprParser.BooleanNotExprContext(
+ self, localctx
+ )
+ self._ctx = localctx
+ _prevctx = localctx
+ self.state = 22
+ self.match(PetabMathExprParser.EXCLAMATION_MARK)
+ self.state = 23
+ self.expr(10)
+ pass
+
+ elif la_ == 3:
+ localctx = PetabMathExprParser.ParenExprContext(self, localctx)
+ self._ctx = localctx
+ _prevctx = localctx
+ self.state = 24
+ self.match(PetabMathExprParser.OPEN_PAREN)
+ self.state = 25
+ self.expr(0)
+ self.state = 26
+ self.match(PetabMathExprParser.CLOSE_PAREN)
+ pass
+
+ elif la_ == 4:
+ localctx = PetabMathExprParser.Number_Context(self, localctx)
+ self._ctx = localctx
+ _prevctx = localctx
+ self.state = 28
+ self.number()
+ pass
+
+ elif la_ == 5:
+ localctx = PetabMathExprParser.BooleanLiteral_Context(
+ self, localctx
+ )
+ self._ctx = localctx
+ _prevctx = localctx
+ self.state = 29
+ self.booleanLiteral()
+ pass
+
+ elif la_ == 6:
+ localctx = PetabMathExprParser.FunctionCall_Context(
+ self, localctx
+ )
+ self._ctx = localctx
+ _prevctx = localctx
+ self.state = 30
+ self.functionCall()
+ pass
+
+ elif la_ == 7:
+ localctx = PetabMathExprParser.VarExpr_Context(self, localctx)
+ self._ctx = localctx
+ _prevctx = localctx
+ self.state = 31
+ self.var()
+ pass
+
+ self._ctx.stop = self._input.LT(-1)
+ self.state = 52
+ self._errHandler.sync(self)
+ _alt = self._interp.adaptivePredict(self._input, 2, self._ctx)
+ while _alt != 2 and _alt != ATN.INVALID_ALT_NUMBER:
+ if _alt == 1:
+ if self._parseListeners is not None:
+ self.triggerExitRuleEvent()
+ _prevctx = localctx
+ self.state = 50
+ self._errHandler.sync(self)
+ la_ = self._interp.adaptivePredict(
+ self._input, 1, self._ctx
+ )
+ if la_ == 1:
+ localctx = PetabMathExprParser.PowerExprContext(
+ self,
+ PetabMathExprParser.ExprContext(
+ self, _parentctx, _parentState
+ ),
+ )
+ self.pushNewRecursionContext(
+ localctx, _startState, self.RULE_expr
+ )
+ self.state = 34
+ if not self.precpred(self._ctx, 12):
+ from antlr4.error.Errors import (
+ FailedPredicateException,
+ )
+
+ raise FailedPredicateException(
+ self, "self.precpred(self._ctx, 12)"
+ )
+ self.state = 35
+ self.match(PetabMathExprParser.CARET)
+ self.state = 36
+ self.expr(12)
+ pass
+
+ elif la_ == 2:
+ localctx = PetabMathExprParser.MultExprContext(
+ self,
+ PetabMathExprParser.ExprContext(
+ self, _parentctx, _parentState
+ ),
+ )
+ self.pushNewRecursionContext(
+ localctx, _startState, self.RULE_expr
+ )
+ self.state = 37
+ if not self.precpred(self._ctx, 9):
+ from antlr4.error.Errors import (
+ FailedPredicateException,
+ )
+
+ raise FailedPredicateException(
+ self, "self.precpred(self._ctx, 9)"
+ )
+ self.state = 38
+ _la = self._input.LA(1)
+ if not (_la == 23 or _la == 24):
+ self._errHandler.recoverInline(self)
+ else:
+ self._errHandler.reportMatch(self)
+ self.consume()
+ self.state = 39
+ self.expr(10)
+ pass
+
+ elif la_ == 3:
+ localctx = PetabMathExprParser.AddExprContext(
+ self,
+ PetabMathExprParser.ExprContext(
+ self, _parentctx, _parentState
+ ),
+ )
+ self.pushNewRecursionContext(
+ localctx, _startState, self.RULE_expr
+ )
+ self.state = 40
+ if not self.precpred(self._ctx, 8):
+ from antlr4.error.Errors import (
+ FailedPredicateException,
+ )
+
+ raise FailedPredicateException(
+ self, "self.precpred(self._ctx, 8)"
+ )
+ self.state = 41
+ _la = self._input.LA(1)
+ if not (_la == 21 or _la == 22):
+ self._errHandler.recoverInline(self)
+ else:
+ self._errHandler.reportMatch(self)
+ self.consume()
+ self.state = 42
+ self.expr(9)
+ pass
+
+ elif la_ == 4:
+ localctx = PetabMathExprParser.ComparisonExprContext(
+ self,
+ PetabMathExprParser.ExprContext(
+ self, _parentctx, _parentState
+ ),
+ )
+ self.pushNewRecursionContext(
+ localctx, _startState, self.RULE_expr
+ )
+ self.state = 43
+ if not self.precpred(self._ctx, 6):
+ from antlr4.error.Errors import (
+ FailedPredicateException,
+ )
+
+ raise FailedPredicateException(
+ self, "self.precpred(self._ctx, 6)"
+ )
+ self.state = 44
+ self.comp_op()
+ self.state = 45
+ self.expr(7)
+ pass
+
+ elif la_ == 5:
+ localctx = PetabMathExprParser.BooleanAndOrExprContext(
+ self,
+ PetabMathExprParser.ExprContext(
+ self, _parentctx, _parentState
+ ),
+ )
+ self.pushNewRecursionContext(
+ localctx, _startState, self.RULE_expr
+ )
+ self.state = 47
+ if not self.precpred(self._ctx, 5):
+ from antlr4.error.Errors import (
+ FailedPredicateException,
+ )
+
+ raise FailedPredicateException(
+ self, "self.precpred(self._ctx, 5)"
+ )
+ self.state = 48
+ _la = self._input.LA(1)
+ if not (_la == 13 or _la == 14):
+ self._errHandler.recoverInline(self)
+ else:
+ self._errHandler.reportMatch(self)
+ self.consume()
+ self.state = 49
+ self.expr(6)
+ pass
+
+ self.state = 54
+ self._errHandler.sync(self)
+ _alt = self._interp.adaptivePredict(self._input, 2, self._ctx)
+
+ except RecognitionException as re:
+ localctx.exception = re
+ self._errHandler.reportError(self, re)
+ self._errHandler.recover(self, re)
+ finally:
+ self.unrollRecursionContexts(_parentctx)
+ return localctx
+
+ class Comp_opContext(ParserRuleContext):
+ __slots__ = "parser"
+
+ def __init__(
+ self,
+ parser,
+ parent: ParserRuleContext = None,
+ invokingState: int = -1,
+ ):
+ super().__init__(parent, invokingState)
+ self.parser = parser
+
+ def GT(self):
+ return self.getToken(PetabMathExprParser.GT, 0)
+
+ def LT(self):
+ return self.getToken(PetabMathExprParser.LT, 0)
+
+ def GTE(self):
+ return self.getToken(PetabMathExprParser.GTE, 0)
+
+ def LTE(self):
+ return self.getToken(PetabMathExprParser.LTE, 0)
+
+ def EQ(self):
+ return self.getToken(PetabMathExprParser.EQ, 0)
+
+ def NEQ(self):
+ return self.getToken(PetabMathExprParser.NEQ, 0)
+
+ def getRuleIndex(self):
+ return PetabMathExprParser.RULE_comp_op
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitComp_op"):
+ return visitor.visitComp_op(self)
+ else:
+ return visitor.visitChildren(self)
+
+ def comp_op(self):
+ localctx = PetabMathExprParser.Comp_opContext(
+ self, self._ctx, self.state
+ )
+ self.enterRule(localctx, 4, self.RULE_comp_op)
+ self._la = 0 # Token type
+ try:
+ self.enterOuterAlt(localctx, 1)
+ self.state = 55
+ _la = self._input.LA(1)
+ if not (((_la) & ~0x3F) == 0 and ((1 << _la) & 2064384) != 0):
+ self._errHandler.recoverInline(self)
+ else:
+ self._errHandler.reportMatch(self)
+ self.consume()
+ except RecognitionException as re:
+ localctx.exception = re
+ self._errHandler.reportError(self, re)
+ self._errHandler.recover(self, re)
+ finally:
+ self.exitRule()
+ return localctx
+
+ class ArgumentListContext(ParserRuleContext):
+ __slots__ = "parser"
+
+ def __init__(
+ self,
+ parser,
+ parent: ParserRuleContext = None,
+ invokingState: int = -1,
+ ):
+ super().__init__(parent, invokingState)
+ self.parser = parser
+
+ def expr(self, i: int = None):
+ if i is None:
+ return self.getTypedRuleContexts(
+ PetabMathExprParser.ExprContext
+ )
+ else:
+ return self.getTypedRuleContext(
+ PetabMathExprParser.ExprContext, i
+ )
+
+ def COMMA(self, i: int = None):
+ if i is None:
+ return self.getTokens(PetabMathExprParser.COMMA)
+ else:
+ return self.getToken(PetabMathExprParser.COMMA, i)
+
+ def getRuleIndex(self):
+ return PetabMathExprParser.RULE_argumentList
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitArgumentList"):
+ return visitor.visitArgumentList(self)
+ else:
+ return visitor.visitChildren(self)
+
+ def argumentList(self):
+ localctx = PetabMathExprParser.ArgumentListContext(
+ self, self._ctx, self.state
+ )
+ self.enterRule(localctx, 6, self.RULE_argumentList)
+ self._la = 0 # Token type
+ try:
+ self.enterOuterAlt(localctx, 1)
+ self.state = 57
+ self.expr(0)
+ self.state = 62
+ self._errHandler.sync(self)
+ _la = self._input.LA(1)
+ while _la == 27:
+ self.state = 58
+ self.match(PetabMathExprParser.COMMA)
+ self.state = 59
+ self.expr(0)
+ self.state = 64
+ self._errHandler.sync(self)
+ _la = self._input.LA(1)
+
+ except RecognitionException as re:
+ localctx.exception = re
+ self._errHandler.reportError(self, re)
+ self._errHandler.recover(self, re)
+ finally:
+ self.exitRule()
+ return localctx
+
+ class FunctionCallContext(ParserRuleContext):
+ __slots__ = "parser"
+
+ def __init__(
+ self,
+ parser,
+ parent: ParserRuleContext = None,
+ invokingState: int = -1,
+ ):
+ super().__init__(parent, invokingState)
+ self.parser = parser
+
+ def NAME(self):
+ return self.getToken(PetabMathExprParser.NAME, 0)
+
+ def OPEN_PAREN(self):
+ return self.getToken(PetabMathExprParser.OPEN_PAREN, 0)
+
+ def argumentList(self):
+ return self.getTypedRuleContext(
+ PetabMathExprParser.ArgumentListContext, 0
+ )
+
+ def CLOSE_PAREN(self):
+ return self.getToken(PetabMathExprParser.CLOSE_PAREN, 0)
+
+ def getRuleIndex(self):
+ return PetabMathExprParser.RULE_functionCall
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitFunctionCall"):
+ return visitor.visitFunctionCall(self)
+ else:
+ return visitor.visitChildren(self)
+
+ def functionCall(self):
+ localctx = PetabMathExprParser.FunctionCallContext(
+ self, self._ctx, self.state
+ )
+ self.enterRule(localctx, 8, self.RULE_functionCall)
+ try:
+ self.enterOuterAlt(localctx, 1)
+ self.state = 65
+ self.match(PetabMathExprParser.NAME)
+ self.state = 66
+ self.match(PetabMathExprParser.OPEN_PAREN)
+ self.state = 67
+ self.argumentList()
+ self.state = 68
+ self.match(PetabMathExprParser.CLOSE_PAREN)
+ except RecognitionException as re:
+ localctx.exception = re
+ self._errHandler.reportError(self, re)
+ self._errHandler.recover(self, re)
+ finally:
+ self.exitRule()
+ return localctx
+
+ class BooleanLiteralContext(ParserRuleContext):
+ __slots__ = "parser"
+
+ def __init__(
+ self,
+ parser,
+ parent: ParserRuleContext = None,
+ invokingState: int = -1,
+ ):
+ super().__init__(parent, invokingState)
+ self.parser = parser
+
+ def TRUE(self):
+ return self.getToken(PetabMathExprParser.TRUE, 0)
+
+ def FALSE(self):
+ return self.getToken(PetabMathExprParser.FALSE, 0)
+
+ def getRuleIndex(self):
+ return PetabMathExprParser.RULE_booleanLiteral
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitBooleanLiteral"):
+ return visitor.visitBooleanLiteral(self)
+ else:
+ return visitor.visitChildren(self)
+
+ def booleanLiteral(self):
+ localctx = PetabMathExprParser.BooleanLiteralContext(
+ self, self._ctx, self.state
+ )
+ self.enterRule(localctx, 10, self.RULE_booleanLiteral)
+ self._la = 0 # Token type
+ try:
+ self.enterOuterAlt(localctx, 1)
+ self.state = 70
+ _la = self._input.LA(1)
+ if not (_la == 7 or _la == 8):
+ self._errHandler.recoverInline(self)
+ else:
+ self._errHandler.reportMatch(self)
+ self.consume()
+ except RecognitionException as re:
+ localctx.exception = re
+ self._errHandler.reportError(self, re)
+ self._errHandler.recover(self, re)
+ finally:
+ self.exitRule()
+ return localctx
+
+ class NumberContext(ParserRuleContext):
+ __slots__ = "parser"
+
+ def __init__(
+ self,
+ parser,
+ parent: ParserRuleContext = None,
+ invokingState: int = -1,
+ ):
+ super().__init__(parent, invokingState)
+ self.parser = parser
+
+ def NUMBER(self):
+ return self.getToken(PetabMathExprParser.NUMBER, 0)
+
+ def getRuleIndex(self):
+ return PetabMathExprParser.RULE_number
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitNumber"):
+ return visitor.visitNumber(self)
+ else:
+ return visitor.visitChildren(self)
+
+ def number(self):
+ localctx = PetabMathExprParser.NumberContext(
+ self, self._ctx, self.state
+ )
+ self.enterRule(localctx, 12, self.RULE_number)
+ try:
+ self.enterOuterAlt(localctx, 1)
+ self.state = 72
+ self.match(PetabMathExprParser.NUMBER)
+ except RecognitionException as re:
+ localctx.exception = re
+ self._errHandler.reportError(self, re)
+ self._errHandler.recover(self, re)
+ finally:
+ self.exitRule()
+ return localctx
+
+ class VarContext(ParserRuleContext):
+ __slots__ = "parser"
+
+ def __init__(
+ self,
+ parser,
+ parent: ParserRuleContext = None,
+ invokingState: int = -1,
+ ):
+ super().__init__(parent, invokingState)
+ self.parser = parser
+
+ def NAME(self):
+ return self.getToken(PetabMathExprParser.NAME, 0)
+
+ def getRuleIndex(self):
+ return PetabMathExprParser.RULE_var
+
+ def accept(self, visitor: ParseTreeVisitor):
+ if hasattr(visitor, "visitVar"):
+ return visitor.visitVar(self)
+ else:
+ return visitor.visitChildren(self)
+
+ def var(self):
+ localctx = PetabMathExprParser.VarContext(self, self._ctx, self.state)
+ self.enterRule(localctx, 14, self.RULE_var)
+ try:
+ self.enterOuterAlt(localctx, 1)
+ self.state = 74
+ self.match(PetabMathExprParser.NAME)
+ except RecognitionException as re:
+ localctx.exception = re
+ self._errHandler.reportError(self, re)
+ self._errHandler.recover(self, re)
+ finally:
+ self.exitRule()
+ return localctx
+
+ def sempred(self, localctx: RuleContext, ruleIndex: int, predIndex: int):
+ if self._predicates == None:
+ self._predicates = dict()
+ self._predicates[1] = self.expr_sempred
+ pred = self._predicates.get(ruleIndex, None)
+ if pred is None:
+ raise Exception("No predicate with index:" + str(ruleIndex))
+ else:
+ return pred(localctx, predIndex)
+
+ def expr_sempred(self, localctx: ExprContext, predIndex: int):
+ if predIndex == 0:
+ return self.precpred(self._ctx, 12)
+
+ if predIndex == 1:
+ return self.precpred(self._ctx, 9)
+
+ if predIndex == 2:
+ return self.precpred(self._ctx, 8)
+
+ if predIndex == 3:
+ return self.precpred(self._ctx, 6)
+
+ if predIndex == 4:
+ return self.precpred(self._ctx, 5)
diff --git a/petab/v1/math/_generated/PetabMathExprParser.tokens b/petab/v1/math/_generated/PetabMathExprParser.tokens
new file mode 100644
index 00000000..bfa04b53
--- /dev/null
+++ b/petab/v1/math/_generated/PetabMathExprParser.tokens
@@ -0,0 +1,47 @@
+NUMBER=1
+INTEGER=2
+EXPONENT_FLOAT=3
+POINT_FLOAT=4
+FLOAT_NUMBER=5
+WS=6
+TRUE=7
+FALSE=8
+INF=9
+NAME=10
+OPEN_PAREN=11
+CLOSE_PAREN=12
+BOOLEAN_OR=13
+BOOLEAN_AND=14
+GT=15
+LT=16
+GTE=17
+LTE=18
+EQ=19
+NEQ=20
+PLUS=21
+MINUS=22
+ASTERISK=23
+SLASH=24
+CARET=25
+EXCLAMATION_MARK=26
+COMMA=27
+'true'=7
+'false'=8
+'inf'=9
+'('=11
+')'=12
+'||'=13
+'&&'=14
+'>'=15
+'<'=16
+'>='=17
+'<='=18
+'=='=19
+'!='=20
+'+'=21
+'-'=22
+'*'=23
+'/'=24
+'^'=25
+'!'=26
+','=27
diff --git a/petab/v1/math/_generated/PetabMathExprParserVisitor.py b/petab/v1/math/_generated/PetabMathExprParserVisitor.py
new file mode 100644
index 00000000..0d3e2de2
--- /dev/null
+++ b/petab/v1/math/_generated/PetabMathExprParserVisitor.py
@@ -0,0 +1,104 @@
+# Generated from PetabMathExprParser.g4 by ANTLR 4.13.1
+from antlr4 import *
+
+if "." in __name__:
+ from .PetabMathExprParser import PetabMathExprParser
+else:
+ from PetabMathExprParser import PetabMathExprParser
+
+# This class defines a complete generic visitor for a parse tree produced by PetabMathExprParser.
+
+
+class PetabMathExprParserVisitor(ParseTreeVisitor):
+ # Visit a parse tree produced by PetabMathExprParser#petabExpression.
+ def visitPetabExpression(
+ self, ctx: PetabMathExprParser.PetabExpressionContext
+ ):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#PowerExpr.
+ def visitPowerExpr(self, ctx: PetabMathExprParser.PowerExprContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#BooleanAndOrExpr.
+ def visitBooleanAndOrExpr(
+ self, ctx: PetabMathExprParser.BooleanAndOrExprContext
+ ):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#ComparisonExpr.
+ def visitComparisonExpr(
+ self, ctx: PetabMathExprParser.ComparisonExprContext
+ ):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#MultExpr.
+ def visitMultExpr(self, ctx: PetabMathExprParser.MultExprContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#BooleanLiteral_.
+ def visitBooleanLiteral_(
+ self, ctx: PetabMathExprParser.BooleanLiteral_Context
+ ):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#AddExpr.
+ def visitAddExpr(self, ctx: PetabMathExprParser.AddExprContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#BooleanNotExpr.
+ def visitBooleanNotExpr(
+ self, ctx: PetabMathExprParser.BooleanNotExprContext
+ ):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#ParenExpr.
+ def visitParenExpr(self, ctx: PetabMathExprParser.ParenExprContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#functionCall_.
+ def visitFunctionCall_(
+ self, ctx: PetabMathExprParser.FunctionCall_Context
+ ):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#UnaryExpr.
+ def visitUnaryExpr(self, ctx: PetabMathExprParser.UnaryExprContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#Number_.
+ def visitNumber_(self, ctx: PetabMathExprParser.Number_Context):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#VarExpr_.
+ def visitVarExpr_(self, ctx: PetabMathExprParser.VarExpr_Context):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#comp_op.
+ def visitComp_op(self, ctx: PetabMathExprParser.Comp_opContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#argumentList.
+ def visitArgumentList(self, ctx: PetabMathExprParser.ArgumentListContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#functionCall.
+ def visitFunctionCall(self, ctx: PetabMathExprParser.FunctionCallContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#booleanLiteral.
+ def visitBooleanLiteral(
+ self, ctx: PetabMathExprParser.BooleanLiteralContext
+ ):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#number.
+ def visitNumber(self, ctx: PetabMathExprParser.NumberContext):
+ return self.visitChildren(ctx)
+
+ # Visit a parse tree produced by PetabMathExprParser#var.
+ def visitVar(self, ctx: PetabMathExprParser.VarContext):
+ return self.visitChildren(ctx)
+
+
+del PetabMathExprParser
diff --git a/petab/v1/math/_generated/__init__.py b/petab/v1/math/_generated/__init__.py
new file mode 100644
index 00000000..def90ea8
--- /dev/null
+++ b/petab/v1/math/_generated/__init__.py
@@ -0,0 +1 @@
+# auto-generated
diff --git a/petab/v1/math/regenerate.sh b/petab/v1/math/regenerate.sh
new file mode 100755
index 00000000..9b531e93
--- /dev/null
+++ b/petab/v1/math/regenerate.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env sh
+# This script regenerates the ANTLR parser and lexer for PEtab math expressions
+set -eou > /dev/null
+
+# ANTLR version
+# IMPORTANT: when updating this, also update the version for
+# `antlr4-python3-runtime` in `pyproject.toml`
+antlr_version="4.13.1"
+
+pip show antlr4-tools > /dev/null || pip3 install antlr4-tools
+
+cd "$(dirname "$0")"
+
+antlr4 -v $antlr_version \
+ -Dlanguage=Python3 \
+ -visitor \
+ -no-listener \
+ -o _generated \
+ PetabMathExprParser.g4 \
+ PetabMathExprLexer.g4
+
+echo "# auto-generated" > _generated/__init__.py
diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py
new file mode 100644
index 00000000..cc81a000
--- /dev/null
+++ b/petab/v1/math/sympify.py
@@ -0,0 +1,66 @@
+"""PEtab math to sympy conversion."""
+
+import numpy as np
+import sympy as sp
+from antlr4 import CommonTokenStream, InputStream
+from antlr4.error.ErrorListener import ErrorListener
+
+from ._generated.PetabMathExprLexer import PetabMathExprLexer
+from ._generated.PetabMathExprParser import PetabMathExprParser
+from .SympyVisitor import MathVisitorSympy, bool2num
+
+__all__ = ["sympify_petab"]
+
+
+def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic:
+ """Convert PEtab math expression to sympy expression.
+
+ Args:
+ expr: PEtab math expression.
+
+ Raises:
+ ValueError: Upon lexer/parser errors or if the expression is
+ otherwise invalid.
+
+ Returns:
+ The sympy expression corresponding to `expr`.
+ Boolean values are converted to numeric values.
+ """
+ if isinstance(expr, int) or isinstance(expr, np.integer):
+ return sp.Integer(expr)
+ if isinstance(expr, float) or isinstance(expr, np.floating):
+ return sp.Float(expr)
+
+ # Set error listeners
+ input_stream = InputStream(expr)
+ lexer = PetabMathExprLexer(input_stream)
+ lexer.removeErrorListeners()
+ lexer.addErrorListener(MathErrorListener())
+
+ stream = CommonTokenStream(lexer)
+ parser = PetabMathExprParser(stream)
+ parser.removeErrorListeners()
+ parser.addErrorListener(MathErrorListener())
+
+ # Parse expression
+ try:
+ tree = parser.petabExpression()
+ except ValueError as e:
+ raise ValueError(f"Error parsing {expr!r}: {e.args[0]}") from None
+
+ # Convert to sympy expression
+ visitor = MathVisitorSympy()
+ expr = visitor.visit(tree)
+ expr = bool2num(expr)
+ # check for `False`, we'll accept both `True` and `None`
+ if expr.is_extended_real is False:
+ raise ValueError(f"Expression {expr} is not real-valued.")
+
+ return expr
+
+
+class MathErrorListener(ErrorListener):
+ """Error listener for math expression parser/lexer."""
+
+ def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): # noqa N803
+ raise ValueError(f"Syntax error at {line}:{column}: {msg}")
diff --git a/petab/v1/measurements.py b/petab/v1/measurements.py
new file mode 100644
index 00000000..757ce9ce
--- /dev/null
+++ b/petab/v1/measurements.py
@@ -0,0 +1,351 @@
+"""Functions operating on the PEtab measurement table"""
+# noqa: F405
+
+import itertools
+import math
+import numbers
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from . import core, lint, observables
+from .C import * # noqa: F403
+
+__all__ = [
+ "assert_overrides_match_parameter_count",
+ "create_measurement_df",
+ "get_measurement_df",
+ "get_measurement_parameter_ids",
+ "get_rows_for_condition",
+ "get_simulation_conditions",
+ "measurements_have_replicates",
+ "measurement_is_at_steady_state",
+ "split_parameter_replacement_list",
+ "write_measurement_df",
+]
+
+
+def get_measurement_df(
+ measurement_file: None | str | Path | pd.DataFrame,
+) -> pd.DataFrame:
+ """
+ Read the provided measurement file into a ``pandas.Dataframe``.
+
+ Arguments:
+ measurement_file: Name of file to read from or pandas.Dataframe
+
+ Returns:
+ Measurement DataFrame
+ """
+ if measurement_file is None:
+ return measurement_file
+
+ if isinstance(measurement_file, str | Path):
+ measurement_file = pd.read_csv(
+ measurement_file, sep="\t", float_precision="round_trip"
+ )
+
+ lint.assert_no_leading_trailing_whitespace(
+ measurement_file.columns.values, MEASUREMENT
+ )
+
+ return measurement_file
+
+
+def write_measurement_df(df: pd.DataFrame, filename: str | Path) -> None:
+ """Write PEtab measurement table
+
+ Arguments:
+ df: PEtab measurement table
+ filename: Destination file name
+ """
+ df = get_measurement_df(df)
+ df.to_csv(filename, sep="\t", index=False)
+
+
+def get_simulation_conditions(measurement_df: pd.DataFrame) -> pd.DataFrame:
+ """
+ Create a table of separate simulation conditions. A simulation condition
+ is a specific combination of simulationConditionId and
+ preequilibrationConditionId.
+
+ Arguments:
+ measurement_df: PEtab measurement table
+
+ Returns:
+ Dataframe with columns 'simulationConditionId' and
+ 'preequilibrationConditionId'. All-null columns will be omitted.
+ Missing 'preequilibrationConditionId's will be set to '' (empty
+ string).
+ """
+ if measurement_df.empty:
+ return pd.DataFrame(data={SIMULATION_CONDITION_ID: []})
+ # find columns to group by (i.e. if not all nans).
+ # can be improved by checking for identical condition vectors
+ grouping_cols = core.get_notnull_columns(
+ measurement_df,
+ [SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID],
+ )
+
+ # group by cols and return dataframe containing each combination
+ # of those rows only once (and an additional counting row)
+ # We require NaN-containing rows, but they are ignored by `groupby`,
+ # therefore replace them before
+ simulation_conditions = (
+ measurement_df.fillna("")
+ .groupby(grouping_cols)
+ .size()
+ .reset_index()[grouping_cols]
+ )
+ # sort to be really sure that we always get the same order
+ return simulation_conditions.sort_values(grouping_cols, ignore_index=True)
+
+
+def get_rows_for_condition(
+ measurement_df: pd.DataFrame,
+ condition: pd.Series | pd.DataFrame | dict,
+) -> pd.DataFrame:
+ """
+ Extract rows in `measurement_df` for `condition` according
+ to 'preequilibrationConditionId' and 'simulationConditionId' in
+ `condition`.
+
+ Arguments:
+ measurement_df:
+ PEtab measurement DataFrame
+ condition:
+ DataFrame with single row (or Series) and columns
+ 'preequilibrationConditionId' and 'simulationConditionId'.
+ Or dictionary with those keys.
+
+ Returns:
+ The subselection of rows in ``measurement_df`` for the condition
+ ``condition``.
+ """
+ # filter rows for condition
+ row_filter = 1
+ # check for equality in all grouping cols
+ if PREEQUILIBRATION_CONDITION_ID in condition:
+ row_filter = (
+ measurement_df[PREEQUILIBRATION_CONDITION_ID].fillna("")
+ == condition[PREEQUILIBRATION_CONDITION_ID]
+ ) & row_filter
+ if SIMULATION_CONDITION_ID in condition:
+ row_filter = (
+ measurement_df[SIMULATION_CONDITION_ID]
+ == condition[SIMULATION_CONDITION_ID]
+ ) & row_filter
+ # apply filter
+ cur_measurement_df = measurement_df.loc[row_filter, :]
+
+ return cur_measurement_df
+
+
+def get_measurement_parameter_ids(measurement_df: pd.DataFrame) -> list[str]:
+ """
+ Return list of ID of parameters which occur in measurement table as
+ observable or noise parameter overrides.
+
+ Arguments:
+ measurement_df:
+ PEtab measurement DataFrame
+
+ Returns:
+ List of parameter IDs
+ """
+
+ def get_unique_parameters(series):
+ return core.unique_preserve_order(
+ itertools.chain.from_iterable(
+ series.apply(split_parameter_replacement_list)
+ )
+ )
+
+ return core.unique_preserve_order(
+ get_unique_parameters(measurement_df[OBSERVABLE_PARAMETERS])
+ + get_unique_parameters(measurement_df[NOISE_PARAMETERS])
+ )
+
+
+def split_parameter_replacement_list(
+ list_string: str | numbers.Number, delim: str = PARAMETER_SEPARATOR
+) -> list[str | numbers.Number]:
+ """
+ Split values in observableParameters and noiseParameters in measurement
+ table.
+
+ Arguments:
+ list_string: delim-separated stringified list
+ delim: delimiter
+
+ Returns:
+ List of split values. Numeric values may be converted to `float`,
+ and parameter IDs are kept as strings.
+ """
+ if list_string is None or list_string == "":
+ return []
+
+ if isinstance(list_string, numbers.Number):
+ # Empty cells in pandas might be turned into nan
+ # We might want to allow nan as replacement...
+ if np.isnan(list_string):
+ return []
+ return [list_string]
+
+ result = [x.strip() for x in list_string.split(delim)]
+
+ def convert_and_check(x):
+ x = core.to_float_if_float(x)
+ if isinstance(x, float):
+ return x
+ if lint.is_valid_identifier(x):
+ return x
+
+ raise ValueError(
+ f"The value '{x}' in the parameter replacement list "
+ f"'{list_string}' is neither a number, nor a valid parameter ID."
+ )
+
+ return list(map(convert_and_check, result))
+
+
+def create_measurement_df() -> pd.DataFrame:
+ """Create empty measurement dataframe
+
+ Returns:
+ Created DataFrame
+ """
+ return pd.DataFrame(
+ data={
+ OBSERVABLE_ID: [],
+ PREEQUILIBRATION_CONDITION_ID: [],
+ SIMULATION_CONDITION_ID: [],
+ MEASUREMENT: [],
+ TIME: [],
+ OBSERVABLE_PARAMETERS: [],
+ NOISE_PARAMETERS: [],
+ DATASET_ID: [],
+ REPLICATE_ID: [],
+ }
+ )
+
+
+def measurements_have_replicates(measurement_df: pd.DataFrame) -> bool:
+ """Tests whether the measurements come with replicates
+
+ Arguments:
+ measurement_df: Measurement table
+
+ Returns:
+ ``True`` if there are replicates, ``False`` otherwise
+ """
+ grouping_cols = core.get_notnull_columns(
+ measurement_df,
+ [
+ OBSERVABLE_ID,
+ SIMULATION_CONDITION_ID,
+ PREEQUILIBRATION_CONDITION_ID,
+ TIME,
+ ],
+ )
+ return np.any(
+ measurement_df.fillna("").groupby(grouping_cols).size().values - 1
+ )
+
+
+def assert_overrides_match_parameter_count(
+ measurement_df: pd.DataFrame, observable_df: pd.DataFrame
+) -> None:
+ """Ensure that number of parameters in the observable definition matches
+ the number of overrides in ``measurement_df``
+
+ Arguments:
+ measurement_df: PEtab measurement table
+ observable_df: PEtab observable table
+ """
+ # sympify only once and save number of parameters
+ observable_parameters_count = {
+ obs_id: len(
+ observables.get_formula_placeholders(formula, obs_id, "observable")
+ )
+ for obs_id, formula in zip(
+ observable_df.index.values,
+ observable_df[OBSERVABLE_FORMULA],
+ strict=True,
+ )
+ }
+ noise_parameters_count = {
+ obs_id: len(
+ observables.get_formula_placeholders(formula, obs_id, "noise")
+ )
+ for obs_id, formula in zip(
+ observable_df.index.values,
+ observable_df[NOISE_FORMULA],
+ strict=True,
+ )
+ }
+
+ for _, row in measurement_df.iterrows():
+ # check observable parameters
+ try:
+ expected = observable_parameters_count[row[OBSERVABLE_ID]]
+ except KeyError as e:
+ raise ValueError(
+ f"Observable {row[OBSERVABLE_ID]} used in measurement table "
+ f"is not defined."
+ ) from e
+
+ actual = len(
+ split_parameter_replacement_list(
+ row.get(OBSERVABLE_PARAMETERS, None)
+ )
+ )
+ # No overrides are also allowed
+ if actual != expected:
+ formula = observable_df.loc[row[OBSERVABLE_ID], OBSERVABLE_FORMULA]
+ raise AssertionError(
+ f"Mismatch of observable parameter overrides for "
+ f"{row[OBSERVABLE_ID]} ({formula})"
+ f"in:\n{row}\n"
+ f"Expected {expected} but got {actual}"
+ )
+
+ # check noise parameters
+ replacements = split_parameter_replacement_list(
+ row.get(NOISE_PARAMETERS, None)
+ )
+ try:
+ expected = noise_parameters_count[row[OBSERVABLE_ID]]
+
+ # No overrides are also allowed
+ if len(replacements) != expected:
+ raise AssertionError(
+ f"Mismatch of noise parameter overrides in:\n{row}\n"
+ f"Expected {expected} but got {len(replacements)}"
+ )
+ except KeyError as err:
+ # no overrides defined, but a numerical sigma can be provided
+ # anyways
+ if len(replacements) != 1 or not isinstance(
+ replacements[0], numbers.Number
+ ):
+ raise AssertionError(
+ f"No placeholders have been specified in the noise model "
+ f"for observable {row[OBSERVABLE_ID]}, but parameter ID "
+ "or multiple overrides were specified in the "
+ "noiseParameters column."
+ ) from err
+
+
+def measurement_is_at_steady_state(time: float) -> bool:
+ """Check whether a measurement is at steady state.
+
+ Arguments:
+ time:
+ The time.
+
+ Returns:
+ Whether the measurement is at steady state.
+ """
+ return math.isinf(time)
diff --git a/petab/v1/models/__init__.py b/petab/v1/models/__init__.py
new file mode 100644
index 00000000..938f55fb
--- /dev/null
+++ b/petab/v1/models/__init__.py
@@ -0,0 +1,14 @@
+"""Handling of different model types supported by PEtab."""
+#: SBML model type as used in a PEtab v2 yaml file as `language`.
+MODEL_TYPE_SBML = "sbml"
+#: PySB model type as used in a PEtab v2 yaml file as `language`.
+MODEL_TYPE_PYSB = "pysb"
+
+known_model_types = {
+ MODEL_TYPE_SBML,
+ MODEL_TYPE_PYSB,
+}
+
+from .model import Model # noqa F401
+
+__all__ = ["MODEL_TYPE_SBML", "MODEL_TYPE_PYSB", "known_model_types", "Model"]
diff --git a/petab/v1/models/model.py b/petab/v1/models/model.py
new file mode 100644
index 00000000..de1ebf3a
--- /dev/null
+++ b/petab/v1/models/model.py
@@ -0,0 +1,159 @@
+"""PEtab model abstraction"""
+from __future__ import annotations
+
+import abc
+from collections.abc import Iterable
+from pathlib import Path
+from typing import Any
+
+__all__ = ["Model", "model_factory"]
+
+
+class Model(abc.ABC):
+ """Base class for wrappers for any PEtab-supported model type"""
+
+ @abc.abstractmethod
+ def __init__(self):
+ ...
+
+ @staticmethod
+ @abc.abstractmethod
+ def from_file(filepath_or_buffer: Any, model_id: str) -> Model:
+ """Load the model from the given path/URL
+
+ :param filepath_or_buffer: URL or path of the model
+ :param model_id: Model ID
+ :returns: A ``Model`` instance holding the given model
+ """
+ ...
+
+ @abc.abstractmethod
+ def to_file(self, filename: [str, Path]):
+ """Save the model to the given file
+
+ :param filename: Destination filename
+ """
+ ...
+
+ @classmethod
+ @property
+ @abc.abstractmethod
+ def type_id(cls):
+ ...
+
+ @property
+ @abc.abstractmethod
+ def model_id(self):
+ ...
+
+ @abc.abstractmethod
+ def get_parameter_value(self, id_: str) -> float:
+ """Get a parameter value
+
+ :param id_: ID of the parameter whose value is to be returned
+ :raises ValueError: If no parameter with the given ID exists
+ :returns: The value of the given parameter as specified in the model
+ """
+ ...
+
+ @abc.abstractmethod
+ def get_free_parameter_ids_with_values(
+ self,
+ ) -> Iterable[tuple[str, float]]:
+ """Get free model parameters along with their values
+
+ Returns:
+ Iterator over tuples of (parameter_id, parameter_value)
+ """
+ ...
+
+ @abc.abstractmethod
+ def get_parameter_ids(self) -> Iterable[str]:
+ """Get all parameter IDs from this model
+
+ :returns: Iterator over model parameter IDs
+ """
+ ...
+
+ @abc.abstractmethod
+ def has_entity_with_id(self, entity_id) -> bool:
+ """Check if there is a model entity with the given ID
+
+ :param entity_id: ID to check for
+ :returns:
+ ``True``, if there is an entity with the given ID,
+ ``False`` otherwise
+ """
+ ...
+
+ @abc.abstractmethod
+ def get_valid_parameters_for_parameter_table(self) -> Iterable[str]:
+ """Get IDs of all parameters that are allowed to occur in the PEtab
+ parameters table
+
+ :returns: Iterator over parameter IDs
+ """
+ ...
+
+ @abc.abstractmethod
+ def get_valid_ids_for_condition_table(self) -> Iterable[str]:
+ """Get IDs of all model entities that are allowed to occur as columns
+ in the PEtab conditions table.
+
+ :returns: Iterator over model entity IDs
+ """
+ ...
+
+ @abc.abstractmethod
+ def symbol_allowed_in_observable_formula(self, id_: str) -> bool:
+ """Check if the given ID is allowed to be used in observable and noise
+ formulas
+
+ :returns: ``True``, if allowed, ``False`` otherwise
+ """
+ ...
+
+ @abc.abstractmethod
+ def is_valid(self) -> bool:
+ """Validate this model
+
+ :returns:
+ `True` if the model is valid, `False` if there are errors in
+ this model
+ """
+ ...
+
+ @abc.abstractmethod
+ def is_state_variable(self, id_: str) -> bool:
+ """Check whether the given ID corresponds to a model state variable"""
+ ...
+
+
+def model_factory(
+ filepath_or_buffer: Any, model_language: str, model_id: str = None
+) -> Model:
+ """Create a PEtab model instance from the given model
+
+ :param filepath_or_buffer: Path/URL of the model
+ :param model_language: PEtab model language ID for the given model
+ :param model_id: PEtab model ID for the given model
+ :returns: A :py:class:`Model` instance representing the given model
+ """
+ from . import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, known_model_types
+
+ if model_language == MODEL_TYPE_SBML:
+ from .sbml_model import SbmlModel
+
+ return SbmlModel.from_file(filepath_or_buffer, model_id=model_id)
+
+ if model_language == MODEL_TYPE_PYSB:
+ from .pysb_model import PySBModel
+
+ return PySBModel.from_file(filepath_or_buffer, model_id=model_id)
+
+ if model_language in known_model_types:
+ raise NotImplementedError(
+ f"Unsupported model format: {model_language}"
+ )
+
+ raise ValueError(f"Unknown model format: {model_language}")
diff --git a/petab/v1/models/pysb_model.py b/petab/v1/models/pysb_model.py
new file mode 100644
index 00000000..7355669e
--- /dev/null
+++ b/petab/v1/models/pysb_model.py
@@ -0,0 +1,230 @@
+"""Functions for handling PySB models"""
+
+import itertools
+import re
+import sys
+from collections.abc import Iterable
+from pathlib import Path
+from typing import Any
+
+import pysb
+
+from . import MODEL_TYPE_PYSB
+from .model import Model
+
+__all__ = ["PySBModel", "parse_species_name", "pattern_from_string"]
+
+
+def _pysb_model_from_path(pysb_model_file: str | Path) -> pysb.Model:
+ """Load a pysb model module and return the :class:`pysb.Model` instance
+
+ :param pysb_model_file: Full or relative path to the PySB model module
+ :return: The pysb Model instance
+ """
+ pysb_model_file = Path(pysb_model_file)
+ pysb_model_module_name = pysb_model_file.with_suffix("").name
+
+ import importlib.util
+
+ spec = importlib.util.spec_from_file_location(
+ pysb_model_module_name, pysb_model_file
+ )
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[pysb_model_module_name] = module
+ spec.loader.exec_module(module)
+
+ # find a pysb.Model instance in the module
+ # 1) check if module.model exists and is a pysb.Model
+ model = getattr(module, "model", None)
+ if model:
+ return model
+
+ # 2) check if there is any other pysb.Model instance
+ for x in dir(module):
+ attr = getattr(module, x)
+ if isinstance(attr, pysb.Model):
+ return attr
+
+ raise ValueError(f"Could not find any pysb.Model in {pysb_model_file}.")
+
+
+class PySBModel(Model):
+ """PEtab wrapper for PySB models"""
+
+ type_id = MODEL_TYPE_PYSB
+
+ def __init__(self, model: pysb.Model, model_id: str):
+ super().__init__()
+
+ self.model = model
+ self._model_id = model_id
+
+ @staticmethod
+ def from_file(filepath_or_buffer, model_id: str):
+ return PySBModel(
+ model=_pysb_model_from_path(filepath_or_buffer), model_id=model_id
+ )
+
+ def to_file(self, filename: [str, Path]):
+ from pysb.export import export
+
+ model_source = export(self.model, "pysb_flat")
+ with open(filename, "w") as f:
+ f.write(model_source)
+
+ @property
+ def model_id(self):
+ return self._model_id
+
+ @model_id.setter
+ def model_id(self, model_id):
+ self._model_id = model_id
+
+ def get_parameter_ids(self) -> Iterable[str]:
+ return (p.name for p in self.model.parameters)
+
+ def get_parameter_value(self, id_: str) -> float:
+ try:
+ return self.model.parameters[id_].value
+ except KeyError as e:
+ raise ValueError(f"Parameter {id_} does not exist.") from e
+
+ def get_free_parameter_ids_with_values(
+ self,
+ ) -> Iterable[tuple[str, float]]:
+ return ((p.name, p.value) for p in self.model.parameters)
+
+ def has_entity_with_id(self, entity_id) -> bool:
+ try:
+ _ = self.model.components[entity_id]
+ return True
+ except KeyError:
+ return False
+
+ def get_valid_parameters_for_parameter_table(self) -> Iterable[str]:
+ # all parameters are allowed in the parameter table
+ return self.get_parameter_ids()
+
+ def get_valid_ids_for_condition_table(self) -> Iterable[str]:
+ return itertools.chain(
+ self.get_parameter_ids(), self.get_compartment_ids()
+ )
+
+ def symbol_allowed_in_observable_formula(self, id_: str) -> bool:
+ return id_ in (
+ x.name
+ for x in itertools.chain(
+ self.model.parameters,
+ self.model.observables,
+ self.model.expressions,
+ )
+ )
+
+ def is_valid(self) -> bool:
+ # PySB models are always valid
+ return True
+
+ def is_state_variable(self, id_: str) -> bool:
+ # If there is a component with that name, it's not a state variable
+ # (there are no dynamically-sized compartments)
+ if self.model.components.get(id_, None):
+ return False
+
+ # Try parsing the ID
+ try:
+ result = parse_species_name(id_)
+ except ValueError:
+ return False
+ else:
+ # check if the ID is plausible
+ for monomer, compartment, site_config in result:
+ pysb_monomer: pysb.Monomer = self.model.monomers.get(monomer)
+ if pysb_monomer is None:
+ return False
+ if compartment:
+ pysb_compartment = self.model.compartments.get(compartment)
+ if pysb_compartment is None:
+ return False
+ for site, state in site_config.items():
+ if site not in pysb_monomer.sites:
+ return False
+ if state not in pysb_monomer.site_states[site]:
+ return False
+ if set(pysb_monomer.sites) - set(site_config.keys()):
+ # There are undefined sites
+ return False
+ return True
+
+ def get_compartment_ids(self) -> Iterable[str]:
+ return (compartment.name for compartment in self.model.compartments)
+
+
+def parse_species_name(
+ name: str,
+) -> list[tuple[str, str | None, dict[str, Any]]]:
+ """Parse a PySB species name
+
+ :param name: Species name to parse
+ :returns: List of species, representing complex constituents, each as
+ a tuple of the monomer name, the compartment name, and a dict of sites
+ mapping to site states.
+ :raises ValueError: In case this is not a valid ID
+ """
+ if "=MultiState(" in name:
+ raise NotImplementedError("MultiState is not yet supported.")
+
+ complex_constituent_pattern = re.compile(
+ r"^(?P\w+)\((?P.*)\)"
+ r"( \*\* (?P.*))?$"
+ )
+ result = []
+ complex_constituents = name.split(" % ")
+
+ for complex_constituent in complex_constituents:
+ match = complex_constituent_pattern.match(complex_constituent)
+ if not match:
+ raise ValueError(
+ f"Invalid species name: '{name}' " f"('{complex_constituent}')"
+ )
+ monomer = match.groupdict()["monomer"]
+ site_config_str = match.groupdict()["site_config"]
+ compartment = match.groupdict()["compartment"]
+
+ site_config = {}
+ for site_str in site_config_str.split(", "):
+ if not site_str:
+ continue
+ site, config = site_str.split("=")
+ if config == "None":
+ config = None
+ elif config.startswith("'"):
+ if not config.endswith("'"):
+ raise ValueError(
+ f"Invalid species name: '{name}' " f"('{config}')"
+ )
+ # strip quotes
+ config = config[1:-1]
+ else:
+ config = int(config)
+ site_config[site] = config
+ result.append(
+ (monomer, compartment, site_config),
+ )
+
+ return result
+
+
+def pattern_from_string(string: str, model: pysb.Model) -> pysb.ComplexPattern:
+ """Convert a pattern string to a Pattern instance"""
+ parts = parse_species_name(string)
+ patterns = []
+ for part in parts:
+ patterns.append(
+ pysb.MonomerPattern(
+ monomer=model.monomers.get(part[0]),
+ compartment=model.compartments.get(part[1], None),
+ site_conditions=part[2],
+ )
+ )
+
+ return pysb.ComplexPattern(patterns, compartment=None)
diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py
new file mode 100644
index 00000000..fd57f2dc
--- /dev/null
+++ b/petab/v1/models/sbml_model.py
@@ -0,0 +1,224 @@
+"""Functions for handling SBML models"""
+
+import itertools
+from collections.abc import Iterable
+from pathlib import Path
+
+import libsbml
+import sympy as sp
+from sympy.abc import _clash
+
+from ..sbml import (
+ get_sbml_model,
+ is_sbml_consistent,
+ load_sbml_from_string,
+ write_sbml,
+)
+from . import MODEL_TYPE_SBML
+from .model import Model
+
+__all__ = ["SbmlModel"]
+
+
+class SbmlModel(Model):
+ """PEtab wrapper for SBML models"""
+
+ type_id = MODEL_TYPE_SBML
+
+ def __init__(
+ self,
+ sbml_model: libsbml.Model = None,
+ sbml_reader: libsbml.SBMLReader = None,
+ sbml_document: libsbml.SBMLDocument = None,
+ model_id: str = None,
+ ):
+ super().__init__()
+
+ self.sbml_reader: libsbml.SBMLReader | None = sbml_reader
+ self.sbml_document: libsbml.SBMLDocument | None = sbml_document
+ self.sbml_model: libsbml.Model | None = sbml_model
+
+ self._model_id = model_id or sbml_model.getIdAttribute()
+
+ def __getstate__(self):
+ """Return state for pickling"""
+ state = self.__dict__.copy()
+
+ # libsbml stuff cannot be serialized directly
+ if self.sbml_model:
+ sbml_document = self.sbml_model.getSBMLDocument()
+ sbml_writer = libsbml.SBMLWriter()
+ state["sbml_string"] = sbml_writer.writeSBMLToString(sbml_document)
+
+ exclude = ["sbml_reader", "sbml_document", "sbml_model"]
+ for key in exclude:
+ state.pop(key)
+
+ return state
+
+ def __setstate__(self, state):
+ """Set state after unpickling"""
+ # load SBML model from pickled string
+ sbml_string = state.pop("sbml_string", None)
+ if sbml_string:
+ (
+ self.sbml_reader,
+ self.sbml_document,
+ self.sbml_model,
+ ) = load_sbml_from_string(sbml_string)
+
+ self.__dict__.update(state)
+
+ @staticmethod
+ def from_file(filepath_or_buffer, model_id: str = None):
+ sbml_reader, sbml_document, sbml_model = get_sbml_model(
+ filepath_or_buffer
+ )
+ return SbmlModel(
+ sbml_model=sbml_model,
+ sbml_reader=sbml_reader,
+ sbml_document=sbml_document,
+ model_id=model_id,
+ )
+
+ @property
+ def model_id(self):
+ return self._model_id
+
+ @model_id.setter
+ def model_id(self, model_id):
+ self._model_id = model_id
+
+ def to_file(self, filename: [str, Path]):
+ write_sbml(
+ self.sbml_document or self.sbml_model.getSBMLDocument(), filename
+ )
+
+ def get_parameter_value(self, id_: str) -> float:
+ parameter = self.sbml_model.getParameter(id_)
+ if not parameter:
+ raise ValueError(f"Parameter {id_} does not exist.")
+ return parameter.getValue()
+
+ def get_free_parameter_ids_with_values(
+ self,
+ ) -> Iterable[tuple[str, float]]:
+ rule_targets = {
+ ar.getVariable() for ar in self.sbml_model.getListOfRules()
+ }
+
+ def get_initial(p):
+ # return the initial assignment value if there is one, and it is a
+ # number; `None`, if there is a non-numeric initial assignment;
+ # otherwise, the parameter value
+ if ia := self.sbml_model.getInitialAssignmentBySymbol(p.getId()):
+ sym_expr = sympify_sbml(ia.getMath())
+ return (
+ float(sym_expr.evalf())
+ if sym_expr.evalf().is_Number
+ else None
+ )
+ return p.getValue()
+
+ return (
+ (p.getId(), initial)
+ for p in self.sbml_model.getListOfParameters()
+ if p.getId() not in rule_targets
+ and (initial := get_initial(p)) is not None
+ )
+
+ def get_parameter_ids(self) -> Iterable[str]:
+ rule_targets = {
+ ar.getVariable() for ar in self.sbml_model.getListOfRules()
+ }
+
+ return (
+ p.getId()
+ for p in self.sbml_model.getListOfParameters()
+ if p.getId() not in rule_targets
+ )
+
+ def get_parameter_ids_with_values(self) -> Iterable[tuple[str, float]]:
+ rule_targets = {
+ ar.getVariable() for ar in self.sbml_model.getListOfRules()
+ }
+
+ return (
+ (p.getId(), p.getValue())
+ for p in self.sbml_model.getListOfParameters()
+ if p.getId() not in rule_targets
+ )
+
+ def has_entity_with_id(self, entity_id) -> bool:
+ return self.sbml_model.getElementBySId(entity_id) is not None
+
+ def get_valid_parameters_for_parameter_table(self) -> Iterable[str]:
+ # All parameters except rule-targets
+ disallowed_set = {
+ ar.getVariable() for ar in self.sbml_model.getListOfRules()
+ }
+
+ return (
+ p.getId()
+ for p in self.sbml_model.getListOfParameters()
+ if p.getId() not in disallowed_set
+ )
+
+ def get_valid_ids_for_condition_table(self) -> Iterable[str]:
+ return (
+ x.getId()
+ for x in itertools.chain(
+ self.sbml_model.getListOfParameters(),
+ self.sbml_model.getListOfSpecies(),
+ self.sbml_model.getListOfCompartments(),
+ )
+ )
+
+ def symbol_allowed_in_observable_formula(self, id_: str) -> bool:
+ return self.sbml_model.getElementBySId(id_) or id_ == "time"
+
+ def is_valid(self) -> bool:
+ return is_sbml_consistent(self.sbml_model.getSBMLDocument())
+
+ def is_state_variable(self, id_: str) -> bool:
+ return (
+ self.sbml_model.getSpecies(id_) is not None
+ or self.sbml_model.getCompartment(id_) is not None
+ or self.sbml_model.getRuleByVariable(id_) is not None
+ )
+
+
+def sympify_sbml(sbml_obj: libsbml.ASTNode | libsbml.SBase) -> sp.Expr:
+ """Convert SBML math expression to sympy expression.
+
+ Parameters
+ ----------
+ sbml_obj:
+ SBML math element or an SBML object with a math element.
+
+ Returns
+ -------
+ The sympy expression corresponding to ``sbml_obj``.
+ """
+ ast_node = (
+ sbml_obj
+ if isinstance(sbml_obj, libsbml.ASTNode)
+ else sbml_obj.getMath()
+ )
+
+ parser_settings = libsbml.L3ParserSettings(
+ ast_node.getParentSBMLObject().getModel(),
+ libsbml.L3P_PARSE_LOG_AS_LOG10,
+ libsbml.L3P_EXPAND_UNARY_MINUS,
+ libsbml.L3P_NO_UNITS,
+ libsbml.L3P_AVOGADRO_IS_CSYMBOL,
+ libsbml.L3P_COMPARE_BUILTINS_CASE_INSENSITIVE,
+ None,
+ libsbml.L3P_MODULO_IS_PIECEWISE,
+ )
+
+ formula_str = libsbml.formulaToL3StringWithSettings(
+ ast_node, parser_settings
+ )
+
+ return sp.sympify(formula_str, locals=_clash)
diff --git a/petab/v1/observables.py b/petab/v1/observables.py
new file mode 100644
index 00000000..1485302d
--- /dev/null
+++ b/petab/v1/observables.py
@@ -0,0 +1,228 @@
+"""Functions for working with the PEtab observables table"""
+
+import re
+from collections import OrderedDict
+from pathlib import Path
+from typing import Literal
+
+import pandas as pd
+
+from . import core, lint
+from .C import * # noqa: F403
+from .math import sympify_petab
+from .models import Model
+
+__all__ = [
+ "create_observable_df",
+ "get_formula_placeholders",
+ "get_observable_df",
+ "get_output_parameters",
+ "get_placeholders",
+ "write_observable_df",
+]
+
+
+def get_observable_df(
+ observable_file: str | pd.DataFrame | Path | None,
+) -> pd.DataFrame | None:
+ """
+ Read the provided observable file into a ``pandas.Dataframe``.
+
+ Arguments:
+ observable_file: Name of the file to read from or pandas.Dataframe.
+
+ Returns:
+ Observable DataFrame
+ """
+ if observable_file is None:
+ return observable_file
+
+ if isinstance(observable_file, str | Path):
+ observable_file = pd.read_csv(
+ observable_file, sep="\t", float_precision="round_trip"
+ )
+
+ lint.assert_no_leading_trailing_whitespace(
+ observable_file.columns.values, "observable"
+ )
+
+ if not isinstance(observable_file.index, pd.RangeIndex):
+ observable_file.reset_index(
+ drop=observable_file.index.name != OBSERVABLE_ID,
+ inplace=True,
+ )
+
+ try:
+ observable_file.set_index([OBSERVABLE_ID], inplace=True)
+ except KeyError:
+ raise KeyError(
+ f"Observable table missing mandatory field {OBSERVABLE_ID}."
+ ) from None
+
+ return observable_file
+
+
+def write_observable_df(df: pd.DataFrame, filename: str | Path) -> None:
+ """Write PEtab observable table
+
+ Arguments:
+ df: PEtab observable table
+ filename: Destination file name
+ """
+ df = get_observable_df(df)
+ df.to_csv(filename, sep="\t", index=True)
+
+
+def get_output_parameters(
+ observable_df: pd.DataFrame,
+ model: Model,
+ observables: bool = True,
+ noise: bool = True,
+ mapping_df: pd.DataFrame = None,
+) -> list[str]:
+ """Get output parameters
+
+ Returns IDs of parameters used in observable and noise formulas that are
+ not defined in the model.
+
+ Arguments:
+ observable_df: PEtab observable table
+ model: The underlying model
+ observables: Include parameters from observableFormulas
+ noise: Include parameters from noiseFormulas
+ mapping_df: PEtab mapping table
+
+ Returns:
+ List of output parameter IDs
+ """
+ formulas = []
+ if observables:
+ formulas.extend(observable_df[OBSERVABLE_FORMULA])
+ if noise and NOISE_FORMULA in observable_df:
+ formulas.extend(observable_df[NOISE_FORMULA])
+ output_parameters = OrderedDict()
+
+ for formula in formulas:
+ free_syms = sorted(
+ sympify_petab(formula).free_symbols,
+ key=lambda symbol: symbol.name,
+ )
+ for free_sym in free_syms:
+ sym = str(free_sym)
+ if model.symbol_allowed_in_observable_formula(sym):
+ continue
+
+ # does it map to a model entity?
+ if (
+ mapping_df is not None
+ and sym in mapping_df.index
+ and model.symbol_allowed_in_observable_formula(
+ mapping_df.loc[sym, MODEL_ENTITY_ID]
+ )
+ ):
+ continue
+
+ output_parameters[sym] = None
+
+ return list(output_parameters.keys())
+
+
+def get_formula_placeholders(
+ formula_string: str,
+ observable_id: str,
+ override_type: Literal["observable", "noise"],
+) -> list[str]:
+ """
+ Get placeholder variables in noise or observable definition for the
+ given observable ID.
+
+ Arguments:
+ formula_string: observable formula
+ observable_id: ID of current observable
+ override_type: ``'observable'`` or ``'noise'``, depending on whether
+ ``formula`` is for observable or for noise model
+
+ Returns:
+ List of placeholder parameter IDs in the order expected in the
+ observableParameter column of the measurement table.
+ """
+ if not formula_string:
+ return []
+
+ if not isinstance(formula_string, str):
+ return []
+
+ pattern = re.compile(
+ r"(?:^|\W)("
+ + re.escape(override_type)
+ + r"Parameter\d+_"
+ + re.escape(observable_id)
+ + r")(?=\W|$)"
+ )
+ placeholder_set = set(pattern.findall(formula_string))
+
+ # need to sort and check that there are no gaps in numbering
+ placeholders = [
+ f"{override_type}Parameter{i}_{observable_id}"
+ for i in range(1, len(placeholder_set) + 1)
+ ]
+
+ if placeholder_set != set(placeholders):
+ raise AssertionError(
+ "Non-consecutive numbering of placeholder "
+ f"parameter for {placeholder_set}"
+ )
+
+ return placeholders
+
+
+def get_placeholders(
+ observable_df: pd.DataFrame,
+ observables: bool = True,
+ noise: bool = True,
+) -> list[str]:
+ """Get all placeholder parameters from observable table observableFormulas
+ and noiseFormulas
+
+ Arguments:
+ observable_df: PEtab observable table
+ observables: Include parameters from observableFormulas
+ noise: Include parameters from noiseFormulas
+
+ Returns:
+ List of placeholder parameters from observable table observableFormulas
+ and noiseFormulas.
+ """
+ # collect placeholder parameters overwritten by
+ # {observable,noise}Parameters
+ placeholder_types = []
+ formula_columns = []
+ if observables:
+ placeholder_types.append("observable")
+ formula_columns.append(OBSERVABLE_FORMULA)
+ if noise:
+ placeholder_types.append("noise")
+ formula_columns.append(NOISE_FORMULA)
+
+ placeholders = []
+ for _, row in observable_df.iterrows():
+ for placeholder_type, formula_column in zip(
+ placeholder_types, formula_columns, strict=True
+ ):
+ if formula_column not in row:
+ continue
+
+ cur_placeholders = get_formula_placeholders(
+ row[formula_column], row.name, placeholder_type
+ )
+ placeholders.extend(cur_placeholders)
+ return core.unique_preserve_order(placeholders)
+
+
+def create_observable_df() -> pd.DataFrame:
+ """Create empty observable dataframe
+
+ Returns:
+ Created DataFrame
+ """
+ return pd.DataFrame(data={col: [] for col in OBSERVABLE_DF_COLS})
diff --git a/petab/v1/parameter_mapping.py b/petab/v1/parameter_mapping.py
new file mode 100644
index 00000000..014b4a8e
--- /dev/null
+++ b/petab/v1/parameter_mapping.py
@@ -0,0 +1,805 @@
+"""Functions related to mapping parameter from model to parameter estimation
+problem
+"""
+
+import logging
+import numbers
+import os
+import re
+import warnings
+from collections.abc import Iterable
+from typing import Any, Literal
+
+import libsbml
+import numpy as np
+import pandas as pd
+
+from . import (
+ core,
+ lint,
+ measurements,
+ observables,
+ parameters,
+)
+from .C import * # noqa: F403
+from .mapping import resolve_mapping
+from .models import Model
+
+# FIXME import from petab.ENV_NUM_THREADS
+ENV_NUM_THREADS = "PETAB_NUM_THREADS"
+
+
+logger = logging.getLogger(__name__)
+__all__ = [
+ "get_optimization_to_simulation_parameter_mapping",
+ "get_parameter_mapping_for_condition",
+ "handle_missing_overrides",
+ "merge_preeq_and_sim_pars",
+ "merge_preeq_and_sim_pars_condition",
+ "ParMappingDict",
+ "ParMappingDictTuple",
+ "ScaleMappingDict",
+ "ScaleMappingDictTuple",
+ "ParMappingDictQuadruple",
+]
+
+
+# Parameter mapping for condition
+ParMappingDict = dict[str, str | numbers.Number]
+# Parameter mapping for combination of preequilibration and simulation
+# condition
+ParMappingDictTuple = tuple[ParMappingDict, ParMappingDict]
+# Same for scale mapping
+ScaleMappingDict = dict[str, str]
+ScaleMappingDictTuple = tuple[ScaleMappingDict, ScaleMappingDict]
+# Parameter mapping for combination of preequilibration and simulation
+# conditions, for parameter and scale mapping
+ParMappingDictQuadruple = tuple[
+ ParMappingDict, ParMappingDict, ScaleMappingDict, ScaleMappingDict
+]
+
+
+def get_optimization_to_simulation_parameter_mapping(
+ condition_df: pd.DataFrame,
+ measurement_df: pd.DataFrame,
+ parameter_df: pd.DataFrame | None = None,
+ observable_df: pd.DataFrame | None = None,
+ mapping_df: pd.DataFrame | None = None,
+ sbml_model: libsbml.Model = None,
+ simulation_conditions: pd.DataFrame | None = None,
+ warn_unmapped: bool | None = True,
+ scaled_parameters: bool = False,
+ fill_fixed_parameters: bool = True,
+ allow_timepoint_specific_numeric_noise_parameters: bool = False,
+ model: Model = None,
+) -> list[ParMappingDictQuadruple]:
+ """
+ Create list of mapping dicts from PEtab-problem to model parameters.
+
+ Mapping can be performed in parallel. The number of threads is controlled
+ by the environment variable with the name of
+ :py:data:`petab.ENV_NUM_THREADS`.
+
+ Parameters:
+ condition_df, measurement_df, parameter_df, observable_df:
+ The dataframes in the PEtab format.
+ sbml_model:
+ The SBML model (deprecated)
+ model:
+ The model.
+ simulation_conditions:
+ Table of simulation conditions as created by
+ ``petab.get_simulation_conditions``.
+ warn_unmapped:
+ If ``True``, log warning regarding unmapped parameters
+ scaled_parameters:
+ Whether parameter values should be scaled.
+ fill_fixed_parameters:
+ Whether to fill in nominal values for fixed parameters
+ (estimate=0 in parameters table).
+ allow_timepoint_specific_numeric_noise_parameters:
+ Mapping of timepoint-specific parameters overrides is generally
+ not supported. If this option is set to True, this function will
+ not fail in case of timepoint-specific fixed noise parameters,
+ if the noise formula consists only of one single parameter.
+ It is expected that the respective mapping is performed elsewhere.
+ The value mapped to the respective parameter here is undefined.
+
+ Returns:
+ Parameter value and parameter scale mapping for all conditions.
+
+ The length of the returned array is the number of unique combinations
+ of ``simulationConditionId`` s and ``preequilibrationConditionId`` s
+ from the measurement table. Each entry is a tuple of four dicts of
+ length equal to the number of model parameters.
+ The first two dicts map simulation parameter IDs to optimization
+ parameter IDs or values (where values are fixed) for preequilibration
+ and simulation condition, respectively.
+ The last two dicts map simulation parameter IDs to the parameter scale
+ of the respective parameter, again for preequilibration and simulation
+ condition.
+ If no preequilibration condition is defined, the respective dicts will
+ be empty. ``NaN`` is used where no mapping exists.
+ """
+ if sbml_model:
+ warnings.warn(
+ "Passing a model via the `sbml_model` argument is "
+ "deprecated, use `model=petab.models.sbml_model."
+ "SbmlModel(...)` instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ from .models.sbml_model import SbmlModel
+
+ if model:
+ raise ValueError(
+ "Arguments `model` and `sbml_model` are " "mutually exclusive."
+ )
+ model = SbmlModel(sbml_model=sbml_model)
+
+ # Ensure inputs are okay
+ _perform_mapping_checks(
+ measurement_df,
+ allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
+ )
+
+ if simulation_conditions is None:
+ simulation_conditions = measurements.get_simulation_conditions(
+ measurement_df
+ )
+
+ simulation_parameters = dict(model.get_free_parameter_ids_with_values())
+ # Add output parameters that are not already defined in the model
+ if observable_df is not None:
+ output_parameters = observables.get_output_parameters(
+ observable_df=observable_df, model=model, mapping_df=mapping_df
+ )
+ for par_id in output_parameters:
+ simulation_parameters[par_id] = np.nan
+
+ num_threads = int(os.environ.get(ENV_NUM_THREADS, 1))
+
+ # If sequential execution is requested, let's not create any
+ # thread-allocation overhead
+ if num_threads == 1:
+ mapping = map(
+ _map_condition,
+ _map_condition_arg_packer(
+ simulation_conditions,
+ measurement_df,
+ condition_df,
+ parameter_df,
+ mapping_df,
+ model,
+ simulation_parameters,
+ warn_unmapped,
+ scaled_parameters,
+ fill_fixed_parameters,
+ allow_timepoint_specific_numeric_noise_parameters,
+ ),
+ )
+ return list(mapping)
+
+ # Run multi-threaded
+ from concurrent.futures import ThreadPoolExecutor
+
+ with ThreadPoolExecutor(max_workers=num_threads) as executor:
+ mapping = executor.map(
+ _map_condition,
+ _map_condition_arg_packer(
+ simulation_conditions,
+ measurement_df,
+ condition_df,
+ parameter_df,
+ mapping_df,
+ model,
+ simulation_parameters,
+ warn_unmapped,
+ scaled_parameters,
+ fill_fixed_parameters,
+ allow_timepoint_specific_numeric_noise_parameters,
+ ),
+ )
+ return list(mapping)
+
+
+def _map_condition_arg_packer(
+ simulation_conditions,
+ measurement_df,
+ condition_df,
+ parameter_df,
+ mapping_df,
+ model,
+ simulation_parameters,
+ warn_unmapped,
+ scaled_parameters,
+ fill_fixed_parameters,
+ allow_timepoint_specific_numeric_noise_parameters,
+):
+ """Helper function to pack extra arguments for _map_condition"""
+ for _, condition in simulation_conditions.iterrows():
+ yield (
+ condition,
+ measurement_df,
+ condition_df,
+ parameter_df,
+ mapping_df,
+ model,
+ simulation_parameters,
+ warn_unmapped,
+ scaled_parameters,
+ fill_fixed_parameters,
+ allow_timepoint_specific_numeric_noise_parameters,
+ )
+
+
+def _map_condition(packed_args):
+ """Helper function for parallel condition mapping.
+
+ For arguments see
+ :py:func:`get_optimization_to_simulation_parameter_mapping`.
+ """
+ (
+ condition,
+ measurement_df,
+ condition_df,
+ parameter_df,
+ mapping_df,
+ model,
+ simulation_parameters,
+ warn_unmapped,
+ scaled_parameters,
+ fill_fixed_parameters,
+ allow_timepoint_specific_numeric_noise_parameters,
+ ) = packed_args
+
+ cur_measurement_df = None
+ # Get the condition specific measurements for the current condition, but
+ # only if relevant for parameter mapping
+ if (
+ OBSERVABLE_PARAMETERS in measurement_df
+ and measurement_df[OBSERVABLE_PARAMETERS].notna().any()
+ ) or (
+ NOISE_PARAMETERS in measurement_df
+ and measurement_df[NOISE_PARAMETERS].notna().any()
+ ):
+ cur_measurement_df = measurements.get_rows_for_condition(
+ measurement_df, condition
+ )
+
+ if (
+ PREEQUILIBRATION_CONDITION_ID not in condition
+ or not isinstance(condition[PREEQUILIBRATION_CONDITION_ID], str)
+ or not condition[PREEQUILIBRATION_CONDITION_ID]
+ ):
+ par_map_preeq = {}
+ scale_map_preeq = {}
+ else:
+ par_map_preeq, scale_map_preeq = get_parameter_mapping_for_condition(
+ condition_id=condition[PREEQUILIBRATION_CONDITION_ID],
+ is_preeq=True,
+ cur_measurement_df=cur_measurement_df,
+ model=model,
+ condition_df=condition_df,
+ parameter_df=parameter_df,
+ mapping_df=mapping_df,
+ simulation_parameters=simulation_parameters,
+ warn_unmapped=warn_unmapped,
+ scaled_parameters=scaled_parameters,
+ fill_fixed_parameters=fill_fixed_parameters,
+ allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
+ )
+
+ par_map_sim, scale_map_sim = get_parameter_mapping_for_condition(
+ condition_id=condition[SIMULATION_CONDITION_ID],
+ is_preeq=False,
+ cur_measurement_df=cur_measurement_df,
+ model=model,
+ condition_df=condition_df,
+ parameter_df=parameter_df,
+ mapping_df=mapping_df,
+ simulation_parameters=simulation_parameters,
+ warn_unmapped=warn_unmapped,
+ scaled_parameters=scaled_parameters,
+ fill_fixed_parameters=fill_fixed_parameters,
+ allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
+ )
+
+ return par_map_preeq, par_map_sim, scale_map_preeq, scale_map_sim
+
+
+def get_parameter_mapping_for_condition(
+ condition_id: str,
+ is_preeq: bool,
+ cur_measurement_df: pd.DataFrame | None = None,
+ sbml_model: libsbml.Model = None,
+ condition_df: pd.DataFrame = None,
+ parameter_df: pd.DataFrame = None,
+ mapping_df: pd.DataFrame | None = None,
+ simulation_parameters: dict[str, str] | None = None,
+ warn_unmapped: bool = True,
+ scaled_parameters: bool = False,
+ fill_fixed_parameters: bool = True,
+ allow_timepoint_specific_numeric_noise_parameters: bool = False,
+ model: Model = None,
+) -> tuple[ParMappingDict, ScaleMappingDict]:
+ """
+ Create dictionary of parameter value and parameter scale mappings from
+ PEtab-problem to SBML parameters for the given condition.
+
+ Parameters:
+ condition_id:
+ Condition ID for which to perform mapping
+ is_preeq:
+ If ``True``, output parameters will not be mapped
+ cur_measurement_df:
+ Measurement sub-table for current condition, can be ``None`` if
+ not relevant for parameter mapping
+ condition_df:
+ PEtab condition DataFrame
+ parameter_df:
+ PEtab parameter DataFrame
+ mapping_df:
+ PEtab mapping DataFrame
+ sbml_model:
+ The SBML model (deprecated)
+ model:
+ The model.
+ simulation_parameters:
+ Model simulation parameter IDs mapped to parameter values (output
+ of ``petab.sbml.get_model_parameters(.., with_values=True)``).
+ Optional, saves time if precomputed.
+ warn_unmapped:
+ If ``True``, log warning regarding unmapped parameters
+ scaled_parameters:
+ Whether parameter values should be scaled.
+ fill_fixed_parameters:
+ Whether to fill in nominal values for fixed parameters
+ (estimate=0 in parameters table).
+ allow_timepoint_specific_numeric_noise_parameters:
+ Mapping of timepoint-specific parameters overrides is generally
+ not supported. If this option is set to True, this function will
+ not fail in case of timepoint-specific fixed noise parameters,
+ if the noise formula consists only of one single parameter.
+ It is expected that the respective mapping is performed elsewhere.
+ The value mapped to the respective parameter here is undefined.
+
+ Returns:
+ Tuple of two dictionaries. First dictionary mapping model parameter IDs
+ to mapped parameters IDs to be estimated or to filled-in values in case
+ of non-estimated parameters.
+ Second dictionary mapping model parameter IDs to their scale.
+ ``NaN`` is used where no mapping exists.
+ """
+ if sbml_model:
+ warnings.warn(
+ "Passing a model via the `sbml_model` argument is "
+ "deprecated, use `model=petab.models.sbml_model."
+ "SbmlModel(...)` instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ from .models.sbml_model import SbmlModel
+
+ if model:
+ raise ValueError(
+ "Arguments `model` and `sbml_model` are " "mutually exclusive."
+ )
+ model = SbmlModel(sbml_model=sbml_model)
+
+ if cur_measurement_df is not None:
+ _perform_mapping_checks(
+ cur_measurement_df,
+ allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
+ )
+
+ if simulation_parameters is None:
+ simulation_parameters = dict(
+ model.get_free_parameter_ids_with_values()
+ )
+
+ # NOTE: order matters here - the former is overwritten by the latter:
+ # model < condition table < measurement < table parameter table
+
+ # initialize mapping dicts
+ # for the case of matching simulation and optimization parameter vector
+ par_mapping = simulation_parameters.copy()
+ scale_mapping = {par_id: LIN for par_id in par_mapping.keys()}
+ _output_parameters_to_nan(par_mapping)
+
+ # not strictly necessary for preequilibration, be we do it to have
+ # same length of parameter vectors
+ if cur_measurement_df is not None:
+ _apply_output_parameter_overrides(par_mapping, cur_measurement_df)
+
+ if not is_preeq:
+ handle_missing_overrides(par_mapping, warn=warn_unmapped)
+
+ _apply_condition_parameters(
+ par_mapping,
+ scale_mapping,
+ condition_id,
+ condition_df,
+ model,
+ mapping_df,
+ )
+ _apply_parameter_table(
+ par_mapping,
+ scale_mapping,
+ parameter_df,
+ scaled_parameters,
+ fill_fixed_parameters,
+ )
+
+ return par_mapping, scale_mapping
+
+
+def _output_parameters_to_nan(mapping: ParMappingDict) -> None:
+ """Set output parameters in mapping dictionary to nan"""
+ rex = re.compile("^(noise|observable)Parameter[0-9]+_")
+ for key in mapping.keys():
+ try:
+ matches = rex.match(key)
+ except TypeError:
+ continue
+
+ if matches:
+ mapping[key] = np.nan
+
+
+def _apply_output_parameter_overrides(
+ mapping: ParMappingDict, cur_measurement_df: pd.DataFrame
+) -> None:
+ """
+ Apply output parameter overrides to the parameter mapping dict for a given
+ condition as defined in the measurement table (``observableParameter``,
+ ``noiseParameters``).
+
+ Arguments:
+ mapping: parameter mapping dict as obtained from
+ :py:func:`get_parameter_mapping_for_condition`.
+ cur_measurement_df:
+ Subset of the measurement table for the current condition
+ """
+ for _, row in cur_measurement_df.iterrows():
+ # we trust that the number of overrides matches (see above)
+ overrides = measurements.split_parameter_replacement_list(
+ row.get(OBSERVABLE_PARAMETERS, None)
+ )
+ _apply_overrides_for_observable(
+ mapping, row[OBSERVABLE_ID], "observable", overrides
+ )
+
+ overrides = measurements.split_parameter_replacement_list(
+ row.get(NOISE_PARAMETERS, None)
+ )
+ _apply_overrides_for_observable(
+ mapping, row[OBSERVABLE_ID], "noise", overrides
+ )
+
+
+def _apply_overrides_for_observable(
+ mapping: ParMappingDict,
+ observable_id: str,
+ override_type: Literal["observable", "noise"],
+ overrides: list[str],
+) -> None:
+ """
+ Apply parameter-overrides for observables and noises to mapping
+ matrix.
+
+ Arguments:
+ mapping: mapping dict to which to apply overrides
+ observable_id: observable ID
+ override_type: ``'observable'`` or ``'noise'``
+ overrides: list of overrides for noise or observable parameters
+ """
+ for i, override in enumerate(overrides):
+ overridee_id = f"{override_type}Parameter{i+1}_{observable_id}"
+ mapping[overridee_id] = override
+
+
+def _apply_condition_parameters(
+ par_mapping: ParMappingDict,
+ scale_mapping: ScaleMappingDict,
+ condition_id: str,
+ condition_df: pd.DataFrame,
+ model: Model,
+ mapping_df: pd.DataFrame | None = None,
+) -> None:
+ """Replace parameter IDs in parameter mapping dictionary by condition
+ table parameter values (in-place).
+
+ Arguments:
+ par_mapping: see :py:func:`get_parameter_mapping_for_condition`
+ condition_id: ID of condition to work on
+ condition_df: PEtab condition table
+ """
+ for overridee_id in condition_df.columns:
+ if overridee_id == CONDITION_NAME:
+ continue
+
+ overridee_id = resolve_mapping(mapping_df, overridee_id)
+
+ # Species, compartments, and rule targets are handled elsewhere
+ if model.is_state_variable(overridee_id):
+ continue
+
+ par_mapping[overridee_id] = core.to_float_if_float(
+ condition_df.loc[condition_id, overridee_id]
+ )
+
+ if isinstance(par_mapping[overridee_id], numbers.Number) and np.isnan(
+ par_mapping[overridee_id]
+ ):
+ # NaN in the condition table for an entity without time derivative
+ # indicates that the model value should be used
+ try:
+ par_mapping[overridee_id] = model.get_parameter_value(
+ overridee_id
+ )
+ except ValueError as e:
+ raise NotImplementedError(
+ "Not sure how to handle NaN in condition table for "
+ f"{overridee_id}."
+ ) from e
+
+ scale_mapping[overridee_id] = LIN
+
+
+def _apply_parameter_table(
+ par_mapping: ParMappingDict,
+ scale_mapping: ScaleMappingDict,
+ parameter_df: pd.DataFrame | None = None,
+ scaled_parameters: bool = False,
+ fill_fixed_parameters: bool = True,
+) -> None:
+ """Replace parameters from parameter table in mapping list for a given
+ condition and set the corresponding scale.
+
+ Replace non-estimated parameters by ``nominalValues``
+ (un-scaled / lin-scaled), replace estimated parameters by the respective
+ ID.
+
+ Arguments:
+ par_mapping:
+ mapping dict obtained from
+ :py:func:`get_parameter_mapping_for_condition`
+ parameter_df:
+ PEtab parameter table
+ """
+ if parameter_df is None:
+ return
+
+ for row in parameter_df.itertuples():
+ if row.Index not in par_mapping:
+ # The current parameter is not required for this condition
+ continue
+
+ scale = getattr(row, PARAMETER_SCALE, LIN)
+ scale_mapping[row.Index] = scale
+ if fill_fixed_parameters and getattr(row, ESTIMATE) == 0:
+ val = getattr(row, NOMINAL_VALUE)
+ if scaled_parameters:
+ val = parameters.scale(val, scale)
+ else:
+ scale_mapping[row.Index] = LIN
+ par_mapping[row.Index] = val
+ else:
+ par_mapping[row.Index] = row.Index
+
+ # Replace any leftover mapped parameter coming from condition table
+ for problem_par, sim_par in par_mapping.items():
+ # string indicates unmapped
+ if not isinstance(sim_par, str):
+ continue
+
+ try:
+ # the overridee is a model parameter
+ par_mapping[problem_par] = par_mapping[sim_par]
+ scale_mapping[problem_par] = scale_mapping[sim_par]
+ except KeyError:
+ if parameter_df is None:
+ raise
+
+ # or the overridee is only defined in the parameter table
+ scale = (
+ parameter_df.loc[sim_par, PARAMETER_SCALE]
+ if PARAMETER_SCALE in parameter_df
+ else LIN
+ )
+
+ if (
+ fill_fixed_parameters
+ and ESTIMATE in parameter_df
+ and parameter_df.loc[sim_par, ESTIMATE] == 0
+ ):
+ val = parameter_df.loc[sim_par, NOMINAL_VALUE]
+ if scaled_parameters:
+ val = parameters.scale(val, scale)
+ else:
+ scale = LIN
+ par_mapping[problem_par] = val
+
+ scale_mapping[problem_par] = scale
+
+
+def _perform_mapping_checks(
+ measurement_df: pd.DataFrame,
+ allow_timepoint_specific_numeric_noise_parameters: bool = False,
+) -> None:
+ """Check for PEtab features which we can't account for during parameter
+ mapping.
+ """
+ if lint.measurement_table_has_timepoint_specific_mappings(
+ measurement_df,
+ allow_scalar_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501
+ ):
+ # we could allow that for floats, since they don't matter in this
+ # function and would be simply ignored
+ raise ValueError(
+ "Timepoint-specific parameter overrides currently unsupported."
+ )
+
+
+def handle_missing_overrides(
+ mapping_par_opt_to_par_sim: ParMappingDict,
+ warn: bool = True,
+ condition_id: str = None,
+) -> None:
+ """
+ Find all observable parameters and noise parameters that were not mapped
+ and set their mapping to np.nan.
+
+ Assumes that parameters matching the regular expression
+ ``(noise|observable)Parameter[0-9]+_`` were all supposed to be overwritten.
+
+ Parameters:
+ mapping_par_opt_to_par_sim:
+ Output of :py:func:`get_parameter_mapping_for_condition`
+ warn:
+ If True, log warning regarding unmapped parameters
+ condition_id:
+ Optional condition ID for more informative output
+ """
+ _missed_vals = []
+ rex = re.compile("^(noise|observable)Parameter[0-9]+_")
+ for key, val in mapping_par_opt_to_par_sim.items():
+ try:
+ matches = rex.match(val)
+ except TypeError:
+ continue
+
+ if matches:
+ mapping_par_opt_to_par_sim[key] = np.nan
+ _missed_vals.append(key)
+
+ if _missed_vals and warn:
+ logger.warning(
+ f"Could not map the following overrides for condition "
+ f"{condition_id}: "
+ f"{_missed_vals}. Usually, this is just due to missing "
+ f"data points."
+ )
+
+
+def merge_preeq_and_sim_pars_condition(
+ condition_map_preeq: ParMappingDict,
+ condition_map_sim: ParMappingDict,
+ condition_scale_map_preeq: ScaleMappingDict,
+ condition_scale_map_sim: ScaleMappingDict,
+ condition: Any,
+) -> None:
+ """Merge preequilibration and simulation parameters and scales for a single
+ condition while checking for compatibility.
+
+ This function is meant for the case where we cannot have different
+ parameters (and scales) for preequilibration and simulation. Therefore,
+ merge both and ensure matching scales and parameters.
+ ``condition_map_sim`` and ``condition_scale_map_sim`` will be modified in
+ place.
+
+ Arguments:
+ condition_map_preeq, condition_map_sim:
+ Parameter mapping as obtained from
+ :py:func:`get_parameter_mapping_for_condition`
+ condition_scale_map_preeq, condition_scale_map_sim:
+ Parameter scale mapping as obtained from
+ :py:func:`get_parameter_mapping_for_condition`
+ condition: Condition identifier for more informative error messages
+ """
+ if not condition_map_preeq:
+ # nothing to do
+ return
+
+ all_par_ids = set(condition_map_sim.keys()) | set(
+ condition_map_preeq.keys()
+ )
+
+ for par_id in all_par_ids:
+ if par_id not in condition_map_preeq:
+ # nothing to do
+ continue
+
+ if par_id not in condition_map_sim:
+ # unmapped for simulation -> just use preeq values
+ condition_map_sim[par_id] = condition_map_preeq[par_id]
+ condition_scale_map_sim[par_id] = condition_scale_map_preeq[par_id]
+ continue
+
+ # present in both
+ par_preeq = condition_map_preeq[par_id]
+ par_sim = condition_map_sim[par_id]
+ if par_preeq != par_sim and not (
+ core.is_empty(par_sim) and core.is_empty(par_preeq)
+ ):
+ # both identical or both nan is okay
+ if core.is_empty(par_sim):
+ # unmapped for simulation
+ condition_map_sim[par_id] = par_preeq
+ elif core.is_empty(par_preeq):
+ # unmapped for preeq is okay
+ pass
+ else:
+ raise ValueError(
+ "Cannot handle different values for dynamic "
+ f"parameters: for condition {condition} "
+ f"parameter {par_id} is {par_preeq} for preeq "
+ f"and {par_sim} for simulation."
+ )
+
+ scale_preeq = condition_scale_map_preeq[par_id]
+ scale_sim = condition_scale_map_sim[par_id]
+
+ if scale_preeq != scale_sim:
+ # both identical is okay
+ if core.is_empty(par_sim):
+ # unmapped for simulation
+ condition_scale_map_sim[par_id] = scale_preeq
+ elif core.is_empty(par_preeq):
+ # unmapped for preeq is okay
+ pass
+ else:
+ raise ValueError(
+ "Cannot handle different parameter scales "
+ f"parameters: for condition {condition} "
+ f"scale for parameter {par_id} is {scale_preeq} for preeq "
+ f"and {scale_sim} for simulation."
+ )
+
+
+def merge_preeq_and_sim_pars(
+ parameter_mappings: Iterable[ParMappingDictTuple],
+ scale_mappings: Iterable[ScaleMappingDictTuple],
+) -> tuple[list[ParMappingDictTuple], list[ScaleMappingDictTuple]]:
+ """Merge preequilibration and simulation parameters and scales for a list
+ of conditions while checking for compatibility.
+
+ Parameters:
+ parameter_mappings:
+ As returned by
+ :py:func:`petab.get_optimization_to_simulation_parameter_mapping`.
+ scale_mappings:
+ As returned by
+ :py:func:`petab.get_optimization_to_simulation_parameter_mapping`.
+
+ Returns:
+ The parameter and scale simulation mappings, modified and checked.
+ """
+ parameter_mapping = []
+ scale_mapping = []
+ for ic, (
+ (map_preeq, map_sim),
+ (scale_map_preeq, scale_map_sim),
+ ) in enumerate(zip(parameter_mappings, scale_mappings, strict=True)):
+ merge_preeq_and_sim_pars_condition(
+ condition_map_preeq=map_preeq,
+ condition_map_sim=map_sim,
+ condition_scale_map_preeq=scale_map_preeq,
+ condition_scale_map_sim=scale_map_sim,
+ condition=ic,
+ )
+ parameter_mapping.append(map_sim)
+ scale_mapping.append(scale_map_sim)
+
+ return parameter_mapping, scale_mapping
diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py
new file mode 100644
index 00000000..382e6b57
--- /dev/null
+++ b/petab/v1/parameters.py
@@ -0,0 +1,638 @@
+"""Functions operating on the PEtab parameter table"""
+
+import numbers
+import warnings
+from collections import OrderedDict
+from collections.abc import Iterable, Sequence
+from pathlib import Path
+from typing import (
+ Literal,
+)
+
+import libsbml
+import numpy as np
+import pandas as pd
+
+from . import conditions, core, lint, measurements, observables
+from .C import * # noqa: F403
+from .models import Model
+
+__all__ = [
+ "create_parameter_df",
+ "get_optimization_parameter_scaling",
+ "get_optimization_parameters",
+ "get_parameter_df",
+ "get_priors_from_df",
+ "get_valid_parameters_for_parameter_table",
+ "map_scale",
+ "map_unscale",
+ "normalize_parameter_df",
+ "scale",
+ "unscale",
+ "write_parameter_df",
+]
+
+PARAMETER_SCALE_ARGS = Literal["", "lin", "log", "log10"]
+
+
+def get_parameter_df(
+ parameter_file: str
+ | Path
+ | pd.DataFrame
+ | Iterable[str | Path | pd.DataFrame]
+ | None,
+) -> pd.DataFrame | None:
+ """
+ Read the provided parameter file into a ``pandas.Dataframe``.
+
+ Arguments:
+ parameter_file: Name of the file to read from or pandas.Dataframe,
+ or an Iterable.
+
+ Returns:
+ Parameter ``DataFrame``, or ``None`` if ``None`` was passed.
+ """
+ if parameter_file is None:
+ return None
+ if isinstance(parameter_file, pd.DataFrame):
+ parameter_df = parameter_file
+ elif isinstance(parameter_file, str | Path):
+ parameter_df = pd.read_csv(
+ parameter_file, sep="\t", float_precision="round_trip"
+ )
+ elif isinstance(parameter_file, Iterable):
+ dfs = [get_parameter_df(x) for x in parameter_file if x]
+
+ if not dfs:
+ return None
+
+ parameter_df = pd.concat(dfs)
+ # Check for contradicting parameter definitions
+ _check_for_contradicting_parameter_definitions(parameter_df)
+
+ return parameter_df
+
+ lint.assert_no_leading_trailing_whitespace(
+ parameter_df.columns.values, "parameter"
+ )
+
+ if not isinstance(parameter_df.index, pd.RangeIndex):
+ parameter_df.reset_index(
+ drop=parameter_file.index.name != PARAMETER_ID,
+ inplace=True,
+ )
+
+ try:
+ parameter_df.set_index([PARAMETER_ID], inplace=True)
+ except KeyError as e:
+ raise KeyError(
+ f"Parameter table missing mandatory field {PARAMETER_ID}."
+ ) from e
+ _check_for_contradicting_parameter_definitions(parameter_df)
+
+ return parameter_df
+
+
+def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame):
+ """
+ Raises a ValueError for non-unique parameter IDs
+ """
+ parameter_duplicates = set(
+ parameter_df.index.values[parameter_df.index.duplicated()]
+ )
+ if parameter_duplicates:
+ raise ValueError(
+ f"The values of `{PARAMETER_ID}` must be unique. The "
+ f"following duplicates were found:\n{parameter_duplicates}"
+ )
+
+
+def write_parameter_df(df: pd.DataFrame, filename: str | Path) -> None:
+ """Write PEtab parameter table
+
+ Arguments:
+ df: PEtab parameter table
+ filename: Destination file name
+ """
+ df = get_parameter_df(df)
+ df.to_csv(filename, sep="\t", index=True)
+
+
+def get_optimization_parameters(parameter_df: pd.DataFrame) -> list[str]:
+ """
+ Get list of optimization parameter IDs from parameter table.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+
+ Returns:
+ List of IDs of parameters selected for optimization.
+ """
+ return list(parameter_df.index[parameter_df[ESTIMATE] == 1])
+
+
+def get_optimization_parameter_scaling(
+ parameter_df: pd.DataFrame,
+) -> dict[str, str]:
+ """
+ Get Dictionary with optimization parameter IDs mapped to parameter scaling
+ strings.
+
+ Arguments:
+ parameter_df: PEtab parameter DataFrame
+
+ Returns:
+ Dictionary with optimization parameter IDs mapped to parameter scaling
+ strings.
+ """
+ estimated_df = parameter_df.loc[parameter_df[ESTIMATE] == 1]
+ return dict(
+ zip(estimated_df.index, estimated_df[PARAMETER_SCALE], strict=True)
+ )
+
+
+def create_parameter_df(
+ sbml_model: libsbml.Model | None = None,
+ condition_df: pd.DataFrame | None = None,
+ observable_df: pd.DataFrame | None = None,
+ measurement_df: pd.DataFrame | None = None,
+ model: Model | None = None,
+ include_optional: bool = False,
+ parameter_scale: str = LOG10,
+ lower_bound: Iterable = None,
+ upper_bound: Iterable = None,
+ mapping_df: pd.DataFrame | None = None,
+) -> pd.DataFrame:
+ """Create a new PEtab parameter table
+
+ All table entries can be provided as string or list-like with length
+ matching the number of parameters
+
+ Arguments:
+ sbml_model: SBML Model (deprecated, mutually exclusive with ``model``)
+ model: PEtab model (mutually exclusive with ``sbml_model``)
+ condition_df: PEtab condition DataFrame
+ observable_df: PEtab observable DataFrame
+ measurement_df: PEtab measurement DataFrame
+ include_optional: By default this only returns parameters that are
+ required to be present in the parameter table. If set to ``True``,
+ this returns all parameters that are allowed to be present in the
+ parameter table (i.e. also including parameters specified in the
+ model).
+ parameter_scale: parameter scaling
+ lower_bound: lower bound for parameter value
+ upper_bound: upper bound for parameter value
+ mapping_df: PEtab mapping DataFrame
+
+ Returns:
+ The created parameter DataFrame
+ """
+ if sbml_model:
+ warnings.warn(
+ "Passing a model via the `sbml_model` argument is "
+ "deprecated, use `model=petab.models.sbml_model."
+ "SbmlModel(...)` instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ from .models.sbml_model import SbmlModel
+
+ if model:
+ raise ValueError(
+ "Arguments `model` and `sbml_model` are " "mutually exclusive."
+ )
+ model = SbmlModel(sbml_model=sbml_model)
+ if include_optional:
+ parameter_ids = list(
+ get_valid_parameters_for_parameter_table(
+ model=model,
+ condition_df=condition_df,
+ observable_df=observable_df,
+ measurement_df=measurement_df,
+ )
+ )
+ else:
+ parameter_ids = list(
+ get_required_parameters_for_parameter_table(
+ model=model,
+ condition_df=condition_df,
+ observable_df=observable_df,
+ measurement_df=measurement_df,
+ mapping_df=mapping_df,
+ )
+ )
+
+ df = pd.DataFrame(
+ data={
+ PARAMETER_ID: parameter_ids,
+ PARAMETER_NAME: parameter_ids,
+ PARAMETER_SCALE: parameter_scale,
+ LOWER_BOUND: lower_bound,
+ UPPER_BOUND: upper_bound,
+ NOMINAL_VALUE: np.nan,
+ ESTIMATE: 1,
+ INITIALIZATION_PRIOR_TYPE: "",
+ INITIALIZATION_PRIOR_PARAMETERS: "",
+ OBJECTIVE_PRIOR_TYPE: "",
+ OBJECTIVE_PRIOR_PARAMETERS: "",
+ }
+ )
+ df.set_index([PARAMETER_ID], inplace=True)
+
+ # For model parameters, set nominal values as defined in the model
+ for parameter_id in df.index:
+ try:
+ df.loc[parameter_id, NOMINAL_VALUE] = model.get_parameter_value(
+ parameter_id
+ )
+ except ValueError:
+ # parameter was introduced as condition-specific override and
+ # is potentially not present in the model
+ pass
+ return df
+
+
+def get_required_parameters_for_parameter_table(
+ model: Model,
+ condition_df: pd.DataFrame,
+ observable_df: pd.DataFrame,
+ measurement_df: pd.DataFrame,
+ mapping_df: pd.DataFrame = None,
+) -> set[str]:
+ """
+ Get set of parameters which need to go into the parameter table
+
+ Arguments:
+ model: PEtab model
+ condition_df: PEtab condition table
+ observable_df: PEtab observable table
+ measurement_df: PEtab measurement table
+ mapping_df: PEtab mapping table
+
+ Returns:
+ Set of parameter IDs which PEtab requires to be present in the
+ parameter table. That is all {observable,noise}Parameters from the
+ measurement table as well as all parametric condition table overrides
+ that are not defined in the model.
+ """
+ # use ordered dict as proxy for ordered set
+ parameter_ids = OrderedDict()
+
+ # Add parameters from measurement table, unless they are fixed parameters
+ def append_overrides(overrides):
+ for p in overrides:
+ if isinstance(p, str) and p not in condition_df.columns:
+ parameter_ids[p] = None
+
+ for _, row in measurement_df.iterrows():
+ # we trust that the number of overrides matches
+ append_overrides(
+ measurements.split_parameter_replacement_list(
+ row.get(OBSERVABLE_PARAMETERS, None)
+ )
+ )
+ append_overrides(
+ measurements.split_parameter_replacement_list(
+ row.get(NOISE_PARAMETERS, None)
+ )
+ )
+
+ # Add output parameters except for placeholders
+ for formula_type, placeholder_sources in (
+ (
+ # Observable formulae
+ {"observables": True, "noise": False},
+ # can only contain observable placeholders
+ {"noise": False, "observables": True},
+ ),
+ (
+ # Noise formulae
+ {"observables": False, "noise": True},
+ # can contain noise and observable placeholders
+ {"noise": True, "observables": True},
+ ),
+ ):
+ output_parameters = observables.get_output_parameters(
+ observable_df,
+ model,
+ mapping_df=mapping_df,
+ **formula_type,
+ )
+ placeholders = observables.get_placeholders(
+ observable_df,
+ **placeholder_sources,
+ )
+ for p in output_parameters:
+ if p not in placeholders:
+ parameter_ids[p] = None
+
+ # Add condition table parametric overrides unless already defined in the
+ # model
+ for p in conditions.get_parametric_overrides(condition_df):
+ if not model.has_entity_with_id(p):
+ parameter_ids[p] = None
+
+ # remove parameters that occur in the condition table and are overridden
+ # for ALL conditions
+ for p in condition_df.columns[~condition_df.isnull().any()]:
+ try:
+ del parameter_ids[p]
+ except KeyError:
+ pass
+ return parameter_ids.keys()
+
+
+def get_valid_parameters_for_parameter_table(
+ model: Model,
+ condition_df: pd.DataFrame,
+ observable_df: pd.DataFrame,
+ measurement_df: pd.DataFrame,
+ mapping_df: pd.DataFrame = None,
+) -> set[str]:
+ """
+ Get set of parameters which may be present inside the parameter table
+
+ Arguments:
+ model: PEtab model
+ condition_df: PEtab condition table
+ observable_df: PEtab observable table
+ measurement_df: PEtab measurement table
+ mapping_df: PEtab mapping table for additional checks
+
+ Returns:
+ Set of parameter IDs which PEtab allows to be present in the
+ parameter table.
+ """
+ # - grab all allowed model parameters
+ # - grab corresponding names from mapping table
+ # - grab all output parameters defined in {observable,noise}Formula
+ # - grab all parameters from measurement table
+ # - grab all parametric overrides from condition table
+ # - remove parameters for which condition table columns exist
+ # - remove placeholder parameters
+ # (only partial overrides are not supported)
+
+ # must not go into parameter table
+ blackset = set()
+
+ if observable_df is not None:
+ placeholders = set(observables.get_placeholders(observable_df))
+
+ # collect assignment targets
+ blackset |= placeholders
+
+ if condition_df is not None:
+ blackset |= set(condition_df.columns.values) - {CONDITION_NAME}
+
+ # don't use sets here, to have deterministic ordering,
+ # e.g. for creating parameter tables
+ parameter_ids = OrderedDict.fromkeys(
+ p
+ for p in model.get_valid_parameters_for_parameter_table()
+ if p not in blackset
+ )
+
+ if mapping_df is not None:
+ for from_id, to_id in zip(
+ mapping_df.index.values, mapping_df[MODEL_ENTITY_ID], strict=True
+ ):
+ if to_id in parameter_ids.keys():
+ parameter_ids[from_id] = None
+
+ if observable_df is not None:
+ # add output parameters from observables table
+ output_parameters = observables.get_output_parameters(
+ observable_df=observable_df, model=model
+ )
+ for p in output_parameters:
+ if p not in blackset:
+ parameter_ids[p] = None
+
+ # Append parameters from measurement table, unless they occur as condition
+ # table columns
+ def append_overrides(overrides):
+ for p in overrides:
+ if isinstance(p, str) and p not in blackset:
+ parameter_ids[p] = None
+
+ if measurement_df is not None:
+ for _, row in measurement_df.iterrows():
+ # we trust that the number of overrides matches
+ append_overrides(
+ measurements.split_parameter_replacement_list(
+ row.get(OBSERVABLE_PARAMETERS, None)
+ )
+ )
+ append_overrides(
+ measurements.split_parameter_replacement_list(
+ row.get(NOISE_PARAMETERS, None)
+ )
+ )
+
+ # Append parameter overrides from condition table
+ if condition_df is not None:
+ for p in conditions.get_parametric_overrides(condition_df):
+ parameter_ids[p] = None
+
+ return parameter_ids.keys()
+
+
+def get_priors_from_df(
+ parameter_df: pd.DataFrame,
+ mode: Literal["initialization", "objective"],
+ parameter_ids: Sequence[str] = None,
+) -> list[tuple]:
+ """Create list with information about the parameter priors
+
+ Arguments:
+ parameter_df: PEtab parameter table
+ mode: ``'initialization'`` or ``'objective'``
+ parameter_ids: A sequence of parameter IDs for which to sample starting
+ points.
+ For subsetting or reordering the parameters.
+ Defaults to all estimated parameters.
+
+ Returns:
+ List with prior information.
+ """
+ # get types and parameters of priors from dataframe
+ par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1]
+
+ if parameter_ids:
+ try:
+ par_to_estimate = par_to_estimate.loc[parameter_ids, :]
+ except KeyError as e:
+ missing_ids = set(parameter_ids) - set(par_to_estimate.index)
+ raise KeyError(
+ "Parameter table does not contain estimated parameter(s) "
+ f"{missing_ids}."
+ ) from e
+
+ prior_list = []
+ for _, row in par_to_estimate.iterrows():
+ # retrieve info about type
+ prior_type = str(row.get(f"{mode}PriorType", ""))
+ if core.is_empty(prior_type):
+ prior_type = PARAMETER_SCALE_UNIFORM
+
+ # retrieve info about parameters of priors, make it a tuple of floats
+ pars_str = str(row.get(f"{mode}PriorParameters", ""))
+ if core.is_empty(pars_str):
+ lb, ub = map_scale(
+ [row[LOWER_BOUND], row[UPPER_BOUND]],
+ [row[PARAMETER_SCALE]] * 2,
+ )
+ pars_str = f"{lb}{PARAMETER_SEPARATOR}{ub}"
+ prior_pars = tuple(
+ float(entry) for entry in pars_str.split(PARAMETER_SEPARATOR)
+ )
+
+ # add parameter scale and bounds, as this may be needed
+ par_scale = row[PARAMETER_SCALE]
+ par_bounds = (row[LOWER_BOUND], row[UPPER_BOUND])
+
+ # if no prior is specified, we assume a non-informative (uniform) one
+ if prior_type == "nan":
+ prior_type = PARAMETER_SCALE_UNIFORM
+ prior_pars = (
+ scale(row[LOWER_BOUND], par_scale),
+ scale(row[UPPER_BOUND], par_scale),
+ )
+
+ prior_list.append((prior_type, prior_pars, par_scale, par_bounds))
+
+ return prior_list
+
+
+def scale(
+ parameter: numbers.Number,
+ scale_str: PARAMETER_SCALE_ARGS,
+) -> numbers.Number:
+ """Scale parameter according to ``scale_str``.
+
+ Arguments:
+ parameter:
+ Parameter to be scaled.
+ scale_str:
+ One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``.
+
+ Returns:
+ The scaled parameter.
+ """
+ if scale_str == LIN or not scale_str:
+ return parameter
+ if scale_str == LOG:
+ return np.log(parameter)
+ if scale_str == LOG10:
+ return np.log10(parameter)
+ raise ValueError(f"Invalid parameter scaling: {scale_str}")
+
+
+def unscale(
+ parameter: numbers.Number,
+ scale_str: PARAMETER_SCALE_ARGS,
+) -> numbers.Number:
+ """Unscale parameter according to ``scale_str``.
+
+ Arguments:
+ parameter:
+ Parameter to be unscaled.
+ scale_str:
+ One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``.
+
+ Returns:
+ The unscaled parameter.
+ """
+ if scale_str == LIN or not scale_str:
+ return parameter
+ if scale_str == LOG:
+ return np.exp(parameter)
+ if scale_str == LOG10:
+ return 10**parameter
+ raise ValueError(f"Invalid parameter scaling: {scale_str}")
+
+
+def map_scale(
+ parameters: Sequence[numbers.Number],
+ scale_strs: Iterable[PARAMETER_SCALE_ARGS] | PARAMETER_SCALE_ARGS,
+) -> Iterable[numbers.Number]:
+ """Scale the parameters, i.e. as :func:`scale`, but for Sequences.
+
+ Arguments:
+ parameters:
+ Parameters to be scaled.
+ scale_strs:
+ Scales to apply. Broadcast if a single string.
+
+ Returns:
+ The scaled parameters.
+ """
+ if isinstance(scale_strs, str):
+ scale_strs = [scale_strs] * len(parameters)
+ return (
+ scale(par_val, scale_str)
+ for par_val, scale_str in zip(parameters, scale_strs, strict=True)
+ )
+
+
+def map_unscale(
+ parameters: Sequence[numbers.Number],
+ scale_strs: Iterable[PARAMETER_SCALE_ARGS] | PARAMETER_SCALE_ARGS,
+) -> Iterable[numbers.Number]:
+ """Unscale the parameters, i.e. as :func:`unscale`, but for Sequences.
+
+ Arguments:
+ parameters:
+ Parameters to be unscaled.
+ scale_strs:
+ Scales that the parameters are currently on.
+ Broadcast if a single string.
+
+ Returns:
+ The unscaled parameters.
+ """
+ if isinstance(scale_strs, str):
+ scale_strs = [scale_strs] * len(parameters)
+ return (
+ unscale(par_val, scale_str)
+ for par_val, scale_str in zip(parameters, scale_strs, strict=True)
+ )
+
+
+def normalize_parameter_df(parameter_df: pd.DataFrame) -> pd.DataFrame:
+ """Add missing columns and fill in default values."""
+ df = parameter_df.copy(deep=True)
+
+ if PARAMETER_NAME not in df:
+ df[PARAMETER_NAME] = df.reset_index()[PARAMETER_ID]
+
+ prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]
+ prior_par_cols = [
+ INITIALIZATION_PRIOR_PARAMETERS,
+ OBJECTIVE_PRIOR_PARAMETERS,
+ ]
+ # iterate over initialization and objective priors
+ for prior_type_col, prior_par_col in zip(
+ prior_type_cols, prior_par_cols, strict=True
+ ):
+ # fill in default values for prior type
+ if prior_type_col not in df:
+ df[prior_type_col] = PARAMETER_SCALE_UNIFORM
+ else:
+ for irow, row in df.iterrows():
+ if core.is_empty(row[prior_type_col]):
+ df.loc[irow, prior_type_col] = PARAMETER_SCALE_UNIFORM
+ if prior_par_col not in df:
+ df[prior_par_col] = None
+ for irow, row in df.iterrows():
+ if (
+ core.is_empty(row[prior_par_col])
+ and row[prior_type_col] == PARAMETER_SCALE_UNIFORM
+ ):
+ lb, ub = map_scale(
+ [row[LOWER_BOUND], row[UPPER_BOUND]],
+ [row[PARAMETER_SCALE]] * 2,
+ )
+ df.loc[irow, prior_par_col] = f"{lb}{PARAMETER_SEPARATOR}{ub}"
+
+ return df
diff --git a/petab/problem.py b/petab/v1/problem.py
similarity index 95%
rename from petab/problem.py
rename to petab/v1/problem.py
index 6c5307b2..4a5577eb 100644
--- a/petab/problem.py
+++ b/petab/v1/problem.py
@@ -3,10 +3,10 @@
import os
import tempfile
+from collections.abc import Iterable
from math import nan
from pathlib import Path, PurePosixPath
-from typing import TYPE_CHECKING, Iterable
-from urllib.parse import unquote, urlparse, urlunparse
+from typing import TYPE_CHECKING
from warnings import warn
import pandas as pd
@@ -28,6 +28,7 @@
from .models import MODEL_TYPE_SBML
from .models.model import Model, model_factory
from .models.sbml_model import SbmlModel
+from .yaml import get_path_prefix
if TYPE_CHECKING:
import libsbml
@@ -49,7 +50,7 @@ class Problem:
Optionally it may contain visualization tables.
- Attributes:
+ Parameters:
condition_df: PEtab condition table
measurement_df: PEtab measurement table
parameter_df: PEtab parameter table
@@ -262,34 +263,9 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem:
get_path = lambda filename: filename # noqa: E731
if isinstance(yaml_config, str):
- yaml_path = yaml_config
+ path_prefix = get_path_prefix(yaml_config)
yaml_config = yaml.load_yaml(yaml_config)
-
- # yaml_config may be path or URL
- path_url = urlparse(yaml_path)
- if not path_url.scheme or (
- path_url.scheme != "file" and not path_url.netloc
- ):
- # a regular file path string
- path_prefix = Path(yaml_path).parent
- get_path = lambda filename: path_prefix / filename # noqa: E731
- else:
- # a URL
- # extract parent path from
- url_path = unquote(urlparse(yaml_path).path)
- parent_path = str(PurePosixPath(url_path).parent)
- path_prefix = urlunparse(
- (
- path_url.scheme,
- path_url.netloc,
- parent_path,
- path_url.params,
- path_url.query,
- path_url.fragment,
- )
- )
- # need "/" on windows, not "\"
- get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731
+ get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731
if yaml.is_composite_problem(yaml_config):
raise ValueError(
@@ -307,6 +283,12 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem:
)
if yaml_config[FORMAT_VERSION] == "2.0.0":
warn("Support for PEtab2.0 is experimental!", stacklevel=2)
+ warn(
+ "Using petab.v1.Problem with PEtab2.0 is deprecated. "
+ "Use petab.v2.Problem instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
problem0 = yaml_config["problems"][0]
@@ -503,7 +485,7 @@ def to_files_generic(
if prefix_path is None:
return filenames["yaml_file"]
- return str(prefix_path / filenames["yaml_file"])
+ return str(PurePosixPath(prefix_path, filenames["yaml_file"]))
def to_files(
self,
@@ -647,7 +629,7 @@ def error(name: str) -> ValueError:
mapping_files=mapping_file,
)
- def get_optimization_parameters(self):
+ def get_optimization_parameters(self) -> list[str]:
"""
Return list of optimization parameter IDs.
@@ -655,7 +637,7 @@ def get_optimization_parameters(self):
"""
return parameters.get_optimization_parameters(self.parameter_df)
- def get_optimization_parameter_scales(self):
+ def get_optimization_parameter_scales(self) -> dict[str, str]:
"""
Return list of optimization parameter scaling strings.
@@ -663,7 +645,7 @@ def get_optimization_parameter_scales(self):
"""
return parameters.get_optimization_parameter_scaling(self.parameter_df)
- def get_model_parameters(self):
+ def get_model_parameters(self) -> list[str] | dict[str, float]:
"""See :py:func:`petab.sbml.get_model_parameters`"""
warn(
"petab.Problem.get_model_parameters is deprecated and will be "
@@ -674,7 +656,7 @@ def get_model_parameters(self):
return sbml.get_model_parameters(self.sbml_model)
- def get_observable_ids(self):
+ def get_observable_ids(self) -> list[str]:
"""
Returns dictionary of observable ids.
"""
@@ -945,7 +927,7 @@ def sample_parameter_startpoints_dict(
parameter values.
"""
return [
- dict(zip(self.x_free_ids, parameter_values))
+ dict(zip(self.x_free_ids, parameter_values, strict=True))
for parameter_values in self.sample_parameter_startpoints(
n_starts=n_starts
)
diff --git a/petab/sampling.py b/petab/v1/sampling.py
similarity index 97%
rename from petab/sampling.py
rename to petab/v1/sampling.py
index 466c5284..be154f1c 100644
--- a/petab/sampling.py
+++ b/petab/v1/sampling.py
@@ -1,6 +1,6 @@
"""Functions related to parameter sampling"""
-from typing import Sequence, Tuple
+from collections.abc import Sequence
import numpy as np
import pandas as pd
@@ -12,7 +12,7 @@
def sample_from_prior(
- prior: Tuple[str, list, str, list], n_starts: int
+ prior: tuple[str, list, str, list], n_starts: int
) -> np.array:
"""Creates samples for one parameter based on prior
diff --git a/petab/sbml.py b/petab/v1/sbml.py
similarity index 94%
rename from petab/sbml.py
rename to petab/v1/sbml.py
index b177478e..0a8fd20f 100644
--- a/petab/sbml.py
+++ b/petab/v1/sbml.py
@@ -4,13 +4,12 @@
import logging
from numbers import Number
from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
from warnings import warn
import libsbml
from pandas.io.common import get_handle, is_file_like, is_url
-import petab
+import petab.v1 as petab
logger = logging.getLogger(__name__)
__all__ = [
@@ -132,7 +131,7 @@ def globalize_parameters(
def get_model_parameters(
sbml_model: libsbml.Model, with_values=False
-) -> Union[List[str], Dict[str, float]]:
+) -> list[str] | dict[str, float]:
"""Return SBML model parameters which are not Rule targets
Arguments:
@@ -157,9 +156,7 @@ def get_model_parameters(
}
-def write_sbml(
- sbml_doc: libsbml.SBMLDocument, filename: Union[Path, str]
-) -> None:
+def write_sbml(sbml_doc: libsbml.SBMLDocument, filename: Path | str) -> None:
"""Write PEtab visualization table
Arguments:
@@ -177,7 +174,7 @@ def write_sbml(
def get_sbml_model(
filepath_or_buffer,
-) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]:
+) -> tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]:
"""Get an SBML model from file or URL or file handle
:param filepath_or_buffer:
@@ -195,7 +192,7 @@ def get_sbml_model(
def load_sbml_from_string(
sbml_string: str,
-) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]:
+) -> tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]:
"""Load SBML model from string
:param sbml_string: Model as XML string
@@ -210,24 +207,30 @@ def load_sbml_from_string(
def load_sbml_from_file(
sbml_file: str,
-) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]:
+) -> tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]:
"""Load SBML model from file
:param sbml_file: Filename of the SBML file
:return: The SBML reader, document, model
"""
+ if not Path(sbml_file).is_file():
+ raise FileNotFoundError(f"File not found: {sbml_file}")
+
sbml_reader = libsbml.SBMLReader()
sbml_document = sbml_reader.readSBML(sbml_file)
sbml_model = sbml_document.getModel()
+ if sbml_model is None:
+ raise ValueError(f"SBML model could not be loaded from {sbml_file}")
+
return sbml_reader, sbml_document, sbml_model
def get_model_for_condition(
petab_problem: "petab.Problem",
sim_condition_id: str = None,
- preeq_condition_id: Optional[str] = None,
-) -> Tuple[libsbml.SBMLDocument, libsbml.Model]:
+ preeq_condition_id: str | None = None,
+) -> tuple[libsbml.SBMLDocument, libsbml.Model]:
"""Create an SBML model for the given condition.
Creates a copy of the model and updates parameters according to the PEtab
diff --git a/petab/v1/simplify.py b/petab/v1/simplify.py
new file mode 100644
index 00000000..c4cdeb91
--- /dev/null
+++ b/petab/v1/simplify.py
@@ -0,0 +1,115 @@
+"""Functionality for simplifying PEtab problems"""
+from math import nan
+
+import pandas as pd
+
+import petab.v1 as petab
+
+from . import Problem
+from .C import * # noqa: F403
+from .lint import lint_problem
+
+__all__ = [
+ "remove_nan_measurements",
+ "remove_unused_observables",
+ "remove_unused_conditions",
+ "simplify_problem",
+ "condition_parameters_to_parameter_table",
+]
+
+
+def remove_nan_measurements(problem: Problem):
+ """Drop any measurements that are NaN"""
+ problem.measurement_df = problem.measurement_df[
+ ~problem.measurement_df[MEASUREMENT].isna()
+ ]
+ problem.measurement_df.reset_index(inplace=True, drop=True)
+
+
+def remove_unused_observables(problem: Problem):
+ """Remove observables that have no measurements"""
+ measured_observables = set(problem.measurement_df[OBSERVABLE_ID].unique())
+ problem.observable_df = problem.observable_df[
+ problem.observable_df.index.isin(measured_observables)
+ ]
+
+
+def remove_unused_conditions(problem: Problem):
+ """Remove conditions that have no measurements"""
+ measured_conditions = set(
+ problem.measurement_df[SIMULATION_CONDITION_ID].unique()
+ )
+ if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
+ measured_conditions |= set(
+ problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].unique()
+ )
+
+ problem.condition_df = problem.condition_df[
+ problem.condition_df.index.isin(measured_conditions)
+ ]
+
+
+def simplify_problem(problem: Problem):
+ if lint_problem(problem):
+ raise ValueError("Invalid PEtab problem supplied.")
+
+ remove_unused_observables(problem)
+ remove_unused_conditions(problem)
+ condition_parameters_to_parameter_table(problem)
+
+ if lint_problem(problem):
+ raise AssertionError("Invalid PEtab problem generated.")
+
+
+def condition_parameters_to_parameter_table(problem: Problem):
+ """Move parameters from the condition table to the parameters table, if
+ the same parameter value is used for all conditions.
+ """
+ if (
+ problem.condition_df is None
+ or problem.condition_df.empty
+ or problem.model is None
+ ):
+ return
+
+ replacements = {}
+ for parameter_id in problem.condition_df:
+ if parameter_id == CONDITION_NAME:
+ continue
+
+ if problem.model.is_state_variable(parameter_id):
+ # initial states can't go the parameters table
+ continue
+
+ series = problem.condition_df[parameter_id]
+ value = petab.to_float_if_float(series.iloc[0])
+
+ # same value for all conditions and no parametric overrides (str)?
+ if isinstance(value, float) and len(series.unique()) == 1:
+ replacements[parameter_id] = series.iloc[0]
+
+ if not replacements:
+ return
+
+ rows = [
+ {
+ PARAMETER_ID: parameter_id,
+ PARAMETER_SCALE: LIN,
+ LOWER_BOUND: nan,
+ UPPER_BOUND: nan,
+ NOMINAL_VALUE: value,
+ ESTIMATE: 0,
+ }
+ for parameter_id, value in replacements.items()
+ ]
+ rows = pd.DataFrame(rows)
+ rows.set_index(PARAMETER_ID, inplace=True)
+
+ if problem.parameter_df is None:
+ problem.parameter_df = rows
+ else:
+ problem.parameter_df = pd.concat([problem.parameter_df, rows])
+
+ problem.condition_df = problem.condition_df.drop(
+ columns=replacements.keys()
+ )
diff --git a/petab/v1/simulate.py b/petab/v1/simulate.py
new file mode 100644
index 00000000..682c470f
--- /dev/null
+++ b/petab/v1/simulate.py
@@ -0,0 +1,261 @@
+"""PEtab simulator base class and related functions."""
+from __future__ import annotations
+
+import abc
+import pathlib
+import shutil
+import tempfile
+from warnings import warn
+
+import numpy as np
+import pandas as pd
+import sympy as sp
+
+import petab.v1 as petab
+
+__all__ = ["Simulator", "sample_noise"]
+
+
+class Simulator(abc.ABC):
+ """Base class that specific simulators should inherit.
+
+ Specific simulators should minimally implement the
+ :meth:`petab.simulate.Simulator.simulate_without_noise` method.
+ Example (AMICI): https://bit.ly/33SUSG4
+
+ Attributes:
+ noise_formulas:
+ The formulae that will be used to calculate the scale of noise
+ distributions.
+ petab_problem:
+ A PEtab problem, which will be simulated.
+ rng:
+ A NumPy random generator, used to sample from noise distributions.
+ temporary_working_dir:
+ Whether ``working_dir`` is a temporary directory, which can be
+ deleted without significant consequence.
+ working_dir:
+ All simulator-specific output files will be saved here. This
+ directory and its contents may be modified and deleted, and
+ should be considered ephemeral.
+ """
+
+ def __init__(
+ self,
+ petab_problem: petab.Problem,
+ working_dir: pathlib.Path | str | None = None,
+ ):
+ """Initialize the simulator.
+
+ Initialize the simulator with sufficient information to perform a
+ simulation. If no working directory is specified, a temporary one is
+ created.
+
+ Arguments:
+ petab_problem:
+ A PEtab problem.
+ working_dir:
+ All simulator-specific output files will be saved here. This
+ directory and its contents may be modified and deleted, and
+ should be considered ephemeral.
+ """
+ self.petab_problem = petab_problem
+
+ self.temporary_working_dir = False
+ if working_dir is None:
+ working_dir = tempfile.mkdtemp()
+ self.temporary_working_dir = True
+ if not isinstance(working_dir, pathlib.Path):
+ working_dir = pathlib.Path(working_dir)
+ self.working_dir = working_dir
+ self.working_dir.mkdir(parents=True, exist_ok=True)
+
+ self.noise_formulas = petab.calculate.get_symbolic_noise_formulas(
+ self.petab_problem.observable_df
+ )
+ self.rng = np.random.default_rng()
+
+ def remove_working_dir(self, force: bool = False, **kwargs) -> None:
+ """Remove the simulator working directory, and all files within.
+
+ See the :meth:`petab.simulate.Simulator.__init__` method arguments.
+
+ Arguments:
+ force:
+ If ``True``, the working directory is removed regardless of
+ whether it is a temporary directory.
+ **kwargs:
+ Additional keyword arguments are passed to
+ :func:`shutil.rmtree`.
+ """
+ if force or self.temporary_working_dir:
+ shutil.rmtree(self.working_dir, **kwargs)
+ if self.working_dir.is_dir():
+ warn(
+ "Failed to remove the working directory: "
+ + str(self.working_dir),
+ stacklevel=2,
+ )
+ else:
+ warn(
+ "By default, specified working directories are not removed. "
+ "Please call this method with `force=True`, or manually "
+ f"delete the working directory: {self.working_dir}",
+ stacklevel=2,
+ )
+
+ @abc.abstractmethod
+ def simulate_without_noise(self) -> pd.DataFrame:
+ """Simulate the PEtab problem.
+
+ This is an abstract method that should be implemented with a simulation
+ package. Examples of this are referenced in the class docstring.
+
+ Returns:
+ Simulated data, as a PEtab measurements table, which should be
+ equivalent to replacing all values in the
+ :const:`petab.C.MEASUREMENT` column of the measurements table (of
+ the PEtab problem supplied to the
+ :meth:`petab.simulate.Simulator.__init__` method), with
+ simulated values.
+ """
+ raise NotImplementedError()
+
+ def simulate(
+ self,
+ noise: bool = False,
+ noise_scaling_factor: float = 1,
+ as_measurement: bool = False,
+ **kwargs,
+ ) -> pd.DataFrame:
+ """Simulate a PEtab problem, optionally with noise.
+
+ Arguments:
+ noise: If True, noise is added to simulated data.
+ noise_scaling_factor:
+ A multiplier of the scale of the noise distribution.
+ as_measurement:
+ Whether the data column is named :const:`petab.C.MEASUREMENT`
+ (`True`) or :const:`petab.C.SIMULATION` (`False`).
+ **kwargs:
+ Additional keyword arguments are passed to
+ :meth:`petab.simulate.Simulator.simulate_without_noise`.
+
+ Returns:
+ Simulated data, as a PEtab measurements table.
+ """
+ simulation_df = self.simulate_without_noise(**kwargs)
+ if noise:
+ simulation_df = self.add_noise(simulation_df, noise_scaling_factor)
+
+ columns = {petab.C.MEASUREMENT: petab.C.SIMULATION}
+ if as_measurement:
+ columns = {petab.C.SIMULATION: petab.C.MEASUREMENT}
+ simulation_df = simulation_df.rename(columns=columns)
+
+ return simulation_df
+
+ def add_noise(
+ self,
+ simulation_df: pd.DataFrame,
+ noise_scaling_factor: float = 1,
+ **kwargs,
+ ) -> pd.DataFrame:
+ """Add noise to simulated data.
+
+ Arguments:
+ simulation_df:
+ A PEtab measurements table that contains simulated data.
+ noise_scaling_factor:
+ A multiplier of the scale of the noise distribution.
+ **kwargs:
+ Additional keyword arguments are passed to
+ :func:`sample_noise`.
+
+ Returns:
+ Simulated data with noise, as a PEtab measurements table.
+ """
+ simulation_df_with_noise = simulation_df.copy()
+ simulation_df_with_noise[petab.C.MEASUREMENT] = [
+ sample_noise(
+ self.petab_problem,
+ row,
+ row[petab.C.MEASUREMENT],
+ self.noise_formulas,
+ self.rng,
+ noise_scaling_factor,
+ **kwargs,
+ )
+ for _, row in simulation_df_with_noise.iterrows()
+ ]
+ return simulation_df_with_noise
+
+
+def sample_noise(
+ petab_problem: petab.Problem,
+ measurement_row: pd.Series,
+ simulated_value: float,
+ noise_formulas: dict[str, sp.Expr] | None = None,
+ rng: np.random.Generator | None = None,
+ noise_scaling_factor: float = 1,
+ zero_bounded: bool = False,
+) -> float:
+ """Generate a sample from a PEtab noise distribution.
+
+ Arguments:
+ petab_problem:
+ The PEtab problem used to generate the simulated value.
+ Instance of :class:`petab.Problem`.
+ measurement_row:
+ The row in the PEtab problem measurement table that corresponds
+ to the simulated value.
+ simulated_value:
+ A simulated value without noise.
+ noise_formulas:
+ Processed noise formulas from the PEtab observables table, in the
+ form output by :func:`petab.calculate.get_symbolic_noise_formulas`.
+ rng:
+ A NumPy random generator.
+ noise_scaling_factor:
+ A multiplier of the scale of the noise distribution.
+ zero_bounded:
+ Return zero if the sign of the return value and ``simulated_value``
+ differ. Can be used to ensure non-negative and non-positive values,
+ if the sign of ``simulated_value`` should not change.
+
+ Returns:
+ The sample from the PEtab noise distribution.
+ """
+ if noise_formulas is None:
+ noise_formulas = petab.calculate.get_symbolic_noise_formulas(
+ petab_problem.observable_df
+ )
+ if rng is None:
+ rng = np.random.default_rng()
+
+ noise_value = petab.calculate.evaluate_noise_formula(
+ measurement_row,
+ noise_formulas,
+ petab_problem.parameter_df,
+ simulated_value,
+ )
+
+ # default noise distribution is petab.C.NORMAL
+ noise_distribution = petab_problem.observable_df.loc[
+ measurement_row[petab.C.OBSERVABLE_ID]
+ ].get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL)
+ # an empty noise distribution column in an observables table can result in
+ # `noise_distribution == float('nan')`
+ if pd.isna(noise_distribution):
+ noise_distribution = petab.C.NORMAL
+
+ # below is e.g.: `np.random.normal(loc=simulation, scale=noise_value)`
+ simulated_value_with_noise = getattr(rng, noise_distribution)(
+ loc=simulated_value, scale=noise_value * noise_scaling_factor
+ )
+
+ if zero_bounded and np.sign(simulated_value) != np.sign(
+ simulated_value_with_noise
+ ):
+ return 0.0
+ return simulated_value_with_noise
diff --git a/petab/v1/visualize/__init__.py b/petab/v1/visualize/__init__.py
new file mode 100644
index 00000000..924be86a
--- /dev/null
+++ b/petab/v1/visualize/__init__.py
@@ -0,0 +1,37 @@
+"""
+Visualize
+=========
+
+PEtab comes with visualization functionality. Those need to be imported via
+``import petab.visualize``.
+
+"""
+# ruff: noqa: F401
+import importlib.util
+
+from .plotting import DataProvider, Figure
+
+__all__ = ["DataProvider", "Figure"]
+
+if importlib.util.find_spec("matplotlib") is not None:
+ from .plot_data_and_simulation import (
+ plot_problem,
+ plot_with_vis_spec,
+ plot_without_vis_spec,
+ )
+ from .plot_residuals import (
+ plot_goodness_of_fit,
+ plot_residuals_vs_simulation,
+ )
+ from .plotter import MPLPlotter
+
+ __all__.extend(
+ [
+ "plot_without_vis_spec",
+ "plot_with_vis_spec",
+ "plot_problem",
+ "plot_goodness_of_fit",
+ "plot_residuals_vs_simulation",
+ "MPLPlotter",
+ ]
+ )
diff --git a/petab/visualize/cli.py b/petab/v1/visualize/cli.py
similarity index 99%
rename from petab/visualize/cli.py
rename to petab/v1/visualize/cli.py
index d25a6785..72074936 100644
--- a/petab/visualize/cli.py
+++ b/petab/v1/visualize/cli.py
@@ -7,6 +7,8 @@
from .. import Problem, get_simulation_df, get_visualization_df
from .plot_data_and_simulation import plot_problem
+__all__ = []
+
def _parse_cli_args():
"""Parse command-line arguments."""
diff --git a/petab/v1/visualize/data_overview.py b/petab/v1/visualize/data_overview.py
new file mode 100644
index 00000000..349b503c
--- /dev/null
+++ b/petab/v1/visualize/data_overview.py
@@ -0,0 +1,91 @@
+"""
+Functions for creating an overview report of a PEtab problem
+"""
+
+from pathlib import Path
+from shutil import copyfile
+
+import pandas as pd
+
+import petab.v1 as petab
+from petab.v1.C import (
+ MEASUREMENT,
+ OBSERVABLE_ID,
+ PREEQUILIBRATION_CONDITION_ID,
+ SIMULATION_CONDITION_ID,
+)
+
+__all__ = ["create_report"]
+
+
+def create_report(
+ problem: petab.Problem, model_name: str, output_path: str | Path = ""
+) -> None:
+ """Create an HTML overview data / model overview report
+
+ Arguments:
+ problem: PEtab problem
+ model_name: Name of the model, used for file name for report
+ output_path: Output directory
+ """
+ template_dir = Path(__file__).absolute().parent / "templates"
+ output_path = Path(output_path)
+ template_file = "report.html"
+
+ data_per_observable = get_data_per_observable(problem.measurement_df)
+ num_conditions = len(problem.condition_df.index)
+
+ # Setup template engine
+ import jinja2
+
+ template_loader = jinja2.FileSystemLoader(searchpath=template_dir)
+ template_env = jinja2.Environment(loader=template_loader, autoescape=True)
+ template = template_env.get_template(template_file)
+
+ # Render and save
+ output_text = template.render(
+ problem=problem,
+ model_name=model_name,
+ data_per_observable=data_per_observable,
+ num_conditions=num_conditions,
+ )
+ with open(output_path / f"{model_name}.html", "w") as html_file:
+ html_file.write(output_text)
+ copyfile(template_dir / "mystyle.css", output_path / "mystyle.css")
+
+
+def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame:
+ """Get table with number of data points per observable and condition
+
+ Arguments:
+ measurement_df: PEtab measurement data frame
+ Returns:
+ Pivot table with number of data points per observable and condition
+ """
+ my_measurements = measurement_df.copy()
+
+ index = [SIMULATION_CONDITION_ID]
+ if PREEQUILIBRATION_CONDITION_ID in my_measurements:
+ my_measurements[PREEQUILIBRATION_CONDITION_ID] = (
+ my_measurements[PREEQUILIBRATION_CONDITION_ID]
+ .astype("object")
+ .fillna("", inplace=True)
+ )
+ index.append(PREEQUILIBRATION_CONDITION_ID)
+
+ data_per_observable = pd.pivot_table(
+ my_measurements,
+ values=MEASUREMENT,
+ aggfunc="count",
+ index=index,
+ columns=[OBSERVABLE_ID],
+ fill_value=0,
+ )
+
+ # Add row and column sums
+ data_per_observable.loc["SUM", :] = data_per_observable.sum(axis=0).values
+ data_per_observable["SUM"] = data_per_observable.sum(axis=1).values
+
+ data_per_observable = data_per_observable.astype(int)
+
+ return data_per_observable
diff --git a/petab/visualize/helper_functions.py b/petab/v1/visualize/helper_functions.py
similarity index 92%
rename from petab/visualize/helper_functions.py
rename to petab/v1/visualize/helper_functions.py
index b48e1ad6..b1a6f1b1 100644
--- a/petab/visualize/helper_functions.py
+++ b/petab/v1/visualize/helper_functions.py
@@ -4,22 +4,21 @@
hence not be directly visible/usable when using `import petab.visualize`.
"""
-from typing import List
import pandas as pd
from ..C import *
# for typehints
-IdsList = List[str]
-NumList = List[int]
+IdsList = list[str]
+NumList = list[int]
__all__ = [
"create_dataset_id_list_new",
"generate_dataset_id_col",
]
-def generate_dataset_id_col(exp_data: pd.DataFrame) -> List[str]:
+def generate_dataset_id_col(exp_data: pd.DataFrame) -> list[str]:
"""
Generate DATASET_ID column from condition_ids and observable_ids.
@@ -49,8 +48,8 @@ def generate_dataset_id_col(exp_data: pd.DataFrame) -> List[str]:
def create_dataset_id_list_new(
- df: pd.DataFrame, group_by: str, id_list: List[IdsList]
-) -> List[IdsList]:
+ df: pd.DataFrame, group_by: str, id_list: list[IdsList]
+) -> list[IdsList]:
"""
Create dataset ID list from a list of simulation condition IDs or
observable IDs.
diff --git a/petab/v1/visualize/lint.py b/petab/v1/visualize/lint.py
new file mode 100644
index 00000000..b5de74bc
--- /dev/null
+++ b/petab/v1/visualize/lint.py
@@ -0,0 +1,179 @@
+"""Validation of PEtab visualization files"""
+from __future__ import annotations
+
+import logging
+
+import pandas as pd
+
+from .. import C, Problem
+from ..C import VISUALIZATION_DF_REQUIRED_COLS
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["validate_visualization_df"]
+
+
+def validate_visualization_df(problem: Problem) -> bool:
+ """Validate visualization table
+
+ Arguments:
+ problem: The PEtab problem containing a visualization table
+
+ Returns:
+ ``True`` if errors occurred, ``False`` otherwise
+ """
+ vis_df = problem.visualization_df
+ if vis_df is None or vis_df.empty:
+ return False
+
+ errors = False
+
+ if missing_req_cols := (
+ set(VISUALIZATION_DF_REQUIRED_COLS) - set(vis_df.columns)
+ ):
+ logger.error(
+ f"Missing required columns {missing_req_cols} "
+ "in visualization table."
+ )
+ errors = True
+
+ # Set all unspecified optional values to their defaults to simplify
+ # validation
+ vis_df = vis_df.copy()
+ _apply_defaults(vis_df)
+
+ if unknown_types := (
+ set(vis_df[C.PLOT_TYPE_SIMULATION].unique())
+ - set(C.PLOT_TYPES_SIMULATION)
+ ):
+ logger.error(
+ f"Unknown {C.PLOT_TYPE_SIMULATION}: {unknown_types}. "
+ f"Must be one of {C.PLOT_TYPES_SIMULATION}"
+ )
+ errors = True
+
+ if unknown_types := (
+ set(vis_df[C.PLOT_TYPE_DATA].unique()) - set(C.PLOT_TYPES_DATA)
+ ):
+ logger.error(
+ f"Unknown {C.PLOT_TYPE_DATA}: {unknown_types}. "
+ f"Must be one of {C.PLOT_TYPES_DATA}"
+ )
+ errors = True
+
+ if unknown_scale := (set(vis_df[C.X_SCALE].unique()) - set(C.X_SCALES)):
+ logger.error(
+ f"Unknown {C.X_SCALE}: {unknown_scale}. "
+ f"Must be one of {C.X_SCALES}"
+ )
+ errors = True
+
+ if any(
+ (vis_df[C.X_SCALE] == "order")
+ & (vis_df[C.PLOT_TYPE_SIMULATION] != C.LINE_PLOT)
+ ):
+ logger.error(
+ f"{C.X_SCALE}=order is only allowed with "
+ f"{C.PLOT_TYPE_SIMULATION}={C.LINE_PLOT}."
+ )
+ errors = True
+
+ if unknown_scale := (set(vis_df[C.Y_SCALE].unique()) - set(C.Y_SCALES)):
+ logger.error(
+ f"Unknown {C.Y_SCALE}: {unknown_scale}. "
+ f"Must be one of {C.Y_SCALES}"
+ )
+ errors = True
+
+ if problem.condition_df is not None:
+ # check for ambiguous values
+ reserved_names = {C.TIME, "condition"}
+ for reserved_name in reserved_names:
+ if (
+ reserved_name in problem.condition_df
+ and reserved_name in vis_df[C.X_VALUES]
+ ):
+ logger.error(
+ f"Ambiguous value for `{C.X_VALUES}`: "
+ f"`{reserved_name}` has a special meaning as "
+ f"`{C.X_VALUES}`, but there exists also a model "
+ "entity with that name."
+ )
+ errors = True
+
+ # check xValues exist in condition table
+ for xvalue in set(vis_df[C.X_VALUES].unique()) - reserved_names:
+ if xvalue not in problem.condition_df:
+ logger.error(
+ f"{C.X_VALUES} was set to `{xvalue}`, but no "
+ "such column exists in the conditions table."
+ )
+ errors = True
+
+ if problem.observable_df is not None:
+ # yValues must be an observable
+ for yvalue in vis_df[C.Y_VALUES].unique():
+ if pd.isna(yvalue):
+ # if there is only one observable, we default to that
+ if len(problem.observable_df.index.unique()) == 1:
+ continue
+
+ logger.error(
+ f"{C.Y_VALUES} must be specified if there is more "
+ "than one observable."
+ )
+ errors = True
+
+ if yvalue not in problem.observable_df.index:
+ logger.error(
+ f"{C.Y_VALUES} was set to `{yvalue}`, but no such "
+ "observable exists in the observables table."
+ )
+ errors = True
+
+ if problem.measurement_df is not None:
+ referenced_datasets = set(filter(bool, vis_df[C.DATASET_ID].unique()))
+ if referenced_datasets:
+ existing_datasets = set(
+ filter(bool, problem.measurement_df[C.DATASET_ID].unique())
+ )
+ if not referenced_datasets.issubset(existing_datasets):
+ logger.error(
+ f"Visualization table references {C.DATASET_ID}(s) "
+ f"{referenced_datasets - existing_datasets}, but no such "
+ "dataset(s) exist in the measurement table."
+ )
+ errors = True
+
+ return errors
+
+
+def _apply_defaults(vis_df: pd.DataFrame):
+ """
+ Set default values.
+
+ Adds default values to the given visualization table where no value was
+ specified.
+ """
+
+ def set_default(column: str, value):
+ if column not in vis_df:
+ vis_df[column] = value
+ elif value is not None:
+ if isinstance(value, str):
+ vis_df[column] = vis_df[column].astype("object")
+ vis_df.fillna({column: value}, inplace=True)
+
+ set_default(C.PLOT_NAME, "")
+ set_default(C.PLOT_TYPE_SIMULATION, C.LINE_PLOT)
+ set_default(C.PLOT_TYPE_DATA, C.MEAN_AND_SD)
+ set_default(C.DATASET_ID, None)
+ set_default(C.X_VALUES, C.TIME)
+ set_default(C.X_OFFSET, 0)
+ set_default(C.X_LABEL, vis_df[C.X_VALUES])
+ set_default(C.X_SCALE, C.LIN)
+ set_default(C.Y_VALUES, None)
+ set_default(C.Y_OFFSET, 0)
+ set_default(C.Y_LABEL, vis_df[C.Y_VALUES])
+ set_default(C.Y_SCALE, C.LIN)
+ set_default(C.LEGEND_ENTRY, vis_df[C.DATASET_ID])
diff --git a/petab/v1/visualize/plot_data_and_simulation.py b/petab/v1/visualize/plot_data_and_simulation.py
new file mode 100644
index 00000000..c76bcd43
--- /dev/null
+++ b/petab/v1/visualize/plot_data_and_simulation.py
@@ -0,0 +1,222 @@
+"""Functions for plotting PEtab measurement files and simulation results in
+the same format.
+"""
+
+
+import matplotlib.pyplot as plt
+import pandas as pd
+
+from .. import problem
+from ..C import *
+from .plotter import MPLPlotter
+from .plotting import VisSpecParser
+
+# for typehints
+IdsList = list[str]
+NumList = list[int]
+
+__all__ = ["plot_with_vis_spec", "plot_without_vis_spec", "plot_problem"]
+
+
+def plot_with_vis_spec(
+ vis_spec_df: str | pd.DataFrame,
+ conditions_df: str | pd.DataFrame,
+ measurements_df: str | pd.DataFrame | None = None,
+ simulations_df: str | pd.DataFrame | None = None,
+ subplot_dir: str | None = None,
+ plotter_type: str = "mpl",
+ format_: str = "png",
+) -> dict[str, plt.Subplot] | None:
+ """
+ Plot measurements and/or simulations. Specification of the visualization
+ routines is provided in visualization table.
+
+ Parameters
+ ----------
+ vis_spec_df:
+ A visualization table.
+ conditions_df:
+ A condition DataFrame in the PEtab format or path to the condition
+ file.
+ measurements_df:
+ A measurement DataFrame in the PEtab format or path to the data file.
+ simulations_df:
+ A simulation DataFrame in the PEtab format or path to the simulation
+ output data file.
+ subplot_dir:
+ A path to the folder where single subplots should be saved.
+ PlotIDs will be taken as file names.
+ plotter_type:
+ Specifies which library should be used for plot generation. Currently,
+ only matplotlib is supported.
+ format_:
+ File format for the generated figure.
+ (See :py:func:`matplotlib.pyplot.savefig` for supported options).
+
+ Returns
+ -------
+ ax: Axis object of the created plot.
+ None: In case subplots are saved to a file.
+ """
+ if measurements_df is None and simulations_df is None:
+ raise TypeError(
+ "Not enough arguments. Either measurements_data "
+ "or simulations_data should be provided."
+ )
+
+ vis_spec_parser = VisSpecParser(
+ conditions_df, measurements_df, simulations_df
+ )
+ figure, dataprovider = vis_spec_parser.parse_from_vis_spec(vis_spec_df)
+
+ if plotter_type == "mpl":
+ plotter = MPLPlotter(figure, dataprovider)
+ else:
+ raise NotImplementedError(
+ "Currently, only visualization with " "matplotlib is possible."
+ )
+
+ return plotter.generate_figure(subplot_dir, format_=format_)
+
+
+def plot_without_vis_spec(
+ conditions_df: str | pd.DataFrame,
+ grouping_list: list[IdsList] | None = None,
+ group_by: str = "observable",
+ measurements_df: str | pd.DataFrame | None = None,
+ simulations_df: str | pd.DataFrame | None = None,
+ plotted_noise: str = MEAN_AND_SD,
+ subplot_dir: str | None = None,
+ plotter_type: str = "mpl",
+ format_: str = "png",
+) -> dict[str, plt.Subplot] | None:
+ """
+ Plot measurements and/or simulations. What exactly should be plotted is
+ specified in a grouping_list.
+ If grouping list is not provided, measurements (simulations) will be
+ grouped by observable, i.e. all measurements for each observable will be
+ visualized on one plot.
+
+ Parameters
+ ----------
+ grouping_list:
+ A list of lists. Each sublist corresponds to a plot, each subplot
+ contains the Ids of datasets or observables or simulation conditions
+ for this plot.
+ group_by:
+ Grouping type.
+ Possible values: 'dataset', 'observable', 'simulation'.
+ conditions_df:
+ A condition DataFrame in the PEtab format or path to the condition
+ file.
+ measurements_df:
+ A measurement DataFrame in the PEtab format or path to the data file.
+ simulations_df:
+ A simulation DataFrame in the PEtab format or path to the simulation
+ output data file.
+ plotted_noise:
+ A string indicating how noise should be visualized:
+ ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided'].
+ subplot_dir:
+ A path to the folder where single subplots should be saved.
+ PlotIDs will be taken as file names.
+ plotter_type:
+ Specifies which library should be used for plot generation. Currently,
+ only matplotlib is supported.
+ format_:
+ File format for the generated figure.
+ (See :py:func:`matplotlib.pyplot.savefig` for supported options).
+
+ Returns
+ -------
+ ax: Axis object of the created plot.
+ None: In case subplots are saved to a file.
+ """
+ if measurements_df is None and simulations_df is None:
+ raise TypeError(
+ "Not enough arguments. Either measurements_data "
+ "or simulations_data should be provided."
+ )
+
+ vis_spec_parser = VisSpecParser(
+ conditions_df, measurements_df, simulations_df
+ )
+
+ figure, dataprovider = vis_spec_parser.parse_from_id_list(
+ grouping_list, group_by, plotted_noise
+ )
+
+ if plotter_type == "mpl":
+ plotter = MPLPlotter(figure, dataprovider)
+ else:
+ raise NotImplementedError(
+ "Currently, only visualization with " "matplotlib is possible."
+ )
+
+ return plotter.generate_figure(subplot_dir, format_=format_)
+
+
+def plot_problem(
+ petab_problem: problem.Problem,
+ simulations_df: str | pd.DataFrame | None = None,
+ grouping_list: list[IdsList] | None = None,
+ group_by: str = "observable",
+ plotted_noise: str = MEAN_AND_SD,
+ subplot_dir: str | None = None,
+ plotter_type: str = "mpl",
+) -> dict[str, plt.Subplot] | None:
+ """
+ Visualization using petab problem.
+ If Visualization table is part of the petab_problem, it will be used for
+ visualization. Otherwise, grouping_list will be used.
+ If neither Visualization table nor grouping_list are available,
+ measurements (simulations) will be grouped by observable, i.e. all
+ measurements for each observable will be visualized on one plot.
+
+ Parameters
+ ----------
+ petab_problem:
+ A PEtab problem.
+ simulations_df:
+ A simulation DataFrame in the PEtab format or path to the simulation
+ output data file.
+ grouping_list:
+ A list of lists. Each sublist corresponds to a plot, each subplot
+ contains the Ids of datasets or observables or simulation conditions
+ for this plot.
+ group_by:
+ Possible values: 'dataset', 'observable', 'simulation'.
+ plotted_noise:
+ A string indicating how noise should be visualized:
+ ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided'].
+ subplot_dir:
+ A string which is taken as path to the folder where single subplots
+ should be saved. PlotIDs will be taken as file names.
+ plotter_type:
+ Specifies which library should be used for plot generation. Currently,
+ only matplotlib is supported.
+
+ Returns
+ -------
+ ax: Axis object of the created plot.
+ None: In case subplots are saved to a file.
+ """
+ if petab_problem.visualization_df is not None:
+ return plot_with_vis_spec(
+ petab_problem.visualization_df,
+ petab_problem.condition_df,
+ petab_problem.measurement_df,
+ simulations_df,
+ subplot_dir,
+ plotter_type,
+ )
+ return plot_without_vis_spec(
+ petab_problem.condition_df,
+ grouping_list,
+ group_by,
+ petab_problem.measurement_df,
+ simulations_df,
+ plotted_noise,
+ subplot_dir,
+ plotter_type,
+ )
diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py
new file mode 100644
index 00000000..90298154
--- /dev/null
+++ b/petab/v1/visualize/plot_residuals.py
@@ -0,0 +1,211 @@
+"""
+Functions for plotting residuals.
+"""
+from pathlib import Path
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from scipy import stats
+
+from ..C import *
+from ..calculate import calculate_residuals
+from ..core import get_simulation_df
+from ..problem import Problem
+
+__all__ = ["plot_goodness_of_fit", "plot_residuals_vs_simulation"]
+
+
+def plot_residuals_vs_simulation(
+ petab_problem: Problem,
+ simulations_df: str | Path | pd.DataFrame,
+ size: tuple | None = (10, 7),
+ axes: tuple[plt.Axes, plt.Axes] | None = None,
+) -> matplotlib.axes.Axes:
+ """
+ Plot residuals versus simulation values for measurements with normal noise
+ assumption.
+
+ Parameters
+ ----------
+ petab_problem:
+ A PEtab problem.
+ simulations_df:
+ A simulation DataFrame in the PEtab format or path to the simulation
+ output data file.
+ size:
+ Figure size.
+ axes:
+ Axis object.
+
+ Returns
+ -------
+ ax: Axis object of the created plot.
+ """
+ if isinstance(simulations_df, str | Path):
+ simulations_df = get_simulation_df(simulations_df)
+
+ if NOISE_DISTRIBUTION in petab_problem.observable_df:
+ if OBSERVABLE_TRANSFORMATION in petab_problem.observable_df:
+ observable_ids = petab_problem.observable_df[
+ (petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL)
+ & (
+ petab_problem.observable_df[OBSERVABLE_TRANSFORMATION]
+ == LIN
+ )
+ ].index
+
+ else:
+ observable_ids = petab_problem.observable_df[
+ petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL
+ ].index
+ else:
+ observable_ids = petab_problem.observable_df.index
+
+ if observable_ids.empty:
+ raise ValueError(
+ "Residuals plot is only applicable for normal "
+ "additive noise assumption"
+ )
+
+ if axes is None:
+ fig, axes = plt.subplots(
+ 1, 2, sharey=True, figsize=size, width_ratios=[2, 1]
+ )
+ fig.set_layout_engine("tight")
+ fig.suptitle("Residuals")
+
+ residual_df = calculate_residuals(
+ measurement_dfs=petab_problem.measurement_df,
+ simulation_dfs=simulations_df,
+ observable_dfs=petab_problem.observable_df,
+ parameter_dfs=petab_problem.parameter_df,
+ )[0]
+
+ normal_residuals = residual_df[
+ residual_df[OBSERVABLE_ID].isin(observable_ids)
+ ]
+ simulations_normal = simulations_df[
+ simulations_df[OBSERVABLE_ID].isin(observable_ids)
+ ]
+
+ # compare to standard normal distribution
+ ks_result = stats.kstest(normal_residuals[RESIDUAL], stats.norm.cdf)
+
+ # plot the residuals plot
+ axes[0].hlines(
+ y=0,
+ xmin=min(simulations_normal[SIMULATION]),
+ xmax=max(simulations_normal[SIMULATION]),
+ ls="--",
+ color="gray",
+ )
+ axes[0].scatter(simulations_normal[SIMULATION], normal_residuals[RESIDUAL])
+ axes[0].text(
+ 0.15,
+ 0.85,
+ f"Kolmogorov-Smirnov test results:\n"
+ f"statistic: {ks_result[0]:.2f}\n"
+ f"pvalue: {ks_result[1]:.2e} ",
+ transform=axes[0].transAxes,
+ )
+ axes[0].set_xlabel("simulated values")
+ axes[0].set_ylabel("residuals")
+
+ # plot histogram
+ axes[1].hist(
+ normal_residuals[RESIDUAL], density=True, orientation="horizontal"
+ )
+ axes[1].set_xlabel("distribution")
+
+ ymin, ymax = axes[0].get_ylim()
+ ylim = max(abs(ymin), abs(ymax))
+ axes[0].set_ylim(-ylim, ylim)
+ axes[1].tick_params(
+ left=False, labelleft=False, right=True, labelright=True
+ )
+
+ return axes
+
+
+def plot_goodness_of_fit(
+ petab_problem: Problem,
+ simulations_df: str | Path | pd.DataFrame,
+ size: tuple = (10, 7),
+ ax: plt.Axes | None = None,
+) -> matplotlib.axes.Axes:
+ """
+ Plot goodness of fit.
+
+ Parameters
+ ----------
+ petab_problem:
+ A PEtab problem.
+ simulations_df:
+ A simulation DataFrame in the PEtab format or path to the simulation
+ output data file.
+ size:
+ Figure size.
+ ax:
+ Axis object.
+
+ Returns
+ -------
+ ax: Axis object of the created plot.
+ """
+ if isinstance(simulations_df, str | Path):
+ simulations_df = get_simulation_df(simulations_df)
+
+ if simulations_df is None or petab_problem.measurement_df is None:
+ raise NotImplementedError(
+ "Both measurements and simulation data "
+ "are needed for goodness_of_fit"
+ )
+
+ residual_df = calculate_residuals(
+ measurement_dfs=petab_problem.measurement_df,
+ simulation_dfs=simulations_df,
+ observable_dfs=petab_problem.observable_df,
+ parameter_dfs=petab_problem.parameter_df,
+ )[0]
+ slope, intercept, r_value, p_value, std_err = stats.linregress(
+ petab_problem.measurement_df["measurement"],
+ simulations_df["simulation"],
+ ) # x, y
+
+ if ax is None:
+ fig, ax = plt.subplots(figsize=size)
+ fig.set_layout_engine("tight")
+
+ ax.scatter(
+ petab_problem.measurement_df["measurement"],
+ simulations_df["simulation"],
+ )
+
+ ax.axis("square")
+ xlim = ax.get_xlim()
+ ylim = ax.get_ylim()
+ lim = [min([xlim[0], ylim[0]]), max([xlim[1], ylim[1]])]
+ ax.set_xlim(lim)
+ ax.set_ylim(lim)
+ x = np.linspace(lim, 100)
+ ax.plot(x, x, linestyle="--", color="gray")
+ ax.plot(x, intercept + slope * x, "r", label="fitted line")
+
+ mse = np.mean(np.abs(residual_df["residual"]))
+ ax.text(
+ 0.1,
+ 0.70,
+ f"$R^2$: {r_value**2:.2f}\n"
+ f"slope: {slope:.2f}\n"
+ f"intercept: {intercept:.2f}\n"
+ f"pvalue: {std_err:.2e}\n"
+ f"mean squared error: {mse:.2e}\n",
+ transform=ax.transAxes,
+ )
+
+ ax.set_title("Goodness of fit")
+ ax.set_xlabel("simulated values")
+ ax.set_ylabel("measurements")
+ return ax
diff --git a/petab/v1/visualize/plotter.py b/petab/v1/visualize/plotter.py
new file mode 100644
index 00000000..2a1eaaa9
--- /dev/null
+++ b/petab/v1/visualize/plotter.py
@@ -0,0 +1,879 @@
+"""PEtab visualization plotter classes"""
+import os
+from abc import ABC, abstractmethod
+
+import matplotlib.axes
+import matplotlib.ticker as mtick
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+
+from ..C import *
+from .plotting import DataPlot, DataProvider, DataSeries, Figure, Subplot
+
+__all__ = ["Plotter", "MPLPlotter", "SeabornPlotter"]
+
+
+#: Line style (:class:`matplotlib.lines.Line2D` options) for the measurement
+# data in line plots
+measurement_line_kwargs = {
+ "linestyle": "-.",
+ "marker": "x",
+ "markersize": 10,
+}
+#: Line style (:class:`matplotlib.lines.Line2D` options) for the simulation
+# data in line plots
+simulation_line_kwargs = {
+ "linestyle": "-",
+ "marker": "o",
+ "markersize": 10,
+}
+
+
+class Plotter(ABC):
+ """
+ Plotter abstract base class.
+
+ Attributes
+ ----------
+ figure:
+ Figure instance that serves as a markup for the figure that
+ should be generated
+ data_provider:
+ Data provider
+ """
+
+ def __init__(self, figure: Figure, data_provider: DataProvider):
+ self.figure = figure
+ self.data_provider = data_provider
+
+ @abstractmethod
+ def generate_figure(
+ self, subplot_dir: str | None = None
+ ) -> dict[str, plt.Subplot] | None:
+ pass
+
+
+class MPLPlotter(Plotter):
+ """
+ Matplotlib wrapper
+ """
+
+ def __init__(self, figure: Figure, data_provider: DataProvider):
+ super().__init__(figure, data_provider)
+
+ @staticmethod
+ def _error_column_for_plot_type_data(plot_type_data: str) -> str | None:
+ """Translate PEtab plotTypeData value to column name of internal
+ data representation
+
+ Parameters
+ ----------
+ plot_type_data: PEtab plotTypeData value (the way replicates should be
+ handled)
+
+ Returns
+ -------
+ Name of corresponding column
+ """
+ if plot_type_data == MEAN_AND_SD:
+ return "sd"
+ if plot_type_data == MEAN_AND_SEM:
+ return "sem"
+ if plot_type_data == PROVIDED:
+ return "noise_model"
+ return None
+
+ def generate_lineplot(
+ self,
+ ax: matplotlib.axes.Axes,
+ dataplot: DataPlot,
+ plotTypeData: str,
+ splitaxes_params: dict,
+ ) -> tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]:
+ """
+ Generate line plot.
+
+ It is possible to plot only data or only simulation or both.
+
+ Parameters
+ ----------
+ ax:
+ Axis object.
+ dataplot:
+ Visualization settings for the plot.
+ plotTypeData:
+ Specifies how replicates should be handled.
+ splitaxes_params:
+
+ """
+ simu_color = None
+ (
+ measurements_to_plot,
+ simulations_to_plot,
+ ) = self.data_provider.get_data_to_plot(
+ dataplot, plotTypeData == PROVIDED
+ )
+ noise_col = self._error_column_for_plot_type_data(plotTypeData)
+
+ label_base = dataplot.legendEntry
+
+ # check if t_inf is there
+ # todo: if only t_inf, adjust appearance for that case
+ plot_at_t_inf = (
+ measurements_to_plot is not None and measurements_to_plot.inf_point
+ ) or (
+ simulations_to_plot is not None and simulations_to_plot.inf_point
+ )
+
+ if (
+ measurements_to_plot is not None
+ and not measurements_to_plot.data_to_plot.empty
+ ):
+ # plotting all measurement data
+
+ p = None
+ if plotTypeData == REPLICATE:
+ replicates = np.stack(
+ measurements_to_plot.data_to_plot.repl.values
+ )
+ # sorts according to ascending order of conditions
+ cond, replicates = zip(
+ *sorted(
+ zip(
+ measurements_to_plot.conditions,
+ replicates,
+ strict=True,
+ )
+ ),
+ strict=True,
+ )
+ replicates = np.stack(replicates)
+
+ if replicates.ndim == 1:
+ replicates = np.expand_dims(replicates, axis=1)
+
+ # plot first replicate
+ p = ax.plot(
+ cond,
+ replicates[:, 0],
+ label=label_base,
+ **measurement_line_kwargs,
+ )
+
+ # plot other replicates with the same color
+ ax.plot(
+ cond,
+ replicates[:, 1:],
+ **measurement_line_kwargs,
+ color=p[0].get_color(),
+ )
+
+ # construct errorbar-plots: noise specified above
+ else:
+ # sorts according to ascending order of conditions
+ scond, smean, snoise = zip(
+ *sorted(
+ zip(
+ measurements_to_plot.conditions,
+ measurements_to_plot.data_to_plot["mean"],
+ measurements_to_plot.data_to_plot[noise_col],
+ strict=True,
+ )
+ ),
+ strict=True,
+ )
+
+ if np.inf in scond:
+ # remove inf point
+ scond = scond[:-1]
+ smean = smean[:-1]
+ snoise = snoise[:-1]
+
+ if len(scond) > 0 and len(smean) > 0 and len(snoise) > 0:
+ # if only t=inf there will be nothing to plot
+ p = ax.errorbar(
+ scond,
+ smean,
+ snoise,
+ label=label_base,
+ **measurement_line_kwargs,
+ )
+
+ # simulations should have the same colors if both measurements
+ # and simulations are plotted
+ simu_color = p[0].get_color() if p else None
+
+ # construct simulation plot
+ if (
+ simulations_to_plot is not None
+ and not simulations_to_plot.data_to_plot.empty
+ ):
+ # markers will be displayed only for points that have measurement
+ # counterpart
+ if measurements_to_plot is not None:
+ meas_conditions = (
+ measurements_to_plot.conditions.to_numpy()
+ if isinstance(measurements_to_plot.conditions, pd.Series)
+ else measurements_to_plot.conditions
+ )
+ every = [
+ condition in meas_conditions
+ for condition in simulations_to_plot.conditions
+ ]
+ else:
+ every = None
+
+ # sorts according to ascending order of conditions
+ xs, ys = map(
+ list,
+ zip(
+ *sorted(
+ zip(
+ simulations_to_plot.conditions,
+ simulations_to_plot.data_to_plot["mean"],
+ strict=True,
+ )
+ ),
+ strict=True,
+ ),
+ )
+
+ if np.inf in xs:
+ # remove inf point
+ xs = xs[:-1]
+ ys = ys[:-1]
+ every = every[:-1] if every else None
+
+ if len(xs) > 0 and len(ys) > 0:
+ p = ax.plot(
+ xs,
+ ys,
+ markevery=every,
+ label=label_base + " simulation",
+ color=simu_color,
+ **simulation_line_kwargs,
+ )
+ # lines at t=inf should have the same colors also in case
+ # only simulations are plotted
+ simu_color = p[0].get_color()
+
+ # plot inf points
+ if plot_at_t_inf:
+ ax, splitaxes_params["ax_inf"] = self._line_plot_at_t_inf(
+ ax,
+ plotTypeData,
+ measurements_to_plot,
+ simulations_to_plot,
+ noise_col,
+ label_base,
+ splitaxes_params,
+ color=simu_color,
+ )
+
+ return ax, splitaxes_params["ax_inf"]
+
+ def generate_barplot(
+ self,
+ ax: "matplotlib.pyplot.Axes",
+ dataplot: DataPlot,
+ plotTypeData: str,
+ ) -> None:
+ """
+ Generate barplot.
+
+ Parameters
+ ----------
+ ax:
+ Axis object.
+ dataplot:
+ Visualization settings for the plot.
+ plotTypeData:
+ Specifies how replicates should be handled.
+ """
+ # TODO: plotTypeData == REPLICATE?
+ noise_col = self._error_column_for_plot_type_data(plotTypeData)
+
+ (
+ measurements_to_plot,
+ simulations_to_plot,
+ ) = self.data_provider.get_data_to_plot(
+ dataplot, plotTypeData == PROVIDED
+ )
+
+ x_name = dataplot.legendEntry
+
+ if simulations_to_plot:
+ bar_kwargs = {
+ "align": "edge",
+ "width": -1 / 3,
+ }
+ else:
+ bar_kwargs = {
+ "align": "center",
+ "width": 2 / 3,
+ }
+
+ color = plt.rcParams["axes.prop_cycle"].by_key()["color"][0]
+
+ if measurements_to_plot is not None:
+ ax.bar(
+ x_name,
+ measurements_to_plot.data_to_plot["mean"],
+ yerr=measurements_to_plot.data_to_plot[noise_col],
+ color=color,
+ **bar_kwargs,
+ label="measurement",
+ )
+
+ if simulations_to_plot is not None:
+ bar_kwargs["width"] = -bar_kwargs["width"]
+ ax.bar(
+ x_name,
+ simulations_to_plot.data_to_plot["mean"],
+ color="white",
+ edgecolor=color,
+ **bar_kwargs,
+ label="simulation",
+ )
+
+ def generate_scatterplot(
+ self,
+ ax: "matplotlib.pyplot.Axes",
+ dataplot: DataPlot,
+ plotTypeData: str,
+ ) -> None:
+ """
+ Generate scatterplot.
+
+ Parameters
+ ----------
+ ax:
+ Axis object.
+ dataplot:
+ Visualization settings for the plot.
+ plotTypeData:
+ Specifies how replicates should be handled.
+ """
+ (
+ measurements_to_plot,
+ simulations_to_plot,
+ ) = self.data_provider.get_data_to_plot(
+ dataplot, plotTypeData == PROVIDED
+ )
+
+ if simulations_to_plot is None or measurements_to_plot is None:
+ raise NotImplementedError(
+ "Both measurements and simulation data "
+ "are needed for scatter plots"
+ )
+ ax.scatter(
+ measurements_to_plot.data_to_plot["mean"],
+ simulations_to_plot.data_to_plot["mean"],
+ label=getattr(dataplot, LEGEND_ENTRY),
+ )
+ self._square_plot_equal_ranges(ax)
+
+ def generate_subplot(
+ self,
+ fig: matplotlib.figure.Figure,
+ ax: matplotlib.axes.Axes,
+ subplot: Subplot,
+ ) -> None:
+ """
+ Generate subplot based on markup provided by subplot.
+
+ Parameters
+ ----------
+ fig:
+ Figure object.
+ ax:
+ Axis object.
+ subplot:
+ Subplot visualization settings.
+ """
+ # set yScale
+ if subplot.yScale == LIN:
+ ax.set_yscale("linear")
+ elif subplot.yScale == LOG10:
+ ax.set_yscale("log")
+ elif subplot.yScale == LOG:
+ ax.set_yscale("log", base=np.e)
+
+ if subplot.plotTypeSimulation == BAR_PLOT:
+ for data_plot in subplot.data_plots:
+ self.generate_barplot(ax, data_plot, subplot.plotTypeData)
+
+ # get rid of duplicate legends
+ handles, labels = ax.get_legend_handles_labels()
+ by_label = dict(zip(labels, handles, strict=True))
+ ax.legend(by_label.values(), by_label.keys())
+
+ x_names = [x.legendEntry for x in subplot.data_plots]
+ ax.set_xticks(range(len(x_names)))
+ ax.set_xticklabels(x_names)
+
+ for label in ax.get_xmajorticklabels():
+ label.set_rotation(30)
+ label.set_horizontalalignment("right")
+ elif subplot.plotTypeSimulation == SCATTER_PLOT:
+ for data_plot in subplot.data_plots:
+ self.generate_scatterplot(ax, data_plot, subplot.plotTypeData)
+ else:
+ # set xScale
+ if subplot.xScale == LIN:
+ ax.set_xscale("linear")
+ elif subplot.xScale == LOG10:
+ ax.set_xscale("log")
+ elif subplot.xScale == LOG:
+ ax.set_xscale("log", base=np.e)
+ # equidistant
+ elif subplot.xScale == "order":
+ ax.set_xscale("linear")
+ # check if conditions are monotone decreasing or increasing
+ if np.all(np.diff(subplot.conditions) < 0):
+ # monot. decreasing -> reverse
+ xlabel = subplot.conditions[::-1]
+ conditions = range(len(subplot.conditions))[::-1]
+ ax.set_xticks(range(len(conditions)), xlabel)
+ elif np.all(np.diff(subplot.conditions) > 0):
+ xlabel = subplot.conditions
+ conditions = range(len(subplot.conditions))
+ ax.set_xticks(range(len(conditions)), xlabel)
+ else:
+ raise ValueError(
+ "Error: x-conditions do not coincide, "
+ "some are mon. increasing, some "
+ "monotonically decreasing"
+ )
+
+ splitaxes_params = self._preprocess_splitaxes(fig, ax, subplot)
+ for data_plot in subplot.data_plots:
+ ax, splitaxes_params["ax_inf"] = self.generate_lineplot(
+ ax,
+ data_plot,
+ subplot.plotTypeData,
+ splitaxes_params=splitaxes_params,
+ )
+ if splitaxes_params["ax_inf"] is not None:
+ self._postprocess_splitaxes(
+ ax, splitaxes_params["ax_inf"], splitaxes_params["t_inf"]
+ )
+
+ # show 'e' as basis not 2.7... in natural log scale cases
+ def ticks(y, _):
+ return rf"$e^{{{np.log(y):.0f}}}$"
+
+ if subplot.xScale == LOG:
+ ax.xaxis.set_major_formatter(mtick.FuncFormatter(ticks))
+ if subplot.yScale == LOG:
+ ax.yaxis.set_major_formatter(mtick.FuncFormatter(ticks))
+
+ if subplot.plotTypeSimulation != BAR_PLOT:
+ ax.legend()
+ ax.set_title(subplot.plotName)
+ if subplot.xlim:
+ ax.set_xlim(subplot.xlim)
+ if subplot.ylim:
+ ax.set_ylim(subplot.ylim)
+ ax.autoscale_view()
+
+ # Beautify plots
+ ax.set_xlabel(subplot.xLabel)
+ ax.set_ylabel(subplot.yLabel)
+
+ def generate_figure(
+ self,
+ subplot_dir: str | None = None,
+ format_: str = "png",
+ ) -> dict[str, plt.Subplot] | None:
+ """
+ Generate the full figure based on the markup in the figure attribute.
+
+ Parameters
+ ----------
+ subplot_dir:
+ A path to the folder where single subplots should be saved.
+ PlotIDs will be taken as file names.
+ format_:
+ File format for the generated figure.
+ (See :py:func:`matplotlib.pyplot.savefig` for supported options).
+
+ Returns
+ -------
+ ax:
+ Axis object of the created plot.
+ None:
+ In case subplots are saved to file.
+ """
+ if subplot_dir is None:
+ # compute, how many rows and columns we need for the subplots
+ num_row = int(np.round(np.sqrt(self.figure.num_subplots)))
+ num_col = int(np.ceil(self.figure.num_subplots / num_row))
+
+ fig, axes = plt.subplots(
+ num_row, num_col, squeeze=False, figsize=self.figure.size
+ )
+ fig.set_layout_engine("tight")
+
+ for ax in axes.flat[self.figure.num_subplots :]:
+ ax.remove()
+
+ axes = dict(
+ zip(
+ [plot.plotId for plot in self.figure.subplots],
+ axes.flat,
+ strict=False,
+ )
+ )
+
+ for subplot in self.figure.subplots:
+ if subplot_dir is not None:
+ fig, ax = plt.subplots(figsize=self.figure.size)
+ fig.set_layout_engine("tight")
+ else:
+ ax = axes[subplot.plotId]
+
+ try:
+ self.generate_subplot(fig, ax, subplot)
+ except Exception as e:
+ raise RuntimeError(
+ f"Error plotting {getattr(subplot, PLOT_ID)}."
+ ) from e
+
+ if subplot_dir is not None:
+ # TODO: why this doesn't work?
+ plt.tight_layout()
+ plt.savefig(
+ os.path.join(subplot_dir, f"{subplot.plotId}.{format_}")
+ )
+ plt.close()
+
+ if subplot_dir is None:
+ # TODO: why this doesn't work?
+ plt.tight_layout()
+ return axes
+
+ @staticmethod
+ def _square_plot_equal_ranges(
+ ax: "matplotlib.pyplot.Axes", lim: list | tuple | None = None
+ ) -> "matplotlib.pyplot.Axes":
+ """
+ Square plot with equal range for scatter plots.
+
+ Returns
+ -------
+ Updated axis object.
+ """
+ ax.axis("square")
+
+ if lim is None:
+ xlim = ax.get_xlim()
+ ylim = ax.get_ylim()
+ lim = [np.min([xlim[0], ylim[0]]), np.max([xlim[1], ylim[1]])]
+
+ ax.set_xlim(lim)
+ ax.set_ylim(lim)
+
+ # Same tick mark on x and y
+ ax.yaxis.set_major_locator(ax.xaxis.get_major_locator())
+
+ return ax
+
+ @staticmethod
+ def _line_plot_at_t_inf(
+ ax: matplotlib.axes.Axes,
+ plotTypeData: str,
+ measurements_to_plot: DataSeries,
+ simulations_to_plot: DataSeries,
+ noise_col: str,
+ label_base: str,
+ split_axes_params: dict,
+ color=None,
+ ) -> tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]:
+ """
+ Plot data at t=inf.
+
+ Parameters
+ ----------
+ ax:
+ Axis object for the data corresponding to the finite timepoints.
+ plotTypeData:
+ The way replicates should be handled.
+ measurements_to_plot:
+ Measurements to plot.
+ simulations_to_plot:
+ Simulations to plot.
+ noise_col:
+ The name of the error column for plot_type_data.
+ label_base:
+ Label base.
+ split_axes_params:
+ A dictionary of split axes parameters with
+ - Axis object for the data corresponding to t=inf
+ - Time value that represents t=inf
+ - left and right limits for the axis where the data corresponding
+ to the finite timepoints is plotted
+ color:
+ Line color.
+
+ Returns
+ -------
+ Two axis objects: for the data corresponding to the finite timepoints
+ and for the data corresponding to t=inf
+ """
+ ax_inf = split_axes_params["ax_inf"]
+ t_inf = split_axes_params["t_inf"]
+ ax_finite_right_limit = split_axes_params["ax_finite_right_limit"]
+ ax_left_limit = split_axes_params["ax_left_limit"]
+
+ timepoints_inf = [
+ ax_finite_right_limit,
+ t_inf,
+ ax_finite_right_limit
+ + (ax_finite_right_limit - ax_left_limit) * 0.2,
+ ]
+
+ # plot measurements
+ if measurements_to_plot is not None and measurements_to_plot.inf_point:
+ measurements_data_to_plot_inf = (
+ measurements_to_plot.data_to_plot.loc[np.inf]
+ )
+
+ if plotTypeData == REPLICATE:
+ p = None
+ if plotTypeData == REPLICATE:
+ replicates = measurements_data_to_plot_inf.repl
+ if replicates.ndim == 0:
+ replicates = np.expand_dims(replicates, axis=0)
+
+ # plot first replicate
+ p = ax_inf.plot(
+ timepoints_inf,
+ [replicates[0]] * 3,
+ markevery=[1],
+ label=label_base + " simulation",
+ color=color,
+ **measurement_line_kwargs,
+ )
+
+ # plot other replicates with the same color
+ ax_inf.plot(
+ timepoints_inf,
+ [replicates[1:]] * 3,
+ markevery=[1],
+ color=p[0].get_color(),
+ **measurement_line_kwargs,
+ )
+ else:
+ p = ax_inf.plot(
+ [timepoints_inf[0], timepoints_inf[2]],
+ [
+ measurements_data_to_plot_inf["mean"],
+ measurements_data_to_plot_inf["mean"],
+ ],
+ color=color,
+ **measurement_line_kwargs,
+ )
+ ax_inf.errorbar(
+ t_inf,
+ measurements_data_to_plot_inf["mean"],
+ measurements_data_to_plot_inf[noise_col],
+ label=label_base + " simulation",
+ color=p[0].get_color(),
+ **measurement_line_kwargs,
+ )
+
+ if color is None:
+ # in case no color was provided from finite time points
+ # plot and measurements are available corresponding
+ # simulation should have the same color
+ color = p[0].get_color()
+
+ # plot simulations
+ if simulations_to_plot is not None and simulations_to_plot.inf_point:
+ simulations_data_to_plot_inf = (
+ simulations_to_plot.data_to_plot.loc[np.inf]
+ )
+
+ if plotTypeData == REPLICATE:
+ replicates = simulations_data_to_plot_inf.repl
+ if replicates.ndim == 0:
+ replicates = np.expand_dims(replicates, axis=0)
+
+ # plot first replicate
+ p = ax_inf.plot(
+ timepoints_inf,
+ [replicates[0]] * 3,
+ markevery=[1],
+ label=label_base,
+ color=color,
+ **simulation_line_kwargs,
+ )
+
+ # plot other replicates with the same color
+ ax_inf.plot(
+ timepoints_inf,
+ [replicates[1:]] * 3,
+ markevery=[1],
+ color=p[0].get_color(),
+ **simulation_line_kwargs,
+ )
+ else:
+ ax_inf.plot(
+ timepoints_inf,
+ [simulations_data_to_plot_inf["mean"]] * 3,
+ markevery=[1],
+ color=color,
+ **simulation_line_kwargs,
+ )
+
+ ax.set_xlim(right=ax_finite_right_limit)
+ return ax, ax_inf
+
+ @staticmethod
+ def _postprocess_splitaxes(
+ ax: matplotlib.axes.Axes, ax_inf: matplotlib.axes.Axes, t_inf: float
+ ) -> None:
+ """
+ Postprocess the splitaxes: set axes limits, turn off unnecessary
+ ticks and plot dashed lines highlighting the gap in the x axis.
+
+ Parameters
+ ----------
+ ax:
+ Axis object for the data corresponding to the finite timepoints.
+ ax_inf:
+ Axis object for the data corresponding to t=inf.
+ t_inf:
+ Time value that represents t=inf
+ """
+ ax_inf.tick_params(left=False, labelleft=False)
+ ax_inf.spines["left"].set_visible(False)
+ ax_inf.set_xticks([t_inf])
+ ax_inf.set_xticklabels([r"$t_{\infty}$"])
+
+ bottom, top = ax.get_ylim()
+ left, right = ax.get_xlim()
+ ax.spines["right"].set_visible(False)
+ ax_inf.set_xlim(right, right + (right - left) * 0.2)
+ d = (top - bottom) * 0.02
+ ax_inf.vlines(
+ x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray"
+ ) # right
+ ax.vlines(
+ x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray"
+ ) # left
+ ax_inf.set_ylim(bottom, top)
+ ax.set_ylim(bottom, top)
+
+ def _preprocess_splitaxes(
+ self,
+ fig: matplotlib.figure.Figure,
+ ax: matplotlib.axes.Axes,
+ subplot: Subplot,
+ ) -> dict:
+ """
+ Prepare splitaxes if data at t=inf should be plotted: compute left and
+ right limits for the axis where the data corresponding to the finite
+ timepoints will be plotted, compute time point that will represent
+ t=inf on the plot, create additional axes for plotting data at t=inf.
+ """
+
+ def check_data_to_plot(
+ data_to_plot: DataSeries,
+ ) -> tuple[bool, float | None, float]:
+ """
+ Check if there is data available at t=inf and compute maximum and
+ minimum finite time points that need to be plotted corresponding
+ to a dataplot.
+ """
+ contains_inf = False
+ max_finite_cond, min_cond = None, np.inf
+ if data_to_plot is not None and len(data_to_plot.conditions):
+ contains_inf = np.inf in data_to_plot.conditions
+ finite_conditions = data_to_plot.conditions[
+ data_to_plot.conditions != np.inf
+ ]
+ max_finite_cond = (
+ np.max(finite_conditions)
+ if finite_conditions.size
+ else None
+ )
+ min_cond = min(data_to_plot.conditions)
+ return contains_inf, max_finite_cond, min_cond
+
+ splitaxes = False
+ ax_inf = None
+ t_inf, ax_finite_right_limit, ax_left_limit = None, None, np.inf
+ for dataplot in subplot.data_plots:
+ (
+ measurements_to_plot,
+ simulations_to_plot,
+ ) = self.data_provider.get_data_to_plot(
+ dataplot, subplot.plotTypeData == PROVIDED
+ )
+
+ contains_inf_m, max_finite_cond_m, min_cond_m = check_data_to_plot(
+ measurements_to_plot
+ )
+ contains_inf_s, max_finite_cond_s, min_cond_s = check_data_to_plot(
+ simulations_to_plot
+ )
+
+ if max_finite_cond_m is not None:
+ ax_finite_right_limit = (
+ max(ax_finite_right_limit, max_finite_cond_m)
+ if ax_finite_right_limit is not None
+ else max_finite_cond_m
+ )
+ if max_finite_cond_s is not None:
+ ax_finite_right_limit = (
+ max(ax_finite_right_limit, max_finite_cond_s)
+ if ax_finite_right_limit is not None
+ else max_finite_cond_s
+ )
+
+ ax_left_limit = min(ax_left_limit, min(min_cond_m, min_cond_s))
+ # check if t=inf is contained in any data to be plotted on the
+ # subplot
+ if not splitaxes:
+ splitaxes = contains_inf_m or contains_inf_s
+
+ if splitaxes:
+ # if t=inf is the only time point in measurements and simulations
+ # ax_finite_right_limit will be None and ax_left_limit will be
+ # equal to np.inf
+ if ax_finite_right_limit is None and ax_left_limit == np.inf:
+ ax_finite_right_limit = 10
+ ax_left_limit = 0
+ t_inf = (
+ ax_finite_right_limit
+ + (ax_finite_right_limit - ax_left_limit) * 0.1
+ )
+ # create axes for t=inf
+ divider = make_axes_locatable(ax)
+ ax_inf = divider.new_horizontal(size="10%", pad=0.3)
+ fig.add_axes(ax_inf)
+
+ return {
+ "ax_inf": ax_inf,
+ "t_inf": t_inf,
+ "ax_finite_right_limit": ax_finite_right_limit,
+ "ax_left_limit": ax_left_limit,
+ }
+
+
+class SeabornPlotter(Plotter):
+ """
+ Seaborn wrapper.
+ """
+
+ def __init__(self, figure: Figure, data_provider: DataProvider):
+ super().__init__(figure, data_provider)
+
+ def generate_figure(
+ self, subplot_dir: str | None = None
+ ) -> dict[str, plt.Subplot] | None:
+ pass
diff --git a/petab/v1/visualize/plotting.py b/petab/v1/visualize/plotting.py
new file mode 100644
index 00000000..e690df2c
--- /dev/null
+++ b/petab/v1/visualize/plotting.py
@@ -0,0 +1,1102 @@
+"""PEtab visualization data selection and visualization settings classes"""
+import warnings
+from numbers import Number, Real
+from pathlib import Path
+from typing import Literal
+
+import numpy as np
+import pandas as pd
+
+from .. import conditions, core, measurements
+from ..C import *
+from ..problem import Problem
+from .helper_functions import (
+ create_dataset_id_list_new,
+ generate_dataset_id_col,
+)
+
+__all__ = [
+ "DataSeries",
+ "DataPlot",
+ "Subplot",
+ "Figure",
+ "DataProvider",
+ "VisSpecParser",
+]
+
+# for typehints
+IdsList = list[str]
+NumList = list[int]
+
+# The default figure size
+DEFAULT_FIGSIZE = [20, 15]
+
+# also for type hints
+# TODO: split into dataplot and subplot level dicts?
+# TODO: add when only python>=3.8 is supported
+# class VisDict(TypedDict):
+# PLOT_NAME: str
+# PLOT_TYPE_SIMULATION: str
+# PLOT_TYPE_DATA: str
+# X_VALUES: str
+# X_OFFSET: List[Number]
+# X_LABEL: str
+# X_SCALE: str
+# Y_VALUES: List[str]
+# Y_OFFSET: List[Number]
+# Y_LABEL: str
+# Y_SCALE: str
+# LEGEND_ENTRY: List[Number]
+# DATASET_ID: List[str]
+
+
+class DataSeries:
+ """
+ Data for one individual line
+ """
+
+ def __init__(
+ self,
+ conditions_: np.ndarray | pd.Series | None,
+ data_to_plot: pd.DataFrame | None = None,
+ ):
+ self.data_to_plot = data_to_plot
+ self.data_to_plot.sort_index(inplace=True)
+
+ self.conditions = conditions_
+ self.inf_point = (
+ np.inf in self.conditions if self.conditions is not None else False
+ )
+ # sort index for the case that indices of conditions and
+ # measurements differ. if indep_var='time', conditions is a
+ # numpy array, if indep_var=observable it's a Series
+ if isinstance(self.conditions, np.ndarray):
+ self.conditions.sort()
+ elif isinstance(self.conditions, pd.Series):
+ self.conditions.sort_index(inplace=True)
+
+ def add_x_offset(self, offset) -> None:
+ """
+ Offset for the independent variable.
+
+ Parameters
+ ----------
+ offset:
+ Offset value.
+
+ """
+ if self.conditions is not None:
+ self.conditions += offset
+
+ def add_y_offset(self, offset):
+ self.data_to_plot["mean"] += offset
+ self.data_to_plot["repl"] += offset
+
+ def add_offsets(self, x_offset=0, y_offset=0) -> None:
+ """
+ Data offsets.
+
+ Parameters
+ ----------
+ x_offset:
+ Offset for the independent variable.
+ y_offset:
+ Offsets for the observable.
+ """
+ self.add_x_offset(x_offset)
+ self.add_y_offset(y_offset)
+
+
+class DataPlot:
+ """
+ Visualization specification of a plot of one data series, e.g. for
+ an individual line on a subplot.
+ """
+
+ def __init__(self, plot_settings: dict):
+ """
+ Constructor.
+
+ Parameters
+ ----------
+ plot_settings: A plot spec for one dataplot
+ (only VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS)
+ """
+ for key, val in plot_settings.items():
+ setattr(self, key, val)
+
+ if DATASET_ID not in vars(self):
+ raise ValueError(f"{DATASET_ID} must be specified")
+ if X_VALUES not in vars(self): # TODO: singular?
+ setattr(self, X_VALUES, TIME)
+ if X_OFFSET not in vars(self):
+ setattr(self, X_OFFSET, 0)
+ if Y_VALUES not in vars(self):
+ setattr(self, Y_VALUES, "")
+ if Y_OFFSET not in vars(self):
+ setattr(self, Y_OFFSET, 0.0)
+ if LEGEND_ENTRY not in vars(self):
+ setattr(self, LEGEND_ENTRY, getattr(self, DATASET_ID))
+
+ @classmethod
+ def from_df(cls, plot_spec: pd.DataFrame):
+ vis_spec_dict = plot_spec.to_dict()
+
+ return cls(vis_spec_dict)
+
+ def __repr__(self):
+ return f"{self.__class__.__name__}({self.__dict__})"
+
+
+class Subplot:
+ """
+ Visualization specification of a subplot.
+ """
+
+ def __init__(
+ self,
+ plot_id: str,
+ plot_settings: dict,
+ dataplots: list[DataPlot] | None = None,
+ ):
+ """
+ Constructor.
+
+ Parameters
+ ----------
+ plot_id:
+ Plot ID.
+ plot_settings:
+ Plot spec for a subplot (only VISUALIZATION_DF_SUBPLOT_LEVEL_COLS).
+ dataplots:
+ A list of data plots that should be plotted on one subplot.
+ """
+ # parameters of a specific subplot
+
+ setattr(self, PLOT_ID, plot_id)
+ for key, val in plot_settings.items():
+ setattr(self, key, val)
+
+ if PLOT_NAME not in vars(self):
+ setattr(self, PLOT_NAME, "")
+ if PLOT_TYPE_SIMULATION not in vars(self):
+ setattr(self, PLOT_TYPE_SIMULATION, LINE_PLOT)
+ if PLOT_TYPE_DATA not in vars(self):
+ setattr(self, PLOT_TYPE_DATA, MEAN_AND_SD)
+ if X_LABEL not in vars(self):
+ setattr(self, X_LABEL, TIME) # TODO: getattr(self, X_VALUES)
+ if X_SCALE not in vars(self):
+ setattr(self, X_SCALE, LIN)
+ if Y_LABEL not in vars(self):
+ setattr(self, Y_LABEL, "values")
+ if Y_SCALE not in vars(self):
+ setattr(self, Y_SCALE, LIN)
+
+ self.data_plots = dataplots if dataplots is not None else []
+ self.xlim = None
+ self.ylim = None
+
+ @classmethod
+ def from_df(
+ cls,
+ plot_id: str,
+ vis_spec: pd.DataFrame,
+ dataplots: list[DataPlot] | None = None,
+ ):
+ vis_spec_dict = {}
+ for col in vis_spec:
+ if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS:
+ entry = vis_spec.loc[:, col]
+ entry = np.unique(entry)
+ if entry.size > 1:
+ warnings.warn(
+ f"For {PLOT_ID} {plot_id} in column "
+ f"{col} contradictory settings ({entry})"
+ f". Proceeding with first entry "
+ f"({entry[0]}).",
+ stacklevel=2,
+ )
+ entry = entry[0]
+
+ # check if values are allowed
+ if (
+ col in [Y_SCALE, X_SCALE]
+ and entry not in OBSERVABLE_TRANSFORMATIONS
+ ):
+ raise ValueError(
+ f"{X_SCALE} and {Y_SCALE} have to be "
+ f"one of the following: "
+ + ", ".join(OBSERVABLE_TRANSFORMATIONS)
+ )
+ elif col == PLOT_TYPE_DATA and entry not in PLOT_TYPES_DATA:
+ raise ValueError(
+ f"{PLOT_TYPE_DATA} has to be one of the "
+ f"following: " + ", ".join(PLOT_TYPES_DATA)
+ )
+ elif (
+ col == PLOT_TYPE_SIMULATION
+ and entry not in PLOT_TYPES_SIMULATION
+ ):
+ raise ValueError(
+ f"{PLOT_TYPE_SIMULATION} has to be one of"
+ f" the following: " + ", ".join(PLOT_TYPES_SIMULATION)
+ )
+
+ # append new entry to dict
+ vis_spec_dict[col] = entry
+ else:
+ warnings.warn(
+ f"Column {col} cannot be used to specify subplot"
+ f", only settings from the following columns can"
+ f" be used:"
+ + ", ".join(VISUALIZATION_DF_SUBPLOT_LEVEL_COLS),
+ stacklevel=2,
+ )
+ return cls(plot_id, vis_spec_dict, dataplots)
+
+ def add_dataplot(self, dataplot: DataPlot) -> None:
+ """
+ Add data plot.
+
+ Parameters
+ ----------
+ dataplot:
+ Data plot visualization settings.
+
+ """
+ self.data_plots.append(dataplot)
+
+ def set_axes_limits(
+ self,
+ xlim: tuple[Real | None, Real | None] | None = None,
+ ylim: tuple[Real | None, Real | None] | None = None,
+ ):
+ """
+ Set axes limits for all subplots. If xlim or ylim or any of the tuple
+ items is None, corresponding limit is left unchanged.
+
+ Parameters
+ ----------
+ xlim:
+ X axis limits.
+ ylim:
+ Y axis limits.
+ """
+ self.xlim = xlim
+ self.ylim = ylim
+
+
+class Figure:
+ """
+ Visualization specification of a figure.
+
+ Contains information regarding how data should be visualized.
+ """
+
+ def __init__(
+ self,
+ subplots: list[Subplot] | None = None,
+ size: tuple = DEFAULT_FIGSIZE,
+ title: tuple | None = None,
+ ):
+ """
+ Constructor.
+
+ Parameters
+ ----------
+ subplots: A list of visualization specifications for each subplot
+ size: Figure size
+ title: Figure title
+ """
+ # TODO: Isensee measurements table in doc/examples doesn't correspond
+ # to documentation: observableTransformation and
+ # noiseDistribution columns replicateId problem
+ # TODO: Should we put in the documentation which combination of fields
+ # must be unique in the measurement table and add such check?
+ # obs_id + sim_cond_id + preeq_cod_id (if exists) + time +
+ # replicate_id (if exists)?
+ self.size = size
+ self.title = title
+ self.subplots = subplots if subplots is not None else []
+
+ @property
+ def num_subplots(self) -> int:
+ return len(self.subplots)
+
+ def add_subplot(self, subplot: Subplot) -> None:
+ """
+ Add subplot.
+
+ Parameters
+ ----------
+ subplot:
+ Subplot visualization settings.
+
+ """
+ self.subplots.append(subplot)
+
+ def set_axes_limits(
+ self,
+ xlim: tuple[Real | None, Real | None] | None = None,
+ ylim: tuple[Real | None, Real | None] | None = None,
+ ) -> None:
+ """
+ Set axes limits for all subplots. If xlim or ylim or any of the tuple
+ items is None, corresponding limit is left unchanged.
+
+ Parameters
+ ----------
+ xlim:
+ X axis limits.
+ ylim:
+ Y axis limits.
+ """
+ for subplot in self.subplots:
+ subplot.set_axes_limits(xlim, ylim)
+
+ def save_to_tsv(self, output_file_path: str = "visuSpec.tsv") -> None:
+ """
+ Save full Visualization specification table.
+
+ Note that datasetId column in the resulting table might have been
+ generated even though datasetId column in Measurement table is missing
+ or is different. Please, correct it manually.
+
+ Parameters
+ ----------
+ output_file_path:
+ File path to which the generated visualization specification is
+ saved.
+ """
+ # TODO: what if datasetIds were generated?
+
+ warnings.warn(
+ f"Note: please check that {DATASET_ID} column "
+ f"corresponds to {DATASET_ID} column in Measurement "
+ f"(Simulation) table.",
+ stacklevel=2,
+ )
+
+ visu_dict = {}
+ for subplot in self.subplots:
+ subplot_level = {
+ key: subplot.__dict__[key]
+ for key in subplot.__dict__
+ if key in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS
+ }
+
+ for dataplot in subplot.data_plots:
+ dataset_level = {
+ key: dataplot.__dict__[key]
+ for key in dataplot.__dict__
+ if key in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS
+ }
+ row = {**subplot_level, **dataset_level}
+ for key, value in row.items():
+ if key in visu_dict:
+ visu_dict[key].append(value)
+ else:
+ visu_dict[key] = [row[key]]
+ visu_df = pd.DataFrame.from_dict(visu_dict)
+ visu_df.to_csv(output_file_path, sep="\t", index=False)
+
+
+class DataProvider:
+ """
+ Handles data selection.
+ """
+
+ def __init__(
+ self,
+ exp_conditions: pd.DataFrame,
+ measurements_data: pd.DataFrame | None = None,
+ simulations_data: pd.DataFrame | None = None,
+ ):
+ self.conditions_data = exp_conditions
+
+ if measurements_data is None and simulations_data is None:
+ raise TypeError(
+ "Not enough arguments. Either measurements_data "
+ "or simulations_data should be provided."
+ )
+ self.measurements_data = measurements_data
+ self.simulations_data = simulations_data
+
+ @staticmethod
+ def _matches_plot_spec(
+ df: pd.DataFrame, plot_spec: "DataPlot", dataset_id
+ ) -> pd.Series:
+ """
+ Construct an index for subsetting of the dataframe according to what
+ is specified in plot_spec.
+
+ Parameters
+ ----------
+ df:
+ A pandas data frame to subset, can be from measurement file or
+ simulation file.
+ plot_spec:
+ A visualization spec from the visualization file.
+
+ Returns
+ -------
+ Boolean series that can be used for subsetting of the passed
+ dataframe
+ """
+ subset = df[DATASET_ID] == dataset_id
+ if getattr(plot_spec, Y_VALUES) == "":
+ if len(df.loc[subset, OBSERVABLE_ID].unique()) > 1:
+ raise ValueError(
+ f"{Y_VALUES} must be specified in visualization table if "
+ f"multiple different observables are available."
+ )
+ else:
+ subset &= df[OBSERVABLE_ID] == getattr(plot_spec, Y_VALUES)
+ return subset
+
+ def _get_independent_var_values(
+ self, data_df: pd.DataFrame, dataplot: DataPlot
+ ) -> tuple[np.ndarray, str, pd.Series]:
+ """
+ Get independent variable values.
+
+ Parameters
+ ----------
+ data_df:
+ A pandas data frame to subset, can be from measurement file or
+ simulation file.
+ dataplot:
+ Data plot visualization settings.
+
+ Returns
+ -------
+ col_name_unique:
+ A name of the column from Measurement (Simulation) table, which
+ specifies independent variable values (depends on the xValues entry
+ of visualization specification).
+ Possible values:
+
+ * TIME (independent variable values will be taken from the TIME
+ column of Measurement (Simulation) table)
+
+ * SIMULATION_CONDITION_ID (independent variable values will be
+ taken from one of the columns of Condition table)
+
+ uni_condition_id:
+ Time points
+ or
+ contains all unique condition IDs which should be
+ plotted together as one dataplot. Independent variable values will
+ be collected for these conditions
+ conditions_:
+ An independent variable values or None for the BarPlot case
+ possible values: time points, None, vales of independent variable
+ (Parameter or Species, specified in the xValues entry of
+ visualization specification) for each condition_id in
+ uni_condition_id
+
+ """
+ indep_var = getattr(dataplot, X_VALUES)
+
+ dataset_id = getattr(dataplot, DATASET_ID)
+
+ single_m_data = data_df[
+ self._matches_plot_spec(data_df, dataplot, dataset_id)
+ ]
+
+ # gather simulationConditionIds belonging to datasetId
+ uni_condition_id, uind = np.unique(
+ single_m_data[SIMULATION_CONDITION_ID], return_index=True
+ )
+ # keep the ordering which was given by user from top to bottom
+ # (avoid ordering by names '1','10','11','2',...)'
+ uni_condition_id = uni_condition_id[np.argsort(uind)]
+ col_name_unique = SIMULATION_CONDITION_ID
+
+ if indep_var == TIME:
+ # obtain unique observation times
+ uni_condition_id = single_m_data[TIME].unique()
+ col_name_unique = TIME
+ conditions_ = uni_condition_id
+ elif indep_var == "condition":
+ conditions_ = None
+ else:
+ # indep_var = parameterOrStateId case ?
+ # extract conditions (plot input) from condition file
+ ind_cond = self.conditions_data.index.isin(uni_condition_id)
+ conditions_ = self.conditions_data[ind_cond][indep_var]
+
+ return uni_condition_id, col_name_unique, conditions_
+
+ def get_data_series(
+ self,
+ data_df: pd.DataFrame,
+ data_col: Literal["measurement", "simulation"],
+ dataplot: DataPlot,
+ provided_noise: bool,
+ ) -> DataSeries:
+ """
+ Get data to plot from measurement or simulation DataFrame.
+
+ Parameters
+ ----------
+ data_df: measurement or simulation DataFrame
+ data_col: data column, i.e. 'measurement' or 'simulation'
+ dataplot: visualization specification
+ provided_noise:
+ True if numeric values for the noise level are provided in the
+ data table
+
+ Returns
+ -------
+ Data to plot
+ """
+ (
+ uni_condition_id,
+ col_name_unique,
+ conditions_,
+ ) = self._get_independent_var_values(data_df, dataplot)
+
+ dataset_id = getattr(dataplot, DATASET_ID)
+
+ # get data subset selected based on provided dataset_id
+ # and observable_ids
+ single_m_data = data_df[
+ self._matches_plot_spec(data_df, dataplot, dataset_id)
+ ]
+
+ # create empty dataframe for means and SDs
+ measurements_to_plot = pd.DataFrame(
+ columns=["mean", "noise_model", "sd", "sem", "repl"],
+ index=uni_condition_id,
+ )
+
+ for var_cond_id in uni_condition_id:
+ subset = single_m_data[col_name_unique] == var_cond_id
+
+ # what has to be plotted is selected
+ data_measurements = single_m_data.loc[subset, data_col]
+
+ # TODO: all this rather inside DataSeries?
+ # process the data
+ measurements_to_plot.at[var_cond_id, "mean"] = np.mean(
+ data_measurements
+ )
+ measurements_to_plot.at[var_cond_id, "sd"] = np.std(
+ data_measurements
+ )
+
+ if provided_noise and np.any(subset):
+ if (
+ len(single_m_data.loc[subset, NOISE_PARAMETERS].unique())
+ > 1
+ ):
+ raise NotImplementedError(
+ f"Datapoints with inconsistent {NOISE_PARAMETERS} "
+ f"is currently not implemented. Stopping."
+ )
+ tmp_noise = single_m_data.loc[subset, NOISE_PARAMETERS].values[
+ 0
+ ]
+ if isinstance(tmp_noise, str):
+ raise NotImplementedError(
+ "No numerical noise values provided in the "
+ "measurement table. Stopping."
+ )
+ if (
+ isinstance(tmp_noise, Number)
+ or tmp_noise.dtype == "float64"
+ ):
+ measurements_to_plot.at[
+ var_cond_id, "noise_model"
+ ] = tmp_noise
+
+ # standard error of mean
+ measurements_to_plot.at[var_cond_id, "sem"] = np.std(
+ data_measurements
+ ) / np.sqrt(len(data_measurements))
+
+ # single replicates
+ measurements_to_plot.at[
+ var_cond_id, "repl"
+ ] = data_measurements.values
+
+ data_series = DataSeries(conditions_, measurements_to_plot)
+ data_series.add_offsets(dataplot.xOffset, dataplot.yOffset)
+ return data_series
+
+ def get_data_to_plot(
+ self, dataplot: DataPlot, provided_noise: bool
+ ) -> tuple[DataSeries, DataSeries]:
+ """
+ Get data to plot.
+
+ Parameters
+ ----------
+ dataplot: visualization specification
+ provided_noise:
+ True if numeric values for the noise level are provided in the
+ measurement table
+
+ Returns
+ -----------
+ measurements_to_plot,
+ simulations_to_plot
+ """
+ measurements_to_plot = None
+ simulations_to_plot = None
+
+ if self.measurements_data is not None:
+ measurements_to_plot = self.get_data_series(
+ self.measurements_data, MEASUREMENT, dataplot, provided_noise
+ )
+
+ if self.simulations_data is not None:
+ simulations_to_plot = self.get_data_series(
+ self.simulations_data, SIMULATION, dataplot, provided_noise
+ )
+ return measurements_to_plot, simulations_to_plot
+
+
+class VisSpecParser:
+ """
+ Parser of visualization specification provided by user either in the form
+ of Visualization table or as a list of lists with datasets ids or
+ observable ids or condition ids. Figure instance is created containing
+ information regarding how data should be visualized. In addition to the
+ Figure instance, a DataProvider instance is created that will be
+ responsible for the data selection and manipulation.
+ """
+
+ def __init__(
+ self,
+ conditions_data: str | Path | pd.DataFrame,
+ exp_data: str | Path | pd.DataFrame | None = None,
+ sim_data: str | Path | pd.DataFrame | None = None,
+ ):
+ if isinstance(conditions_data, str | Path):
+ conditions_data = conditions.get_condition_df(conditions_data)
+
+ # import from file in case experimental data is provided in file
+ if isinstance(exp_data, str | Path):
+ exp_data = measurements.get_measurement_df(exp_data)
+
+ if isinstance(sim_data, str | Path):
+ sim_data = core.get_simulation_df(sim_data)
+
+ if exp_data is None and sim_data is None:
+ raise TypeError(
+ "Not enough arguments. Either measurements_data "
+ "or simulations_data should be provided."
+ )
+
+ self.conditions_data = conditions_data
+ self.measurements_data = exp_data
+ self.simulations_data = sim_data
+
+ @classmethod
+ def from_problem(cls, petab_problem: Problem, sim_data):
+ return cls(
+ petab_problem.condition_df, petab_problem.measurement_df, sim_data
+ )
+
+ @property
+ def _data_df(self):
+ return (
+ self.measurements_data
+ if self.measurements_data is not None
+ else self.simulations_data
+ )
+
+ @staticmethod
+ def create_subplot(
+ plot_id: str, subplot_vis_spec: pd.DataFrame
+ ) -> Subplot:
+ """
+ Create subplot.
+
+ Parameters
+ ----------
+ plot_id:
+ Plot id.
+ subplot_vis_spec:
+ A visualization specification DataFrame that contains specification
+ for the subplot and corresponding dataplots.
+
+ Returns
+ -------
+ Subplot
+ """
+ subplot_columns = [
+ col
+ for col in subplot_vis_spec.columns
+ if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS
+ ]
+ subplot = Subplot.from_df(
+ plot_id, subplot_vis_spec.loc[:, subplot_columns]
+ )
+
+ dataplot_cols = [
+ col
+ for col in subplot_vis_spec.columns
+ if col in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS
+ ]
+ dataplot_spec = subplot_vis_spec.loc[:, dataplot_cols]
+
+ for _, row in dataplot_spec.iterrows():
+ data_plot = DataPlot.from_df(row)
+ subplot.add_dataplot(data_plot)
+
+ return subplot
+
+ def parse_from_vis_spec(
+ self,
+ vis_spec: str | Path | pd.DataFrame | None,
+ ) -> tuple[Figure, DataProvider]:
+ """
+ Get visualization settings from a visualization specification.
+
+ Parameters
+ ----------
+ vis_spec:
+ Visualization specification DataFrame in the PEtab format
+ or a path to a visualization file.
+
+ Returns
+ -------
+ A figure template with visualization settings and a data provider
+ """
+ # import visualization specification, if file was specified
+ if isinstance(vis_spec, str | Path):
+ vis_spec = core.get_visualization_df(vis_spec)
+
+ if DATASET_ID not in vis_spec.columns:
+ self._add_dataset_id_col()
+ vis_spec = self._expand_vis_spec_settings(vis_spec)
+ else:
+ if (
+ self.measurements_data is not None
+ and DATASET_ID not in self.measurements_data
+ ):
+ raise ValueError(
+ f"grouping by datasetId was requested, but "
+ f"{DATASET_ID} column is missing from "
+ f"measurement table"
+ )
+ if (
+ self.simulations_data is not None
+ and DATASET_ID not in self.simulations_data
+ ):
+ raise ValueError(
+ f"grouping by datasetId was requested, but "
+ f"{DATASET_ID} column is missing from "
+ f"simulation table"
+ )
+
+ figure = Figure()
+
+ # get unique plotIDs preserving the order from the original vis spec
+ _, idx = np.unique(vis_spec[PLOT_ID], return_index=True)
+ plot_ids = vis_spec[PLOT_ID].iloc[np.sort(idx)]
+
+ # loop over unique plotIds
+ for plot_id in plot_ids:
+ # get indices for specific plotId
+ ind_plot = vis_spec[PLOT_ID] == plot_id
+
+ subplot = self.create_subplot(plot_id, vis_spec[ind_plot])
+ figure.add_subplot(subplot)
+
+ return figure, DataProvider(
+ self.conditions_data, self.measurements_data, self.simulations_data
+ )
+
+ def parse_from_id_list(
+ self,
+ ids_per_plot: list[IdsList] | None = None,
+ group_by: str = "observable",
+ plotted_noise: str | None = MEAN_AND_SD,
+ ) -> tuple[Figure, DataProvider]:
+ """
+ Get visualization settings from a list of ids and a grouping parameter.
+
+ Parameters
+ ----------
+ ids_per_plot:
+ A list of lists. Each sublist corresponds to a plot, each subplot
+ contains the Ids of datasets or observables or simulation
+ conditions for this plot.
+ e.g.
+
+ ::
+
+ dataset_ids_per_plot = [['dataset_1', 'dataset_2'],
+ ['dataset_1', 'dataset_4',
+ 'dataset_5']]
+
+ or
+
+ ::
+
+ cond_id_list = [['model1_data1'],
+ ['model1_data2', 'model1_data3'],
+ ['model1_data4', 'model1_data5'],
+ ['model1_data6']].
+
+ group_by:
+ Grouping type. Possible values: 'dataset', 'observable',
+ 'simulation'.
+ plotted_noise:
+ String indicating how noise should be visualized:
+ ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided'].
+
+ Returns
+ -------
+ A figure template with visualization settings and a data provider
+
+ """
+ if ids_per_plot is None:
+ # this is the default case. If no grouping is specified,
+ # all observables are plotted. One observable per plot.
+ unique_obs_list = self._data_df[OBSERVABLE_ID].unique()
+ ids_per_plot = [[obs_id] for obs_id in unique_obs_list]
+
+ if group_by == "dataset" and DATASET_ID not in self._data_df:
+ raise ValueError(
+ f"grouping by datasetId was requested, but "
+ f"{DATASET_ID} column is missing from data table"
+ )
+
+ if group_by != "dataset":
+ # datasetId_list will be created (possibly overwriting previous
+ # list - only in the local variable, not in the tsv-file)
+ self._add_dataset_id_col()
+
+ columns_dict = self._get_vis_spec_dependent_columns_dict(
+ group_by, ids_per_plot
+ )
+
+ columns_dict[PLOT_TYPE_DATA] = [plotted_noise] * len(
+ columns_dict[DATASET_ID]
+ )
+
+ vis_spec_df = pd.DataFrame(columns_dict)
+
+ return self.parse_from_vis_spec(vis_spec_df)
+
+ def _add_dataset_id_col(self) -> None:
+ """
+ Add dataset_id column to the measurement table and simulations table
+ (possibly overwrite).
+ """
+ if self.measurements_data is not None:
+ if DATASET_ID in self.measurements_data.columns:
+ self.measurements_data = self.measurements_data.drop(
+ DATASET_ID, axis=1
+ )
+ self.measurements_data.insert(
+ loc=self.measurements_data.columns.size,
+ column=DATASET_ID,
+ value=generate_dataset_id_col(self.measurements_data),
+ )
+
+ if self.simulations_data is not None:
+ if DATASET_ID in self.simulations_data.columns:
+ self.simulations_data = self.simulations_data.drop(
+ DATASET_ID, axis=1
+ )
+ self.simulations_data.insert(
+ loc=self.simulations_data.columns.size,
+ column=DATASET_ID,
+ value=generate_dataset_id_col(self.simulations_data),
+ )
+
+ def _get_vis_spec_dependent_columns_dict(
+ self, group_by: str, id_list: list[IdsList] | None = None
+ ) -> dict:
+ """
+ Helper method for creating values for columns PLOT_ID, DATASET_ID,
+ LEGEND_ENTRY, Y_VALUES for visualization specification file.
+
+ Parameters
+ ----------
+ group_by:
+ Grouping type.
+ Possible values: 'dataset', 'observable', 'simulation'.
+ id_list:
+ Grouping list. Each sublist corresponds to a subplot and
+ contains the Ids of datasets or observables or simulation
+ conditions for this subplot.
+
+ Returns
+ -------
+ A dictionary with values for columns PLOT_ID, DATASET_ID, \
+ LEGEND_ENTRY, Y_VALUES for visualization specification.
+ """
+ if group_by != "dataset":
+ dataset_id_list = create_dataset_id_list_new(
+ self._data_df, group_by, id_list
+ )
+ else:
+ dataset_id_list = id_list
+
+ dataset_id_column = [
+ i_dataset for sublist in dataset_id_list for i_dataset in sublist
+ ]
+
+ dataset_label_column = [
+ self._create_legend(i_dataset)
+ for sublist in dataset_id_list
+ for i_dataset in sublist
+ ]
+
+ # such dataset ids were generated that each dataset_id always
+ # corresponds to one observable
+ yvalues_column = [
+ self._data_df.loc[
+ self._data_df[DATASET_ID] == dataset_id, OBSERVABLE_ID
+ ].iloc[0]
+ for sublist in dataset_id_list
+ for dataset_id in sublist
+ ]
+
+ # get number of plots and create plotId-lists
+ plot_id_column = [
+ "plot%s" % str(ind + 1)
+ for ind, inner_list in enumerate(dataset_id_list)
+ for _ in inner_list
+ ]
+
+ return {
+ PLOT_ID: plot_id_column,
+ DATASET_ID: dataset_id_column,
+ LEGEND_ENTRY: dataset_label_column,
+ Y_VALUES: yvalues_column,
+ }
+
+ def _create_legend(self, dataset_id: str) -> str:
+ """
+ Create a legend for the dataset ids.
+
+ Parameters
+ ----------
+ dataset_id:
+ Dataset id.
+
+ Returns
+ -------
+ A legend.
+ """
+ # relies on the fact that dataset ids were created based on cond_ids
+ # and obs_ids. Therefore, in the following query all pairs will be
+ # the same
+ cond_id, obs_id = self._data_df[
+ self._data_df[DATASET_ID] == dataset_id
+ ][[SIMULATION_CONDITION_ID, OBSERVABLE_ID]].iloc[0, :]
+ tmp = self.conditions_data.loc[cond_id]
+ if CONDITION_NAME not in tmp.index or pd.isna(tmp[CONDITION_NAME]):
+ cond_name = cond_id
+ else:
+ cond_name = tmp[CONDITION_NAME]
+ return f"{cond_name} - {obs_id}"
+
+ def _expand_vis_spec_settings(self, vis_spec: pd.DataFrame):
+ """
+ Expand visualization specification for the case when DATASET_ID is not
+ in vis_spec.columns.
+
+ Parameters
+ -------
+ vis_spec:
+ Visualization specification DataFrame in the PEtab format
+ or a path to a visualization file.
+
+ Returns
+ -------
+ A visualization specification DataFrame.
+ """
+ if DATASET_ID in vis_spec.columns:
+ raise ValueError(
+ f"visualization specification expansion is "
+ f"unnecessary if column {DATASET_ID} is present"
+ )
+
+ if vis_spec.empty:
+ # in case of empty spec all measurements corresponding to each
+ # observable will be plotted on a separate subplot
+ observable_ids = self._data_df[OBSERVABLE_ID].unique()
+
+ vis_spec_exp_rows = [
+ self._vis_spec_rows_for_obs(obs_id, {PLOT_ID: f"plot{idx}"})
+ for idx, obs_id in enumerate(observable_ids)
+ ]
+ return pd.concat(vis_spec_exp_rows, ignore_index=True)
+
+ vis_spec_exp_rows = []
+ for _, row in vis_spec.iterrows():
+ if Y_VALUES in row:
+ vis_spec_exp_rows.append(
+ self._vis_spec_rows_for_obs(row[Y_VALUES], row.to_dict())
+ )
+ else:
+ observable_ids = self._data_df[OBSERVABLE_ID].unique()
+
+ for obs_id in observable_ids:
+ vis_spec_exp_rows.append(
+ self._vis_spec_rows_for_obs(obs_id, row.to_dict())
+ )
+ return pd.concat(vis_spec_exp_rows, ignore_index=True)
+
+ def _vis_spec_rows_for_obs(
+ self, obs_id: str, settings: dict
+ ) -> pd.DataFrame:
+ """
+ Create vis_spec for one observable.
+
+ For each dataset_id corresponding to the observable with the specified
+ id create a vis_spec entry with provided settings.
+
+ Parameters
+ ----------
+ obs_id:
+ Observable ID.
+ settings:
+ Additional visualization settings. For each key that is a
+ valid visualization specification column name, the setting
+ will be added to the resulting visualization specification.
+
+ Returns
+ -------
+ A visualization specification DataFrame.
+ """
+ columns_to_expand = [
+ PLOT_ID,
+ PLOT_NAME,
+ PLOT_TYPE_SIMULATION,
+ PLOT_TYPE_DATA,
+ X_VALUES,
+ X_OFFSET,
+ X_LABEL,
+ X_SCALE,
+ Y_OFFSET,
+ Y_LABEL,
+ Y_SCALE,
+ LEGEND_ENTRY,
+ ]
+
+ dataset_ids = self._data_df[self._data_df[OBSERVABLE_ID] == obs_id][
+ DATASET_ID
+ ].unique()
+ n_rows = len(dataset_ids)
+ columns_dict = {DATASET_ID: dataset_ids, Y_VALUES: [obs_id] * n_rows}
+
+ for column in settings:
+ if column in columns_to_expand:
+ columns_dict[column] = [settings[column]] * n_rows
+
+ if LEGEND_ENTRY not in columns_dict:
+ columns_dict[LEGEND_ENTRY] = [
+ self._create_legend(dataset_id)
+ for dataset_id in columns_dict[DATASET_ID]
+ ]
+ return pd.DataFrame(columns_dict)
diff --git a/petab/visualize/templates/mystyle.css b/petab/v1/visualize/templates/mystyle.css
similarity index 100%
rename from petab/visualize/templates/mystyle.css
rename to petab/v1/visualize/templates/mystyle.css
diff --git a/petab/visualize/templates/report.html b/petab/v1/visualize/templates/report.html
similarity index 100%
rename from petab/visualize/templates/report.html
rename to petab/v1/visualize/templates/report.html
diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py
new file mode 100644
index 00000000..ecffc48e
--- /dev/null
+++ b/petab/v1/yaml.py
@@ -0,0 +1,358 @@
+"""Code regarding the PEtab YAML config files"""
+from __future__ import annotations
+
+import os
+from pathlib import Path, PurePosixPath
+from typing import Any
+from urllib.parse import unquote, urlparse, urlunparse
+
+import jsonschema
+import numpy as np
+import pandas as pd
+import yaml
+from pandas.io.common import get_handle
+
+from .C import * # noqa: F403
+
+# directory with PEtab yaml schema files
+SCHEMA_DIR = Path(__file__).parent.parent / "schemas"
+# map of version number to validation schema
+SCHEMAS = {
+ "1": SCHEMA_DIR / "petab_schema.v1.0.0.yaml",
+ "1.0.0": SCHEMA_DIR / "petab_schema.v1.0.0.yaml",
+ "2.0.0": SCHEMA_DIR / "petab_schema.v2.0.0.yaml",
+}
+
+__all__ = [
+ "validate",
+ "validate_yaml_syntax",
+ "validate_yaml_semantics",
+ "load_yaml",
+ "is_composite_problem",
+ "assert_single_condition_and_sbml_file",
+ "write_yaml",
+ "create_problem_yaml",
+ "get_path_prefix",
+]
+
+
+def validate(
+ yaml_config: dict | str | Path,
+ path_prefix: None | str | Path = None,
+):
+ """Validate syntax and semantics of PEtab config YAML
+
+ Arguments:
+ yaml_config:
+ PEtab YAML config as filename or dict.
+ path_prefix:
+ Base location for relative paths. Defaults to location of YAML
+ file if a filename was provided for ``yaml_config`` or the current
+ working directory.
+ """
+ validate_yaml_syntax(yaml_config)
+ validate_yaml_semantics(yaml_config=yaml_config, path_prefix=path_prefix)
+
+
+def validate_yaml_syntax(
+ yaml_config: dict | str | Path, schema: None | dict | str = None
+):
+ """Validate PEtab YAML file syntax
+
+ Arguments:
+ yaml_config:
+ PEtab YAML file to validate, as file name or dictionary
+ schema:
+ Custom schema for validation
+
+ Raises:
+ see :func:`jsonschema.validate`
+ """
+ yaml_config = load_yaml(yaml_config)
+
+ if schema is None:
+ # try get PEtab version from yaml file
+ # if this is not the available, the file is not valid anyways,
+ # but let's still use the latest PEtab schema for full validation
+ version = (
+ yaml_config.get(FORMAT_VERSION, None) or list(SCHEMAS.values())[-1]
+ )
+ try:
+ schema = SCHEMAS[str(version)]
+ except KeyError as e:
+ raise ValueError(
+ "Unknown PEtab version given in problem "
+ f"specification: {version}"
+ ) from e
+ schema = load_yaml(schema)
+ jsonschema.validate(instance=yaml_config, schema=schema)
+
+
+def validate_yaml_semantics(
+ yaml_config: dict | str | Path,
+ path_prefix: None | str | Path = None,
+):
+ """Validate PEtab YAML file semantics
+
+ Check for existence of files. Assumes valid syntax.
+
+ Version number and contents of referenced files are not yet checked.
+
+ Arguments:
+ yaml_config:
+ PEtab YAML config as filename or dict.
+ path_prefix:
+ Base location for relative paths. Defaults to location of YAML
+ file if a filename was provided for ``yaml_config`` or the current
+ working directory.
+
+ Raises:
+ AssertionError: in case of problems
+ """
+ if not path_prefix:
+ if isinstance(yaml_config, str | Path):
+ path_prefix = get_path_prefix(yaml_config)
+ else:
+ path_prefix = ""
+
+ yaml_config = load_yaml(yaml_config)
+
+ def _check_file(_filename: str, _field: str):
+ # this could be a regular path or some local or remote URL
+ # the simplest check is just trying to load the respective table or
+ # sbml model
+ if _field == SBML_FILES:
+ from .models.sbml_model import SbmlModel
+
+ try:
+ SbmlModel.from_file(_filename)
+ except Exception as e:
+ raise AssertionError(
+ f"Failed to read '{_filename}' provided as '{_field}'."
+ ) from e
+ return
+
+ try:
+ pd.read_csv(_filename, sep="\t")
+ except pd.errors.EmptyDataError:
+ # at this stage, we don't care about the content
+ pass
+ except Exception as e:
+ raise AssertionError(
+ f"Failed to read '{_filename}' provided as '{_field}'."
+ ) from e
+
+ # Handles both a single parameter file, and a parameter file that has been
+ # split into multiple subset files.
+ for parameter_subset_file in list(
+ np.array(yaml_config[PARAMETER_FILE]).flat
+ ):
+ _check_file(
+ f"{path_prefix}/{parameter_subset_file}"
+ if path_prefix
+ else parameter_subset_file,
+ parameter_subset_file,
+ )
+
+ for problem_config in yaml_config[PROBLEMS]:
+ for field in [
+ SBML_FILES,
+ CONDITION_FILES,
+ MEASUREMENT_FILES,
+ VISUALIZATION_FILES,
+ OBSERVABLE_FILES,
+ ]:
+ if field in problem_config:
+ for filename in problem_config[field]:
+ _check_file(
+ f"{path_prefix}/{filename}"
+ if path_prefix
+ else filename,
+ field,
+ )
+
+
+def load_yaml(yaml_config: dict | Path | str) -> dict:
+ """Load YAML
+
+ Convenience function to allow for providing YAML inputs as filename, URL
+ or as dictionary.
+
+ Arguments:
+ yaml_config:
+ PEtab YAML config as filename or dict or URL.
+
+ Returns:
+ The unmodified dictionary if ``yaml_config`` was dictionary.
+ Otherwise the parsed the YAML file.
+ """
+ # already parsed? all PEtab problem yaml files are dictionaries
+ if isinstance(yaml_config, dict):
+ return yaml_config
+
+ with get_handle(yaml_config, mode="r") as io_handle:
+ data = yaml.safe_load(io_handle.handle)
+ return data
+
+
+def is_composite_problem(yaml_config: dict | str | Path) -> bool:
+ """Does this YAML file comprise multiple models?
+
+ Arguments:
+ yaml_config: PEtab configuration as dictionary or YAML file name
+ """
+ yaml_config = load_yaml(yaml_config)
+ return len(yaml_config[PROBLEMS]) > 1
+
+
+def assert_single_condition_and_sbml_file(problem_config: dict) -> None:
+ """Check that there is only a single condition file and a single SBML
+ file specified.
+
+ Arguments:
+ problem_config:
+ Dictionary as defined in the YAML schema inside the `problems`
+ list.
+ Raises:
+ NotImplementedError:
+ If multiple condition or SBML files specified.
+ """
+ if (
+ len(problem_config[SBML_FILES]) > 1
+ or len(problem_config[CONDITION_FILES]) > 1
+ ):
+ # TODO https://github.com/ICB-DCM/PEtab/issues/188
+ # TODO https://github.com/ICB-DCM/PEtab/issues/189
+ raise NotImplementedError(
+ "Support for multiple models or condition files is not yet "
+ "implemented."
+ )
+
+
+def write_yaml(yaml_config: dict[str, Any], filename: str | Path) -> None:
+ """Write PEtab YAML file
+
+ Arguments:
+ yaml_config: Data to write
+ filename: File to create
+ """
+ with open(filename, "w") as outfile:
+ yaml.dump(
+ yaml_config, outfile, default_flow_style=False, sort_keys=False
+ )
+
+
+def create_problem_yaml(
+ sbml_files: str | Path | list[str | Path],
+ condition_files: str | Path | list[str | Path],
+ measurement_files: str | Path | list[str | Path],
+ parameter_file: str | Path,
+ observable_files: str | Path | list[str | Path],
+ yaml_file: str | Path,
+ visualization_files: str | Path | list[str | Path] | None = None,
+ relative_paths: bool = True,
+ mapping_files: str | Path | list[str | Path] = None,
+) -> None:
+ """Create and write default YAML file for a single PEtab problem
+
+ Arguments:
+ sbml_files: Path of SBML model file or list of such
+ condition_files: Path of condition file or list of such
+ measurement_files: Path of measurement file or list of such
+ parameter_file: Path of parameter file
+ observable_files: Path of observable file or list of such
+ yaml_file: Path to which YAML file should be written
+ visualization_files:
+ Optional Path to visualization file or list of such
+ relative_paths:
+ whether all paths in the YAML file should be relative to the
+ location of the YAML file. If ``False``, then paths are left
+ unchanged.
+ mapping_files: Path of mapping file
+ """
+ if isinstance(sbml_files, Path | str):
+ sbml_files = [sbml_files]
+ if isinstance(condition_files, Path | str):
+ condition_files = [condition_files]
+ if isinstance(measurement_files, Path | str):
+ measurement_files = [measurement_files]
+ if isinstance(observable_files, Path | str):
+ observable_files = [observable_files]
+ if isinstance(visualization_files, Path | str):
+ visualization_files = [visualization_files]
+
+ if relative_paths:
+ yaml_file_dir = Path(yaml_file).parent
+
+ def get_rel_to_yaml(paths: list[str] | None):
+ if paths is None:
+ return paths
+ return [
+ os.path.relpath(path, start=yaml_file_dir) for path in paths
+ ]
+
+ sbml_files = get_rel_to_yaml(sbml_files)
+ condition_files = get_rel_to_yaml(condition_files)
+ measurement_files = get_rel_to_yaml(measurement_files)
+ observable_files = get_rel_to_yaml(observable_files)
+ visualization_files = get_rel_to_yaml(visualization_files)
+ parameter_file = get_rel_to_yaml([parameter_file])[0]
+ mapping_files = get_rel_to_yaml(mapping_files)
+
+ problem_dic = {
+ CONDITION_FILES: condition_files,
+ MEASUREMENT_FILES: measurement_files,
+ SBML_FILES: sbml_files,
+ OBSERVABLE_FILES: observable_files,
+ }
+ if mapping_files:
+ problem_dic[MAPPING_FILES] = mapping_files
+
+ if visualization_files is not None:
+ problem_dic[VISUALIZATION_FILES] = visualization_files
+ yaml_dic = {
+ PARAMETER_FILE: parameter_file,
+ FORMAT_VERSION: 1,
+ PROBLEMS: [problem_dic],
+ }
+ write_yaml(yaml_dic, yaml_file)
+
+
+def get_path_prefix(yaml_path: Path | str) -> str:
+ """Get the path prefix from a PEtab problem yaml file.
+
+ Get the path prefix to retrieve any files with relative paths referenced
+ in the given PEtab problem yaml file.
+
+ Arguments:
+ yaml_path: PEtab problem YAML file path (local or URL).
+
+ Returns:
+ The path prefix for retrieving any referenced files with relative
+ paths.
+ """
+ yaml_path = str(yaml_path)
+
+ # yaml_config may be path or URL
+ path_url = urlparse(yaml_path)
+ if not path_url.scheme or (
+ path_url.scheme != "file" and not path_url.netloc
+ ):
+ # a regular file path string
+ return str(Path(yaml_path).parent)
+
+ # a URL
+ # extract parent path
+ url_path = unquote(urlparse(yaml_path).path)
+ parent_path = str(PurePosixPath(url_path).parent)
+ path_prefix = urlunparse(
+ (
+ path_url.scheme,
+ path_url.netloc,
+ parent_path,
+ path_url.params,
+ path_url.query,
+ path_url.fragment,
+ )
+ )
+ return path_prefix
diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py
new file mode 100644
index 00000000..98084fa5
--- /dev/null
+++ b/petab/v2/__init__.py
@@ -0,0 +1,16 @@
+"""The PEtab 2.0 subpackage.
+
+Contains all functionality related to handling PEtab 2.0 problems.
+"""
+from warnings import warn
+
+from ..v1 import * # noqa: F403, F401, E402
+
+# import after v1
+from .problem import Problem # noqa: F401
+
+warn(
+ "Support for PEtab2.0 and all of petab.v2 is experimental "
+ "and subject to changes!",
+ stacklevel=1,
+)
diff --git a/petab/v2/lint.py b/petab/v2/lint.py
new file mode 100644
index 00000000..87554e64
--- /dev/null
+++ b/petab/v2/lint.py
@@ -0,0 +1,567 @@
+"""Validation of PEtab problems"""
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from enum import IntEnum
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from petab.v1 import (
+ assert_model_parameters_in_condition_or_parameter_table,
+)
+from petab.v1.C import (
+ ESTIMATE,
+ MODEL_ENTITY_ID,
+ NOISE_PARAMETERS,
+ NOMINAL_VALUE,
+ OBSERVABLE_PARAMETERS,
+ PARAMETER_DF_REQUIRED_COLS,
+ PARAMETER_ID,
+)
+from petab.v1.conditions import get_parametric_overrides
+from petab.v1.lint import (
+ _check_df,
+ assert_no_leading_trailing_whitespace,
+ assert_parameter_bounds_are_numeric,
+ assert_parameter_estimate_is_boolean,
+ assert_parameter_id_is_string,
+ assert_parameter_prior_parameters_are_valid,
+ assert_parameter_prior_type_is_valid,
+ assert_parameter_scale_is_valid,
+ assert_unique_parameter_ids,
+ check_ids,
+ check_parameter_bounds,
+)
+from petab.v1.measurements import split_parameter_replacement_list
+from petab.v1.observables import get_output_parameters, get_placeholders
+from petab.v1.parameters import (
+ get_valid_parameters_for_parameter_table,
+)
+from petab.v1.visualize.lint import validate_visualization_df
+
+from ..v1 import (
+ assert_measurement_conditions_present_in_condition_table,
+ check_condition_df,
+ check_measurement_df,
+ check_observable_df,
+)
+from .problem import Problem
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+ "ValidationIssueSeverity",
+ "ValidationIssue",
+ "ValidationResultList",
+ "ValidationError",
+ "ValidationTask",
+ "CheckModel",
+ "CheckTableExists",
+ "CheckMeasurementTable",
+ "CheckConditionTable",
+ "CheckObservableTable",
+ "CheckParameterTable",
+ "CheckAllParametersPresentInParameterTable",
+ "CheckValidParameterInConditionOrParameterTable",
+ "CheckVisualizationTable",
+ "lint_problem",
+ "default_validation_tasks",
+]
+
+
+class ValidationIssueSeverity(IntEnum):
+ """The severity of a validation issue."""
+
+ # INFO: Informational message, no action required
+ INFO = 10
+ # WARNING: Warning message, potential issues
+ WARNING = 20
+ # ERROR: Error message, action required
+ ERROR = 30
+ # CRITICAL: Critical error message, stops further validation
+ CRITICAL = 40
+
+
+@dataclass
+class ValidationIssue:
+ """The result of a validation task.
+
+ Attributes:
+ level: The level of the validation event.
+ message: The message of the validation event.
+ """
+
+ level: ValidationIssueSeverity
+ message: str
+
+ def __post_init__(self):
+ if not isinstance(self.level, ValidationIssueSeverity):
+ raise TypeError(
+ "`level` must be an instance of ValidationIssueSeverity."
+ )
+
+ def __str__(self):
+ return f"{self.level.name}: {self.message}"
+
+
+@dataclass
+class ValidationError(ValidationIssue):
+ """A validation result with level ERROR."""
+
+ level: ValidationIssueSeverity = field(
+ default=ValidationIssueSeverity.ERROR, init=False
+ )
+
+
+class ValidationResultList(list[ValidationIssue]):
+ """A list of validation results.
+
+ Contains all issues found during the validation of a PEtab problem.
+ """
+
+ def log(
+ self,
+ *,
+ logger: logging.Logger = logger,
+ min_level: ValidationIssueSeverity = ValidationIssueSeverity.INFO,
+ ):
+ """Log the validation results."""
+ for result in self:
+ if result.level < min_level:
+ continue
+ if result.level == ValidationIssueSeverity.INFO:
+ logger.info(result.message)
+ elif result.level == ValidationIssueSeverity.WARNING:
+ logger.warning(result.message)
+ elif result.level >= ValidationIssueSeverity.ERROR:
+ logger.error(result.message)
+
+ if not self:
+ logger.info("PEtab format check completed successfully.")
+
+ def has_errors(self) -> bool:
+ """Check if there are any errors in the validation results."""
+ return any(
+ result.level >= ValidationIssueSeverity.ERROR for result in self
+ )
+
+
+def lint_problem(problem: Problem | str | Path) -> ValidationResultList:
+ """Validate a PEtab problem.
+
+ Arguments:
+ problem:
+ PEtab problem to check. Instance of :class:`Problem` or path
+ to a PEtab problem yaml file.
+ Returns:
+ A list of validation results. Empty if no issues were found.
+ """
+
+ problem = Problem.get_problem(problem)
+
+ return problem.validate()
+
+
+class ValidationTask(ABC):
+ """A task to validate a PEtab problem."""
+
+ @abstractmethod
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ """Run the validation task.
+
+ Arguments:
+ problem: PEtab problem to check.
+ Returns:
+ Validation results or ``None``
+ """
+ ...
+
+ def __call__(self, *args, **kwargs):
+ return self.run(*args, **kwargs)
+
+
+class CheckModel(ValidationTask):
+ """A task to validate the model of a PEtab problem."""
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if problem.model is None:
+ return ValidationError("Model is missing.")
+
+ if not problem.model.is_valid():
+ # TODO get actual model validation messages
+ return ValidationError("Model is invalid.")
+
+
+class CheckTableExists(ValidationTask):
+ """A task to check if a table exists in the PEtab problem."""
+
+ def __init__(self, table_name: str):
+ if table_name not in ["measurement", "observable", "parameter"]:
+ # all others are optional
+ raise ValueError(
+ f"Table name {table_name} is not supported. "
+ "Supported table names are 'measurement', 'observable', "
+ "'parameter'."
+ )
+ self.table_name = table_name
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if getattr(problem, f"{self.table_name}_df") is None:
+ return ValidationError(f"{self.table_name} table is missing.")
+
+
+class CheckMeasurementTable(ValidationTask):
+ """A task to validate the measurement table of a PEtab problem."""
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if problem.measurement_df is None:
+ return
+
+ try:
+ check_measurement_df(problem.measurement_df, problem.observable_df)
+
+ if problem.condition_df is not None:
+ # TODO: handle missing condition_df
+ assert_measurement_conditions_present_in_condition_table(
+ problem.measurement_df, problem.condition_df
+ )
+ except AssertionError as e:
+ return ValidationError(str(e))
+
+
+class CheckConditionTable(ValidationTask):
+ """A task to validate the condition table of a PEtab problem."""
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if problem.condition_df is None:
+ return
+
+ try:
+ check_condition_df(
+ problem.condition_df,
+ model=problem.model,
+ observable_df=problem.observable_df,
+ mapping_df=problem.mapping_df,
+ )
+ except AssertionError as e:
+ return ValidationError(str(e))
+
+
+class CheckObservableTable(ValidationTask):
+ """A task to validate the observable table of a PEtab problem."""
+
+ def run(self, problem: Problem):
+ if problem.observable_df is None:
+ return
+
+ try:
+ check_observable_df(
+ problem.observable_df,
+ )
+ except AssertionError as e:
+ return ValidationIssue(
+ level=ValidationIssueSeverity.ERROR, message=str(e)
+ )
+
+
+class CheckObservablesDoNotShadowModelEntities(ValidationTask):
+ """A task to check that observable IDs do not shadow model entities."""
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if problem.observable_df is None or problem.model is None:
+ return
+
+ shadowed_entities = [
+ obs_id
+ for obs_id in problem.observable_df.index
+ if problem.model.has_entity_with_id(obs_id)
+ ]
+ if shadowed_entities:
+ return ValidationError(
+ f"Observable IDs {shadowed_entities} shadow model entities."
+ )
+
+
+class CheckParameterTable(ValidationTask):
+ """A task to validate the parameter table of a PEtab problem."""
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if problem.parameter_df is None:
+ return
+
+ try:
+ df = problem.parameter_df
+ _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter")
+
+ if df.index.name != PARAMETER_ID:
+ return ValidationError(
+ f"Parameter table has wrong index {df.index.name}."
+ f" Expected {PARAMETER_ID}.",
+ )
+
+ check_ids(df.index.values, kind="parameter")
+
+ for column_name in PARAMETER_DF_REQUIRED_COLS[
+ 1:
+ ]: # 0 is PARAMETER_ID
+ if not np.issubdtype(df[column_name].dtype, np.number):
+ assert_no_leading_trailing_whitespace(
+ df[column_name].values, column_name
+ )
+
+ # nominal value is required for non-estimated parameters
+ non_estimated_par_ids = list(
+ df.index[
+ (df[ESTIMATE] != 1)
+ | (
+ pd.api.types.is_string_dtype(df[ESTIMATE])
+ and df[ESTIMATE] != "1"
+ )
+ ]
+ )
+ # TODO implement as validators
+ # `assert_has_fixed_parameter_nominal_values`
+ # and `assert_correct_table_dtypes`
+ if non_estimated_par_ids:
+ if NOMINAL_VALUE not in df:
+ return ValidationError(
+ "Parameter table contains parameters "
+ f"{non_estimated_par_ids} that are not "
+ "specified to be estimated, "
+ f"but column {NOMINAL_VALUE} is missing."
+ )
+ try:
+ df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float)
+ except ValueError:
+ return ValidationError(
+ f"Expected numeric values for `{NOMINAL_VALUE}` "
+ "in parameter table "
+ "for all non-estimated parameters."
+ )
+
+ assert_parameter_id_is_string(df)
+ assert_parameter_scale_is_valid(df)
+ assert_parameter_bounds_are_numeric(df)
+ assert_parameter_estimate_is_boolean(df)
+ assert_unique_parameter_ids(df)
+ check_parameter_bounds(df)
+ assert_parameter_prior_type_is_valid(df)
+ assert_parameter_prior_parameters_are_valid(df)
+
+ except AssertionError as e:
+ return ValidationError(str(e))
+
+
+class CheckAllParametersPresentInParameterTable(ValidationTask):
+ """Ensure all required parameters are contained in the parameter table
+ with no additional ones."""
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if (
+ problem.model is None
+ or problem.parameter_df is None
+ or problem.observable_df is None
+ or problem.measurement_df is None
+ ):
+ return
+
+ required = get_required_parameters_for_parameter_table(problem)
+
+ allowed = get_valid_parameters_for_parameter_table(
+ model=problem.model,
+ condition_df=problem.condition_df,
+ observable_df=problem.observable_df,
+ measurement_df=problem.measurement_df,
+ mapping_df=problem.mapping_df,
+ )
+
+ actual = set(problem.parameter_df.index)
+ missing = required - actual
+ extraneous = actual - allowed
+
+ # missing parameters might be present under a different name based on
+ # the mapping table
+ if missing and problem.mapping_df is not None:
+ model_to_petab_mapping = {}
+ for map_from, map_to in zip(
+ problem.mapping_df.index.values,
+ problem.mapping_df[MODEL_ENTITY_ID],
+ strict=True,
+ ):
+ if map_to in model_to_petab_mapping:
+ model_to_petab_mapping[map_to].append(map_from)
+ else:
+ model_to_petab_mapping[map_to] = [map_from]
+ missing = {
+ missing_id
+ for missing_id in missing
+ if missing_id not in model_to_petab_mapping
+ or all(
+ mapping_parameter not in actual
+ for mapping_parameter in model_to_petab_mapping[missing_id]
+ )
+ }
+
+ if missing:
+ return ValidationError(
+ "Missing parameter(s) in the model or the "
+ "parameters table: " + str(missing)
+ )
+
+ if extraneous:
+ return ValidationError(
+ "Extraneous parameter(s) in parameter table: "
+ + str(extraneous)
+ )
+
+
+class CheckValidParameterInConditionOrParameterTable(ValidationTask):
+ """A task to check that all required and only allowed model parameters are
+ present in the condition or parameter table."""
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if (
+ problem.model is None
+ or problem.condition_df is None
+ or problem.parameter_df is None
+ ):
+ return
+
+ try:
+ assert_model_parameters_in_condition_or_parameter_table(
+ problem.model,
+ problem.condition_df,
+ problem.parameter_df,
+ problem.mapping_df,
+ )
+ except AssertionError as e:
+ return ValidationIssue(
+ level=ValidationIssueSeverity.ERROR, message=str(e)
+ )
+
+
+class CheckVisualizationTable(ValidationTask):
+ """A task to validate the visualization table of a PEtab problem."""
+
+ def run(self, problem: Problem) -> ValidationIssue | None:
+ if problem.visualization_df is None:
+ return
+
+ if validate_visualization_df(problem):
+ return ValidationIssue(
+ level=ValidationIssueSeverity.ERROR,
+ message="Visualization table is invalid.",
+ )
+
+
+def get_required_parameters_for_parameter_table(
+ problem: Problem,
+) -> set[str]:
+ """
+ Get set of parameters which need to go into the parameter table
+
+ Arguments:
+ problem: The PEtab problem
+ Returns:
+ Set of parameter IDs which PEtab requires to be present in the
+ parameter table. That is all {observable,noise}Parameters from the
+ measurement table as well as all parametric condition table overrides
+ that are not defined in the model.
+ """
+ parameter_ids = set()
+
+ # Add parameters from measurement table, unless they are fixed parameters
+ def append_overrides(overrides):
+ parameter_ids.update(
+ p
+ for p in overrides
+ if isinstance(p, str) and p not in problem.condition_df.columns
+ )
+
+ for _, row in problem.measurement_df.iterrows():
+ # we trust that the number of overrides matches
+ append_overrides(
+ split_parameter_replacement_list(
+ row.get(OBSERVABLE_PARAMETERS, None)
+ )
+ )
+ append_overrides(
+ split_parameter_replacement_list(row.get(NOISE_PARAMETERS, None))
+ )
+
+ # remove `observable_ids` when
+ # `get_output_parameters` is updated for PEtab v2/v1.1, where
+ # observable IDs are allowed in observable formulae
+ observable_ids = set(problem.observable_df.index)
+
+ # Add output parameters except for placeholders
+ for formula_type, placeholder_sources in (
+ (
+ # Observable formulae
+ {"observables": True, "noise": False},
+ # can only contain observable placeholders
+ {"noise": False, "observables": True},
+ ),
+ (
+ # Noise formulae
+ {"observables": False, "noise": True},
+ # can contain noise and observable placeholders
+ {"noise": True, "observables": True},
+ ),
+ ):
+ output_parameters = get_output_parameters(
+ problem.observable_df,
+ problem.model,
+ mapping_df=problem.mapping_df,
+ **formula_type,
+ )
+ placeholders = get_placeholders(
+ problem.observable_df,
+ **placeholder_sources,
+ )
+ parameter_ids.update(
+ p
+ for p in output_parameters
+ if p not in placeholders and p not in observable_ids
+ )
+
+ # Add condition table parametric overrides unless already defined in the
+ # model
+ parameter_ids.update(
+ p
+ for p in get_parametric_overrides(problem.condition_df)
+ if not problem.model.has_entity_with_id(p)
+ )
+
+ # remove parameters that occur in the condition table and are overridden
+ # for ALL conditions
+ for p in problem.condition_df.columns[
+ ~problem.condition_df.isnull().any()
+ ]:
+ try:
+ parameter_ids.remove(p)
+ except KeyError:
+ pass
+
+ return parameter_ids
+
+
+#: Validation tasks that should be run on any PEtab problem
+default_validation_tasks = [
+ CheckTableExists("measurement"),
+ CheckTableExists("observable"),
+ CheckTableExists("parameter"),
+ CheckModel(),
+ CheckMeasurementTable(),
+ CheckConditionTable(),
+ CheckObservableTable(),
+ CheckObservablesDoNotShadowModelEntities(),
+ CheckParameterTable(),
+ CheckAllParametersPresentInParameterTable(),
+ CheckVisualizationTable(),
+ CheckValidParameterInConditionOrParameterTable(),
+]
diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py
new file mode 100644
index 00000000..86cbe49c
--- /dev/null
+++ b/petab/v2/petab1to2.py
@@ -0,0 +1,145 @@
+"""Convert PEtab version 1 problems to version 2."""
+import shutil
+from itertools import chain
+from pathlib import Path
+
+from pandas.io.common import get_handle, is_url
+
+import petab.v1.C as C
+from petab.models import MODEL_TYPE_SBML
+from petab.v1 import Problem as ProblemV1
+from petab.v2.lint import lint_problem as lint_v2_problem
+from petab.yaml import get_path_prefix
+
+from ..v1 import lint_problem as lint_v1_problem
+from ..v1.yaml import load_yaml, validate, write_yaml
+from ..versions import get_major_version
+
+__all__ = ["petab1to2"]
+
+
+def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
+ """Convert from PEtab 1.0 to PEtab 2.0 format.
+
+ Convert a PEtab problem from PEtab 1.0 to PEtab 2.0 format.
+
+ Parameters
+ ----------
+ yaml_config: dict | Path | str
+ The PEtab problem as dictionary or YAML file name.
+ output_dir: Path | str
+ The output directory to save the converted PEtab problem, or ``None``,
+ to return a :class:`petab.v2.Problem` instance.
+
+ Raises
+ ------
+ ValueError
+ If the input is invalid or does not pass linting or if the generated
+ files do not pass linting.
+ """
+ if output_dir is None:
+ # TODO requires petab.v2.Problem
+ raise NotImplementedError("Not implemented yet.")
+ elif isinstance(yaml_config, dict):
+ raise ValueError("If output_dir is given, yaml_config must be a file.")
+
+ if isinstance(yaml_config, Path | str):
+ yaml_file = str(yaml_config)
+ path_prefix = get_path_prefix(yaml_file)
+ yaml_config = load_yaml(yaml_config)
+ get_src_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731
+ else:
+ yaml_file = None
+ path_prefix = None
+ get_src_path = lambda filename: filename # noqa: E731
+
+ get_dest_path = lambda filename: f"{output_dir}/{filename}" # noqa: E731
+
+ # Validate original PEtab problem
+ validate(yaml_config, path_prefix=path_prefix)
+ if get_major_version(yaml_config) != 1:
+ raise ValueError("PEtab problem is not version 1.")
+ petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config)
+ if lint_v1_problem(petab_problem):
+ raise ValueError("PEtab problem does not pass linting.")
+
+ # Update YAML file
+ new_yaml_config = _update_yaml(yaml_config)
+
+ # Write new YAML file
+ output_dir = Path(output_dir)
+ output_dir.mkdir(parents=True, exist_ok=True)
+ new_yaml_file = output_dir / Path(yaml_file).name
+ write_yaml(new_yaml_config, new_yaml_file)
+
+ # Update tables
+ # condition tables, observable tables, SBML files, parameter table:
+ # no changes - just copy
+ file = yaml_config[C.PARAMETER_FILE]
+ _copy_file(get_src_path(file), get_dest_path(file))
+
+ for problem_config in yaml_config[C.PROBLEMS]:
+ for file in chain(
+ problem_config.get(C.CONDITION_FILES, []),
+ problem_config.get(C.OBSERVABLE_FILES, []),
+ (
+ model[C.MODEL_LOCATION]
+ for model in problem_config.get(C.MODEL_FILES, {}).values()
+ ),
+ problem_config.get(C.MEASUREMENT_FILES, []),
+ problem_config.get(C.VISUALIZATION_FILES, []),
+ ):
+ _copy_file(get_src_path(file), get_dest_path(file))
+
+ # TODO: Measurements: preequilibration to experiments/timecourses once
+ # finalized
+ ...
+
+ # validate updated Problem
+ validation_issues = lint_v2_problem(new_yaml_file)
+
+ if validation_issues:
+ raise ValueError(
+ "Generated PEtab v2 problem did not pass linting: "
+ f"{validation_issues}"
+ )
+
+
+def _update_yaml(yaml_config: dict) -> dict:
+ """Update PEtab 1.0 YAML to PEtab 2.0 format."""
+ yaml_config = yaml_config.copy()
+
+ # Update format_version
+ yaml_config[C.FORMAT_VERSION] = "2.0.0"
+
+ # Add extensions
+ yaml_config[C.EXTENSIONS] = []
+
+ # Move models and set IDs (filename for now)
+ for problem in yaml_config[C.PROBLEMS]:
+ problem[C.MODEL_FILES] = {}
+ models = problem[C.MODEL_FILES]
+ for sbml_file in problem[C.SBML_FILES]:
+ model_id = sbml_file.split("/")[-1].split(".")[0]
+ models[model_id] = {
+ C.MODEL_LANGUAGE: MODEL_TYPE_SBML,
+ C.MODEL_LOCATION: sbml_file,
+ }
+ problem[C.MODEL_FILES] = problem.get(C.MODEL_FILES, {})
+ del problem[C.SBML_FILES]
+
+ return yaml_config
+
+
+def _copy_file(src: Path | str, dest: Path | str):
+ """Copy file."""
+ src = str(src)
+ dest = str(dest)
+
+ if is_url(src):
+ with get_handle(src, mode="r") as src_handle:
+ with open(dest, "w") as dest_handle:
+ dest_handle.write(src_handle.handle.read())
+ return
+
+ shutil.copy(str(src), str(dest))
diff --git a/petab/v2/problem.py b/petab/v2/problem.py
new file mode 100644
index 00000000..612f2571
--- /dev/null
+++ b/petab/v2/problem.py
@@ -0,0 +1,719 @@
+"""PEtab v2 problems."""
+from __future__ import annotations
+
+import logging
+import os
+import tempfile
+from math import nan
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pandas as pd
+
+from ..v1 import (
+ conditions,
+ core,
+ mapping,
+ measurements,
+ observables,
+ parameter_mapping,
+ parameters,
+ sampling,
+ yaml,
+)
+from ..v1.C import * # noqa: F403
+from ..v1.models.model import Model, model_factory
+from ..v1.yaml import get_path_prefix
+
+if TYPE_CHECKING:
+ from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask
+
+
+__all__ = ["Problem"]
+
+
+class Problem:
+ """
+ PEtab parameter estimation problem as defined by
+
+ - model
+ - condition table
+ - measurement table
+ - parameter table
+ - observables table
+ - mapping table
+
+ Optionally it may contain visualization tables.
+
+ Parameters:
+ condition_df: PEtab condition table
+ measurement_df: PEtab measurement table
+ parameter_df: PEtab parameter table
+ observable_df: PEtab observable table
+ visualization_df: PEtab visualization table
+ mapping_df: PEtab mapping table
+ model: The underlying model
+ extensions_config: Information on the extensions used
+ """
+
+ def __init__(
+ self,
+ model: Model = None,
+ condition_df: pd.DataFrame = None,
+ measurement_df: pd.DataFrame = None,
+ parameter_df: pd.DataFrame = None,
+ visualization_df: pd.DataFrame = None,
+ observable_df: pd.DataFrame = None,
+ mapping_df: pd.DataFrame = None,
+ extensions_config: dict = None,
+ ):
+ from ..v2.lint import default_validation_tasks
+
+ self.condition_df: pd.DataFrame | None = condition_df
+ self.measurement_df: pd.DataFrame | None = measurement_df
+ self.parameter_df: pd.DataFrame | None = parameter_df
+ self.visualization_df: pd.DataFrame | None = visualization_df
+ self.observable_df: pd.DataFrame | None = observable_df
+ self.mapping_df: pd.DataFrame | None = mapping_df
+ self.model: Model | None = model
+ self.extensions_config = extensions_config or {}
+ self.validation_tasks: list[
+ ValidationTask
+ ] = default_validation_tasks.copy()
+
+ def __str__(self):
+ model = f"with model ({self.model})" if self.model else "without model"
+ conditions = (
+ f"{self.condition_df.shape[0]} conditions"
+ if self.condition_df is not None
+ else "without conditions table"
+ )
+
+ observables = (
+ f"{self.observable_df.shape[0]} observables"
+ if self.observable_df is not None
+ else "without observables table"
+ )
+
+ measurements = (
+ f"{self.measurement_df.shape[0]} measurements"
+ if self.measurement_df is not None
+ else "without measurements table"
+ )
+
+ if self.parameter_df is not None:
+ num_estimated_parameters = (
+ sum(self.parameter_df[ESTIMATE] == 1)
+ if ESTIMATE in self.parameter_df
+ else self.parameter_df.shape[0]
+ )
+ parameters = f"{num_estimated_parameters} estimated parameters"
+ else:
+ parameters = "without parameter_df table"
+
+ return (
+ f"PEtab Problem {model}, {conditions}, {observables}, "
+ f"{measurements}, {parameters}"
+ )
+
+ @staticmethod
+ def from_yaml(yaml_config: dict | Path | str) -> Problem:
+ """
+ Factory method to load model and tables as specified by YAML file.
+
+ Arguments:
+ yaml_config: PEtab configuration as dictionary or YAML file name
+ """
+ if isinstance(yaml_config, Path):
+ yaml_config = str(yaml_config)
+
+ if isinstance(yaml_config, str):
+ yaml_file = yaml_config
+ path_prefix = get_path_prefix(yaml_file)
+ yaml_config = yaml.load_yaml(yaml_config)
+ get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731
+ else:
+ yaml_file = None
+ get_path = lambda filename: filename # noqa: E731
+
+ if yaml_config[FORMAT_VERSION] not in {"2.0.0"}:
+ # If we got a path to a v1 yaml file, try to auto-upgrade
+ from tempfile import TemporaryDirectory
+
+ from ..versions import get_major_version
+ from .petab1to2 import petab1to2
+
+ if get_major_version(yaml_config) == 1 and yaml_file:
+ logging.debug(
+ "Auto-upgrading problem from PEtab 1.0 to PEtab 2.0"
+ )
+ with TemporaryDirectory() as tmpdirname:
+ try:
+ petab1to2(yaml_file, output_dir=tmpdirname)
+ except Exception as e:
+ raise ValueError(
+ "Failed to auto-upgrade PEtab 1.0 problem to "
+ "PEtab 2.0"
+ ) from e
+ return Problem.from_yaml(
+ Path(tmpdirname) / Path(yaml_file).name
+ )
+ raise ValueError(
+ "Provided PEtab files are of unsupported version "
+ f"{yaml_config[FORMAT_VERSION]}. Expected 2.0.0."
+ )
+
+ if yaml.is_composite_problem(yaml_config):
+ raise ValueError(
+ "petab.Problem.from_yaml() can only be used for "
+ "yaml files comprising a single model. "
+ "Consider using "
+ "petab.CompositeProblem.from_yaml() instead."
+ )
+
+ problem0 = yaml_config["problems"][0]
+
+ if isinstance(yaml_config[PARAMETER_FILE], list):
+ parameter_df = parameters.get_parameter_df(
+ [get_path(f) for f in yaml_config[PARAMETER_FILE]]
+ )
+ else:
+ parameter_df = (
+ parameters.get_parameter_df(
+ get_path(yaml_config[PARAMETER_FILE])
+ )
+ if yaml_config[PARAMETER_FILE]
+ else None
+ )
+
+ if len(problem0[MODEL_FILES]) > 1:
+ # TODO https://github.com/PEtab-dev/libpetab-python/issues/6
+ raise NotImplementedError(
+ "Support for multiple models is not yet implemented."
+ )
+ if not problem0[MODEL_FILES]:
+ model = None
+ else:
+ model_id, model_info = next(iter(problem0[MODEL_FILES].items()))
+ model = model_factory(
+ get_path(model_info[MODEL_LOCATION]),
+ model_info[MODEL_LANGUAGE],
+ model_id=model_id,
+ )
+
+ measurement_files = [
+ get_path(f) for f in problem0.get(MEASUREMENT_FILES, [])
+ ]
+ # If there are multiple tables, we will merge them
+ measurement_df = (
+ core.concat_tables(
+ measurement_files, measurements.get_measurement_df
+ )
+ if measurement_files
+ else None
+ )
+
+ condition_files = [
+ get_path(f) for f in problem0.get(CONDITION_FILES, [])
+ ]
+ # If there are multiple tables, we will merge them
+ condition_df = (
+ core.concat_tables(condition_files, conditions.get_condition_df)
+ if condition_files
+ else None
+ )
+
+ visualization_files = [
+ get_path(f) for f in problem0.get(VISUALIZATION_FILES, [])
+ ]
+ # If there are multiple tables, we will merge them
+ visualization_df = (
+ core.concat_tables(visualization_files, core.get_visualization_df)
+ if visualization_files
+ else None
+ )
+
+ observable_files = [
+ get_path(f) for f in problem0.get(OBSERVABLE_FILES, [])
+ ]
+ # If there are multiple tables, we will merge them
+ observable_df = (
+ core.concat_tables(observable_files, observables.get_observable_df)
+ if observable_files
+ else None
+ )
+
+ mapping_files = [get_path(f) for f in problem0.get(MAPPING_FILES, [])]
+ # If there are multiple tables, we will merge them
+ mapping_df = (
+ core.concat_tables(mapping_files, mapping.get_mapping_df)
+ if mapping_files
+ else None
+ )
+
+ return Problem(
+ condition_df=condition_df,
+ measurement_df=measurement_df,
+ parameter_df=parameter_df,
+ observable_df=observable_df,
+ model=model,
+ visualization_df=visualization_df,
+ mapping_df=mapping_df,
+ extensions_config=yaml_config.get(EXTENSIONS, {}),
+ )
+
+ @staticmethod
+ def from_combine(filename: Path | str) -> Problem:
+ """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive).
+
+ See also :py:func:`petab.create_combine_archive`.
+
+ Arguments:
+ filename: Path to the PEtab-COMBINE archive
+
+ Returns:
+ A :py:class:`petab.Problem` instance.
+ """
+ # function-level import, because module-level import interfered with
+ # other SWIG interfaces
+ try:
+ import libcombine
+ except ImportError as e:
+ raise ImportError(
+ "To use PEtab's COMBINE functionality, libcombine "
+ "(python-libcombine) must be installed."
+ ) from e
+
+ archive = libcombine.CombineArchive()
+ if archive.initializeFromArchive(str(filename)) is None:
+ raise ValueError(f"Invalid Combine Archive: {filename}")
+
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ archive.extractTo(tmpdirname)
+ problem = Problem.from_yaml(
+ os.path.join(tmpdirname, archive.getMasterFile().getLocation())
+ )
+ archive.cleanUp()
+
+ return problem
+
+ @staticmethod
+ def get_problem(problem: str | Path | Problem) -> Problem:
+ """Get a PEtab problem from a file or a problem object.
+
+ Arguments:
+ problem: Path to a PEtab problem file or a PEtab problem object.
+
+ Returns:
+ A PEtab problem object.
+ """
+ if isinstance(problem, Problem):
+ return problem
+
+ if isinstance(problem, str | Path):
+ return Problem.from_yaml(problem)
+
+ raise TypeError(
+ "The argument `problem` must be a path to a PEtab problem file "
+ "or a PEtab problem object."
+ )
+
+ def get_optimization_parameters(self) -> list[str]:
+ """
+ Return list of optimization parameter IDs.
+
+ See :py:func:`petab.parameters.get_optimization_parameters`.
+ """
+ return parameters.get_optimization_parameters(self.parameter_df)
+
+ def get_optimization_parameter_scales(self) -> dict[str, str]:
+ """
+ Return list of optimization parameter scaling strings.
+
+ See :py:func:`petab.parameters.get_optimization_parameters`.
+ """
+ return parameters.get_optimization_parameter_scaling(self.parameter_df)
+
+ def get_observable_ids(self) -> list[str]:
+ """
+ Returns dictionary of observable ids.
+ """
+ return list(self.observable_df.index)
+
+ def _apply_mask(self, v: list, free: bool = True, fixed: bool = True):
+ """Apply mask of only free or only fixed values.
+
+ Parameters
+ ----------
+ v:
+ The full vector the mask is to be applied to.
+ free:
+ Whether to return free parameters, i.e. parameters to estimate.
+ fixed:
+ Whether to return fixed parameters, i.e. parameters not to
+ estimate.
+
+ Returns
+ -------
+ The reduced vector with applied mask.
+ """
+ if not free and not fixed:
+ return []
+ if not free:
+ return [v[ix] for ix in self.x_fixed_indices]
+ if not fixed:
+ return [v[ix] for ix in self.x_free_indices]
+ return v
+
+ def get_x_ids(self, free: bool = True, fixed: bool = True):
+ """Generic function to get parameter ids.
+
+ Parameters
+ ----------
+ free:
+ Whether to return free parameters, i.e. parameters to estimate.
+ fixed:
+ Whether to return fixed parameters, i.e. parameters not to
+ estimate.
+
+ Returns
+ -------
+ The parameter IDs.
+ """
+ v = list(self.parameter_df.index.values)
+ return self._apply_mask(v, free=free, fixed=fixed)
+
+ @property
+ def x_ids(self) -> list[str]:
+ """Parameter table parameter IDs"""
+ return self.get_x_ids()
+
+ @property
+ def x_free_ids(self) -> list[str]:
+ """Parameter table parameter IDs, for free parameters."""
+ return self.get_x_ids(fixed=False)
+
+ @property
+ def x_fixed_ids(self) -> list[str]:
+ """Parameter table parameter IDs, for fixed parameters."""
+ return self.get_x_ids(free=False)
+
+ def get_x_nominal(
+ self, free: bool = True, fixed: bool = True, scaled: bool = False
+ ):
+ """Generic function to get parameter nominal values.
+
+ Parameters
+ ----------
+ free:
+ Whether to return free parameters, i.e. parameters to estimate.
+ fixed:
+ Whether to return fixed parameters, i.e. parameters not to
+ estimate.
+ scaled:
+ Whether to scale the values according to the parameter scale,
+ or return them on linear scale.
+
+ Returns
+ -------
+ The parameter nominal values.
+ """
+ if NOMINAL_VALUE in self.parameter_df:
+ v = list(self.parameter_df[NOMINAL_VALUE])
+ else:
+ v = [nan] * len(self.parameter_df)
+
+ if scaled:
+ v = list(
+ parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE])
+ )
+ return self._apply_mask(v, free=free, fixed=fixed)
+
+ @property
+ def x_nominal(self) -> list:
+ """Parameter table nominal values"""
+ return self.get_x_nominal()
+
+ @property
+ def x_nominal_free(self) -> list:
+ """Parameter table nominal values, for free parameters."""
+ return self.get_x_nominal(fixed=False)
+
+ @property
+ def x_nominal_fixed(self) -> list:
+ """Parameter table nominal values, for fixed parameters."""
+ return self.get_x_nominal(free=False)
+
+ @property
+ def x_nominal_scaled(self) -> list:
+ """Parameter table nominal values with applied parameter scaling"""
+ return self.get_x_nominal(scaled=True)
+
+ @property
+ def x_nominal_free_scaled(self) -> list:
+ """Parameter table nominal values with applied parameter scaling,
+ for free parameters.
+ """
+ return self.get_x_nominal(fixed=False, scaled=True)
+
+ @property
+ def x_nominal_fixed_scaled(self) -> list:
+ """Parameter table nominal values with applied parameter scaling,
+ for fixed parameters.
+ """
+ return self.get_x_nominal(free=False, scaled=True)
+
+ def get_lb(
+ self, free: bool = True, fixed: bool = True, scaled: bool = False
+ ):
+ """Generic function to get lower parameter bounds.
+
+ Parameters
+ ----------
+ free:
+ Whether to return free parameters, i.e. parameters to estimate.
+ fixed:
+ Whether to return fixed parameters, i.e. parameters not to
+ estimate.
+ scaled:
+ Whether to scale the values according to the parameter scale,
+ or return them on linear scale.
+
+ Returns
+ -------
+ The lower parameter bounds.
+ """
+ v = list(self.parameter_df[LOWER_BOUND])
+ if scaled:
+ v = list(
+ parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE])
+ )
+ return self._apply_mask(v, free=free, fixed=fixed)
+
+ @property
+ def lb(self) -> list:
+ """Parameter table lower bounds."""
+ return self.get_lb()
+
+ @property
+ def lb_scaled(self) -> list:
+ """Parameter table lower bounds with applied parameter scaling"""
+ return self.get_lb(scaled=True)
+
+ def get_ub(
+ self, free: bool = True, fixed: bool = True, scaled: bool = False
+ ):
+ """Generic function to get upper parameter bounds.
+
+ Parameters
+ ----------
+ free:
+ Whether to return free parameters, i.e. parameters to estimate.
+ fixed:
+ Whether to return fixed parameters, i.e. parameters not to
+ estimate.
+ scaled:
+ Whether to scale the values according to the parameter scale,
+ or return them on linear scale.
+
+ Returns
+ -------
+ The upper parameter bounds.
+ """
+ v = list(self.parameter_df[UPPER_BOUND])
+ if scaled:
+ v = list(
+ parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE])
+ )
+ return self._apply_mask(v, free=free, fixed=fixed)
+
+ @property
+ def ub(self) -> list:
+ """Parameter table upper bounds"""
+ return self.get_ub()
+
+ @property
+ def ub_scaled(self) -> list:
+ """Parameter table upper bounds with applied parameter scaling"""
+ return self.get_ub(scaled=True)
+
+ @property
+ def x_free_indices(self) -> list[int]:
+ """Parameter table estimated parameter indices."""
+ estimated = list(self.parameter_df[ESTIMATE])
+ return [j for j, val in enumerate(estimated) if val != 0]
+
+ @property
+ def x_fixed_indices(self) -> list[int]:
+ """Parameter table non-estimated parameter indices."""
+ estimated = list(self.parameter_df[ESTIMATE])
+ return [j for j, val in enumerate(estimated) if val == 0]
+
+ def get_simulation_conditions_from_measurement_df(self) -> pd.DataFrame:
+ """See :func:`petab.get_simulation_conditions`."""
+ return measurements.get_simulation_conditions(self.measurement_df)
+
+ def get_optimization_to_simulation_parameter_mapping(self, **kwargs):
+ """
+ See
+ :py:func:`petab.parameter_mapping.get_optimization_to_simulation_parameter_mapping`,
+ to which all keyword arguments are forwarded.
+ """
+ return (
+ parameter_mapping.get_optimization_to_simulation_parameter_mapping(
+ condition_df=self.condition_df,
+ measurement_df=self.measurement_df,
+ parameter_df=self.parameter_df,
+ observable_df=self.observable_df,
+ model=self.model,
+ **kwargs,
+ )
+ )
+
+ def create_parameter_df(self, **kwargs) -> pd.DataFrame:
+ """Create a new PEtab parameter table
+
+ See :py:func:`create_parameter_df`.
+ """
+ return parameters.create_parameter_df(
+ model=self.model,
+ condition_df=self.condition_df,
+ observable_df=self.observable_df,
+ measurement_df=self.measurement_df,
+ mapping_df=self.mapping_df,
+ **kwargs,
+ )
+
+ def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs):
+ """Create 2D array with starting points for optimization
+
+ See :py:func:`petab.sample_parameter_startpoints`.
+ """
+ return sampling.sample_parameter_startpoints(
+ self.parameter_df, n_starts=n_starts, **kwargs
+ )
+
+ def sample_parameter_startpoints_dict(
+ self, n_starts: int = 100
+ ) -> list[dict[str, float]]:
+ """Create dictionaries with starting points for optimization
+
+ See also :py:func:`petab.sample_parameter_startpoints`.
+
+ Returns:
+ A list of dictionaries with parameter IDs mapping to samples
+ parameter values.
+ """
+ return [
+ dict(zip(self.x_free_ids, parameter_values, strict=True))
+ for parameter_values in self.sample_parameter_startpoints(
+ n_starts=n_starts
+ )
+ ]
+
+ def unscale_parameters(
+ self,
+ x_dict: dict[str, float],
+ ) -> dict[str, float]:
+ """Unscale parameter values.
+
+ Parameters
+ ----------
+ x_dict:
+ Keys are parameter IDs in the PEtab problem, values are scaled
+ parameter values.
+
+ Returns
+ -------
+ The unscaled parameter values.
+ """
+ return {
+ parameter_id: parameters.unscale(
+ parameter_value,
+ self.parameter_df[PARAMETER_SCALE][parameter_id],
+ )
+ for parameter_id, parameter_value in x_dict.items()
+ }
+
+ def scale_parameters(
+ self,
+ x_dict: dict[str, float],
+ ) -> dict[str, float]:
+ """Scale parameter values.
+
+ Parameters
+ ----------
+ x_dict:
+ Keys are parameter IDs in the PEtab problem, values are unscaled
+ parameter values.
+
+ Returns
+ -------
+ The scaled parameter values.
+ """
+ return {
+ parameter_id: parameters.scale(
+ parameter_value,
+ self.parameter_df[PARAMETER_SCALE][parameter_id],
+ )
+ for parameter_id, parameter_value in x_dict.items()
+ }
+
+ @property
+ def n_estimated(self) -> int:
+ """The number of estimated parameters."""
+ return len(self.x_free_indices)
+
+ @property
+ def n_measurements(self) -> int:
+ """Number of measurements."""
+ return self.measurement_df[MEASUREMENT].notna().sum()
+
+ @property
+ def n_priors(self) -> int:
+ """Number of priors."""
+ if OBJECTIVE_PRIOR_PARAMETERS not in self.parameter_df:
+ return 0
+
+ return self.parameter_df[OBJECTIVE_PRIOR_PARAMETERS].notna().sum()
+
+ def validate(
+ self, validation_tasks: list[ValidationTask] = None
+ ) -> ValidationResultList:
+ """Validate the PEtab problem.
+
+ Arguments:
+ validation_tasks: List of validation tasks to run. If ``None``
+ or empty, :attr:`Problem.validation_tasks` are used.
+ Returns:
+ A list of validation results.
+ """
+ from ..v2.lint import ValidationIssueSeverity, ValidationResultList
+
+ validation_results = ValidationResultList()
+ if self.extensions_config:
+ validation_results.append(
+ ValidationIssue(
+ ValidationIssueSeverity.WARNING,
+ "Validation of PEtab extensions is not yet implemented, "
+ "but the given problem uses the following extensions: "
+ f"{'', ''.join(self.extensions_config.keys())}",
+ )
+ )
+
+ for task in validation_tasks or self.validation_tasks:
+ try:
+ cur_result = task.run(self)
+ except Exception as e:
+ cur_result = ValidationIssue(
+ ValidationIssueSeverity.CRITICAL,
+ f"Validation task {task} failed with exception: {e}",
+ )
+
+ if cur_result:
+ validation_results.append(cur_result)
+
+ if cur_result.level == ValidationIssueSeverity.CRITICAL:
+ break
+
+ return validation_results
diff --git a/petab/version.py b/petab/version.py
index 54e96123..1d88e505 100644
--- a/petab/version.py
+++ b/petab/version.py
@@ -1,2 +1,2 @@
"""PEtab library version"""
-__version__ = "0.3.0"
+__version__ = "0.4.0"
diff --git a/petab/versions.py b/petab/versions.py
new file mode 100644
index 00000000..2e2eb2f4
--- /dev/null
+++ b/petab/versions.py
@@ -0,0 +1,35 @@
+"""Handling of PEtab version numbers."""
+from __future__ import annotations
+
+from pathlib import Path
+
+from petab.C import FORMAT_VERSION
+from petab.v1 import Problem as V1Problem
+from petab.v1.yaml import load_yaml
+from petab.v2 import Problem as V2Problem
+
+__all__ = [
+ "get_major_version",
+]
+
+
+def get_major_version(
+ problem: str | dict | Path | V1Problem | V2Problem,
+) -> int:
+ """Get the major version number of the given problem."""
+ if isinstance(problem, V1Problem):
+ return 1
+
+ if isinstance(problem, V2Problem):
+ return 2
+
+ if isinstance(problem, str | Path):
+ yaml_config = load_yaml(problem)
+ version = yaml_config.get(FORMAT_VERSION)
+ elif isinstance(problem, dict):
+ version = problem.get(FORMAT_VERSION)
+ else:
+ raise ValueError(f"Unsupported argument type: {type(problem)}")
+
+ version = str(version)
+ return int(version.split(".")[0])
diff --git a/petab/visualize/__init__.py b/petab/visualize/__init__.py
index 924be86a..2151c3f8 100644
--- a/petab/visualize/__init__.py
+++ b/petab/visualize/__init__.py
@@ -1,37 +1,10 @@
-"""
-Visualize
-=========
+"""Deprecated module for visualization of PEtab problems.
-PEtab comes with visualization functionality. Those need to be imported via
-``import petab.visualize``.
+Use petab.v1.visualize instead."""
-"""
-# ruff: noqa: F401
-import importlib.util
+from petab import _deprecated_import_v1
+from petab.v1.visualize import * # noqa: F403, F401, E402
-from .plotting import DataProvider, Figure
+from .plotting import DataProvider, Figure # noqa: F403, F401, E402
-__all__ = ["DataProvider", "Figure"]
-
-if importlib.util.find_spec("matplotlib") is not None:
- from .plot_data_and_simulation import (
- plot_problem,
- plot_with_vis_spec,
- plot_without_vis_spec,
- )
- from .plot_residuals import (
- plot_goodness_of_fit,
- plot_residuals_vs_simulation,
- )
- from .plotter import MPLPlotter
-
- __all__.extend(
- [
- "plot_without_vis_spec",
- "plot_with_vis_spec",
- "plot_problem",
- "plot_goodness_of_fit",
- "plot_residuals_vs_simulation",
- "MPLPlotter",
- ]
- )
+_deprecated_import_v1(__name__)
diff --git a/petab/visualize/data_overview.py b/petab/visualize/data_overview.py
index a327d655..356953da 100644
--- a/petab/visualize/data_overview.py
+++ b/petab/visualize/data_overview.py
@@ -1,87 +1,5 @@
-"""
-Functions for creating an overview report of a PEtab problem
-"""
+"""Deprecated module. Use petab.v1.visualize.data_overview instead."""
+from petab import _deprecated_import_v1
+from petab.v1.visualize.data_overview import * # noqa: F403, F401, E402
-from pathlib import Path
-from shutil import copyfile
-from typing import Union
-
-import pandas as pd
-
-import petab
-from petab.C import *
-
-__all__ = ["create_report"]
-
-
-def create_report(
- problem: petab.Problem, model_name: str, output_path: Union[str, Path] = ""
-) -> None:
- """Create an HTML overview data / model overview report
-
- Arguments:
- problem: PEtab problem
- model_name: Name of the model, used for file name for report
- output_path: Output directory
- """
- template_dir = Path(__file__).absolute().parent / "templates"
- output_path = Path(output_path)
- template_file = "report.html"
-
- data_per_observable = get_data_per_observable(problem.measurement_df)
- num_conditions = len(problem.condition_df.index)
-
- # Setup template engine
- import jinja2
-
- template_loader = jinja2.FileSystemLoader(searchpath=template_dir)
- template_env = jinja2.Environment(loader=template_loader, autoescape=True)
- template = template_env.get_template(template_file)
-
- # Render and save
- output_text = template.render(
- problem=problem,
- model_name=model_name,
- data_per_observable=data_per_observable,
- num_conditions=num_conditions,
- )
- with open(output_path / f"{model_name}.html", "w") as html_file:
- html_file.write(output_text)
- copyfile(template_dir / "mystyle.css", output_path / "mystyle.css")
-
-
-def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame:
- """Get table with number of data points per observable and condition
-
- Arguments:
- measurement_df: PEtab measurement data frame
- Returns:
- Pivot table with number of data points per observable and condition
- """
- my_measurements = measurement_df.copy()
-
- index = [SIMULATION_CONDITION_ID]
- if PREEQUILIBRATION_CONDITION_ID in my_measurements:
- my_measurements[PREEQUILIBRATION_CONDITION_ID] = (
- my_measurements[PREEQUILIBRATION_CONDITION_ID]
- .astype("object")
- .fillna("", inplace=True)
- )
- index.append(PREEQUILIBRATION_CONDITION_ID)
-
- data_per_observable = pd.pivot_table(
- my_measurements,
- values=MEASUREMENT,
- aggfunc="count",
- index=index,
- columns=[OBSERVABLE_ID],
- fill_value=0,
- )
-
- # Add row and column sums
- data_per_observable.loc["SUM", :] = data_per_observable.sum(axis=0).values
- data_per_observable["SUM"] = data_per_observable.sum(axis=1).values
-
- data_per_observable = data_per_observable.astype(int)
-
- return data_per_observable
+_deprecated_import_v1(__name__)
diff --git a/petab/visualize/lint.py b/petab/visualize/lint.py
index 0e973928..e1e6c536 100644
--- a/petab/visualize/lint.py
+++ b/petab/visualize/lint.py
@@ -1,175 +1,7 @@
-"""Validation of PEtab visualization files"""
-import logging
+"""Deprecated module for linting PEtab visualization files.
-import pandas as pd
+Use petab.v1.visualize.lint instead."""
+from petab import _deprecated_import_v1
+from petab.v1.visualize.lint import * # noqa: F403, F401, E402
-from .. import C, Problem
-from ..C import VISUALIZATION_DF_REQUIRED_COLS
-
-logger = logging.getLogger(__name__)
-
-
-def validate_visualization_df(problem: Problem) -> bool:
- """Validate visualization table
-
- Arguments:
- problem: The PEtab problem containing a visualization table
-
- Returns:
- ``True`` if errors occurred, ``False`` otherwise
- """
- vis_df = problem.visualization_df
- if vis_df is None or vis_df.empty:
- return False
-
- errors = False
-
- if missing_req_cols := (
- set(VISUALIZATION_DF_REQUIRED_COLS) - set(vis_df.columns)
- ):
- logger.error(
- f"Missing required columns {missing_req_cols} "
- "in visualization table."
- )
- errors = True
-
- # Set all unspecified optional values to their defaults to simplify
- # validation
- vis_df = vis_df.copy()
- _apply_defaults(vis_df)
-
- if unknown_types := (
- set(vis_df[C.PLOT_TYPE_SIMULATION].unique())
- - set(C.PLOT_TYPES_SIMULATION)
- ):
- logger.error(
- f"Unknown {C.PLOT_TYPE_SIMULATION}: {unknown_types}. "
- f"Must be one of {C.PLOT_TYPES_SIMULATION}"
- )
- errors = True
-
- if unknown_types := (
- set(vis_df[C.PLOT_TYPE_DATA].unique()) - set(C.PLOT_TYPES_DATA)
- ):
- logger.error(
- f"Unknown {C.PLOT_TYPE_DATA}: {unknown_types}. "
- f"Must be one of {C.PLOT_TYPES_DATA}"
- )
- errors = True
-
- if unknown_scale := (set(vis_df[C.X_SCALE].unique()) - set(C.X_SCALES)):
- logger.error(
- f"Unknown {C.X_SCALE}: {unknown_scale}. "
- f"Must be one of {C.X_SCALES}"
- )
- errors = True
-
- if any(
- (vis_df[C.X_SCALE] == "order")
- & (vis_df[C.PLOT_TYPE_SIMULATION] != C.LINE_PLOT)
- ):
- logger.error(
- f"{C.X_SCALE}=order is only allowed with "
- f"{C.PLOT_TYPE_SIMULATION}={C.LINE_PLOT}."
- )
- errors = True
-
- if unknown_scale := (set(vis_df[C.Y_SCALE].unique()) - set(C.Y_SCALES)):
- logger.error(
- f"Unknown {C.Y_SCALE}: {unknown_scale}. "
- f"Must be one of {C.Y_SCALES}"
- )
- errors = True
-
- if problem.condition_df is not None:
- # check for ambiguous values
- reserved_names = {C.TIME, "condition"}
- for reserved_name in reserved_names:
- if (
- reserved_name in problem.condition_df
- and reserved_name in vis_df[C.X_VALUES]
- ):
- logger.error(
- f"Ambiguous value for `{C.X_VALUES}`: "
- f"`{reserved_name}` has a special meaning as "
- f"`{C.X_VALUES}`, but there exists also a model "
- "entity with that name."
- )
- errors = True
-
- # check xValues exist in condition table
- for xvalue in set(vis_df[C.X_VALUES].unique()) - reserved_names:
- if xvalue not in problem.condition_df:
- logger.error(
- f"{C.X_VALUES} was set to `{xvalue}`, but no "
- "such column exists in the conditions table."
- )
- errors = True
-
- if problem.observable_df is not None:
- # yValues must be an observable
- for yvalue in vis_df[C.Y_VALUES].unique():
- if pd.isna(yvalue):
- # if there is only one observable, we default to that
- if len(problem.observable_df.index.unique()) == 1:
- continue
-
- logger.error(
- f"{C.Y_VALUES} must be specified if there is more "
- "than one observable."
- )
- errors = True
-
- if yvalue not in problem.observable_df.index:
- logger.error(
- f"{C.Y_VALUES} was set to `{yvalue}`, but no such "
- "observable exists in the observables table."
- )
- errors = True
-
- if problem.measurement_df is not None:
- referenced_datasets = set(filter(bool, vis_df[C.DATASET_ID].unique()))
- if referenced_datasets:
- existing_datasets = set(
- filter(bool, problem.measurement_df[C.DATASET_ID].unique())
- )
- if not referenced_datasets.issubset(existing_datasets):
- logger.error(
- f"Visualization table references {C.DATASET_ID}(s) "
- f"{referenced_datasets - existing_datasets}, but no such "
- "dataset(s) exist in the measurement table."
- )
- errors = True
-
- return errors
-
-
-def _apply_defaults(vis_df: pd.DataFrame):
- """
- Set default values.
-
- Adds default values to the given visualization table where no value was
- specified.
- """
-
- def set_default(column: str, value):
- if column not in vis_df:
- vis_df[column] = value
- elif value is not None:
- if isinstance(value, str):
- vis_df[column] = vis_df[column].astype("object")
- vis_df.fillna({column: value}, inplace=True)
-
- set_default(C.PLOT_NAME, "")
- set_default(C.PLOT_TYPE_SIMULATION, C.LINE_PLOT)
- set_default(C.PLOT_TYPE_DATA, C.MEAN_AND_SD)
- set_default(C.DATASET_ID, None)
- set_default(C.X_VALUES, C.TIME)
- set_default(C.X_OFFSET, 0)
- set_default(C.X_LABEL, vis_df[C.X_VALUES])
- set_default(C.X_SCALE, C.LIN)
- set_default(C.Y_VALUES, None)
- set_default(C.Y_OFFSET, 0)
- set_default(C.Y_LABEL, vis_df[C.Y_VALUES])
- set_default(C.Y_SCALE, C.LIN)
- set_default(C.LEGEND_ENTRY, vis_df[C.DATASET_ID])
+_deprecated_import_v1(__name__)
diff --git a/petab/visualize/plot_data_and_simulation.py b/petab/visualize/plot_data_and_simulation.py
index 0353e71a..0151665f 100644
--- a/petab/visualize/plot_data_and_simulation.py
+++ b/petab/visualize/plot_data_and_simulation.py
@@ -1,223 +1,7 @@
-"""Functions for plotting PEtab measurement files and simulation results in
-the same format.
-"""
+"""Deprecated module.
-from typing import Dict, List, Optional, Union
+Use petab.v1.visualize.plot_data_and_simulation instead."""
+from petab import _deprecated_import_v1
+from petab.v1.visualize.plot_data_and_simulation import * # noqa: F403, F401, E402
-import matplotlib.pyplot as plt
-import pandas as pd
-
-from .. import problem
-from ..C import *
-from .plotter import MPLPlotter
-from .plotting import VisSpecParser
-
-# for typehints
-IdsList = List[str]
-NumList = List[int]
-
-__all__ = ["plot_with_vis_spec", "plot_without_vis_spec", "plot_problem"]
-
-
-def plot_with_vis_spec(
- vis_spec_df: Union[str, pd.DataFrame],
- conditions_df: Union[str, pd.DataFrame],
- measurements_df: Optional[Union[str, pd.DataFrame]] = None,
- simulations_df: Optional[Union[str, pd.DataFrame]] = None,
- subplot_dir: Optional[str] = None,
- plotter_type: str = "mpl",
- format_: str = "png",
-) -> Optional[Dict[str, plt.Subplot]]:
- """
- Plot measurements and/or simulations. Specification of the visualization
- routines is provided in visualization table.
-
- Parameters
- ----------
- vis_spec_df:
- A visualization table.
- conditions_df:
- A condition DataFrame in the PEtab format or path to the condition
- file.
- measurements_df:
- A measurement DataFrame in the PEtab format or path to the data file.
- simulations_df:
- A simulation DataFrame in the PEtab format or path to the simulation
- output data file.
- subplot_dir:
- A path to the folder where single subplots should be saved.
- PlotIDs will be taken as file names.
- plotter_type:
- Specifies which library should be used for plot generation. Currently,
- only matplotlib is supported.
- format_:
- File format for the generated figure.
- (See :py:func:`matplotlib.pyplot.savefig` for supported options).
-
- Returns
- -------
- ax: Axis object of the created plot.
- None: In case subplots are saved to a file.
- """
- if measurements_df is None and simulations_df is None:
- raise TypeError(
- "Not enough arguments. Either measurements_data "
- "or simulations_data should be provided."
- )
-
- vis_spec_parser = VisSpecParser(
- conditions_df, measurements_df, simulations_df
- )
- figure, dataprovider = vis_spec_parser.parse_from_vis_spec(vis_spec_df)
-
- if plotter_type == "mpl":
- plotter = MPLPlotter(figure, dataprovider)
- else:
- raise NotImplementedError(
- "Currently, only visualization with " "matplotlib is possible."
- )
-
- return plotter.generate_figure(subplot_dir, format_=format_)
-
-
-def plot_without_vis_spec(
- conditions_df: Union[str, pd.DataFrame],
- grouping_list: Optional[List[IdsList]] = None,
- group_by: str = "observable",
- measurements_df: Optional[Union[str, pd.DataFrame]] = None,
- simulations_df: Optional[Union[str, pd.DataFrame]] = None,
- plotted_noise: str = MEAN_AND_SD,
- subplot_dir: Optional[str] = None,
- plotter_type: str = "mpl",
- format_: str = "png",
-) -> Optional[Dict[str, plt.Subplot]]:
- """
- Plot measurements and/or simulations. What exactly should be plotted is
- specified in a grouping_list.
- If grouping list is not provided, measurements (simulations) will be
- grouped by observable, i.e. all measurements for each observable will be
- visualized on one plot.
-
- Parameters
- ----------
- grouping_list:
- A list of lists. Each sublist corresponds to a plot, each subplot
- contains the Ids of datasets or observables or simulation conditions
- for this plot.
- group_by:
- Grouping type.
- Possible values: 'dataset', 'observable', 'simulation'.
- conditions_df:
- A condition DataFrame in the PEtab format or path to the condition
- file.
- measurements_df:
- A measurement DataFrame in the PEtab format or path to the data file.
- simulations_df:
- A simulation DataFrame in the PEtab format or path to the simulation
- output data file.
- plotted_noise:
- A string indicating how noise should be visualized:
- ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided'].
- subplot_dir:
- A path to the folder where single subplots should be saved.
- PlotIDs will be taken as file names.
- plotter_type:
- Specifies which library should be used for plot generation. Currently,
- only matplotlib is supported.
- format_:
- File format for the generated figure.
- (See :py:func:`matplotlib.pyplot.savefig` for supported options).
-
- Returns
- -------
- ax: Axis object of the created plot.
- None: In case subplots are saved to a file.
- """
- if measurements_df is None and simulations_df is None:
- raise TypeError(
- "Not enough arguments. Either measurements_data "
- "or simulations_data should be provided."
- )
-
- vis_spec_parser = VisSpecParser(
- conditions_df, measurements_df, simulations_df
- )
-
- figure, dataprovider = vis_spec_parser.parse_from_id_list(
- grouping_list, group_by, plotted_noise
- )
-
- if plotter_type == "mpl":
- plotter = MPLPlotter(figure, dataprovider)
- else:
- raise NotImplementedError(
- "Currently, only visualization with " "matplotlib is possible."
- )
-
- return plotter.generate_figure(subplot_dir, format_=format_)
-
-
-def plot_problem(
- petab_problem: problem.Problem,
- simulations_df: Optional[Union[str, pd.DataFrame]] = None,
- grouping_list: Optional[List[IdsList]] = None,
- group_by: str = "observable",
- plotted_noise: str = MEAN_AND_SD,
- subplot_dir: Optional[str] = None,
- plotter_type: str = "mpl",
-) -> Optional[Dict[str, plt.Subplot]]:
- """
- Visualization using petab problem.
- If Visualization table is part of the petab_problem, it will be used for
- visualization. Otherwise, grouping_list will be used.
- If neither Visualization table nor grouping_list are available,
- measurements (simulations) will be grouped by observable, i.e. all
- measurements for each observable will be visualized on one plot.
-
- Parameters
- ----------
- petab_problem:
- A PEtab problem.
- simulations_df:
- A simulation DataFrame in the PEtab format or path to the simulation
- output data file.
- grouping_list:
- A list of lists. Each sublist corresponds to a plot, each subplot
- contains the Ids of datasets or observables or simulation conditions
- for this plot.
- group_by:
- Possible values: 'dataset', 'observable', 'simulation'.
- plotted_noise:
- A string indicating how noise should be visualized:
- ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided'].
- subplot_dir:
- A string which is taken as path to the folder where single subplots
- should be saved. PlotIDs will be taken as file names.
- plotter_type:
- Specifies which library should be used for plot generation. Currently,
- only matplotlib is supported.
-
- Returns
- -------
- ax: Axis object of the created plot.
- None: In case subplots are saved to a file.
- """
- if petab_problem.visualization_df is not None:
- return plot_with_vis_spec(
- petab_problem.visualization_df,
- petab_problem.condition_df,
- petab_problem.measurement_df,
- simulations_df,
- subplot_dir,
- plotter_type,
- )
- return plot_without_vis_spec(
- petab_problem.condition_df,
- grouping_list,
- group_by,
- petab_problem.measurement_df,
- simulations_df,
- plotted_noise,
- subplot_dir,
- plotter_type,
- )
+_deprecated_import_v1(__name__)
diff --git a/petab/visualize/plot_residuals.py b/petab/visualize/plot_residuals.py
index 45a1e5a1..91136199 100644
--- a/petab/visualize/plot_residuals.py
+++ b/petab/visualize/plot_residuals.py
@@ -1,212 +1,5 @@
-"""
-Functions for plotting residuals.
-"""
-from pathlib import Path
-from typing import Optional, Tuple, Union
+"""Deprecated module. Use petab.v1.visualize.plot_residuals instead."""
+from petab import _deprecated_import_v1
+from petab.v1.visualize.plot_residuals import * # noqa: F403, F401, E402
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from scipy import stats
-
-from ..C import *
-from ..calculate import calculate_residuals
-from ..core import get_simulation_df
-from ..problem import Problem
-
-__all__ = ["plot_goodness_of_fit", "plot_residuals_vs_simulation"]
-
-
-def plot_residuals_vs_simulation(
- petab_problem: Problem,
- simulations_df: Union[str, Path, pd.DataFrame],
- size: Optional[Tuple] = (10, 7),
- axes: Optional[Tuple[plt.Axes, plt.Axes]] = None,
-) -> matplotlib.axes.Axes:
- """
- Plot residuals versus simulation values for measurements with normal noise
- assumption.
-
- Parameters
- ----------
- petab_problem:
- A PEtab problem.
- simulations_df:
- A simulation DataFrame in the PEtab format or path to the simulation
- output data file.
- size:
- Figure size.
- axes:
- Axis object.
-
- Returns
- -------
- ax: Axis object of the created plot.
- """
- if isinstance(simulations_df, (str, Path)):
- simulations_df = get_simulation_df(simulations_df)
-
- if NOISE_DISTRIBUTION in petab_problem.observable_df:
- if OBSERVABLE_TRANSFORMATION in petab_problem.observable_df:
- observable_ids = petab_problem.observable_df[
- (petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL)
- & (
- petab_problem.observable_df[OBSERVABLE_TRANSFORMATION]
- == LIN
- )
- ].index
-
- else:
- observable_ids = petab_problem.observable_df[
- petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL
- ].index
- else:
- observable_ids = petab_problem.observable_df.index
-
- if observable_ids.empty:
- raise ValueError(
- "Residuals plot is only applicable for normal "
- "additive noise assumption"
- )
-
- if axes is None:
- fig, axes = plt.subplots(
- 1, 2, sharey=True, figsize=size, width_ratios=[2, 1]
- )
- fig.set_layout_engine("tight")
- fig.suptitle("Residuals")
-
- residual_df = calculate_residuals(
- measurement_dfs=petab_problem.measurement_df,
- simulation_dfs=simulations_df,
- observable_dfs=petab_problem.observable_df,
- parameter_dfs=petab_problem.parameter_df,
- )[0]
-
- normal_residuals = residual_df[
- residual_df[OBSERVABLE_ID].isin(observable_ids)
- ]
- simulations_normal = simulations_df[
- simulations_df[OBSERVABLE_ID].isin(observable_ids)
- ]
-
- # compare to standard normal distribution
- ks_result = stats.kstest(normal_residuals[RESIDUAL], stats.norm.cdf)
-
- # plot the residuals plot
- axes[0].hlines(
- y=0,
- xmin=min(simulations_normal[SIMULATION]),
- xmax=max(simulations_normal[SIMULATION]),
- ls="--",
- color="gray",
- )
- axes[0].scatter(simulations_normal[SIMULATION], normal_residuals[RESIDUAL])
- axes[0].text(
- 0.15,
- 0.85,
- f"Kolmogorov-Smirnov test results:\n"
- f"statistic: {ks_result[0]:.2f}\n"
- f"pvalue: {ks_result[1]:.2e} ",
- transform=axes[0].transAxes,
- )
- axes[0].set_xlabel("simulated values")
- axes[0].set_ylabel("residuals")
-
- # plot histogram
- axes[1].hist(
- normal_residuals[RESIDUAL], density=True, orientation="horizontal"
- )
- axes[1].set_xlabel("distribution")
-
- ymin, ymax = axes[0].get_ylim()
- ylim = max(abs(ymin), abs(ymax))
- axes[0].set_ylim(-ylim, ylim)
- axes[1].tick_params(
- left=False, labelleft=False, right=True, labelright=True
- )
-
- return axes
-
-
-def plot_goodness_of_fit(
- petab_problem: Problem,
- simulations_df: Union[str, Path, pd.DataFrame],
- size: Tuple = (10, 7),
- ax: Optional[plt.Axes] = None,
-) -> matplotlib.axes.Axes:
- """
- Plot goodness of fit.
-
- Parameters
- ----------
- petab_problem:
- A PEtab problem.
- simulations_df:
- A simulation DataFrame in the PEtab format or path to the simulation
- output data file.
- size:
- Figure size.
- ax:
- Axis object.
-
- Returns
- -------
- ax: Axis object of the created plot.
- """
- if isinstance(simulations_df, (str, Path)):
- simulations_df = get_simulation_df(simulations_df)
-
- if simulations_df is None or petab_problem.measurement_df is None:
- raise NotImplementedError(
- "Both measurements and simulation data "
- "are needed for goodness_of_fit"
- )
-
- residual_df = calculate_residuals(
- measurement_dfs=petab_problem.measurement_df,
- simulation_dfs=simulations_df,
- observable_dfs=petab_problem.observable_df,
- parameter_dfs=petab_problem.parameter_df,
- )[0]
- slope, intercept, r_value, p_value, std_err = stats.linregress(
- petab_problem.measurement_df["measurement"],
- simulations_df["simulation"],
- ) # x, y
-
- if ax is None:
- fig, ax = plt.subplots(figsize=size)
- fig.set_layout_engine("tight")
-
- ax.scatter(
- petab_problem.measurement_df["measurement"],
- simulations_df["simulation"],
- )
-
- ax.axis("square")
- xlim = ax.get_xlim()
- ylim = ax.get_ylim()
- lim = [min([xlim[0], ylim[0]]), max([xlim[1], ylim[1]])]
- ax.set_xlim(lim)
- ax.set_ylim(lim)
- x = np.linspace(lim, 100)
- ax.plot(x, x, linestyle="--", color="gray")
- ax.plot(x, intercept + slope * x, "r", label="fitted line")
-
- mse = np.mean(np.abs(residual_df["residual"]))
- ax.text(
- 0.1,
- 0.70,
- f"$R^2$: {r_value**2:.2f}\n"
- f"slope: {slope:.2f}\n"
- f"intercept: {intercept:.2f}\n"
- f"pvalue: {std_err:.2e}\n"
- f"mean squared error: {mse:.2e}\n",
- transform=ax.transAxes,
- )
-
- ax.set_title("Goodness of fit")
- ax.set_xlabel("simulated values")
- ax.set_ylabel("measurements")
- return ax
+_deprecated_import_v1(__name__)
diff --git a/petab/visualize/plotter.py b/petab/visualize/plotter.py
index c2ebe6e4..8b8eeba1 100644
--- a/petab/visualize/plotter.py
+++ b/petab/visualize/plotter.py
@@ -1,865 +1,9 @@
-"""PEtab visualization plotter classes"""
-import os
-from abc import ABC, abstractmethod
-from typing import Dict, List, Optional, Tuple, Union
-
-import matplotlib.axes
-import matplotlib.ticker as mtick
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt
-from mpl_toolkits.axes_grid1 import make_axes_locatable
-
-from ..C import *
-from .plotting import DataPlot, DataProvider, DataSeries, Figure, Subplot
-
-__all__ = ["Plotter", "MPLPlotter", "SeabornPlotter"]
-
-
-#: Line style (:class:`matplotlib.lines.Line2D` options) for the measurement
-# data in line plots
-measurement_line_kwargs = {
- "linestyle": "-.",
- "marker": "x",
- "markersize": 10,
-}
-#: Line style (:class:`matplotlib.lines.Line2D` options) for the simulation
-# data in line plots
-simulation_line_kwargs = {
- "linestyle": "-",
- "marker": "o",
- "markersize": 10,
-}
-
-
-class Plotter(ABC):
- """
- Plotter abstract base class.
-
- Attributes
- ----------
- figure:
- Figure instance that serves as a markup for the figure that
- should be generated
- data_provider:
- Data provider
- """
-
- def __init__(self, figure: Figure, data_provider: DataProvider):
- self.figure = figure
- self.data_provider = data_provider
-
- @abstractmethod
- def generate_figure(
- self, subplot_dir: Optional[str] = None
- ) -> Optional[Dict[str, plt.Subplot]]:
- pass
-
-
-class MPLPlotter(Plotter):
- """
- Matplotlib wrapper
- """
-
- def __init__(self, figure: Figure, data_provider: DataProvider):
- super().__init__(figure, data_provider)
-
- @staticmethod
- def _error_column_for_plot_type_data(plot_type_data: str) -> Optional[str]:
- """Translate PEtab plotTypeData value to column name of internal
- data representation
-
- Parameters
- ----------
- plot_type_data: PEtab plotTypeData value (the way replicates should be
- handled)
-
- Returns
- -------
- Name of corresponding column
- """
- if plot_type_data == MEAN_AND_SD:
- return "sd"
- if plot_type_data == MEAN_AND_SEM:
- return "sem"
- if plot_type_data == PROVIDED:
- return "noise_model"
- return None
-
- def generate_lineplot(
- self,
- ax: matplotlib.axes.Axes,
- dataplot: DataPlot,
- plotTypeData: str,
- splitaxes_params: dict,
- ) -> Tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]:
- """
- Generate line plot.
-
- It is possible to plot only data or only simulation or both.
-
- Parameters
- ----------
- ax:
- Axis object.
- dataplot:
- Visualization settings for the plot.
- plotTypeData:
- Specifies how replicates should be handled.
- splitaxes_params:
-
- """
- simu_color = None
- (
- measurements_to_plot,
- simulations_to_plot,
- ) = self.data_provider.get_data_to_plot(
- dataplot, plotTypeData == PROVIDED
- )
- noise_col = self._error_column_for_plot_type_data(plotTypeData)
-
- label_base = dataplot.legendEntry
-
- # check if t_inf is there
- # todo: if only t_inf, adjust appearance for that case
- plot_at_t_inf = (
- measurements_to_plot is not None and measurements_to_plot.inf_point
- ) or (
- simulations_to_plot is not None and simulations_to_plot.inf_point
- )
-
- if (
- measurements_to_plot is not None
- and not measurements_to_plot.data_to_plot.empty
- ):
- # plotting all measurement data
-
- p = None
- if plotTypeData == REPLICATE:
- replicates = np.stack(
- measurements_to_plot.data_to_plot.repl.values
- )
- # sorts according to ascending order of conditions
- cond, replicates = zip(
- *sorted(zip(measurements_to_plot.conditions, replicates))
- )
- replicates = np.stack(replicates)
-
- if replicates.ndim == 1:
- replicates = np.expand_dims(replicates, axis=1)
-
- # plot first replicate
- p = ax.plot(
- cond,
- replicates[:, 0],
- label=label_base,
- **measurement_line_kwargs,
- )
-
- # plot other replicates with the same color
- ax.plot(
- cond,
- replicates[:, 1:],
- **measurement_line_kwargs,
- color=p[0].get_color(),
- )
-
- # construct errorbar-plots: noise specified above
- else:
- # sorts according to ascending order of conditions
- scond, smean, snoise = zip(
- *sorted(
- zip(
- measurements_to_plot.conditions,
- measurements_to_plot.data_to_plot["mean"],
- measurements_to_plot.data_to_plot[noise_col],
- )
- )
- )
-
- if np.inf in scond:
- # remove inf point
- scond = scond[:-1]
- smean = smean[:-1]
- snoise = snoise[:-1]
-
- if len(scond) > 0 and len(smean) > 0 and len(snoise) > 0:
- # if only t=inf there will be nothing to plot
- p = ax.errorbar(
- scond,
- smean,
- snoise,
- label=label_base,
- **measurement_line_kwargs,
- )
-
- # simulations should have the same colors if both measurements
- # and simulations are plotted
- simu_color = p[0].get_color() if p else None
-
- # construct simulation plot
- if (
- simulations_to_plot is not None
- and not simulations_to_plot.data_to_plot.empty
- ):
- # markers will be displayed only for points that have measurement
- # counterpart
- if measurements_to_plot is not None:
- meas_conditions = (
- measurements_to_plot.conditions.to_numpy()
- if isinstance(measurements_to_plot.conditions, pd.Series)
- else measurements_to_plot.conditions
- )
- every = [
- condition in meas_conditions
- for condition in simulations_to_plot.conditions
- ]
- else:
- every = None
-
- # sorts according to ascending order of conditions
- xs, ys = map(
- list,
- zip(
- *sorted(
- zip(
- simulations_to_plot.conditions,
- simulations_to_plot.data_to_plot["mean"],
- )
- )
- ),
- )
-
- if np.inf in xs:
- # remove inf point
- xs = xs[:-1]
- ys = ys[:-1]
- every = every[:-1] if every else None
-
- if len(xs) > 0 and len(ys) > 0:
- p = ax.plot(
- xs,
- ys,
- markevery=every,
- label=label_base + " simulation",
- color=simu_color,
- **simulation_line_kwargs,
- )
- # lines at t=inf should have the same colors also in case
- # only simulations are plotted
- simu_color = p[0].get_color()
-
- # plot inf points
- if plot_at_t_inf:
- ax, splitaxes_params["ax_inf"] = self._line_plot_at_t_inf(
- ax,
- plotTypeData,
- measurements_to_plot,
- simulations_to_plot,
- noise_col,
- label_base,
- splitaxes_params,
- color=simu_color,
- )
-
- return ax, splitaxes_params["ax_inf"]
-
- def generate_barplot(
- self,
- ax: "matplotlib.pyplot.Axes",
- dataplot: DataPlot,
- plotTypeData: str,
- ) -> None:
- """
- Generate barplot.
-
- Parameters
- ----------
- ax:
- Axis object.
- dataplot:
- Visualization settings for the plot.
- plotTypeData:
- Specifies how replicates should be handled.
- """
- # TODO: plotTypeData == REPLICATE?
- noise_col = self._error_column_for_plot_type_data(plotTypeData)
-
- (
- measurements_to_plot,
- simulations_to_plot,
- ) = self.data_provider.get_data_to_plot(
- dataplot, plotTypeData == PROVIDED
- )
-
- x_name = dataplot.legendEntry
-
- if simulations_to_plot:
- bar_kwargs = {
- "align": "edge",
- "width": -1 / 3,
- }
- else:
- bar_kwargs = {
- "align": "center",
- "width": 2 / 3,
- }
-
- color = plt.rcParams["axes.prop_cycle"].by_key()["color"][0]
-
- if measurements_to_plot is not None:
- ax.bar(
- x_name,
- measurements_to_plot.data_to_plot["mean"],
- yerr=measurements_to_plot.data_to_plot[noise_col],
- color=color,
- **bar_kwargs,
- label="measurement",
- )
-
- if simulations_to_plot is not None:
- bar_kwargs["width"] = -bar_kwargs["width"]
- ax.bar(
- x_name,
- simulations_to_plot.data_to_plot["mean"],
- color="white",
- edgecolor=color,
- **bar_kwargs,
- label="simulation",
- )
-
- def generate_scatterplot(
- self,
- ax: "matplotlib.pyplot.Axes",
- dataplot: DataPlot,
- plotTypeData: str,
- ) -> None:
- """
- Generate scatterplot.
-
- Parameters
- ----------
- ax:
- Axis object.
- dataplot:
- Visualization settings for the plot.
- plotTypeData:
- Specifies how replicates should be handled.
- """
- (
- measurements_to_plot,
- simulations_to_plot,
- ) = self.data_provider.get_data_to_plot(
- dataplot, plotTypeData == PROVIDED
- )
-
- if simulations_to_plot is None or measurements_to_plot is None:
- raise NotImplementedError(
- "Both measurements and simulation data "
- "are needed for scatter plots"
- )
- ax.scatter(
- measurements_to_plot.data_to_plot["mean"],
- simulations_to_plot.data_to_plot["mean"],
- label=getattr(dataplot, LEGEND_ENTRY),
- )
- self._square_plot_equal_ranges(ax)
-
- def generate_subplot(
- self,
- fig: matplotlib.figure.Figure,
- ax: matplotlib.axes.Axes,
- subplot: Subplot,
- ) -> None:
- """
- Generate subplot based on markup provided by subplot.
-
- Parameters
- ----------
- fig:
- Figure object.
- ax:
- Axis object.
- subplot:
- Subplot visualization settings.
- """
- # set yScale
- if subplot.yScale == LIN:
- ax.set_yscale("linear")
- elif subplot.yScale == LOG10:
- ax.set_yscale("log")
- elif subplot.yScale == LOG:
- ax.set_yscale("log", base=np.e)
-
- if subplot.plotTypeSimulation == BAR_PLOT:
- for data_plot in subplot.data_plots:
- self.generate_barplot(ax, data_plot, subplot.plotTypeData)
-
- # get rid of duplicate legends
- handles, labels = ax.get_legend_handles_labels()
- by_label = dict(zip(labels, handles))
- ax.legend(by_label.values(), by_label.keys())
-
- x_names = [x.legendEntry for x in subplot.data_plots]
- ax.set_xticks(range(len(x_names)))
- ax.set_xticklabels(x_names)
-
- for label in ax.get_xmajorticklabels():
- label.set_rotation(30)
- label.set_horizontalalignment("right")
- elif subplot.plotTypeSimulation == SCATTER_PLOT:
- for data_plot in subplot.data_plots:
- self.generate_scatterplot(ax, data_plot, subplot.plotTypeData)
- else:
- # set xScale
- if subplot.xScale == LIN:
- ax.set_xscale("linear")
- elif subplot.xScale == LOG10:
- ax.set_xscale("log")
- elif subplot.xScale == LOG:
- ax.set_xscale("log", base=np.e)
- # equidistant
- elif subplot.xScale == "order":
- ax.set_xscale("linear")
- # check if conditions are monotone decreasing or increasing
- if np.all(np.diff(subplot.conditions) < 0):
- # monot. decreasing -> reverse
- xlabel = subplot.conditions[::-1]
- conditions = range(len(subplot.conditions))[::-1]
- ax.set_xticks(range(len(conditions)), xlabel)
- elif np.all(np.diff(subplot.conditions) > 0):
- xlabel = subplot.conditions
- conditions = range(len(subplot.conditions))
- ax.set_xticks(range(len(conditions)), xlabel)
- else:
- raise ValueError(
- "Error: x-conditions do not coincide, "
- "some are mon. increasing, some "
- "monotonically decreasing"
- )
-
- splitaxes_params = self._preprocess_splitaxes(fig, ax, subplot)
- for data_plot in subplot.data_plots:
- ax, splitaxes_params["ax_inf"] = self.generate_lineplot(
- ax,
- data_plot,
- subplot.plotTypeData,
- splitaxes_params=splitaxes_params,
- )
- if splitaxes_params["ax_inf"] is not None:
- self._postprocess_splitaxes(
- ax, splitaxes_params["ax_inf"], splitaxes_params["t_inf"]
- )
-
- # show 'e' as basis not 2.7... in natural log scale cases
- def ticks(y, _):
- return rf"$e^{{{np.log(y):.0f}}}$"
-
- if subplot.xScale == LOG:
- ax.xaxis.set_major_formatter(mtick.FuncFormatter(ticks))
- if subplot.yScale == LOG:
- ax.yaxis.set_major_formatter(mtick.FuncFormatter(ticks))
-
- if subplot.plotTypeSimulation != BAR_PLOT:
- ax.legend()
- ax.set_title(subplot.plotName)
- if subplot.xlim:
- ax.set_xlim(subplot.xlim)
- if subplot.ylim:
- ax.set_ylim(subplot.ylim)
- ax.autoscale_view()
-
- # Beautify plots
- ax.set_xlabel(subplot.xLabel)
- ax.set_ylabel(subplot.yLabel)
-
- def generate_figure(
- self,
- subplot_dir: Optional[str] = None,
- format_: str = "png",
- ) -> Optional[Dict[str, plt.Subplot]]:
- """
- Generate the full figure based on the markup in the figure attribute.
-
- Parameters
- ----------
- subplot_dir:
- A path to the folder where single subplots should be saved.
- PlotIDs will be taken as file names.
- format_:
- File format for the generated figure.
- (See :py:func:`matplotlib.pyplot.savefig` for supported options).
-
- Returns
- -------
- ax:
- Axis object of the created plot.
- None:
- In case subplots are saved to file.
- """
- if subplot_dir is None:
- # compute, how many rows and columns we need for the subplots
- num_row = int(np.round(np.sqrt(self.figure.num_subplots)))
- num_col = int(np.ceil(self.figure.num_subplots / num_row))
-
- fig, axes = plt.subplots(
- num_row, num_col, squeeze=False, figsize=self.figure.size
- )
- fig.set_layout_engine("tight")
-
- for ax in axes.flat[self.figure.num_subplots :]:
- ax.remove()
-
- axes = dict(
- zip([plot.plotId for plot in self.figure.subplots], axes.flat)
- )
-
- for subplot in self.figure.subplots:
- if subplot_dir is not None:
- fig, ax = plt.subplots(figsize=self.figure.size)
- fig.set_layout_engine("tight")
- else:
- ax = axes[subplot.plotId]
-
- try:
- self.generate_subplot(fig, ax, subplot)
- except Exception as e:
- raise RuntimeError(
- f"Error plotting {getattr(subplot, PLOT_ID)}."
- ) from e
-
- if subplot_dir is not None:
- # TODO: why this doesn't work?
- plt.tight_layout()
- plt.savefig(
- os.path.join(subplot_dir, f"{subplot.plotId}.{format_}")
- )
- plt.close()
-
- if subplot_dir is None:
- # TODO: why this doesn't work?
- plt.tight_layout()
- return axes
-
- @staticmethod
- def _square_plot_equal_ranges(
- ax: "matplotlib.pyplot.Axes", lim: Optional[Union[List, Tuple]] = None
- ) -> "matplotlib.pyplot.Axes":
- """
- Square plot with equal range for scatter plots.
-
- Returns
- -------
- Updated axis object.
- """
- ax.axis("square")
-
- if lim is None:
- xlim = ax.get_xlim()
- ylim = ax.get_ylim()
- lim = [np.min([xlim[0], ylim[0]]), np.max([xlim[1], ylim[1]])]
-
- ax.set_xlim(lim)
- ax.set_ylim(lim)
-
- # Same tick mark on x and y
- ax.yaxis.set_major_locator(ax.xaxis.get_major_locator())
-
- return ax
-
- @staticmethod
- def _line_plot_at_t_inf(
- ax: matplotlib.axes.Axes,
- plotTypeData: str,
- measurements_to_plot: DataSeries,
- simulations_to_plot: DataSeries,
- noise_col: str,
- label_base: str,
- split_axes_params: dict,
- color=None,
- ) -> Tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]:
- """
- Plot data at t=inf.
-
- Parameters
- ----------
- ax:
- Axis object for the data corresponding to the finite timepoints.
- plotTypeData:
- The way replicates should be handled.
- measurements_to_plot:
- Measurements to plot.
- simulations_to_plot:
- Simulations to plot.
- noise_col:
- The name of the error column for plot_type_data.
- label_base:
- Label base.
- split_axes_params:
- A dictionary of split axes parameters with
- - Axis object for the data corresponding to t=inf
- - Time value that represents t=inf
- - left and right limits for the axis where the data corresponding
- to the finite timepoints is plotted
- color:
- Line color.
-
- Returns
- -------
- Two axis objects: for the data corresponding to the finite timepoints
- and for the data corresponding to t=inf
- """
- ax_inf = split_axes_params["ax_inf"]
- t_inf = split_axes_params["t_inf"]
- ax_finite_right_limit = split_axes_params["ax_finite_right_limit"]
- ax_left_limit = split_axes_params["ax_left_limit"]
-
- timepoints_inf = [
- ax_finite_right_limit,
- t_inf,
- ax_finite_right_limit
- + (ax_finite_right_limit - ax_left_limit) * 0.2,
- ]
-
- # plot measurements
- if measurements_to_plot is not None and measurements_to_plot.inf_point:
- measurements_data_to_plot_inf = (
- measurements_to_plot.data_to_plot.loc[np.inf]
- )
-
- if plotTypeData == REPLICATE:
- p = None
- if plotTypeData == REPLICATE:
- replicates = measurements_data_to_plot_inf.repl
- if replicates.ndim == 0:
- replicates = np.expand_dims(replicates, axis=0)
-
- # plot first replicate
- p = ax_inf.plot(
- timepoints_inf,
- [replicates[0]] * 3,
- markevery=[1],
- label=label_base + " simulation",
- color=color,
- **measurement_line_kwargs,
- )
-
- # plot other replicates with the same color
- ax_inf.plot(
- timepoints_inf,
- [replicates[1:]] * 3,
- markevery=[1],
- color=p[0].get_color(),
- **measurement_line_kwargs,
- )
- else:
- p = ax_inf.plot(
- [timepoints_inf[0], timepoints_inf[2]],
- [
- measurements_data_to_plot_inf["mean"],
- measurements_data_to_plot_inf["mean"],
- ],
- color=color,
- **measurement_line_kwargs,
- )
- ax_inf.errorbar(
- t_inf,
- measurements_data_to_plot_inf["mean"],
- measurements_data_to_plot_inf[noise_col],
- label=label_base + " simulation",
- color=p[0].get_color(),
- **measurement_line_kwargs,
- )
-
- if color is None:
- # in case no color was provided from finite time points
- # plot and measurements are available corresponding
- # simulation should have the same color
- color = p[0].get_color()
-
- # plot simulations
- if simulations_to_plot is not None and simulations_to_plot.inf_point:
- simulations_data_to_plot_inf = (
- simulations_to_plot.data_to_plot.loc[np.inf]
- )
-
- if plotTypeData == REPLICATE:
- replicates = simulations_data_to_plot_inf.repl
- if replicates.ndim == 0:
- replicates = np.expand_dims(replicates, axis=0)
-
- # plot first replicate
- p = ax_inf.plot(
- timepoints_inf,
- [replicates[0]] * 3,
- markevery=[1],
- label=label_base,
- color=color,
- **simulation_line_kwargs,
- )
-
- # plot other replicates with the same color
- ax_inf.plot(
- timepoints_inf,
- [replicates[1:]] * 3,
- markevery=[1],
- color=p[0].get_color(),
- **simulation_line_kwargs,
- )
- else:
- ax_inf.plot(
- timepoints_inf,
- [simulations_data_to_plot_inf["mean"]] * 3,
- markevery=[1],
- color=color,
- **simulation_line_kwargs,
- )
-
- ax.set_xlim(right=ax_finite_right_limit)
- return ax, ax_inf
-
- @staticmethod
- def _postprocess_splitaxes(
- ax: matplotlib.axes.Axes, ax_inf: matplotlib.axes.Axes, t_inf: float
- ) -> None:
- """
- Postprocess the splitaxes: set axes limits, turn off unnecessary
- ticks and plot dashed lines highlighting the gap in the x axis.
-
- Parameters
- ----------
- ax:
- Axis object for the data corresponding to the finite timepoints.
- ax_inf:
- Axis object for the data corresponding to t=inf.
- t_inf:
- Time value that represents t=inf
- """
- ax_inf.tick_params(left=False, labelleft=False)
- ax_inf.spines["left"].set_visible(False)
- ax_inf.set_xticks([t_inf])
- ax_inf.set_xticklabels([r"$t_{\infty}$"])
-
- bottom, top = ax.get_ylim()
- left, right = ax.get_xlim()
- ax.spines["right"].set_visible(False)
- ax_inf.set_xlim(right, right + (right - left) * 0.2)
- d = (top - bottom) * 0.02
- ax_inf.vlines(
- x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray"
- ) # right
- ax.vlines(
- x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray"
- ) # left
- ax_inf.set_ylim(bottom, top)
- ax.set_ylim(bottom, top)
-
- def _preprocess_splitaxes(
- self,
- fig: matplotlib.figure.Figure,
- ax: matplotlib.axes.Axes,
- subplot: Subplot,
- ) -> Dict:
- """
- Prepare splitaxes if data at t=inf should be plotted: compute left and
- right limits for the axis where the data corresponding to the finite
- timepoints will be plotted, compute time point that will represent
- t=inf on the plot, create additional axes for plotting data at t=inf.
- """
-
- def check_data_to_plot(
- data_to_plot: DataSeries,
- ) -> Tuple[bool, Optional[float], float]:
- """
- Check if there is data available at t=inf and compute maximum and
- minimum finite time points that need to be plotted corresponding
- to a dataplot.
- """
- contains_inf = False
- max_finite_cond, min_cond = None, np.inf
- if data_to_plot is not None and len(data_to_plot.conditions):
- contains_inf = np.inf in data_to_plot.conditions
- finite_conditions = data_to_plot.conditions[
- data_to_plot.conditions != np.inf
- ]
- max_finite_cond = (
- np.max(finite_conditions)
- if finite_conditions.size
- else None
- )
- min_cond = min(data_to_plot.conditions)
- return contains_inf, max_finite_cond, min_cond
-
- splitaxes = False
- ax_inf = None
- t_inf, ax_finite_right_limit, ax_left_limit = None, None, np.inf
- for dataplot in subplot.data_plots:
- (
- measurements_to_plot,
- simulations_to_plot,
- ) = self.data_provider.get_data_to_plot(
- dataplot, subplot.plotTypeData == PROVIDED
- )
-
- contains_inf_m, max_finite_cond_m, min_cond_m = check_data_to_plot(
- measurements_to_plot
- )
- contains_inf_s, max_finite_cond_s, min_cond_s = check_data_to_plot(
- simulations_to_plot
- )
-
- if max_finite_cond_m is not None:
- ax_finite_right_limit = (
- max(ax_finite_right_limit, max_finite_cond_m)
- if ax_finite_right_limit is not None
- else max_finite_cond_m
- )
- if max_finite_cond_s is not None:
- ax_finite_right_limit = (
- max(ax_finite_right_limit, max_finite_cond_s)
- if ax_finite_right_limit is not None
- else max_finite_cond_s
- )
-
- ax_left_limit = min(ax_left_limit, min(min_cond_m, min_cond_s))
- # check if t=inf is contained in any data to be plotted on the
- # subplot
- if not splitaxes:
- splitaxes = contains_inf_m or contains_inf_s
-
- if splitaxes:
- # if t=inf is the only time point in measurements and simulations
- # ax_finite_right_limit will be None and ax_left_limit will be
- # equal to np.inf
- if ax_finite_right_limit is None and ax_left_limit == np.inf:
- ax_finite_right_limit = 10
- ax_left_limit = 0
- t_inf = (
- ax_finite_right_limit
- + (ax_finite_right_limit - ax_left_limit) * 0.1
- )
- # create axes for t=inf
- divider = make_axes_locatable(ax)
- ax_inf = divider.new_horizontal(size="10%", pad=0.3)
- fig.add_axes(ax_inf)
-
- return {
- "ax_inf": ax_inf,
- "t_inf": t_inf,
- "ax_finite_right_limit": ax_finite_right_limit,
- "ax_left_limit": ax_left_limit,
- }
-
-
-class SeabornPlotter(Plotter):
- """
- Seaborn wrapper.
- """
-
- def __init__(self, figure: Figure, data_provider: DataProvider):
- super().__init__(figure, data_provider)
-
- def generate_figure(
- self, subplot_dir: Optional[str] = None
- ) -> Optional[Dict[str, plt.Subplot]]:
- pass
+"""Deprecated module. Use petab.v1.visualize.plotter instead."""
+from petab import _deprecated_import_v1
+from petab.v1.visualize.plotter import * # noqa: F403, F401, E402
+from petab.v1.visualize.plotter import ( # noqa: F401
+ measurement_line_kwargs,
+ simulation_line_kwargs,
+)
+
+_deprecated_import_v1(__name__)
diff --git a/petab/visualize/plotting.py b/petab/visualize/plotting.py
index e1f874ce..a675cf51 100644
--- a/petab/visualize/plotting.py
+++ b/petab/visualize/plotting.py
@@ -1,1102 +1,6 @@
-"""PEtab visualization data selection and visualization settings classes"""
-import warnings
-from numbers import Number, Real
-from pathlib import Path
-from typing import Dict, List, Literal, Optional, Tuple, Union
+"""Deprecated module. Use petab.v1.visualize.plotting instead."""
+from petab import _deprecated_import_v1
+from petab.v1.visualize.plotting import * # noqa: F403, F401, E402
+from petab.v1.visualize.plotting import DEFAULT_FIGSIZE # noqa: F401
-import numpy as np
-import pandas as pd
-
-from .. import conditions, core, measurements
-from ..C import *
-from ..problem import Problem
-from .helper_functions import (
- create_dataset_id_list_new,
- generate_dataset_id_col,
-)
-
-__all__ = [
- "DataSeries",
- "DataPlot",
- "Subplot",
- "Figure",
- "DataProvider",
- "VisSpecParser",
-]
-
-# for typehints
-IdsList = List[str]
-NumList = List[int]
-
-# The default figure size
-DEFAULT_FIGSIZE = [20, 15]
-
-# also for type hints
-# TODO: split into dataplot and subplot level dicts?
-# TODO: add when only python>=3.8 is supported
-# class VisDict(TypedDict):
-# PLOT_NAME: str
-# PLOT_TYPE_SIMULATION: str
-# PLOT_TYPE_DATA: str
-# X_VALUES: str
-# X_OFFSET: List[Number]
-# X_LABEL: str
-# X_SCALE: str
-# Y_VALUES: List[str]
-# Y_OFFSET: List[Number]
-# Y_LABEL: str
-# Y_SCALE: str
-# LEGEND_ENTRY: List[Number]
-# DATASET_ID: List[str]
-
-
-class DataSeries:
- """
- Data for one individual line
- """
-
- def __init__(
- self,
- conditions_: Optional[Union[np.ndarray, pd.Series]],
- data_to_plot: Optional[pd.DataFrame] = None,
- ):
- self.data_to_plot = data_to_plot
- self.data_to_plot.sort_index(inplace=True)
-
- self.conditions = conditions_
- self.inf_point = (
- np.inf in self.conditions if self.conditions is not None else False
- )
- # sort index for the case that indices of conditions and
- # measurements differ. if indep_var='time', conditions is a
- # numpy array, if indep_var=observable it's a Series
- if isinstance(self.conditions, np.ndarray):
- self.conditions.sort()
- elif isinstance(self.conditions, pd.Series):
- self.conditions.sort_index(inplace=True)
-
- def add_x_offset(self, offset) -> None:
- """
- Offset for the independent variable.
-
- Parameters
- ----------
- offset:
- Offset value.
-
- """
- if self.conditions is not None:
- self.conditions += offset
-
- def add_y_offset(self, offset):
- self.data_to_plot["mean"] += offset
- self.data_to_plot["repl"] += offset
-
- def add_offsets(self, x_offset=0, y_offset=0) -> None:
- """
- Data offsets.
-
- Parameters
- ----------
- x_offset:
- Offset for the independent variable.
- y_offset:
- Offsets for the observable.
- """
- self.add_x_offset(x_offset)
- self.add_y_offset(y_offset)
-
-
-class DataPlot:
- """
- Visualization specification of a plot of one data series, e.g. for
- an individual line on a subplot.
- """
-
- def __init__(self, plot_settings: dict):
- """
- Constructor.
-
- Parameters
- ----------
- plot_settings: A plot spec for one dataplot
- (only VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS)
- """
- for key, val in plot_settings.items():
- setattr(self, key, val)
-
- if DATASET_ID not in vars(self):
- raise ValueError(f"{DATASET_ID} must be specified")
- if X_VALUES not in vars(self): # TODO: singular?
- setattr(self, X_VALUES, TIME)
- if X_OFFSET not in vars(self):
- setattr(self, X_OFFSET, 0)
- if Y_VALUES not in vars(self):
- setattr(self, Y_VALUES, "")
- if Y_OFFSET not in vars(self):
- setattr(self, Y_OFFSET, 0.0)
- if LEGEND_ENTRY not in vars(self):
- setattr(self, LEGEND_ENTRY, getattr(self, DATASET_ID))
-
- @classmethod
- def from_df(cls, plot_spec: pd.DataFrame):
- vis_spec_dict = plot_spec.to_dict()
-
- return cls(vis_spec_dict)
-
- def __repr__(self):
- return f"{self.__class__.__name__}({self.__dict__})"
-
-
-class Subplot:
- """
- Visualization specification of a subplot.
- """
-
- def __init__(
- self,
- plot_id: str,
- plot_settings: dict,
- dataplots: Optional[List[DataPlot]] = None,
- ):
- """
- Constructor.
-
- Parameters
- ----------
- plot_id:
- Plot ID.
- plot_settings:
- Plot spec for a subplot (only VISUALIZATION_DF_SUBPLOT_LEVEL_COLS).
- dataplots:
- A list of data plots that should be plotted on one subplot.
- """
- # parameters of a specific subplot
-
- setattr(self, PLOT_ID, plot_id)
- for key, val in plot_settings.items():
- setattr(self, key, val)
-
- if PLOT_NAME not in vars(self):
- setattr(self, PLOT_NAME, "")
- if PLOT_TYPE_SIMULATION not in vars(self):
- setattr(self, PLOT_TYPE_SIMULATION, LINE_PLOT)
- if PLOT_TYPE_DATA not in vars(self):
- setattr(self, PLOT_TYPE_DATA, MEAN_AND_SD)
- if X_LABEL not in vars(self):
- setattr(self, X_LABEL, TIME) # TODO: getattr(self, X_VALUES)
- if X_SCALE not in vars(self):
- setattr(self, X_SCALE, LIN)
- if Y_LABEL not in vars(self):
- setattr(self, Y_LABEL, "values")
- if Y_SCALE not in vars(self):
- setattr(self, Y_SCALE, LIN)
-
- self.data_plots = dataplots if dataplots is not None else []
- self.xlim = None
- self.ylim = None
-
- @classmethod
- def from_df(
- cls,
- plot_id: str,
- vis_spec: pd.DataFrame,
- dataplots: Optional[List[DataPlot]] = None,
- ):
- vis_spec_dict = {}
- for col in vis_spec:
- if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS:
- entry = vis_spec.loc[:, col]
- entry = np.unique(entry)
- if entry.size > 1:
- warnings.warn(
- f"For {PLOT_ID} {plot_id} in column "
- f"{col} contradictory settings ({entry})"
- f". Proceeding with first entry "
- f"({entry[0]}).",
- stacklevel=2,
- )
- entry = entry[0]
-
- # check if values are allowed
- if (
- col in [Y_SCALE, X_SCALE]
- and entry not in OBSERVABLE_TRANSFORMATIONS
- ):
- raise ValueError(
- f"{X_SCALE} and {Y_SCALE} have to be "
- f"one of the following: "
- + ", ".join(OBSERVABLE_TRANSFORMATIONS)
- )
- elif col == PLOT_TYPE_DATA and entry not in PLOT_TYPES_DATA:
- raise ValueError(
- f"{PLOT_TYPE_DATA} has to be one of the "
- f"following: " + ", ".join(PLOT_TYPES_DATA)
- )
- elif (
- col == PLOT_TYPE_SIMULATION
- and entry not in PLOT_TYPES_SIMULATION
- ):
- raise ValueError(
- f"{PLOT_TYPE_SIMULATION} has to be one of"
- f" the following: " + ", ".join(PLOT_TYPES_SIMULATION)
- )
-
- # append new entry to dict
- vis_spec_dict[col] = entry
- else:
- warnings.warn(
- f"Column {col} cannot be used to specify subplot"
- f", only settings from the following columns can"
- f" be used:"
- + ", ".join(VISUALIZATION_DF_SUBPLOT_LEVEL_COLS),
- stacklevel=2,
- )
- return cls(plot_id, vis_spec_dict, dataplots)
-
- def add_dataplot(self, dataplot: DataPlot) -> None:
- """
- Add data plot.
-
- Parameters
- ----------
- dataplot:
- Data plot visualization settings.
-
- """
- self.data_plots.append(dataplot)
-
- def set_axes_limits(
- self,
- xlim: Optional[Tuple[Optional[Real], Optional[Real]]] = None,
- ylim: Optional[Tuple[Optional[Real], Optional[Real]]] = None,
- ):
- """
- Set axes limits for all subplots. If xlim or ylim or any of the tuple
- items is None, corresponding limit is left unchanged.
-
- Parameters
- ----------
- xlim:
- X axis limits.
- ylim:
- Y axis limits.
- """
- self.xlim = xlim
- self.ylim = ylim
-
-
-class Figure:
- """
- Visualization specification of a figure.
-
- Contains information regarding how data should be visualized.
- """
-
- def __init__(
- self,
- subplots: Optional[List[Subplot]] = None,
- size: Tuple = DEFAULT_FIGSIZE,
- title: Optional[Tuple] = None,
- ):
- """
- Constructor.
-
- Parameters
- ----------
- subplots: A list of visualization specifications for each subplot
- size: Figure size
- title: Figure title
- """
- # TODO: Isensee measurements table in doc/examples doesn't correspond
- # to documentation: observableTransformation and
- # noiseDistribution columns replicateId problem
- # TODO: Should we put in the documentation which combination of fields
- # must be unique in the measurement table and add such check?
- # obs_id + sim_cond_id + preeq_cod_id (if exists) + time +
- # replicate_id (if exists)?
- self.size = size
- self.title = title
- self.subplots = subplots if subplots is not None else []
-
- @property
- def num_subplots(self) -> int:
- return len(self.subplots)
-
- def add_subplot(self, subplot: Subplot) -> None:
- """
- Add subplot.
-
- Parameters
- ----------
- subplot:
- Subplot visualization settings.
-
- """
- self.subplots.append(subplot)
-
- def set_axes_limits(
- self,
- xlim: Optional[Tuple[Optional[Real], Optional[Real]]] = None,
- ylim: Optional[Tuple[Optional[Real], Optional[Real]]] = None,
- ) -> None:
- """
- Set axes limits for all subplots. If xlim or ylim or any of the tuple
- items is None, corresponding limit is left unchanged.
-
- Parameters
- ----------
- xlim:
- X axis limits.
- ylim:
- Y axis limits.
- """
- for subplot in self.subplots:
- subplot.set_axes_limits(xlim, ylim)
-
- def save_to_tsv(self, output_file_path: str = "visuSpec.tsv") -> None:
- """
- Save full Visualization specification table.
-
- Note that datasetId column in the resulting table might have been
- generated even though datasetId column in Measurement table is missing
- or is different. Please, correct it manually.
-
- Parameters
- ----------
- output_file_path:
- File path to which the generated visualization specification is
- saved.
- """
- # TODO: what if datasetIds were generated?
-
- warnings.warn(
- f"Note: please check that {DATASET_ID} column "
- f"corresponds to {DATASET_ID} column in Measurement "
- f"(Simulation) table.",
- stacklevel=2,
- )
-
- visu_dict = {}
- for subplot in self.subplots:
- subplot_level = {
- key: subplot.__dict__[key]
- for key in subplot.__dict__
- if key in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS
- }
-
- for dataplot in subplot.data_plots:
- dataset_level = {
- key: dataplot.__dict__[key]
- for key in dataplot.__dict__
- if key in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS
- }
- row = {**subplot_level, **dataset_level}
- for key, value in row.items():
- if key in visu_dict:
- visu_dict[key].append(value)
- else:
- visu_dict[key] = [row[key]]
- visu_df = pd.DataFrame.from_dict(visu_dict)
- visu_df.to_csv(output_file_path, sep="\t", index=False)
-
-
-class DataProvider:
- """
- Handles data selection.
- """
-
- def __init__(
- self,
- exp_conditions: pd.DataFrame,
- measurements_data: Optional[pd.DataFrame] = None,
- simulations_data: Optional[pd.DataFrame] = None,
- ):
- self.conditions_data = exp_conditions
-
- if measurements_data is None and simulations_data is None:
- raise TypeError(
- "Not enough arguments. Either measurements_data "
- "or simulations_data should be provided."
- )
- self.measurements_data = measurements_data
- self.simulations_data = simulations_data
-
- @staticmethod
- def _matches_plot_spec(
- df: pd.DataFrame, plot_spec: "DataPlot", dataset_id
- ) -> pd.Series:
- """
- Construct an index for subsetting of the dataframe according to what
- is specified in plot_spec.
-
- Parameters
- ----------
- df:
- A pandas data frame to subset, can be from measurement file or
- simulation file.
- plot_spec:
- A visualization spec from the visualization file.
-
- Returns
- -------
- Boolean series that can be used for subsetting of the passed
- dataframe
- """
- subset = df[DATASET_ID] == dataset_id
- if getattr(plot_spec, Y_VALUES) == "":
- if len(df.loc[subset, OBSERVABLE_ID].unique()) > 1:
- raise ValueError(
- f"{Y_VALUES} must be specified in visualization table if "
- f"multiple different observables are available."
- )
- else:
- subset &= df[OBSERVABLE_ID] == getattr(plot_spec, Y_VALUES)
- return subset
-
- def _get_independent_var_values(
- self, data_df: pd.DataFrame, dataplot: DataPlot
- ) -> Tuple[np.ndarray, str, pd.Series]:
- """
- Get independent variable values.
-
- Parameters
- ----------
- data_df:
- A pandas data frame to subset, can be from measurement file or
- simulation file.
- dataplot:
- Data plot visualization settings.
-
- Returns
- -------
- col_name_unique:
- A name of the column from Measurement (Simulation) table, which
- specifies independent variable values (depends on the xValues entry
- of visualization specification).
- Possible values:
-
- * TIME (independent variable values will be taken from the TIME
- column of Measurement (Simulation) table)
-
- * SIMULATION_CONDITION_ID (independent variable values will be
- taken from one of the columns of Condition table)
-
- uni_condition_id:
- Time points
- or
- contains all unique condition IDs which should be
- plotted together as one dataplot. Independent variable values will
- be collected for these conditions
- conditions_:
- An independent variable values or None for the BarPlot case
- possible values: time points, None, vales of independent variable
- (Parameter or Species, specified in the xValues entry of
- visualization specification) for each condition_id in
- uni_condition_id
-
- """
- indep_var = getattr(dataplot, X_VALUES)
-
- dataset_id = getattr(dataplot, DATASET_ID)
-
- single_m_data = data_df[
- self._matches_plot_spec(data_df, dataplot, dataset_id)
- ]
-
- # gather simulationConditionIds belonging to datasetId
- uni_condition_id, uind = np.unique(
- single_m_data[SIMULATION_CONDITION_ID], return_index=True
- )
- # keep the ordering which was given by user from top to bottom
- # (avoid ordering by names '1','10','11','2',...)'
- uni_condition_id = uni_condition_id[np.argsort(uind)]
- col_name_unique = SIMULATION_CONDITION_ID
-
- if indep_var == TIME:
- # obtain unique observation times
- uni_condition_id = single_m_data[TIME].unique()
- col_name_unique = TIME
- conditions_ = uni_condition_id
- elif indep_var == "condition":
- conditions_ = None
- else:
- # indep_var = parameterOrStateId case ?
- # extract conditions (plot input) from condition file
- ind_cond = self.conditions_data.index.isin(uni_condition_id)
- conditions_ = self.conditions_data[ind_cond][indep_var]
-
- return uni_condition_id, col_name_unique, conditions_
-
- def get_data_series(
- self,
- data_df: pd.DataFrame,
- data_col: Literal["measurement", "simulation"],
- dataplot: DataPlot,
- provided_noise: bool,
- ) -> DataSeries:
- """
- Get data to plot from measurement or simulation DataFrame.
-
- Parameters
- ----------
- data_df: measurement or simulation DataFrame
- data_col: data column, i.e. 'measurement' or 'simulation'
- dataplot: visualization specification
- provided_noise:
- True if numeric values for the noise level are provided in the
- data table
-
- Returns
- -------
- Data to plot
- """
- (
- uni_condition_id,
- col_name_unique,
- conditions_,
- ) = self._get_independent_var_values(data_df, dataplot)
-
- dataset_id = getattr(dataplot, DATASET_ID)
-
- # get data subset selected based on provided dataset_id
- # and observable_ids
- single_m_data = data_df[
- self._matches_plot_spec(data_df, dataplot, dataset_id)
- ]
-
- # create empty dataframe for means and SDs
- measurements_to_plot = pd.DataFrame(
- columns=["mean", "noise_model", "sd", "sem", "repl"],
- index=uni_condition_id,
- )
-
- for var_cond_id in uni_condition_id:
- subset = single_m_data[col_name_unique] == var_cond_id
-
- # what has to be plotted is selected
- data_measurements = single_m_data.loc[subset, data_col]
-
- # TODO: all this rather inside DataSeries?
- # process the data
- measurements_to_plot.at[var_cond_id, "mean"] = np.mean(
- data_measurements
- )
- measurements_to_plot.at[var_cond_id, "sd"] = np.std(
- data_measurements
- )
-
- if provided_noise and np.any(subset):
- if (
- len(single_m_data.loc[subset, NOISE_PARAMETERS].unique())
- > 1
- ):
- raise NotImplementedError(
- f"Datapoints with inconsistent {NOISE_PARAMETERS} "
- f"is currently not implemented. Stopping."
- )
- tmp_noise = single_m_data.loc[subset, NOISE_PARAMETERS].values[
- 0
- ]
- if isinstance(tmp_noise, str):
- raise NotImplementedError(
- "No numerical noise values provided in the "
- "measurement table. Stopping."
- )
- if (
- isinstance(tmp_noise, Number)
- or tmp_noise.dtype == "float64"
- ):
- measurements_to_plot.at[
- var_cond_id, "noise_model"
- ] = tmp_noise
-
- # standard error of mean
- measurements_to_plot.at[var_cond_id, "sem"] = np.std(
- data_measurements
- ) / np.sqrt(len(data_measurements))
-
- # single replicates
- measurements_to_plot.at[
- var_cond_id, "repl"
- ] = data_measurements.values
-
- data_series = DataSeries(conditions_, measurements_to_plot)
- data_series.add_offsets(dataplot.xOffset, dataplot.yOffset)
- return data_series
-
- def get_data_to_plot(
- self, dataplot: DataPlot, provided_noise: bool
- ) -> Tuple[DataSeries, DataSeries]:
- """
- Get data to plot.
-
- Parameters
- ----------
- dataplot: visualization specification
- provided_noise:
- True if numeric values for the noise level are provided in the
- measurement table
-
- Returns
- -----------
- measurements_to_plot,
- simulations_to_plot
- """
- measurements_to_plot = None
- simulations_to_plot = None
-
- if self.measurements_data is not None:
- measurements_to_plot = self.get_data_series(
- self.measurements_data, MEASUREMENT, dataplot, provided_noise
- )
-
- if self.simulations_data is not None:
- simulations_to_plot = self.get_data_series(
- self.simulations_data, SIMULATION, dataplot, provided_noise
- )
- return measurements_to_plot, simulations_to_plot
-
-
-class VisSpecParser:
- """
- Parser of visualization specification provided by user either in the form
- of Visualization table or as a list of lists with datasets ids or
- observable ids or condition ids. Figure instance is created containing
- information regarding how data should be visualized. In addition to the
- Figure instance, a DataProvider instance is created that will be
- responsible for the data selection and manipulation.
- """
-
- def __init__(
- self,
- conditions_data: Union[str, Path, pd.DataFrame],
- exp_data: Optional[Union[str, Path, pd.DataFrame]] = None,
- sim_data: Optional[Union[str, Path, pd.DataFrame]] = None,
- ):
- if isinstance(conditions_data, (str, Path)):
- conditions_data = conditions.get_condition_df(conditions_data)
-
- # import from file in case experimental data is provided in file
- if isinstance(exp_data, (str, Path)):
- exp_data = measurements.get_measurement_df(exp_data)
-
- if isinstance(sim_data, (str, Path)):
- sim_data = core.get_simulation_df(sim_data)
-
- if exp_data is None and sim_data is None:
- raise TypeError(
- "Not enough arguments. Either measurements_data "
- "or simulations_data should be provided."
- )
-
- self.conditions_data = conditions_data
- self.measurements_data = exp_data
- self.simulations_data = sim_data
-
- @classmethod
- def from_problem(cls, petab_problem: Problem, sim_data):
- return cls(
- petab_problem.condition_df, petab_problem.measurement_df, sim_data
- )
-
- @property
- def _data_df(self):
- return (
- self.measurements_data
- if self.measurements_data is not None
- else self.simulations_data
- )
-
- @staticmethod
- def create_subplot(
- plot_id: str, subplot_vis_spec: pd.DataFrame
- ) -> Subplot:
- """
- Create subplot.
-
- Parameters
- ----------
- plot_id:
- Plot id.
- subplot_vis_spec:
- A visualization specification DataFrame that contains specification
- for the subplot and corresponding dataplots.
-
- Returns
- -------
- Subplot
- """
- subplot_columns = [
- col
- for col in subplot_vis_spec.columns
- if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS
- ]
- subplot = Subplot.from_df(
- plot_id, subplot_vis_spec.loc[:, subplot_columns]
- )
-
- dataplot_cols = [
- col
- for col in subplot_vis_spec.columns
- if col in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS
- ]
- dataplot_spec = subplot_vis_spec.loc[:, dataplot_cols]
-
- for _, row in dataplot_spec.iterrows():
- data_plot = DataPlot.from_df(row)
- subplot.add_dataplot(data_plot)
-
- return subplot
-
- def parse_from_vis_spec(
- self,
- vis_spec: Optional[Union[str, Path, pd.DataFrame]],
- ) -> Tuple[Figure, DataProvider]:
- """
- Get visualization settings from a visualization specification.
-
- Parameters
- ----------
- vis_spec:
- Visualization specification DataFrame in the PEtab format
- or a path to a visualization file.
-
- Returns
- -------
- A figure template with visualization settings and a data provider
- """
- # import visualization specification, if file was specified
- if isinstance(vis_spec, (str, Path)):
- vis_spec = core.get_visualization_df(vis_spec)
-
- if DATASET_ID not in vis_spec.columns:
- self._add_dataset_id_col()
- vis_spec = self._expand_vis_spec_settings(vis_spec)
- else:
- if (
- self.measurements_data is not None
- and DATASET_ID not in self.measurements_data
- ):
- raise ValueError(
- f"grouping by datasetId was requested, but "
- f"{DATASET_ID} column is missing from "
- f"measurement table"
- )
- if (
- self.simulations_data is not None
- and DATASET_ID not in self.simulations_data
- ):
- raise ValueError(
- f"grouping by datasetId was requested, but "
- f"{DATASET_ID} column is missing from "
- f"simulation table"
- )
-
- figure = Figure()
-
- # get unique plotIDs preserving the order from the original vis spec
- _, idx = np.unique(vis_spec[PLOT_ID], return_index=True)
- plot_ids = vis_spec[PLOT_ID].iloc[np.sort(idx)]
-
- # loop over unique plotIds
- for plot_id in plot_ids:
- # get indices for specific plotId
- ind_plot = vis_spec[PLOT_ID] == plot_id
-
- subplot = self.create_subplot(plot_id, vis_spec[ind_plot])
- figure.add_subplot(subplot)
-
- return figure, DataProvider(
- self.conditions_data, self.measurements_data, self.simulations_data
- )
-
- def parse_from_id_list(
- self,
- ids_per_plot: Optional[List[IdsList]] = None,
- group_by: str = "observable",
- plotted_noise: Optional[str] = MEAN_AND_SD,
- ) -> Tuple[Figure, DataProvider]:
- """
- Get visualization settings from a list of ids and a grouping parameter.
-
- Parameters
- ----------
- ids_per_plot:
- A list of lists. Each sublist corresponds to a plot, each subplot
- contains the Ids of datasets or observables or simulation
- conditions for this plot.
- e.g.
-
- ::
-
- dataset_ids_per_plot = [['dataset_1', 'dataset_2'],
- ['dataset_1', 'dataset_4',
- 'dataset_5']]
-
- or
-
- ::
-
- cond_id_list = [['model1_data1'],
- ['model1_data2', 'model1_data3'],
- ['model1_data4', 'model1_data5'],
- ['model1_data6']].
-
- group_by:
- Grouping type. Possible values: 'dataset', 'observable',
- 'simulation'.
- plotted_noise:
- String indicating how noise should be visualized:
- ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided'].
-
- Returns
- -------
- A figure template with visualization settings and a data provider
-
- """
- if ids_per_plot is None:
- # this is the default case. If no grouping is specified,
- # all observables are plotted. One observable per plot.
- unique_obs_list = self._data_df[OBSERVABLE_ID].unique()
- ids_per_plot = [[obs_id] for obs_id in unique_obs_list]
-
- if group_by == "dataset" and DATASET_ID not in self._data_df:
- raise ValueError(
- f"grouping by datasetId was requested, but "
- f"{DATASET_ID} column is missing from data table"
- )
-
- if group_by != "dataset":
- # datasetId_list will be created (possibly overwriting previous
- # list - only in the local variable, not in the tsv-file)
- self._add_dataset_id_col()
-
- columns_dict = self._get_vis_spec_dependent_columns_dict(
- group_by, ids_per_plot
- )
-
- columns_dict[PLOT_TYPE_DATA] = [plotted_noise] * len(
- columns_dict[DATASET_ID]
- )
-
- vis_spec_df = pd.DataFrame(columns_dict)
-
- return self.parse_from_vis_spec(vis_spec_df)
-
- def _add_dataset_id_col(self) -> None:
- """
- Add dataset_id column to the measurement table and simulations table
- (possibly overwrite).
- """
- if self.measurements_data is not None:
- if DATASET_ID in self.measurements_data.columns:
- self.measurements_data = self.measurements_data.drop(
- DATASET_ID, axis=1
- )
- self.measurements_data.insert(
- loc=self.measurements_data.columns.size,
- column=DATASET_ID,
- value=generate_dataset_id_col(self.measurements_data),
- )
-
- if self.simulations_data is not None:
- if DATASET_ID in self.simulations_data.columns:
- self.simulations_data = self.simulations_data.drop(
- DATASET_ID, axis=1
- )
- self.simulations_data.insert(
- loc=self.simulations_data.columns.size,
- column=DATASET_ID,
- value=generate_dataset_id_col(self.simulations_data),
- )
-
- def _get_vis_spec_dependent_columns_dict(
- self, group_by: str, id_list: Optional[List[IdsList]] = None
- ) -> Dict:
- """
- Helper method for creating values for columns PLOT_ID, DATASET_ID,
- LEGEND_ENTRY, Y_VALUES for visualization specification file.
-
- Parameters
- ----------
- group_by:
- Grouping type.
- Possible values: 'dataset', 'observable', 'simulation'.
- id_list:
- Grouping list. Each sublist corresponds to a subplot and
- contains the Ids of datasets or observables or simulation
- conditions for this subplot.
-
- Returns
- -------
- A dictionary with values for columns PLOT_ID, DATASET_ID, \
- LEGEND_ENTRY, Y_VALUES for visualization specification.
- """
- if group_by != "dataset":
- dataset_id_list = create_dataset_id_list_new(
- self._data_df, group_by, id_list
- )
- else:
- dataset_id_list = id_list
-
- dataset_id_column = [
- i_dataset for sublist in dataset_id_list for i_dataset in sublist
- ]
-
- dataset_label_column = [
- self._create_legend(i_dataset)
- for sublist in dataset_id_list
- for i_dataset in sublist
- ]
-
- # such dataset ids were generated that each dataset_id always
- # corresponds to one observable
- yvalues_column = [
- self._data_df.loc[
- self._data_df[DATASET_ID] == dataset_id, OBSERVABLE_ID
- ].iloc[0]
- for sublist in dataset_id_list
- for dataset_id in sublist
- ]
-
- # get number of plots and create plotId-lists
- plot_id_column = [
- "plot%s" % str(ind + 1)
- for ind, inner_list in enumerate(dataset_id_list)
- for _ in inner_list
- ]
-
- return {
- PLOT_ID: plot_id_column,
- DATASET_ID: dataset_id_column,
- LEGEND_ENTRY: dataset_label_column,
- Y_VALUES: yvalues_column,
- }
-
- def _create_legend(self, dataset_id: str) -> str:
- """
- Create a legend for the dataset ids.
-
- Parameters
- ----------
- dataset_id:
- Dataset id.
-
- Returns
- -------
- A legend.
- """
- # relies on the fact that dataset ids were created based on cond_ids
- # and obs_ids. Therefore, in the following query all pairs will be
- # the same
- cond_id, obs_id = self._data_df[
- self._data_df[DATASET_ID] == dataset_id
- ][[SIMULATION_CONDITION_ID, OBSERVABLE_ID]].iloc[0, :]
- tmp = self.conditions_data.loc[cond_id]
- if CONDITION_NAME not in tmp.index or pd.isna(tmp[CONDITION_NAME]):
- cond_name = cond_id
- else:
- cond_name = tmp[CONDITION_NAME]
- return f"{cond_name} - {obs_id}"
-
- def _expand_vis_spec_settings(self, vis_spec: pd.DataFrame):
- """
- Expand visualization specification for the case when DATASET_ID is not
- in vis_spec.columns.
-
- Parameters
- -------
- vis_spec:
- Visualization specification DataFrame in the PEtab format
- or a path to a visualization file.
-
- Returns
- -------
- A visualization specification DataFrame.
- """
- if DATASET_ID in vis_spec.columns:
- raise ValueError(
- f"visualization specification expansion is "
- f"unnecessary if column {DATASET_ID} is present"
- )
-
- if vis_spec.empty:
- # in case of empty spec all measurements corresponding to each
- # observable will be plotted on a separate subplot
- observable_ids = self._data_df[OBSERVABLE_ID].unique()
-
- vis_spec_exp_rows = [
- self._vis_spec_rows_for_obs(obs_id, {PLOT_ID: f"plot{idx}"})
- for idx, obs_id in enumerate(observable_ids)
- ]
- return pd.concat(vis_spec_exp_rows, ignore_index=True)
-
- vis_spec_exp_rows = []
- for _, row in vis_spec.iterrows():
- if Y_VALUES in row:
- vis_spec_exp_rows.append(
- self._vis_spec_rows_for_obs(row[Y_VALUES], row.to_dict())
- )
- else:
- observable_ids = self._data_df[OBSERVABLE_ID].unique()
-
- for obs_id in observable_ids:
- vis_spec_exp_rows.append(
- self._vis_spec_rows_for_obs(obs_id, row.to_dict())
- )
- return pd.concat(vis_spec_exp_rows, ignore_index=True)
-
- def _vis_spec_rows_for_obs(
- self, obs_id: str, settings: dict
- ) -> pd.DataFrame:
- """
- Create vis_spec for one observable.
-
- For each dataset_id corresponding to the observable with the specified
- id create a vis_spec entry with provided settings.
-
- Parameters
- ----------
- obs_id:
- Observable ID.
- settings:
- Additional visualization settings. For each key that is a
- valid visualization specification column name, the setting
- will be added to the resulting visualization specification.
-
- Returns
- -------
- A visualization specification DataFrame.
- """
- columns_to_expand = [
- PLOT_ID,
- PLOT_NAME,
- PLOT_TYPE_SIMULATION,
- PLOT_TYPE_DATA,
- X_VALUES,
- X_OFFSET,
- X_LABEL,
- X_SCALE,
- Y_OFFSET,
- Y_LABEL,
- Y_SCALE,
- LEGEND_ENTRY,
- ]
-
- dataset_ids = self._data_df[self._data_df[OBSERVABLE_ID] == obs_id][
- DATASET_ID
- ].unique()
- n_rows = len(dataset_ids)
- columns_dict = {DATASET_ID: dataset_ids, Y_VALUES: [obs_id] * n_rows}
-
- for column in settings:
- if column in columns_to_expand:
- columns_dict[column] = [settings[column]] * n_rows
-
- if LEGEND_ENTRY not in columns_dict:
- columns_dict[LEGEND_ENTRY] = [
- self._create_legend(dataset_id)
- for dataset_id in columns_dict[DATASET_ID]
- ]
- return pd.DataFrame(columns_dict)
+_deprecated_import_v1(__name__)
diff --git a/petab/yaml.py b/petab/yaml.py
index 364413f5..8a84221a 100644
--- a/petab/yaml.py
+++ b/petab/yaml.py
@@ -1,292 +1,7 @@
-"""Code regarding the PEtab YAML config files"""
+"""Deprecated module for reading and writing PEtab YAML files.
-import os
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+Use petab.v1.yaml instead."""
+from petab import _deprecated_import_v1
+from petab.v1.yaml import * # noqa: F403, F401, E402
-import jsonschema
-import numpy as np
-import yaml
-from pandas.io.common import get_handle
-
-from .C import * # noqa: F403
-
-# directory with PEtab yaml schema files
-SCHEMA_DIR = Path(__file__).parent / "schemas"
-# map of version number to validation schema
-SCHEMAS = {
- "1": SCHEMA_DIR / "petab_schema.v1.0.0.yaml",
- "1.0.0": SCHEMA_DIR / "petab_schema.v1.0.0.yaml",
- "2.0.0": SCHEMA_DIR / "petab_schema.v2.0.0.yaml",
-}
-
-__all__ = [
- "validate",
- "validate_yaml_syntax",
- "validate_yaml_semantics",
- "load_yaml",
- "is_composite_problem",
- "assert_single_condition_and_sbml_file",
- "write_yaml",
- "create_problem_yaml",
-]
-
-
-def validate(
- yaml_config: Union[Dict, str, Path],
- path_prefix: Union[None, str, Path] = None,
-):
- """Validate syntax and semantics of PEtab config YAML
-
- Arguments:
- yaml_config:
- PEtab YAML config as filename or dict.
- path_prefix:
- Base location for relative paths. Defaults to location of YAML
- file if a filename was provided for ``yaml_config`` or the current
- working directory.
- """
- validate_yaml_syntax(yaml_config)
- validate_yaml_semantics(yaml_config=yaml_config, path_prefix=path_prefix)
-
-
-def validate_yaml_syntax(
- yaml_config: Union[Dict, str, Path], schema: Union[None, Dict, str] = None
-):
- """Validate PEtab YAML file syntax
-
- Arguments:
- yaml_config:
- PEtab YAML file to validate, as file name or dictionary
- schema:
- Custom schema for validation
-
- Raises:
- see :func:`jsonschema.validate`
- """
- yaml_config = load_yaml(yaml_config)
-
- if schema is None:
- # try get PEtab version from yaml file
- # if this is not the available, the file is not valid anyways,
- # but let's still use the latest PEtab schema for full validation
- version = (
- yaml_config.get(FORMAT_VERSION, None) or list(SCHEMAS.values())[-1]
- )
- try:
- schema = SCHEMAS[str(version)]
- except KeyError as e:
- raise ValueError(
- "Unknown PEtab version given in problem "
- f"specification: {version}"
- ) from e
- schema = load_yaml(schema)
- jsonschema.validate(instance=yaml_config, schema=schema)
-
-
-def validate_yaml_semantics(
- yaml_config: Union[Dict, str, Path],
- path_prefix: Union[None, str, Path] = None,
-):
- """Validate PEtab YAML file semantics
-
- Check for existence of files. Assumes valid syntax.
-
- Version number and contents of referenced files are not yet checked.
-
- Arguments:
- yaml_config:
- PEtab YAML config as filename or dict.
- path_prefix:
- Base location for relative paths. Defaults to location of YAML
- file if a filename was provided for ``yaml_config`` or the current
- working directory.
-
- Raises:
- AssertionError: in case of problems
- """
- if not path_prefix:
- if isinstance(yaml_config, (str, Path)):
- path_prefix = os.path.dirname(str(yaml_config))
- else:
- path_prefix = ""
-
- yaml_config = load_yaml(yaml_config)
-
- def _check_file(_filename: str, _field: str):
- if not os.path.isfile(_filename):
- raise AssertionError(
- f"File '{_filename}' provided as '{_field}' " "does not exist."
- )
-
- # Handles both a single parameter file, and a parameter file that has been
- # split into multiple subset files.
- for parameter_subset_file in list(
- np.array(yaml_config[PARAMETER_FILE]).flat
- ):
- _check_file(
- os.path.join(path_prefix, parameter_subset_file),
- parameter_subset_file,
- )
-
- for problem_config in yaml_config[PROBLEMS]:
- for field in [
- SBML_FILES,
- CONDITION_FILES,
- MEASUREMENT_FILES,
- VISUALIZATION_FILES,
- OBSERVABLE_FILES,
- ]:
- if field in problem_config:
- for filename in problem_config[field]:
- _check_file(os.path.join(path_prefix, filename), field)
-
-
-def load_yaml(yaml_config: Union[Dict, Path, str]) -> Dict:
- """Load YAML
-
- Convenience function to allow for providing YAML inputs as filename, URL
- or as dictionary.
-
- Arguments:
- yaml_config:
- PEtab YAML config as filename or dict or URL.
-
- Returns:
- The unmodified dictionary if ``yaml_config`` was dictionary.
- Otherwise the parsed the YAML file.
- """
- # already parsed? all PEtab problem yaml files are dictionaries
- if isinstance(yaml_config, dict):
- return yaml_config
-
- with get_handle(yaml_config, mode="r") as io_handle:
- data = yaml.safe_load(io_handle.handle)
- return data
-
-
-def is_composite_problem(yaml_config: Union[Dict, str, Path]) -> bool:
- """Does this YAML file comprise multiple models?
-
- Arguments:
- yaml_config: PEtab configuration as dictionary or YAML file name
- """
- yaml_config = load_yaml(yaml_config)
- return len(yaml_config[PROBLEMS]) > 1
-
-
-def assert_single_condition_and_sbml_file(problem_config: Dict) -> None:
- """Check that there is only a single condition file and a single SBML
- file specified.
-
- Arguments:
- problem_config:
- Dictionary as defined in the YAML schema inside the `problems`
- list.
- Raises:
- NotImplementedError:
- If multiple condition or SBML files specified.
- """
- if (
- len(problem_config[SBML_FILES]) > 1
- or len(problem_config[CONDITION_FILES]) > 1
- ):
- # TODO https://github.com/ICB-DCM/PEtab/issues/188
- # TODO https://github.com/ICB-DCM/PEtab/issues/189
- raise NotImplementedError(
- "Support for multiple models or condition files is not yet "
- "implemented."
- )
-
-
-def write_yaml(
- yaml_config: Dict[str, Any], filename: Union[str, Path]
-) -> None:
- """Write PEtab YAML file
-
- Arguments:
- yaml_config: Data to write
- filename: File to create
- """
- with open(filename, "w") as outfile:
- yaml.dump(
- yaml_config, outfile, default_flow_style=False, sort_keys=False
- )
-
-
-def create_problem_yaml(
- sbml_files: Union[str, Path, List[Union[str, Path]]],
- condition_files: Union[str, Path, List[Union[str, Path]]],
- measurement_files: Union[str, Path, List[Union[str, Path]]],
- parameter_file: Union[str, Path],
- observable_files: Union[str, Path, List[Union[str, Path]]],
- yaml_file: Union[str, Path],
- visualization_files: Optional[
- Union[str, Path, List[Union[str, Path]]]
- ] = None,
- relative_paths: bool = True,
- mapping_files: Union[str, Path, List[Union[str, Path]]] = None,
-) -> None:
- """Create and write default YAML file for a single PEtab problem
-
- Arguments:
- sbml_files: Path of SBML model file or list of such
- condition_files: Path of condition file or list of such
- measurement_files: Path of measurement file or list of such
- parameter_file: Path of parameter file
- observable_files: Path of observable file or list of such
- yaml_file: Path to which YAML file should be written
- visualization_files:
- Optional Path to visualization file or list of such
- relative_paths:
- whether all paths in the YAML file should be relative to the
- location of the YAML file. If ``False``, then paths are left
- unchanged.
- mapping_files: Path of mapping file
- """
- if isinstance(sbml_files, (Path, str)):
- sbml_files = [sbml_files]
- if isinstance(condition_files, (Path, str)):
- condition_files = [condition_files]
- if isinstance(measurement_files, (Path, str)):
- measurement_files = [measurement_files]
- if isinstance(observable_files, (Path, str)):
- observable_files = [observable_files]
- if isinstance(visualization_files, (Path, str)):
- visualization_files = [visualization_files]
-
- if relative_paths:
- yaml_file_dir = Path(yaml_file).parent
-
- def get_rel_to_yaml(paths: Union[List[str], None]):
- if paths is None:
- return paths
- return [
- os.path.relpath(path, start=yaml_file_dir) for path in paths
- ]
-
- sbml_files = get_rel_to_yaml(sbml_files)
- condition_files = get_rel_to_yaml(condition_files)
- measurement_files = get_rel_to_yaml(measurement_files)
- observable_files = get_rel_to_yaml(observable_files)
- visualization_files = get_rel_to_yaml(visualization_files)
- parameter_file = get_rel_to_yaml([parameter_file])[0]
- mapping_files = get_rel_to_yaml(mapping_files)
-
- problem_dic = {
- CONDITION_FILES: condition_files,
- MEASUREMENT_FILES: measurement_files,
- SBML_FILES: sbml_files,
- OBSERVABLE_FILES: observable_files,
- }
- if mapping_files:
- problem_dic[MAPPING_FILES] = mapping_files
-
- if visualization_files is not None:
- problem_dic[VISUALIZATION_FILES] = visualization_files
- yaml_dic = {
- PARAMETER_FILE: parameter_file,
- FORMAT_VERSION: 1,
- PROBLEMS: [problem_dic],
- }
- write_yaml(yaml_dic, yaml_file)
+_deprecated_import_v1(__name__)
diff --git a/pyproject.toml b/pyproject.toml
index 5fcdd9e7..1758476a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,6 +5,84 @@ requires = [
]
build-backend = "setuptools.build_meta"
+[project]
+name = "petab"
+dynamic = ["version", "readme"]
+description = "Parameter estimation tabular data"
+requires-python = ">=3.10"
+dependencies = [
+ "numpy>=1.15.1",
+ "pandas>=1.2.0",
+ # remove when pandas >= 3, see also
+ # https://github.com/pandas-dev/pandas/issues/54466
+ "pyarrow",
+ "python-libsbml>=5.17.0",
+ "sympy",
+ "colorama",
+ "pyyaml",
+ "jsonschema",
+ "antlr4-python3-runtime==4.13.1",
+]
+license = {text = "MIT License"}
+authors = [
+ {name = "The PEtab developers"},
+]
+maintainers = [
+ {name = "Daniel Weindl", email = "sci@danielweindl.de"},
+ {name = "Dilan Pathirana", email = "dilan.pathirana@uni-bonn.de"},
+ {name = "Maren Philipps", email = "maren.philipps@uni-bonn.de"},
+]
+
+[project.optional-dependencies]
+tests = [
+ "pytest",
+ "pytest-cov",
+ "simplesbml",
+ "scipy",
+ "pysb",
+]
+quality = [
+ "pre-commit",
+]
+reports = [
+ # https://github.com/spatialaudio/nbsphinx/issues/641
+ "Jinja2==3.0.3",
+]
+combine = [
+ "python-libcombine>=0.2.6",
+]
+doc = [
+ "sphinx>=3.5.3, !=5.1.0",
+ "sphinxcontrib-napoleon>=0.7",
+ "sphinx-markdown-tables>=0.0.15",
+ "sphinx-rtd-theme>=0.5.1",
+ "m2r2",
+ "myst-nb>=0.14.0",
+ # https://github.com/spatialaudio/nbsphinx/issues/687#issuecomment-1339271312
+ "ipython>=7.21.0, !=8.7.0",
+ "pysb",
+]
+vis = [
+ "matplotlib>=3.6.0",
+ "seaborn",
+ "scipy"
+]
+
+[project.scripts]
+petablint = "petab.petablint:main"
+petab_visualize = "petab.v1.visualize.cli:_petab_visualize_main"
+
+[project.urls]
+Repository = "https://github.com/PEtab-dev/libpetab-python"
+Documentation = "https://petab.readthedocs.io/projects/libpetab-python/"
+
+[tool.setuptools.packages.find]
+include = ["petab", "petab.*"]
+namespaces = false
+
+[tool.setuptools.package-data]
+petab = ["petab/schemas/*", "petab/visualize/templates/*"]
+
[tool.ruff]
line-length = 79
lint.extend-select = [
@@ -20,9 +98,17 @@ lint.extend-select = [
# TODO: "ANN001", "D", # pydocstyle (PEP 257)
]
lint.extend-ignore = ["F403", "F405", "S101"]
+lint.exclude = [
+ "petab/v1/math/_generated/*", # auto-generated
+]
[tool.ruff.lint.pydocstyle]
convention = "pep257"
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["T201"]
+
+[tool.ruff.format]
+exclude = [
+ "petab/math/_generated/*", # auto-generated
+]
diff --git a/pytest.ini b/pytest.ini
index 8d2e5992..11b8918a 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,5 +1,9 @@
[pytest]
filterwarnings =
error
+ # TODO: until tests are reorganized for petab.v1
+ ignore::DeprecationWarning
+ ignore:Support for PEtab2.0 and all of petab.v2 is experimental:UserWarning
+ ignore:Support for PEtab2.0 is experimental:UserWarning
ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning
ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning
diff --git a/setup.py b/setup.py
index 2ff424eb..3ba41deb 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
import os
import re
-from setuptools import find_namespace_packages, setup
+from setuptools import setup
def read(fname):
@@ -35,68 +35,10 @@ def absolute_links(txt):
# sets __version__
exec(read(version_file)) # pylint: disable=W0122 # nosec # noqa: S102
-ENTRY_POINTS = {
- "console_scripts": [
- "petablint = petab.petablint:main",
- "petab_visualize = petab.visualize.cli:_petab_visualize_main",
- ]
-}
-
# project metadata
# noinspection PyUnresolvedReferences
setup(
- name="petab",
- version=__version__,
- description="Parameter estimation tabular data",
long_description=absolute_links(read("README.md")),
long_description_content_type="text/markdown",
- author="The PEtab developers",
- author_email="daniel.weindl@helmholtz-muenchen.de",
- url="https://github.com/PEtab-dev/libpetab-python",
- packages=find_namespace_packages(exclude=["doc*", "test*"]),
- install_requires=[
- "numpy>=1.15.1",
- "pandas>=1.2.0",
- # remove when pandas >= 3, see also
- # https://github.com/pandas-dev/pandas/issues/54466
- "pyarrow",
- "python-libsbml>=5.17.0",
- "sympy",
- "colorama",
- "pyyaml",
- "jsonschema",
- ],
- include_package_data=True,
- python_requires=">=3.10.0",
- entry_points=ENTRY_POINTS,
- extras_require={
- "tests": [
- "pytest",
- "pytest-cov",
- "simplesbml",
- "scipy",
- "pysb",
- ],
- "quality": [
- "pre-commit",
- ],
- "reports": [
- # https://github.com/spatialaudio/nbsphinx/issues/641
- "Jinja2==3.0.3",
- ],
- "combine": [
- "python-libcombine>=0.2.6",
- ],
- "doc": [
- "sphinx>=3.5.3, !=5.1.0",
- "sphinxcontrib-napoleon>=0.7",
- "sphinx-markdown-tables>=0.0.15",
- "sphinx-rtd-theme>=0.5.1",
- "m2r2",
- "myst-nb>=0.14.0",
- # https://github.com/spatialaudio/nbsphinx/issues/687#issuecomment-1339271312
- "ipython>=7.21.0, !=8.7.0",
- ],
- "vis": ["matplotlib>=3.6.0", "seaborn", "scipy"],
- },
+ version=__version__,
)
diff --git a/tests/v1/__init__.py b/tests/v1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/v1/math/__init__.py b/tests/v1/math/__init__.py
new file mode 100644
index 00000000..45457ba7
--- /dev/null
+++ b/tests/v1/math/__init__.py
@@ -0,0 +1 @@
+"""PEtab math handling functionality."""
diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py
new file mode 100644
index 00000000..4b350d4e
--- /dev/null
+++ b/tests/v1/math/test_math.py
@@ -0,0 +1,94 @@
+import importlib.resources
+from pathlib import Path
+
+import numpy as np
+import pytest
+import sympy as sp
+import yaml
+from sympy.abc import _clash
+from sympy.logic.boolalg import Boolean
+
+from petab.math import sympify_petab
+
+
+def test_sympify_numpy():
+ assert sympify_petab(np.float64(1.0)) == sp.Float(1.0)
+
+
+def test_parse_simple():
+ """Test simple numeric expressions."""
+ assert float(sympify_petab("1 + 2")) == 3
+ assert float(sympify_petab("1 + 2 * 3")) == 7
+ assert float(sympify_petab("(1 + 2) * 3")) == 9
+ assert float(sympify_petab("1 + 2 * (3 + 4)")) == 15
+ assert float(sympify_petab("1 + 2 * (3 + 4) / 2")) == 8
+
+
+def read_cases():
+ """Read test cases from YAML file in the petab_test_suite package."""
+ yaml_file = importlib.resources.files("petabtests.cases").joinpath(
+ str(Path("v2.0.0", "math", "math_tests.yaml"))
+ )
+ with importlib.resources.as_file(yaml_file) as file, open(file) as file:
+ data = yaml.safe_load(file)
+
+ cases = []
+ for item in data["cases"]:
+ expr_str = item["expression"]
+ if item["expected"] is True or item["expected"] is False:
+ expected = item["expected"]
+ else:
+ try:
+ expected = float(item["expected"])
+ except ValueError:
+ expected = sp.sympify(item["expected"], locals=_clash)
+ expected = expected.subs(
+ {
+ s: sp.Symbol(s.name, real=True)
+ for s in expected.free_symbols
+ }
+ )
+ cases.append((expr_str, expected))
+ return cases
+
+
+@pytest.mark.parametrize("expr_str, expected", read_cases())
+def test_parse_cases(expr_str, expected):
+ """Test PEtab math expressions for the PEtab test suite."""
+ result = sympify_petab(expr_str)
+ if isinstance(result, Boolean):
+ assert result == expected
+ else:
+ try:
+ result = float(result.evalf())
+ assert np.isclose(
+ result, expected
+ ), f"{expr_str}: Expected {expected}, got {result}"
+ except TypeError:
+ assert (
+ result == expected
+ ), f"{expr_str}: Expected {expected}, got {result}"
+
+
+def test_ids():
+ """Test symbols in expressions."""
+ assert sympify_petab("bla * 2") == 2.0 * sp.Symbol("bla", real=True)
+
+
+def test_syntax_error():
+ """Test exceptions upon syntax errors."""
+ # parser error
+ with pytest.raises(ValueError, match="Syntax error"):
+ sympify_petab("1 + ")
+
+ # lexer error
+ with pytest.raises(ValueError, match="Syntax error"):
+ sympify_petab("0.")
+
+
+def test_complex():
+ """Test expressions producing (unsupported) complex numbers."""
+ with pytest.raises(ValueError, match="not real-valued"):
+ sympify_petab("sqrt(-1)")
+ with pytest.raises(ValueError, match="not real-valued"):
+ sympify_petab("arctanh(inf)")
diff --git a/tests/test_calculate.py b/tests/v1/test_calculate.py
similarity index 99%
rename from tests/test_calculate.py
rename to tests/v1/test_calculate.py
index d98896c8..ca93c33a 100644
--- a/tests/test_calculate.py
+++ b/tests/v1/test_calculate.py
@@ -4,6 +4,7 @@
import pandas as pd
import pytest
+import petab
from petab import (
calculate_chi2,
calculate_llh,
@@ -55,8 +56,8 @@ def model_simple():
return (
measurement_df,
- observable_df,
- parameter_df,
+ petab.get_observable_df(observable_df),
+ petab.get_parameter_df(parameter_df),
simulation_df,
expected_residuals,
expected_residuals_nonorm,
diff --git a/tests/test_combine.py b/tests/v1/test_combine.py
similarity index 99%
rename from tests/test_combine.py
rename to tests/v1/test_combine.py
index 398b2737..08ad5b77 100644
--- a/tests/test_combine.py
+++ b/tests/v1/test_combine.py
@@ -9,7 +9,7 @@
# import fixtures
pytest_plugins = [
- "tests.test_petab",
+ "tests.v1.test_petab",
]
diff --git a/tests/test_conditions.py b/tests/v1/test_conditions.py
similarity index 100%
rename from tests/test_conditions.py
rename to tests/v1/test_conditions.py
diff --git a/tests/test_deprecated.py b/tests/v1/test_deprecated.py
similarity index 100%
rename from tests/test_deprecated.py
rename to tests/v1/test_deprecated.py
diff --git a/tests/test_lint.py b/tests/v1/test_lint.py
similarity index 97%
rename from tests/test_lint.py
rename to tests/v1/test_lint.py
index cc99f71d..b178a425 100644
--- a/tests/test_lint.py
+++ b/tests/v1/test_lint.py
@@ -12,7 +12,7 @@
# import fixtures
pytest_plugins = [
- "tests.test_petab",
+ "tests.v1.test_petab",
]
@@ -38,7 +38,7 @@ def test_assert_measured_observables_present():
def test_condition_table_is_parameter_free():
with patch(
- "petab.get_parametric_overrides"
+ "petab.v1.get_parametric_overrides"
) as mock_get_parametric_overrides:
mock_get_parametric_overrides.return_value = []
assert lint.condition_table_is_parameter_free(pd.DataFrame()) is True
@@ -427,8 +427,8 @@ def test_petablint_succeeds():
"""Run petablint and ensure we exit successfully for a file that should
contain no errors
"""
- dir_isensee = "../doc/example/example_Isensee/"
- dir_fujita = "../doc/example/example_Fujita/"
+ dir_isensee = "../../doc/example/example_Isensee/"
+ dir_fujita = "../../doc/example/example_Fujita/"
# run with measurement file
script_path = os.path.abspath(os.path.dirname(__file__))
@@ -642,3 +642,18 @@ def test_parameter_ids_are_unique():
parameter_df.index = ["par0", "par1"]
parameter_df.index.name = "parameterId"
lint.check_parameter_df(parameter_df)
+
+
+def test_check_positive_bounds_for_scaled_parameters():
+ parameter_df = pd.DataFrame(
+ {
+ PARAMETER_ID: ["par"],
+ PARAMETER_SCALE: [LOG10],
+ ESTIMATE: [1],
+ LOWER_BOUND: [0.0],
+ UPPER_BOUND: [1],
+ }
+ ).set_index(PARAMETER_ID)
+
+ with pytest.raises(AssertionError, match="positive"):
+ lint.check_parameter_df(parameter_df)
diff --git a/tests/test_mapping.py b/tests/v1/test_mapping.py
similarity index 100%
rename from tests/test_mapping.py
rename to tests/v1/test_mapping.py
diff --git a/tests/test_measurements.py b/tests/v1/test_measurements.py
similarity index 100%
rename from tests/test_measurements.py
rename to tests/v1/test_measurements.py
diff --git a/tests/test_model_pysb.py b/tests/v1/test_model_pysb.py
similarity index 100%
rename from tests/test_model_pysb.py
rename to tests/v1/test_model_pysb.py
diff --git a/tests/test_observables.py b/tests/v1/test_observables.py
similarity index 99%
rename from tests/test_observables.py
rename to tests/v1/test_observables.py
index 2897f86f..f9547fec 100644
--- a/tests/test_observables.py
+++ b/tests/v1/test_observables.py
@@ -10,7 +10,7 @@
# import fixtures
pytest_plugins = [
- "tests.test_petab",
+ "tests.v1.test_petab",
]
diff --git a/tests/test_parameter_mapping.py b/tests/v1/test_parameter_mapping.py
similarity index 99%
rename from tests/test_parameter_mapping.py
rename to tests/v1/test_parameter_mapping.py
index f1db8c02..e499bd5c 100644
--- a/tests/test_parameter_mapping.py
+++ b/tests/v1/test_parameter_mapping.py
@@ -7,11 +7,11 @@
import petab
from petab.C import *
from petab.models.sbml_model import SbmlModel
-from petab.parameter_mapping import _apply_parameter_table
+from petab.v1.parameter_mapping import _apply_parameter_table
# import fixtures
pytest_plugins = [
- "tests.test_petab",
+ "tests.v1.test_petab",
]
diff --git a/tests/test_parameters.py b/tests/v1/test_parameters.py
similarity index 99%
rename from tests/test_parameters.py
rename to tests/v1/test_parameters.py
index a2fa5e66..c28528fe 100644
--- a/tests/test_parameters.py
+++ b/tests/v1/test_parameters.py
@@ -226,7 +226,7 @@ def test_normalize_parameter_df():
# check if prior parameters match
for col in [INITIALIZATION_PRIOR_PARAMETERS, OBJECTIVE_PRIOR_PARAMETERS]:
for (_, actual_row), (_, expected_row) in zip(
- actual.iterrows(), expected.iterrows()
+ actual.iterrows(), expected.iterrows(), strict=True
):
actual_pars = tuple(
[float(val) for val in actual_row[col].split(";")]
diff --git a/tests/test_petab.py b/tests/v1/test_petab.py
similarity index 98%
rename from tests/test_petab.py
rename to tests/v1/test_petab.py
index ed4ac63a..65700af5 100644
--- a/tests/test_petab.py
+++ b/tests/v1/test_petab.py
@@ -13,6 +13,7 @@
from yaml import safe_load
import petab
+import petab.v1
from petab.C import *
from petab.models.sbml_model import SbmlModel
@@ -110,7 +111,7 @@ def petab_problem():
@pytest.fixture
def fujita_model_scaling():
- path = Path(__file__).parent.parent / "doc" / "example" / "example_Fujita"
+ path = Path(__file__).parents[2] / "doc" / "example" / "example_Fujita"
sbml_file = path / "Fujita_model.xml"
condition_file = path / "Fujita_experimentalCondition.tsv"
@@ -225,7 +226,13 @@ def test_get_priors_from_df():
bounds = [entry[3] for entry in prior_list]
assert (
bounds
- == list(zip(parameter_df[LOWER_BOUND], parameter_df[UPPER_BOUND]))[:4]
+ == list(
+ zip(
+ parameter_df[LOWER_BOUND],
+ parameter_df[UPPER_BOUND],
+ strict=True,
+ )
+ )[:4]
)
# give correct value for empty
@@ -304,7 +311,7 @@ def test_create_parameter_df(
# Test old API with passing libsbml.Model directly
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
- parameter_df = petab.create_parameter_df(
+ parameter_df = petab.v1.create_parameter_df(
ss_model.model,
condition_df_2_conditions,
observable_df,
diff --git a/tests/test_sbml.py b/tests/v1/test_sbml.py
similarity index 100%
rename from tests/test_sbml.py
rename to tests/v1/test_sbml.py
diff --git a/tests/test_simplify.py b/tests/v1/test_simplify.py
similarity index 100%
rename from tests/test_simplify.py
rename to tests/v1/test_simplify.py
diff --git a/tests/test_simulate.py b/tests/v1/test_simulate.py
similarity index 99%
rename from tests/test_simulate.py
rename to tests/v1/test_simulate.py
index ee85ea08..e23b63cb 100644
--- a/tests/test_simulate.py
+++ b/tests/v1/test_simulate.py
@@ -1,7 +1,7 @@
"""Tests for petab/simulate.py."""
import functools
+from collections.abc import Callable
from pathlib import Path
-from typing import Callable
import numpy as np
import pandas as pd
@@ -26,7 +26,7 @@ def simulate_without_noise(self) -> pd.DataFrame:
def petab_problem() -> petab.Problem:
"""Create a PEtab problem for use in tests."""
petab_yaml_path = (
- Path(__file__).parent.parent.absolute()
+ Path(__file__).parents[2].absolute()
/ "doc"
/ "example"
/ "example_Fujita"
diff --git a/tests/test_visualization.py b/tests/v1/test_visualization.py
similarity index 99%
rename from tests/test_visualization.py
rename to tests/v1/test_visualization.py
index 1e67afff..0edd4b78 100644
--- a/tests/test_visualization.py
+++ b/tests/v1/test_visualization.py
@@ -20,7 +20,7 @@
# Avoid errors when plotting without X server
plt.switch_backend("agg")
-EXAMPLE_DIR = Path(__file__).parents[1] / "doc" / "example"
+EXAMPLE_DIR = Path(__file__).parents[2] / "doc" / "example"
@pytest.fixture(scope="function")
diff --git a/tests/test_visualization_data_overview.py b/tests/v1/test_visualization_data_overview.py
similarity index 92%
rename from tests/test_visualization_data_overview.py
rename to tests/v1/test_visualization_data_overview.py
index 76669f43..1b42fdbb 100644
--- a/tests/test_visualization_data_overview.py
+++ b/tests/v1/test_visualization_data_overview.py
@@ -10,7 +10,7 @@ def test_data_overview():
repository"""
with TemporaryDirectory() as temp_dir:
outfile = Path(temp_dir) / "Fujita.html"
- repo_root = Path(__file__).parent.parent
+ repo_root = Path(__file__).parents[2]
yaml_filename = (
repo_root / "doc" / "example" / "example_Fujita" / "Fujita.yaml"
)
diff --git a/tests/test_yaml.py b/tests/v1/test_yaml.py
similarity index 73%
rename from tests/test_yaml.py
rename to tests/v1/test_yaml.py
index f739c50b..82ab242c 100644
--- a/tests/test_yaml.py
+++ b/tests/v1/test_yaml.py
@@ -5,7 +5,7 @@
import pytest
from jsonschema.exceptions import ValidationError
-from petab.yaml import create_problem_yaml, validate
+from petab.yaml import create_problem_yaml, get_path_prefix, validate
def test_validate():
@@ -17,7 +17,7 @@ def test_validate():
# should be well-formed
file_ = (
- Path(__file__).parents[1]
+ Path(__file__).parents[2]
/ "doc"
/ "example"
/ "example_Fujita"
@@ -37,8 +37,10 @@ def test_create_problem_yaml():
observable_file = Path(outdir, "observables.tsv")
yaml_file = Path(outdir, "problem.yaml")
visualization_file = Path(outdir, "visualization.tsv")
+
+ _create_dummy_sbml_model(sbml_file)
+
for file in (
- sbml_file,
condition_file,
measurement_file,
parameter_file,
@@ -65,13 +67,14 @@ def test_create_problem_yaml():
observable_file2 = Path(outdir, "observables2.tsv")
yaml_file2 = Path(outdir, "problem2.yaml")
for file in (
- sbml_file2,
condition_file2,
measurement_file2,
observable_file2,
):
file.touch()
+ _create_dummy_sbml_model(sbml_file2)
+
sbml_files = [sbml_file, sbml_file2]
condition_files = [condition_file, condition_file2]
measurement_files = [measurement_file, measurement_file2]
@@ -85,3 +88,29 @@ def test_create_problem_yaml():
yaml_file2,
)
validate(yaml_file2)
+
+
+def test_get_path_prefix():
+ assert get_path_prefix("/some/dir/file.yaml") == str(Path("/some/dir"))
+ assert get_path_prefix("some/dir/file.yaml") == str(Path("some/dir"))
+ assert (
+ get_path_prefix("https://petab.rocks/dir/file.yaml")
+ == "https://petab.rocks/dir"
+ )
+
+
+def test_validate_remote():
+ yaml_url = (
+ "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite"
+ "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml"
+ )
+
+ validate(yaml_url)
+
+
+def _create_dummy_sbml_model(sbml_file: Path | str):
+ import libsbml
+
+ sbml_doc = libsbml.SBMLDocument()
+ sbml_doc.createModel()
+ libsbml.writeSBMLToFile(sbml_doc, str(sbml_file))
diff --git a/tests/v2/__init__.py b/tests/v2/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py
new file mode 100644
index 00000000..c309a00e
--- /dev/null
+++ b/tests/v2/test_conversion.py
@@ -0,0 +1,34 @@
+import logging
+import tempfile
+
+from petab.v2.petab1to2 import petab1to2
+
+
+def test_petab1to2_remote():
+ yaml_url = (
+ "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite"
+ "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml"
+ )
+
+ with tempfile.TemporaryDirectory(prefix="test_petab1to2") as tmpdirname:
+ # TODO verify that the v2 files match "ground truth"
+ # in `petabtests/cases/v2.0.0/sbml/0001/_0001.yaml`
+ petab1to2(yaml_url, tmpdirname)
+
+
+def test_benchmark_collection():
+ """Test that we can upgrade all benchmark collection models."""
+ import benchmark_models_petab
+
+ logging.basicConfig(level=logging.DEBUG)
+
+ for problem_id in benchmark_models_petab.MODELS:
+ if problem_id == "Lang_PLOSComputBiol2024":
+ # Does not pass initial linting
+ continue
+
+ yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id)
+ with tempfile.TemporaryDirectory(
+ prefix=f"test_petab1to2_{problem_id}"
+ ) as tmpdirname:
+ petab1to2(yaml_path, tmpdirname)
diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py
new file mode 100644
index 00000000..334dc86a
--- /dev/null
+++ b/tests/v2/test_problem.py
@@ -0,0 +1,27 @@
+from petab.v2 import Problem
+
+
+def test_load_remote():
+ """Test loading remote files"""
+ yaml_url = (
+ "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite"
+ "/main/petabtests/cases/v2.0.0/sbml/0001/_0001.yaml"
+ )
+ petab_problem = Problem.from_yaml(yaml_url)
+
+ assert (
+ petab_problem.measurement_df is not None
+ and not petab_problem.measurement_df.empty
+ )
+
+ assert petab_problem.validate() == []
+
+
+def test_auto_upgrade():
+ yaml_url = (
+ "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite"
+ "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml"
+ )
+ problem = Problem.from_yaml(yaml_url)
+ # TODO check something specifically different in a v2 problem
+ assert isinstance(problem, Problem)
diff --git a/tox.ini b/tox.ini
index b7c94b39..d57aa91d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -13,6 +13,10 @@ description =
[testenv:unit]
extras = tests,reports,combine,vis
+deps=
+ git+https://github.com/PEtab-dev/petab_test_suite@main
+ git+https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab.git@master\#subdirectory=src/python
+
commands =
python -m pip install sympy>=1.12.1
python -m pytest --cov=petab --cov-report=xml --cov-append \