diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 255099f6..90c2fc8f 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -5,7 +5,7 @@ version: 2 sphinx: configuration: doc/conf.py - fail_on_warning: true + fail_on_warning: false build: os: "ubuntu-22.04" diff --git a/CHANGELOG.md b/CHANGELOG.md index b944ab53..56ec211a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,47 @@ # PEtab changelog +## 0.4 series + +### 0.4.0 + +**Prepare for PEtab v2** + +To enable ongoing support for PEtab v1, while "forking" the v1 code for PEtab v2, the old code base is now available at `petab.v1`, and the new code base will be at `petab.v2`. For now, old `import petab.*` statements still work, but are marked as deprecated, and `import petab.v1.*` should be used instead. `petablint` will be designed for use with only full PEtab problems in future too, rather than individual tables -- partial problems will be supported to validate individual tables. + +* Add PEtab math parser and sympy converter by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/260 +* Deprecate petablint with individual tables by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/274 +* Introduce petab.v1 package by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/282 +* Separate v1 and v2 tests by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/284 +* Add petab.v2.Problem by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/285 +* PEtab v1 to v2 converter by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/281 +* Fix imports related to v1 subpackage by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/293 + +**Validation** + +Validation will become increasingly atomic and OOP, to support extension-specific validation in PEtab v2. + +* Validator: check for positive bounds for log-scaled parameter by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/278 +* Validator: check prior parameters by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/279 +* Fix validation for remote files by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/287 +* New validation API by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/288 + +**Documentation** + +* Fixed formatting / missing type annotations by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/292 +* Added versioning and deprecation policy by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/277 + +**Other changes** + +* Simplify yaml schema by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/264 +* Handle numpy types in sympify_petab by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/294 +* New `get_path_prefix` method to get the base path for relative paths in PEtab problem YAML by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/280 + +**New Contributors** + +* @dependabot made their first contribution in https://github.com/PEtab-dev/libpetab-python/pull/267 + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.3.0...v0.4.0 + ## 0.3 series ### 0.3.0 diff --git a/MANIFEST.in b/MANIFEST.in index 72717189..dbdebd53 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ recursive-include petab/schemas *.yaml -recursive-include petab/visualize/templates * +recursive-include petab/v1/visualize/templates * +recursive-exclude tests * diff --git a/README.md b/README.md index 167b336e..7873928b 100644 --- a/README.md +++ b/README.md @@ -39,11 +39,11 @@ be: entrypoints to be available as a shell command from anywhere, called `petablint` - - [`petab.create_parameter_df`](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.parameters.html#petab.parameters.create_parameter_df) + - [petab.create_parameter_df](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.parameters.html#petab.parameters.create_parameter_df) to create the parameter table, once you have set up the model, condition table, observable table and measurement table - - [`petab.create_combine_archive`](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.core.html#petab.core.create_combine_archive) + - [petab.create_combine_archive](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.core.html#petab.core.create_combine_archive) to create a [COMBINE Archive](https://combinearchive.org/index/) from PEtab files diff --git a/doc/development.rst b/doc/development.rst new file mode 100644 index 00000000..df4edf55 --- /dev/null +++ b/doc/development.rst @@ -0,0 +1,26 @@ +Development +=========== + +Versioning +---------- + +We use `Semantic Versioning `_ with the modifications +described under :ref:`deprecation_policy`. + +.. _deprecation_policy: + +Deprecation policy +------------------ + +petab aims to provide a stable API for users. However, not all features can be +maintained indefinitely. We will deprecate features in minor releases and +where possible, issue a warning when they are used. We will keep deprecated +features for at least six months after the release that includes the +respective deprecation warning and then remove them earliest in the next minor +or major release. If a deprecated feature is the source of a major bug, we may +remove it earlier. + +Python compatibility +-------------------- + +We follow `numpy's Python support policy `_. diff --git a/doc/index.rst b/doc/index.rst index f4abafc7..be67867d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -14,6 +14,7 @@ Changelog how_to_cite license + development .. toctree:: :maxdepth: 2 diff --git a/doc/modules.rst b/doc/modules.rst index 1454a36a..a227fafa 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -1,28 +1,32 @@ API Reference ============= -.. automodule:: petab - .. rubric:: Modules .. autosummary:: :toctree: build/_autosummary :recursive: - petab.C - petab.calculate - petab.composite_problem - petab.conditions - petab.core - petab.lint - petab.measurements - petab.observables - petab.parameter_mapping - petab.parameters - petab.problem - petab.sampling - petab.sbml - petab.simulate - petab.simplify - petab.visualize - petab.yaml + petab + petab.v1 + petab.v1.C + petab.v1.calculate + petab.v1.composite_problem + petab.v1.conditions + petab.v1.core + petab.v1.lint + petab.v1.measurements + petab.v1.models + petab.v1.observables + petab.v1.parameter_mapping + petab.v1.parameters + petab.v1.problem + petab.v1.sampling + petab.v1.sbml + petab.v1.simulate + petab.v1.simplify + petab.v1.visualize + petab.v1.yaml + petab.v2 + petab.v2.lint + petab.v2.problem diff --git a/petab/C.py b/petab/C.py index 2e3616ee..77f11abb 100644 --- a/petab/C.py +++ b/petab/C.py @@ -1,365 +1,5 @@ -# pylint: disable:invalid-name -""" -This file contains constant definitions. -""" +"""Deprecated module. Use petab.v1.C instead.""" +from petab import _deprecated_import_v1 +from petab.v1.C import * # noqa: F403, F401, E402 -import math as _math - -# MEASUREMENTS - -#: -OBSERVABLE_ID = "observableId" - -#: -PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId" - -#: -SIMULATION_CONDITION_ID = "simulationConditionId" - -#: -MEASUREMENT = "measurement" - -#: -TIME = "time" - -#: Time value that indicates steady-state measurements -TIME_STEADY_STATE = _math.inf - -#: -OBSERVABLE_PARAMETERS = "observableParameters" - -#: -NOISE_PARAMETERS = "noiseParameters" - -#: -DATASET_ID = "datasetId" - -#: -REPLICATE_ID = "replicateId" - -#: Mandatory columns of measurement table -MEASUREMENT_DF_REQUIRED_COLS = [ - OBSERVABLE_ID, - SIMULATION_CONDITION_ID, - MEASUREMENT, - TIME, -] - -#: Optional columns of measurement table -MEASUREMENT_DF_OPTIONAL_COLS = [ - PREEQUILIBRATION_CONDITION_ID, - OBSERVABLE_PARAMETERS, - NOISE_PARAMETERS, - DATASET_ID, - REPLICATE_ID, -] - -#: Measurement table columns -MEASUREMENT_DF_COLS = [ - MEASUREMENT_DF_REQUIRED_COLS[0], - MEASUREMENT_DF_OPTIONAL_COLS[0], - *MEASUREMENT_DF_REQUIRED_COLS[1:], - *MEASUREMENT_DF_OPTIONAL_COLS[1:], -] - - -# PARAMETERS - -#: -PARAMETER_ID = "parameterId" -#: -PARAMETER_NAME = "parameterName" -#: -PARAMETER_SCALE = "parameterScale" -#: -LOWER_BOUND = "lowerBound" -#: -UPPER_BOUND = "upperBound" -#: -NOMINAL_VALUE = "nominalValue" -#: -ESTIMATE = "estimate" -#: -INITIALIZATION_PRIOR_TYPE = "initializationPriorType" -#: -INITIALIZATION_PRIOR_PARAMETERS = "initializationPriorParameters" -#: -OBJECTIVE_PRIOR_TYPE = "objectivePriorType" -#: -OBJECTIVE_PRIOR_PARAMETERS = "objectivePriorParameters" - -#: Mandatory columns of parameter table -PARAMETER_DF_REQUIRED_COLS = [ - PARAMETER_ID, - PARAMETER_SCALE, - LOWER_BOUND, - UPPER_BOUND, - ESTIMATE, -] - -#: Optional columns of parameter table -PARAMETER_DF_OPTIONAL_COLS = [ - PARAMETER_NAME, - NOMINAL_VALUE, - INITIALIZATION_PRIOR_TYPE, - INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_TYPE, - OBJECTIVE_PRIOR_PARAMETERS, -] - -#: Parameter table columns -PARAMETER_DF_COLS = [ - PARAMETER_DF_REQUIRED_COLS[0], - PARAMETER_DF_OPTIONAL_COLS[0], - *PARAMETER_DF_REQUIRED_COLS[1:], - *PARAMETER_DF_OPTIONAL_COLS[1:], -] - -#: -INITIALIZATION = "initialization" -#: -OBJECTIVE = "objective" - - -# CONDITIONS - -#: -CONDITION_ID = "conditionId" -#: -CONDITION_NAME = "conditionName" - - -# OBSERVABLES - -#: -OBSERVABLE_NAME = "observableName" -#: -OBSERVABLE_FORMULA = "observableFormula" -#: -NOISE_FORMULA = "noiseFormula" -#: -OBSERVABLE_TRANSFORMATION = "observableTransformation" -#: -NOISE_DISTRIBUTION = "noiseDistribution" - -#: Mandatory columns of observables table -OBSERVABLE_DF_REQUIRED_COLS = [ - OBSERVABLE_ID, - OBSERVABLE_FORMULA, - NOISE_FORMULA, -] - -#: Optional columns of observables table -OBSERVABLE_DF_OPTIONAL_COLS = [ - OBSERVABLE_NAME, - OBSERVABLE_TRANSFORMATION, - NOISE_DISTRIBUTION, -] - -#: Observables table columns -OBSERVABLE_DF_COLS = [ - *OBSERVABLE_DF_REQUIRED_COLS, - *OBSERVABLE_DF_OPTIONAL_COLS, -] - - -# TRANSFORMATIONS - -#: -LIN = "lin" -#: -LOG = "log" -#: -LOG10 = "log10" -#: Supported observable transformations -OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10] - - -# NOISE MODELS - -#: -UNIFORM = "uniform" -#: -PARAMETER_SCALE_UNIFORM = "parameterScaleUniform" -#: -NORMAL = "normal" -#: -PARAMETER_SCALE_NORMAL = "parameterScaleNormal" -#: -LAPLACE = "laplace" -#: -PARAMETER_SCALE_LAPLACE = "parameterScaleLaplace" -#: -LOG_NORMAL = "logNormal" -#: -LOG_LAPLACE = "logLaplace" - -#: Supported prior types -PRIOR_TYPES = [ - UNIFORM, - NORMAL, - LAPLACE, - LOG_NORMAL, - LOG_LAPLACE, - PARAMETER_SCALE_UNIFORM, - PARAMETER_SCALE_NORMAL, - PARAMETER_SCALE_LAPLACE, -] - -#: Supported noise distributions -NOISE_MODELS = [NORMAL, LAPLACE] - - -# VISUALIZATION - -#: -PLOT_ID = "plotId" -#: -PLOT_NAME = "plotName" -#: -PLOT_TYPE_SIMULATION = "plotTypeSimulation" -#: -PLOT_TYPE_DATA = "plotTypeData" -#: -X_VALUES = "xValues" -#: -X_OFFSET = "xOffset" -#: -X_LABEL = "xLabel" -#: -X_SCALE = "xScale" -#: -Y_VALUES = "yValues" -#: -Y_OFFSET = "yOffset" -#: -Y_LABEL = "yLabel" -#: -Y_SCALE = "yScale" -#: -LEGEND_ENTRY = "legendEntry" - -#: Mandatory columns of visualization table -VISUALIZATION_DF_REQUIRED_COLS = [PLOT_ID] - -#: Optional columns of visualization table -VISUALIZATION_DF_OPTIONAL_COLS = [ - PLOT_NAME, - PLOT_TYPE_SIMULATION, - PLOT_TYPE_DATA, - X_VALUES, - X_OFFSET, - X_LABEL, - X_SCALE, - Y_VALUES, - Y_OFFSET, - Y_LABEL, - Y_SCALE, - LEGEND_ENTRY, - DATASET_ID, -] - -#: Visualization table columns -VISUALIZATION_DF_COLS = [ - *VISUALIZATION_DF_REQUIRED_COLS, - *VISUALIZATION_DF_OPTIONAL_COLS, -] - -#: Visualization table columns that contain subplot specifications -VISUALIZATION_DF_SUBPLOT_LEVEL_COLS = [ - PLOT_ID, - PLOT_NAME, - PLOT_TYPE_SIMULATION, - PLOT_TYPE_DATA, - X_LABEL, - X_SCALE, - Y_LABEL, - Y_SCALE, -] - -#: Visualization table columns that contain single plot specifications -VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS = [ - X_VALUES, - X_OFFSET, - Y_VALUES, - Y_OFFSET, - LEGEND_ENTRY, - DATASET_ID, -] - -#: -LINE_PLOT = "LinePlot" -#: -BAR_PLOT = "BarPlot" -#: -SCATTER_PLOT = "ScatterPlot" -#: Supported plot types -PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT] - -#: Supported xScales -X_SCALES = [LIN, LOG, LOG10] - -#: Supported yScales -Y_SCALES = [LIN, LOG, LOG10] - - -#: -MEAN_AND_SD = "MeanAndSD" -#: -MEAN_AND_SEM = "MeanAndSEM" -#: -REPLICATE = "replicate" -#: -PROVIDED = "provided" -#: Supported settings for handling replicates -PLOT_TYPES_DATA = [MEAN_AND_SD, MEAN_AND_SEM, REPLICATE, PROVIDED] - - -# YAML -#: -FORMAT_VERSION = "format_version" -#: -PARAMETER_FILE = "parameter_file" -#: -PROBLEMS = "problems" -#: -SBML_FILES = "sbml_files" -#: -MODEL_FILES = "model_files" -#: -MODEL_LOCATION = "location" -#: -MODEL_LANGUAGE = "language" -#: -CONDITION_FILES = "condition_files" -#: -MEASUREMENT_FILES = "measurement_files" -#: -OBSERVABLE_FILES = "observable_files" -#: -VISUALIZATION_FILES = "visualization_files" -#: -MAPPING_FILES = "mapping_files" -#: -EXTENSIONS = "extensions" - - -# MAPPING -#: -PETAB_ENTITY_ID = "petabEntityId" -#: -MODEL_ENTITY_ID = "modelEntityId" -#: -MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID] - -# MORE - -#: -SIMULATION = "simulation" -#: -RESIDUAL = "residual" -#: -NOISE_VALUE = "noiseValue" - -# separator for multiple parameter values (bounds, observableParameters, ...) -PARAMETER_SEPARATOR = ";" +_deprecated_import_v1(__name__) diff --git a/petab/__init__.py b/petab/__init__.py index 16cff24b..3dd30598 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -2,30 +2,74 @@ PEtab global ============ +.. warning:: + + All functions in here are deprecated. Use the respective functions from + :mod:`petab.v1` instead. + Attributes: ENV_NUM_THREADS: Name of environment variable to set number of threads or processes PEtab should use for operations that can be performed in parallel. By default, all operations are performed sequentially. """ +import functools +import inspect +import sys +import warnings +from warnings import warn + +# deprecated imports +from petab.v1 import * # noqa: F403, F401, E402 + +from .v1.format_version import __format_version__ # noqa: F401, E402 + +# __all__ = [ +# 'ENV_NUM_THREADS', +# ] ENV_NUM_THREADS = "PETAB_NUM_THREADS" -from .C import * # noqa: F403, F401, E402 -from .calculate import * # noqa: F403, F401, E402 -from .composite_problem import * # noqa: F403, F401, E402 -from .conditions import * # noqa: F403, F401, E402 -from .core import * # noqa: F403, F401, E402 -from .format_version import __format_version__ # noqa: F401, E402 -from .lint import * # noqa: F403, F401, E402 -from .mapping import * # noqa: F403, F401, E402 -from .measurements import * # noqa: F403, F401, E402 -from .observables import * # noqa: F403, F401, E402 -from .parameter_mapping import * # noqa: F403, F401, E402 -from .parameters import * # noqa: F403, F401, E402 -from .problem import * # noqa: F403, F401, E402 -from .sampling import * # noqa: F403, F401, E402 -from .sbml import * # noqa: F403, F401, E402 -from .simulate import * # noqa: F403, F401, E402 -from .version import __version__ # noqa: F401, E402 -from .yaml import * # noqa: F403, F401, E402 + +def _deprecated_v1(func): + """Decorator for deprecation warnings for functions.""" + + @functools.wraps(func) + def new_func(*args, **kwargs): + warnings.warn( + f"petab.{func.__name__} is deprecated, " + f"please use petab.v1.{func.__name__} instead.", + category=DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return new_func + + +def _deprecated_import_v1(module_name: str): + """Decorator for deprecation warnings for modules.""" + warn( + f"The '{module_name}' module is deprecated and will be removed " + f"in the next major release. Please use " + f"'petab.v1.{module_name.removeprefix('petab.')}' " + "instead.", + DeprecationWarning, + stacklevel=2, + ) + + +__all__ = [ + x + for x in dir(sys.modules[__name__]) + if not x.startswith("_") + and x not in {"sys", "warnings", "functools", "warn", "inspect"} +] + + +# apply decorator to all functions in the module +for name in __all__: + obj = globals().get(name) + if callable(obj) and inspect.isfunction(obj): + globals()[name] = _deprecated_v1(obj) +del name, obj diff --git a/petab/calculate.py b/petab/calculate.py index f5258fc6..ca4c224f 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -1,421 +1,7 @@ -"""Functions performing various calculations.""" +"""Deprecated module for calculating residuals and log-likelihoods. -import numbers -from functools import reduce -from typing import Dict, List, Union +Use petab.v1.calculate instead.""" +from petab import _deprecated_import_v1 +from petab.v1.calculate import * # noqa: F403, F401, E402 -import numpy as np -import pandas as pd -import sympy - -import petab - -from .C import * -from .math import sympify_petab - -__all__ = [ - "calculate_residuals", - "calculate_residuals_for_table", - "get_symbolic_noise_formulas", - "evaluate_noise_formula", - "calculate_chi2", - "calculate_chi2_for_table_from_residuals", - "calculate_llh", - "calculate_llh_for_table", - "calculate_single_llh", -] - - -def calculate_residuals( - measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], - simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], - observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], - parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], - normalize: bool = True, - scale: bool = True, -) -> List[pd.DataFrame]: - """Calculate residuals. - - Arguments: - measurement_dfs: - The problem measurement tables. - simulation_dfs: - Simulation tables corresponding to the measurement tables. - observable_dfs: - The problem observable tables. - parameter_dfs: - The problem parameter tables. - normalize: - Whether to normalize residuals by the noise standard deviation - terms. - scale: - Whether to calculate residuals of scaled values. - - Returns: - List of DataFrames in the same structure as `measurement_dfs` - with a field `residual` instead of measurement. - """ - # convenience - if isinstance(measurement_dfs, pd.DataFrame): - measurement_dfs = [measurement_dfs] - if isinstance(simulation_dfs, pd.DataFrame): - simulation_dfs = [simulation_dfs] - if isinstance(observable_dfs, pd.DataFrame): - observable_dfs = [observable_dfs] - if isinstance(parameter_dfs, pd.DataFrame): - parameter_dfs = [parameter_dfs] - - # iterate over data frames - residual_dfs = [] - for measurement_df, simulation_df, observable_df, parameter_df in zip( - measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs - ): - residual_df = calculate_residuals_for_table( - measurement_df, - simulation_df, - observable_df, - parameter_df, - normalize, - scale, - ) - residual_dfs.append(residual_df) - return residual_dfs - - -def calculate_residuals_for_table( - measurement_df: pd.DataFrame, - simulation_df: pd.DataFrame, - observable_df: pd.DataFrame, - parameter_df: pd.DataFrame, - normalize: bool = True, - scale: bool = True, -) -> pd.DataFrame: - """ - Calculate residuals for a single measurement table. - For the arguments, see `calculate_residuals`. - """ - # create residual df as copy of measurement df, change column - residual_df = measurement_df.copy(deep=True).rename( - columns={MEASUREMENT: RESIDUAL} - ) - residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64") - # matching columns - compared_cols = set(MEASUREMENT_DF_COLS) - compared_cols -= {MEASUREMENT} - compared_cols &= set(measurement_df.columns) - compared_cols &= set(simulation_df.columns) - - # compute noise formulas for observables - noise_formulas = get_symbolic_noise_formulas(observable_df) - - # iterate over measurements, find corresponding simulations - for irow, row in measurement_df.iterrows(): - measurement = row[MEASUREMENT] - # look up in simulation df - masks = [ - (simulation_df[col] == row[col]) | petab.is_empty(row[col]) - for col in compared_cols - ] - mask = reduce(lambda x, y: x & y, masks) - simulation = simulation_df.loc[mask][SIMULATION].iloc[0] - if scale: - # apply scaling - observable = observable_df.loc[row[OBSERVABLE_ID]] - trafo = observable.get(OBSERVABLE_TRANSFORMATION, LIN) - simulation = petab.scale(simulation, trafo) - measurement = petab.scale(measurement, trafo) - - # non-normalized residual is just the difference - residual = simulation - measurement - - noise_value = 1 - if normalize: - # look up noise standard deviation - noise_value = evaluate_noise_formula( - row, noise_formulas, parameter_df, simulation - ) - residual /= noise_value - - # fill in value - residual_df.loc[irow, RESIDUAL] = residual - return residual_df - - -def get_symbolic_noise_formulas(observable_df) -> Dict[str, sympy.Expr]: - """Sympify noise formulas. - - Arguments: - observable_df: The observable table. - - Returns: - Dictionary of {observable_id}: {noise_formula}. - """ - noise_formulas = {} - # iterate over observables - for row in observable_df.itertuples(): - observable_id = row.Index - if NOISE_FORMULA not in observable_df.columns: - noise_formula = None - else: - noise_formula = sympify_petab(row.noiseFormula) - noise_formulas[observable_id] = noise_formula - return noise_formulas - - -def evaluate_noise_formula( - measurement: pd.Series, - noise_formulas: Dict[str, sympy.Expr], - parameter_df: pd.DataFrame, - simulation: numbers.Number, -) -> float: - """Fill in parameters for `measurement` and evaluate noise_formula. - - Arguments: - measurement: A measurement table row. - noise_formulas: The noise formulas as computed by - `get_symbolic_noise_formulas`. - parameter_df: The parameter table. - simulation: The simulation corresponding to the measurement, scaled. - - Returns: - The noise value. - """ - # the observable id - observable_id = measurement[OBSERVABLE_ID] - - # extract measurement specific overrides - observable_parameter_overrides = petab.split_parameter_replacement_list( - measurement.get(NOISE_PARAMETERS, None) - ) - # fill in measurement specific parameters - overrides = { - f"noiseParameter{i_obs_par + 1}_{observable_id}": obs_par - for i_obs_par, obs_par in enumerate(observable_parameter_overrides) - } - - # fill in observables - overrides[observable_id] = simulation - - # fill in general parameters - for row in parameter_df.itertuples(): - overrides[row.Index] = row.nominalValue - - # replace parametric measurement specific parameters - for key, value in overrides.items(): - if not isinstance(value, numbers.Number): - # is parameter - overrides[key] = parameter_df.loc[value, NOMINAL_VALUE] - - # replace parameters by values in formula - noise_formula = noise_formulas[observable_id] - noise_value = noise_formula.subs(overrides) - - # conversion is possible if all parameters are replaced - try: - noise_value = float(noise_value) - except TypeError as e: - raise ValueError( - f"Cannot replace all parameters in noise formula {noise_value} " - f"for observable {observable_id}. " - f"Missing {noise_formula.free_symbols}. Note that model states " - "are currently not supported." - ) from e - return noise_value - - -def calculate_chi2( - measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], - simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], - observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], - parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], - normalize: bool = True, - scale: bool = True, -) -> float: - """Calculate the chi2 value. - - Arguments: - measurement_dfs: - The problem measurement tables. - simulation_dfs: - Simulation tables corresponding to the measurement tables. - observable_dfs: - The problem observable tables. - parameter_dfs: - The problem parameter tables. - normalize: - Whether to normalize residuals by the noise standard deviation - terms. - scale: - Whether to calculate residuals of scaled values. - - Returns: - The aggregated chi2 value. - """ - residual_dfs = calculate_residuals( - measurement_dfs, - simulation_dfs, - observable_dfs, - parameter_dfs, - normalize, - scale, - ) - chi2s = [ - calculate_chi2_for_table_from_residuals(df) for df in residual_dfs - ] - return sum(chi2s) - - -def calculate_chi2_for_table_from_residuals( - residual_df: pd.DataFrame, -) -> float: - """Compute chi2 value for a single residual table.""" - return (np.array(residual_df[RESIDUAL]) ** 2).sum() - - -def calculate_llh( - measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], - simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], - observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], - parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], -) -> float: - """Calculate total log likelihood. - - Arguments: - measurement_dfs: - The problem measurement tables. - simulation_dfs: - Simulation tables corresponding to the measurement tables. - observable_dfs: - The problem observable tables. - parameter_dfs: - The problem parameter tables. - - Returns: - The log-likelihood. - """ - # convenience - if isinstance(measurement_dfs, pd.DataFrame): - measurement_dfs = [measurement_dfs] - if isinstance(simulation_dfs, pd.DataFrame): - simulation_dfs = [simulation_dfs] - if isinstance(observable_dfs, pd.DataFrame): - observable_dfs = [observable_dfs] - if isinstance(parameter_dfs, pd.DataFrame): - parameter_dfs = [parameter_dfs] - - # iterate over data frames - llhs = [] - for measurement_df, simulation_df, observable_df, parameter_df in zip( - measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs - ): - _llh = calculate_llh_for_table( - measurement_df, simulation_df, observable_df, parameter_df - ) - llhs.append(_llh) - return sum(llhs) - - -def calculate_llh_for_table( - measurement_df: pd.DataFrame, - simulation_df: pd.DataFrame, - observable_df: pd.DataFrame, - parameter_df: pd.DataFrame, -) -> float: - """Calculate log-likelihood for one set of tables. For the arguments, see - `calculate_llh`. - """ - llhs = [] - - # matching columns - compared_cols = set(MEASUREMENT_DF_COLS) - compared_cols -= {MEASUREMENT} - compared_cols &= set(measurement_df.columns) - compared_cols &= set(simulation_df.columns) - - # compute noise formulas for observables - noise_formulas = get_symbolic_noise_formulas(observable_df) - - # iterate over measurements, find corresponding simulations - for _, row in measurement_df.iterrows(): - measurement = row[MEASUREMENT] - - # look up in simulation df - masks = [ - (simulation_df[col] == row[col]) | petab.is_empty(row[col]) - for col in compared_cols - ] - mask = reduce(lambda x, y: x & y, masks) - - simulation = simulation_df.loc[mask][SIMULATION].iloc[0] - - observable = observable_df.loc[row[OBSERVABLE_ID]] - - # get scale - scale = observable.get(OBSERVABLE_TRANSFORMATION, LIN) - - # get noise standard deviation - noise_value = evaluate_noise_formula( - row, noise_formulas, parameter_df, petab.scale(simulation, scale) - ) - - # get noise distribution - noise_distribution = observable.get(NOISE_DISTRIBUTION, NORMAL) - - llh = calculate_single_llh( - measurement, simulation, scale, noise_distribution, noise_value - ) - llhs.append(llh) - return sum(llhs) - - -def calculate_single_llh( - measurement: float, - simulation: float, - scale: str, - noise_distribution: str, - noise_value: float, -) -> float: - """Calculate a single log likelihood. - - Arguments: - measurement: The measurement value. - simulation: The simulated value. - scale: The scale on which the noise model is to be applied. - noise_distribution: The noise distribution. - noise_value: The considered noise models possess a single noise - parameter, e.g. the normal standard deviation. - - Returns: - The computed likelihood for the given values. - """ - # short-hand - m, s, sigma = measurement, simulation, noise_value - pi, log, log10 = np.pi, np.log, np.log10 - - # go over the possible cases - if noise_distribution == NORMAL and scale == LIN: - nllh = 0.5 * log(2 * pi * sigma**2) + 0.5 * ((s - m) / sigma) ** 2 - elif noise_distribution == NORMAL and scale == LOG: - nllh = ( - 0.5 * log(2 * pi * sigma**2 * m**2) - + 0.5 * ((log(s) - log(m)) / sigma) ** 2 - ) - elif noise_distribution == NORMAL and scale == LOG10: - nllh = ( - 0.5 * log(2 * pi * sigma**2 * m**2 * log(10) ** 2) - + 0.5 * ((log10(s) - log10(m)) / sigma) ** 2 - ) - elif noise_distribution == LAPLACE and scale == LIN: - nllh = log(2 * sigma) + abs((s - m) / sigma) - elif noise_distribution == LAPLACE and scale == LOG: - nllh = log(2 * sigma * m) + abs((log(s) - log(m)) / sigma) - elif noise_distribution == LAPLACE and scale == LOG10: - nllh = log(2 * sigma * m * log(10)) + abs( - (log10(s) - log10(m)) / sigma - ) - else: - raise NotImplementedError( - "Unsupported combination of noise_distribution and scale " - f"specified: {noise_distribution}, {scale}." - ) - return -nllh +_deprecated_import_v1(__name__) diff --git a/petab/composite_problem.py b/petab/composite_problem.py index b8569cb5..51d30a20 100644 --- a/petab/composite_problem.py +++ b/petab/composite_problem.py @@ -1,86 +1,7 @@ -"""PEtab problems consisting of multiple models""" -import os -from typing import Dict, List, Union +"""Deprecated module for composite problems. -import pandas as pd +Use petab.v1.composite_problem instead.""" +from petab import _deprecated_import_v1 +from petab.v1.composite_problem import * # noqa: F403, F401, E402 -from . import parameters, problem, yaml -from .C import * # noqa: F403 - -__all__ = ["CompositeProblem"] - - -class CompositeProblem: - """Representation of a PEtab problem consisting of multiple models - - Attributes: - problems: - List of :py:class:`petab.Problem` s - parameter_df: - PEtab parameter DataFrame - """ - - def __init__( - self, - parameter_df: pd.DataFrame = None, - problems: List[problem.Problem] = None, - ): - """Constructor - - Arguments: - parameter_df: - see CompositeProblem.parameter_df - problems: - see CompositeProblem.problems - """ - self.problems: List[problem.Problem] = problems - self.parameter_df: pd.DataFrame = parameter_df - - @staticmethod - def from_yaml(yaml_config: Union[Dict, str]) -> "CompositeProblem": - """Create from YAML file - - Factory method to create a CompositeProblem instance from a PEtab - YAML config file - - Arguments: - yaml_config: PEtab configuration as dictionary or YAML file name - """ - if isinstance(yaml_config, str): - path_prefix = os.path.dirname(yaml_config) - yaml_config = yaml.load_yaml(yaml_config) - else: - path_prefix = "" - - parameter_df = parameters.get_parameter_df( - os.path.join(path_prefix, yaml_config[PARAMETER_FILE]) - ) - - problems = [] - for problem_config in yaml_config[PROBLEMS]: - yaml.assert_single_condition_and_sbml_file(problem_config) - - # don't set parameter file if we have multiple models - cur_problem = problem.Problem.from_files( - sbml_file=os.path.join( - path_prefix, problem_config[SBML_FILES][0] - ), - measurement_file=[ - os.path.join(path_prefix, f) - for f in problem_config[MEASUREMENT_FILES] - ], - condition_file=os.path.join( - path_prefix, problem_config[CONDITION_FILES][0] - ), - visualization_files=[ - os.path.join(path_prefix, f) - for f in problem_config[VISUALIZATION_FILES] - ], - observable_files=[ - os.path.join(path_prefix, f) - for f in problem_config[OBSERVABLE_FILES] - ], - ) - problems.append(cur_problem) - - return CompositeProblem(parameter_df=parameter_df, problems=problems) +_deprecated_import_v1(__name__) diff --git a/petab/conditions.py b/petab/conditions.py index deef08f7..cd00e466 100644 --- a/petab/conditions.py +++ b/petab/conditions.py @@ -1,118 +1,8 @@ -"""Functions operating on the PEtab condition table""" +"""Deprecated module for condition tables. -from pathlib import Path -from typing import Iterable, List, Optional, Union +Use petab.v1.conditions instead. +""" +from petab import _deprecated_import_v1 +from petab.v1.conditions import * # noqa: F403, F401, E402 -import numpy as np -import pandas as pd - -from . import core, lint -from .C import * - -__all__ = [ - "get_condition_df", - "write_condition_df", - "create_condition_df", - "get_parametric_overrides", -] - - -def get_condition_df( - condition_file: Union[str, pd.DataFrame, Path, None], -) -> pd.DataFrame: - """Read the provided condition file into a ``pandas.Dataframe`` - - Conditions are rows, parameters are columns, conditionId is index. - - Arguments: - condition_file: File name of PEtab condition file or pandas.Dataframe - """ - if condition_file is None: - return condition_file - - if isinstance(condition_file, (str, Path)): - condition_file = pd.read_csv( - condition_file, sep="\t", float_precision="round_trip" - ) - - lint.assert_no_leading_trailing_whitespace( - condition_file.columns.values, "condition" - ) - - if not isinstance(condition_file.index, pd.RangeIndex): - condition_file.reset_index( - drop=condition_file.index.name != CONDITION_ID, - inplace=True, - ) - - try: - condition_file.set_index([CONDITION_ID], inplace=True) - except KeyError: - raise KeyError( - f"Condition table missing mandatory field {CONDITION_ID}." - ) from None - - return condition_file - - -def write_condition_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: - """Write PEtab condition table - - Arguments: - df: PEtab condition table - filename: Destination file name - """ - df = get_condition_df(df) - df.to_csv(filename, sep="\t", index=True) - - -def create_condition_df( - parameter_ids: Iterable[str], condition_ids: Optional[Iterable[str]] = None -) -> pd.DataFrame: - """Create empty condition DataFrame - - Arguments: - parameter_ids: the columns - condition_ids: the rows - Returns: - A :py:class:`pandas.DataFrame` with empty given rows and columns and - all nan values - """ - condition_ids = [] if condition_ids is None else list(condition_ids) - - data = {CONDITION_ID: condition_ids} - df = pd.DataFrame(data) - - for p in parameter_ids: - if not lint.is_valid_identifier(p): - raise ValueError("Invalid parameter ID: " + p) - df[p] = np.nan - - df.set_index(CONDITION_ID, inplace=True) - - return df - - -def get_parametric_overrides(condition_df: pd.DataFrame) -> List[str]: - """Get parametric overrides from condition table - - Arguments: - condition_df: PEtab condition table - - Returns: - List of parameter IDs that are mapped in a condition-specific way - """ - constant_parameters = set(condition_df.columns.values.tolist()) - { - CONDITION_ID, - CONDITION_NAME, - } - result = [] - - for column in constant_parameters: - if np.issubdtype(condition_df[column].dtype, np.number): - continue - - floatified = condition_df.loc[:, column].apply(core.to_float_if_float) - - result.extend(x for x in floatified if not isinstance(x, float)) - return result +_deprecated_import_v1(__name__) diff --git a/petab/core.py b/petab/core.py index 97b002d2..2668111c 100644 --- a/petab/core.py +++ b/petab/core.py @@ -1,545 +1,7 @@ -"""PEtab core functions (or functions that don't fit anywhere else)""" -import logging -import os -import re -from pathlib import Path -from typing import ( - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Sequence, - Union, -) -from warnings import warn +"""Deprecated module for PEtab core classes and functions. -import numpy as np -import pandas as pd -from pandas.api.types import is_string_dtype +Use petab.v1.core instead.""" +from petab import _deprecated_import_v1 +from petab.v1.core import * # noqa: F403, F401, E402 -from . import yaml -from .C import * # noqa: F403 - -logger = logging.getLogger(__name__) -__all__ = [ - "get_simulation_df", - "write_simulation_df", - "get_visualization_df", - "write_visualization_df", - "get_notnull_columns", - "flatten_timepoint_specific_output_overrides", - "concat_tables", - "to_float_if_float", - "is_empty", - "create_combine_archive", - "unique_preserve_order", - "unflatten_simulation_df", -] - -POSSIBLE_GROUPVARS_FLATTENED_PROBLEM = [ - OBSERVABLE_ID, - OBSERVABLE_PARAMETERS, - NOISE_PARAMETERS, - SIMULATION_CONDITION_ID, - PREEQUILIBRATION_CONDITION_ID, -] - - -def get_simulation_df(simulation_file: Union[str, Path]) -> pd.DataFrame: - """Read PEtab simulation table - - Arguments: - simulation_file: URL or filename of PEtab simulation table - - Returns: - Simulation DataFrame - """ - return pd.read_csv( - simulation_file, sep="\t", index_col=None, float_precision="round_trip" - ) - - -def write_simulation_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: - """Write PEtab simulation table - - Arguments: - df: PEtab simulation table - filename: Destination file name - """ - df.to_csv(filename, sep="\t", index=False) - - -def get_visualization_df( - visualization_file: Union[str, Path, pd.DataFrame, None], -) -> Union[pd.DataFrame, None]: - """Read PEtab visualization table - - Arguments: - visualization_file: - URL or filename of PEtab visualization table to read from, - or a DataFrame or None that will be returned as is. - - Returns: - Visualization DataFrame - """ - if visualization_file is None: - return None - - if isinstance(visualization_file, pd.DataFrame): - return visualization_file - - try: - types = {PLOT_NAME: str} - vis_spec = pd.read_csv( - visualization_file, - sep="\t", - index_col=None, - converters=types, - float_precision="round_trip", - ) - except pd.errors.EmptyDataError: - warn( - "Visualization table is empty. Defaults will be used. " - "Refer to the documentation for details.", - stacklevel=2, - ) - vis_spec = pd.DataFrame() - return vis_spec - - -def write_visualization_df( - df: pd.DataFrame, filename: Union[str, Path] -) -> None: - """Write PEtab visualization table - - Arguments: - df: PEtab visualization table - filename: Destination file name - """ - df.to_csv(filename, sep="\t", index=False) - - -def get_notnull_columns(df: pd.DataFrame, candidates: Iterable): - """ - Return list of ``df``-columns in ``candidates`` which are not all null/nan. - - The output can e.g. be used as input for ``pandas.DataFrame.groupby``. - - Arguments: - df: - Dataframe - candidates: - Columns of ``df`` to consider - """ - return [ - col for col in candidates if col in df and not np.all(df[col].isnull()) - ] - - -def get_observable_replacement_id(groupvars, groupvar) -> str: - """Get the replacement ID for an observable. - - Arguments: - groupvars: - The columns of a PEtab measurement table that should be unique - between observables in a flattened PEtab problem. - groupvar: - A specific grouping of `groupvars`. - - Returns: - The observable replacement ID. - """ - replacement_id = "" - for field in POSSIBLE_GROUPVARS_FLATTENED_PROBLEM: - if field in groupvars: - val = ( - str(groupvar[groupvars.index(field)]) - .replace(PARAMETER_SEPARATOR, "_") - .replace(".", "_") - ) - if replacement_id == "": - replacement_id = val - elif val != "": - replacement_id += f"__{val}" - return replacement_id - - -def get_hyperparameter_replacement_id( - hyperparameter_type, - observable_replacement_id, -): - """Get the full ID for a replaced hyperparameter. - - Arguments: - hyperparameter_type: - The type of hyperparameter, e.g. `noiseParameter`. - observable_replacement_id: - The observable replacement ID, e.g. the output of - `get_observable_replacement_id`. - - Returns: - The hyperparameter replacement ID, with a field that will be replaced - by the first matched substring in a regex substitution. - """ - return f"{hyperparameter_type}\\1_{observable_replacement_id}" - - -def get_flattened_id_mappings( - petab_problem: "petab.problem.Problem", -) -> Dict[str, Dict[str, str]]: - """Get mapping from unflattened to flattened observable IDs. - - Arguments: - petab_problem: - The unflattened PEtab problem. - - Returns: - A dictionary of dictionaries. Each inner dictionary is a mapping - from original ID to flattened ID. Each outer dictionary is the mapping - for either: observable IDs; noise parameter IDs; or, observable - parameter IDs. - """ - groupvars = get_notnull_columns( - petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM - ) - mappings = { - OBSERVABLE_ID: {}, - NOISE_PARAMETERS: {}, - OBSERVABLE_PARAMETERS: {}, - } - for groupvar, measurements in petab_problem.measurement_df.groupby( - groupvars, dropna=False - ): - observable_id = groupvar[groupvars.index(OBSERVABLE_ID)] - observable_replacement_id = get_observable_replacement_id( - groupvars, groupvar - ) - - logger.debug(f"Creating synthetic observable {observable_id}") - if observable_replacement_id in petab_problem.observable_df.index: - raise RuntimeError( - "could not create synthetic observables " - f"since {observable_replacement_id} was " - "already present in observable table" - ) - - mappings[OBSERVABLE_ID][observable_replacement_id] = observable_id - - for field, hyperparameter_type in [ - (NOISE_PARAMETERS, "noiseParameter"), - (OBSERVABLE_PARAMETERS, "observableParameter"), - ]: - if field in measurements: - mappings[field][ - get_hyperparameter_replacement_id( - hyperparameter_type=hyperparameter_type, - observable_replacement_id=observable_replacement_id, - ) - ] = rf"{hyperparameter_type}([0-9]+)_{observable_id}" - return mappings - - -def flatten_timepoint_specific_output_overrides( - petab_problem: "petab.problem.Problem", -) -> None: - """Flatten timepoint-specific output parameter overrides. - - If the PEtab problem definition has timepoint-specific - `observableParameters` or `noiseParameters` for the same observable, - replace those by replicating the respective observable. - - This is a helper function for some tools which may not support such - timepoint-specific mappings. The observable table and measurement table - are modified in place. - - Arguments: - petab_problem: - PEtab problem to work on. Modified in place. - """ - new_measurement_dfs = [] - new_observable_dfs = [] - groupvars = get_notnull_columns( - petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM - ) - - mappings = get_flattened_id_mappings(petab_problem) - - for groupvar, measurements in petab_problem.measurement_df.groupby( - groupvars, dropna=False - ): - obs_id = groupvar[groupvars.index(OBSERVABLE_ID)] - observable_replacement_id = get_observable_replacement_id( - groupvars, groupvar - ) - - observable = petab_problem.observable_df.loc[obs_id].copy() - observable.name = observable_replacement_id - for field, hyperparameter_type, target in [ - (NOISE_PARAMETERS, "noiseParameter", NOISE_FORMULA), - (OBSERVABLE_PARAMETERS, "observableParameter", OBSERVABLE_FORMULA), - (OBSERVABLE_PARAMETERS, "observableParameter", NOISE_FORMULA), - ]: - if field not in measurements: - continue - - if not is_string_dtype(type(observable[target])): - # if not a string, we don't have to substitute anything - continue - - hyperparameter_replacement_id = get_hyperparameter_replacement_id( - hyperparameter_type=hyperparameter_type, - observable_replacement_id=observable_replacement_id, - ) - hyperparameter_id = mappings[field][hyperparameter_replacement_id] - observable[target] = re.sub( - hyperparameter_id, - hyperparameter_replacement_id, - observable[target], - ) - - measurements[OBSERVABLE_ID] = observable_replacement_id - new_measurement_dfs.append(measurements) - new_observable_dfs.append(observable) - - petab_problem.observable_df = pd.concat(new_observable_dfs, axis=1).T - petab_problem.observable_df.index.name = OBSERVABLE_ID - petab_problem.measurement_df = pd.concat(new_measurement_dfs) - - -def unflatten_simulation_df( - simulation_df: pd.DataFrame, - petab_problem: "petab.problem.Problem", -) -> pd.DataFrame: - """Unflatten simulations from a flattened PEtab problem. - - A flattened PEtab problem is the output of applying - :func:`flatten_timepoint_specific_output_overrides` to a PEtab problem. - - Arguments: - simulation_df: - The simulation dataframe. A dataframe in the same format as a PEtab - measurements table, but with the ``measurement`` column switched - with a ``simulation`` column. - petab_problem: - The unflattened PEtab problem. - - Returns: - The simulation dataframe for the unflattened PEtab problem. - """ - mappings = get_flattened_id_mappings(petab_problem) - original_observable_ids = simulation_df[OBSERVABLE_ID].replace( - mappings[OBSERVABLE_ID] - ) - unflattened_simulation_df = simulation_df.assign( - **{ - OBSERVABLE_ID: original_observable_ids, - } - ) - return unflattened_simulation_df - - -def concat_tables( - tables: Union[ - str, Path, pd.DataFrame, Iterable[Union[pd.DataFrame, str, Path]] - ], - file_parser: Optional[Callable] = None, -) -> pd.DataFrame: - """Concatenate DataFrames provided as DataFrames or filenames, and a parser - - Arguments: - tables: - Iterable of tables to join, as DataFrame or filename. - file_parser: - Function used to read the table in case filenames are provided, - accepting a filename as only argument. - - Returns: - The concatenated DataFrames - """ - if isinstance(tables, pd.DataFrame): - return tables - - if isinstance(tables, (str, Path)): - return file_parser(tables) - - df = pd.DataFrame() - - for tmp_df in tables: - # load from file, if necessary - if isinstance(tmp_df, (str, Path)): - tmp_df = file_parser(tmp_df) - - df = pd.concat( - [df, tmp_df], - sort=False, - ignore_index=isinstance(tmp_df.index, pd.RangeIndex), - ) - - return df - - -def to_float_if_float(x: Any) -> Any: - """Return input as float if possible, otherwise return as is - - Arguments: - x: Anything - - Returns: - ``x`` as float if possible, otherwise ``x`` - """ - try: - return float(x) - except (ValueError, TypeError): - return x - - -def is_empty(val) -> bool: - """Check if the value `val`, e.g. a table entry, is empty. - - Arguments: - val: The value to check. - - Returns: - Whether the field is to be considered empty. - """ - return val == "" or pd.isnull(val) - - -def create_combine_archive( - yaml_file: Union[str, Path], - filename: Union[str, Path], - family_name: Optional[str] = None, - given_name: Optional[str] = None, - email: Optional[str] = None, - organization: Optional[str] = None, -) -> None: - """Create COMBINE archive (https://co.mbine.org/documents/archive) based - on PEtab YAML file. - - Arguments: - yaml_file: Path to PEtab YAML file - filename: Destination file name - family_name: Family name of archive creator - given_name: Given name of archive creator - email: E-mail address of archive creator - organization: Organization of archive creator - """ - path_prefix = os.path.dirname(str(yaml_file)) - yaml_config = yaml.load_yaml(yaml_file) - - # function-level import, because module-level import interfered with - # other SWIG interfaces - try: - import libcombine - except ImportError as err: - raise ImportError( - "To use PEtab's COMBINE functionality, libcombine " - "(python-libcombine) must be installed." - ) from err - - def _add_file_metadata(location: str, description: str = ""): - """Add metadata to the added file""" - omex_description = libcombine.OmexDescription() - omex_description.setAbout(location) - omex_description.setDescription(description) - omex_description.setCreated( - libcombine.OmexDescription.getCurrentDateAndTime() - ) - archive.addMetadata(location, omex_description) - - archive = libcombine.CombineArchive() - - # Add PEtab files and metadata - archive.addFile( - str(yaml_file), - os.path.basename(yaml_file), - "http://identifiers.org/combine.specifications/petab.version-1", - True, - ) - _add_file_metadata( - location=os.path.basename(yaml_file), description="PEtab YAML file" - ) - - # Add parameter file(s) that describe a single parameter table. - # Works for a single file name, or a list of file names. - for parameter_subset_file in list( - np.array(yaml_config[PARAMETER_FILE]).flat - ): - archive.addFile( - os.path.join(path_prefix, parameter_subset_file), - parameter_subset_file, - libcombine.KnownFormats.lookupFormat("tsv"), - False, - ) - _add_file_metadata( - location=parameter_subset_file, description="PEtab parameter file" - ) - - for problem in yaml_config[PROBLEMS]: - for sbml_file in problem[SBML_FILES]: - archive.addFile( - os.path.join(path_prefix, sbml_file), - sbml_file, - libcombine.KnownFormats.lookupFormat("sbml"), - False, - ) - _add_file_metadata(location=sbml_file, description="SBML model") - - for field in [ - MEASUREMENT_FILES, - OBSERVABLE_FILES, - VISUALIZATION_FILES, - CONDITION_FILES, - ]: - if field not in problem: - continue - - for file in problem[field]: - archive.addFile( - os.path.join(path_prefix, file), - file, - libcombine.KnownFormats.lookupFormat("tsv"), - False, - ) - desc = field.split("_")[0] - _add_file_metadata( - location=file, description=f"PEtab {desc} file" - ) - - # Add archive metadata - description = libcombine.OmexDescription() - description.setAbout(".") - description.setDescription("PEtab archive") - description.setCreated(libcombine.OmexDescription.getCurrentDateAndTime()) - - # Add creator info - creator = libcombine.VCard() - if family_name: - creator.setFamilyName(family_name) - if given_name: - creator.setGivenName(given_name) - if email: - creator.setEmail(email) - if organization: - creator.setOrganization(organization) - description.addCreator(creator) - - archive.addMetadata(".", description) - archive.writeToFile(str(filename)) - - -def unique_preserve_order(seq: Sequence) -> List: - """Return a list of unique elements in Sequence, keeping only the first - occurrence of each element - - Parameters: - seq: Sequence to prune - - Returns: - List of unique elements in ``seq`` - """ - seen = set() - seen_add = seen.add - return [x for x in seq if not (x in seen or seen_add(x))] +_deprecated_import_v1(__name__) diff --git a/petab/lint.py b/petab/lint.py index 07c1990b..a7461ca3 100644 --- a/petab/lint.py +++ b/petab/lint.py @@ -1,1191 +1,9 @@ -"""Integrity checks and tests for specific features used""" +"""Deprecated module for linting PEtab files. -import copy -import logging -import numbers -import re -from collections import Counter -from typing import Any, Iterable, Optional +Use petab.v1.lint instead. +""" -import numpy as np -import pandas as pd -import sympy as sp +from petab import _deprecated_import_v1 +from petab.v1.lint import * # noqa: F403, F401, E402 -import petab - -from . import core, measurements, parameters -from .C import * # noqa: F403 -from .math import sympify_petab -from .models import Model - -logger = logging.getLogger(__name__) -__all__ = [ - "assert_all_parameters_present_in_parameter_df", - "assert_measured_observables_defined", - "assert_measurement_conditions_present_in_condition_table", - "assert_measurements_not_null", - "assert_measurements_numeric", - "assert_model_parameters_in_condition_or_parameter_table", - "assert_no_leading_trailing_whitespace", - "assert_noise_distributions_valid", - "assert_parameter_bounds_are_numeric", - "assert_parameter_estimate_is_boolean", - "assert_parameter_id_is_string", - "assert_parameter_prior_parameters_are_valid", - "assert_parameter_prior_type_is_valid", - "assert_parameter_scale_is_valid", - "assert_unique_observable_ids", - "assert_unique_parameter_ids", - "check_condition_df", - "check_ids", - "check_measurement_df", - "check_observable_df", - "check_parameter_bounds", - "check_parameter_df", - "condition_table_is_parameter_free", - "get_non_unique", - "is_scalar_float", - "is_valid_identifier", - "lint_problem", - "measurement_table_has_observable_parameter_numeric_overrides", - "measurement_table_has_timepoint_specific_mappings", - "observable_table_has_nontrivial_noise_formula", -] - - -def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: - """Check if given columns are present in DataFrame - - Arguments: - df: Dataframe to check - req_cols: Column names which have to be present - name: Name of the DataFrame to be included in error message - - Raises: - AssertionError: if a column is missing - """ - if missing_cols := set(req_cols) - set(df.columns.values): - raise AssertionError( - f"DataFrame {name} requires the columns {missing_cols}." - ) - - -def assert_no_leading_trailing_whitespace( - names_list: Iterable[str], name: str -) -> None: - """Check that there is no trailing whitespace in elements of Iterable - - Arguments: - names_list: strings to check for whitespace - name: name of `names_list` for error messages - - Raises: - AssertionError: if there is trailing whitespace - """ - r = re.compile(r"(?:^\s)|(?:\s$)") - for i, x in enumerate(names_list): - if isinstance(x, str) and r.search(x): - raise AssertionError(f"Whitespace around {name}[{i}] = '{x}'.") - - -def check_condition_df( - df: pd.DataFrame, - model: Optional[Model] = None, - observable_df: Optional[pd.DataFrame] = None, - mapping_df: Optional[pd.DataFrame] = None, -) -> None: - """Run sanity checks on PEtab condition table - - Arguments: - df: PEtab condition DataFrame - model: Model for additional checking of parameter IDs - observable_df: PEtab observables DataFrame - mapping_df: PEtab mapping DataFrame - - Raises: - AssertionError: in case of problems - """ - # Check required columns are present - req_cols = [] - _check_df(df, req_cols, "condition") - - # Check for correct index - if df.index.name != CONDITION_ID: - raise AssertionError( - f"Condition table has wrong index {df.index.name}." - f"expected {CONDITION_ID}." - ) - - check_ids(df.index.values, kind="condition") - - if not df.index.is_unique: - raise AssertionError( - "Non-unique condition IDs: " - f"{df.index.values[df.index.duplicated()]}" - ) - - for column_name in req_cols: - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) - - if model is not None: - allowed_cols = set(model.get_valid_ids_for_condition_table()) - if observable_df is not None: - allowed_cols |= set( - petab.get_output_parameters( - model=model, - observable_df=observable_df, - mapping_df=mapping_df, - ) - ) - if mapping_df is not None: - allowed_cols |= set(mapping_df.index.values) - for column_name in df.columns: - if ( - column_name != CONDITION_NAME - and column_name not in allowed_cols - ): - raise AssertionError( - "Condition table contains column for unknown entity '" - f"{column_name}'." - ) - - -def check_measurement_df( - df: pd.DataFrame, observable_df: Optional[pd.DataFrame] = None -) -> None: - """Run sanity checks on PEtab measurement table - - Arguments: - df: PEtab measurement DataFrame - observable_df: PEtab observable DataFrame for checking if measurements - are compatible with observable transformations. - - Raises: - AssertionError, ValueError: in case of problems - """ - _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") - - for column_name in MEASUREMENT_DF_REQUIRED_COLS: - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) - - for column_name in MEASUREMENT_DF_OPTIONAL_COLS: - if column_name in df and not np.issubdtype( - df[column_name].dtype, np.number - ): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) - - if observable_df is not None: - assert_measured_observables_defined(df, observable_df) - measurements.assert_overrides_match_parameter_count(df, observable_df) - - if OBSERVABLE_TRANSFORMATION in observable_df: - # Check for positivity of measurements in case of - # log-transformation - assert_unique_observable_ids(observable_df) - # If the above is not checked, in the following loop - # trafo may become a pandas Series - for measurement, obs_id in zip(df[MEASUREMENT], df[OBSERVABLE_ID]): - trafo = observable_df.loc[obs_id, OBSERVABLE_TRANSFORMATION] - if measurement <= 0.0 and trafo in [LOG, LOG10]: - raise ValueError( - "Measurements with observable " - f"transformation {trafo} must be " - f"positive, but {measurement} <= 0." - ) - - assert_measurements_not_null(df) - assert_measurements_numeric(df) - - -def check_parameter_df( - df: pd.DataFrame, - model: Optional[Model] = None, - observable_df: Optional[pd.DataFrame] = None, - measurement_df: Optional[pd.DataFrame] = None, - condition_df: Optional[pd.DataFrame] = None, - mapping_df: Optional[pd.DataFrame] = None, -) -> None: - """Run sanity checks on PEtab parameter table - - Arguments: - df: PEtab parameter DataFrame - model: Model for additional checking of parameter IDs - observable_df: PEtab observable table for additional checks - measurement_df: PEtab measurement table for additional checks - condition_df: PEtab condition table for additional checks - mapping_df: PEtab mapping table for additional checks - - Raises: - AssertionError: in case of problems - """ - _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter") - - if df.index.name != PARAMETER_ID: - raise AssertionError( - f"Parameter table has wrong index {df.index.name}." - f"expected {PARAMETER_ID}." - ) - - check_ids(df.index.values, kind="parameter") - - for column_name in PARAMETER_DF_REQUIRED_COLS[1:]: # 0 is PARAMETER_ID - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) - - # nominal value is generally optional, but required if any for any - # parameter estimate != 1 - non_estimated_par_ids = list( - df.index[ - (df[ESTIMATE] != 1) - | ( - pd.api.types.is_string_dtype(df[ESTIMATE]) - and df[ESTIMATE] != "1" - ) - ] - ) - if non_estimated_par_ids: - if NOMINAL_VALUE not in df: - raise AssertionError( - "Parameter table contains parameters " - f"{non_estimated_par_ids} that are not " - "specified to be estimated, " - f"but column {NOMINAL_VALUE} is missing." - ) - try: - df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) - except ValueError as e: - raise AssertionError( - f"Expected numeric values for `{NOMINAL_VALUE}` in parameter " - "table for all non-estimated parameters." - ) from e - - assert_parameter_id_is_string(df) - assert_parameter_scale_is_valid(df) - assert_parameter_bounds_are_numeric(df) - assert_parameter_estimate_is_boolean(df) - assert_unique_parameter_ids(df) - check_parameter_bounds(df) - assert_parameter_prior_type_is_valid(df) - - if model and measurement_df is not None and condition_df is not None: - assert_all_parameters_present_in_parameter_df( - df, model, observable_df, measurement_df, condition_df, mapping_df - ) - - -def check_observable_df(observable_df: pd.DataFrame) -> None: - """Check validity of observable table - - Arguments: - observable_df: PEtab observable DataFrame - - Raises: - AssertionError: in case of problems - """ - _check_df(observable_df, OBSERVABLE_DF_REQUIRED_COLS[1:], "observable") - - check_ids(observable_df.index.values, kind="observable") - - for column_name in OBSERVABLE_DF_REQUIRED_COLS[1:]: - if not np.issubdtype(observable_df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - observable_df[column_name].values, column_name - ) - - for column_name in OBSERVABLE_DF_OPTIONAL_COLS: - if column_name in observable_df and not np.issubdtype( - observable_df[column_name].dtype, np.number - ): - assert_no_leading_trailing_whitespace( - observable_df[column_name].values, column_name - ) - - assert_noise_distributions_valid(observable_df) - assert_unique_observable_ids(observable_df) - - # Check that formulas are parsable - for row in observable_df.itertuples(): - obs = getattr(row, OBSERVABLE_FORMULA) - try: - sympify_petab(obs) - except sp.SympifyError as e: - raise AssertionError( - f"Cannot parse expression '{obs}' " - f"for observable {row.Index}: {e}" - ) from e - - noise = getattr(row, NOISE_FORMULA) - try: - sympified_noise = sympify_petab(noise) - if sympified_noise is None or ( - sympified_noise.is_Number and not sympified_noise.is_finite - ): - raise AssertionError( - f"No or non-finite {NOISE_FORMULA} " - f"given for observable {row.Index}." - ) - except sp.SympifyError as e: - raise AssertionError( - f"Cannot parse expression '{noise}' " - f"for noise model for observable " - f"{row.Index}: {e}" - ) from e - - -def assert_all_parameters_present_in_parameter_df( - parameter_df: pd.DataFrame, - model: Model, - observable_df: pd.DataFrame, - measurement_df: pd.DataFrame, - condition_df: pd.DataFrame, - mapping_df: pd.DataFrame = None, -) -> None: - """Ensure all required parameters are contained in the parameter table - with no additional ones - - Arguments: - parameter_df: PEtab parameter DataFrame - model: model - observable_df: PEtab observable table - measurement_df: PEtab measurement table - condition_df: PEtab condition table - mapping_df: PEtab mapping table for additional checks - - Raises: - AssertionError: in case of problems - """ - required = parameters.get_required_parameters_for_parameter_table( - model=model, - condition_df=condition_df, - observable_df=observable_df, - measurement_df=measurement_df, - mapping_df=mapping_df, - ) - - allowed = parameters.get_valid_parameters_for_parameter_table( - model=model, - condition_df=condition_df, - observable_df=observable_df, - measurement_df=measurement_df, - mapping_df=mapping_df, - ) - - actual = set(parameter_df.index) - missing = required - actual - extraneous = actual - allowed - - # missing parameters might be present under a different name based on - # the mapping table - if missing and mapping_df is not None: - model_to_petab_mapping = {} - for map_from, map_to in zip( - mapping_df.index.values, mapping_df[MODEL_ENTITY_ID] - ): - if map_to in model_to_petab_mapping: - model_to_petab_mapping[map_to].append(map_from) - else: - model_to_petab_mapping[map_to] = [map_from] - missing = { - missing_id - for missing_id in missing - if missing_id not in model_to_petab_mapping - or all( - mapping_parameter not in actual - for mapping_parameter in model_to_petab_mapping[missing_id] - ) - } - - if missing: - raise AssertionError( - "Missing parameter(s) in the model or the " - "parameters table: " + str(missing) - ) - - if extraneous: - raise AssertionError( - "Extraneous parameter(s) in parameter table: " + str(extraneous) - ) - - -def assert_measured_observables_defined( - measurement_df: pd.DataFrame, observable_df: pd.DataFrame -) -> None: - """Check if all observables in the measurement table have been defined in - the observable table - - Arguments: - measurement_df: PEtab measurement table - observable_df: PEtab observable table - - Raises: - AssertionError: in case of problems - """ - used_observables = set(measurement_df[OBSERVABLE_ID].values) - defined_observables = set(observable_df.index.values) - if undefined_observables := (used_observables - defined_observables): - raise AssertionError( - f"Observables {undefined_observables} used in " - "measurement table but not defined in observables table." - ) - - -def condition_table_is_parameter_free(condition_df: pd.DataFrame) -> bool: - """Check if all entries in the condition table are numeric - (no parameter IDs) - - Arguments: - condition_df: PEtab condition table - - Returns: - ``True`` if there are no parameter overrides in the condition table, - ``False`` otherwise. - """ - return len(petab.get_parametric_overrides(condition_df)) == 0 - - -def assert_parameter_id_is_string(parameter_df: pd.DataFrame) -> None: - """ - Check if all entries in the parameterId column of the parameter table - are string and not empty. - - Arguments: - parameter_df: PEtab parameter DataFrame - - Raises: - AssertionError: in case of problems - """ - for parameter_id in parameter_df: - if isinstance(parameter_id, str): - if parameter_id[0].isdigit(): - raise AssertionError( - f"{PARAMETER_ID} {parameter_id} starts with integer." - ) - else: - raise AssertionError(f"Empty {PARAMETER_ID} found.") - - -def assert_unique_parameter_ids(parameter_df: pd.DataFrame) -> None: - """ - Check if the parameterId column of the parameter table is unique. - - Arguments: - parameter_df: PEtab parameter DataFrame - - Raises: - AssertionError: in case of problems - """ - non_unique_ids = get_non_unique(parameter_df.index) - if len(non_unique_ids) > 0: - raise AssertionError( - f"Non-unique values found in the {PARAMETER_ID} column" - " of the parameter table: " + str(non_unique_ids) - ) - - -def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None: - """ - Check if all entries in the parameterScale column of the parameter table - are 'lin' for linear, 'log' for natural logarithm or 'log10' for base 10 - logarithm. - - Arguments: - parameter_df: PEtab parameter DataFrame - - Raises: - AssertionError: in case of problems - """ - for parameter_scale in parameter_df[PARAMETER_SCALE]: - if parameter_scale not in [LIN, LOG, LOG10]: - raise AssertionError( - f"Expected {LIN}, {LOG}, or {LOG10}, but " - f"got {parameter_scale}." - ) - - -def assert_parameter_bounds_are_numeric(parameter_df: pd.DataFrame) -> None: - """ - Check if all entries in the lowerBound and upperBound columns of the - parameter table are numeric. - - Arguments: - parameter_df: PEtab parameter DataFrame - - Raises: - AssertionError: in case of problems - """ - parameter_df[LOWER_BOUND].apply(float).all() - parameter_df[UPPER_BOUND].apply(float).all() - - -def check_parameter_bounds(parameter_df: pd.DataFrame) -> None: - """ - Check if all entries in the lowerBound are smaller than upperBound column - in the parameter table and that bounds are positive for parameterScale - log|log10. - - Arguments: - parameter_df: PEtab parameter DataFrame - - Raises: - AssertionError: in case of problems - - """ - for _, row in parameter_df.iterrows(): - if int(row[ESTIMATE]): - if not row[LOWER_BOUND] <= row[UPPER_BOUND]: - raise AssertionError( - f"{LOWER_BOUND} greater than {UPPER_BOUND} for " - f"{PARAMETER_ID} {row.name}." - ) - if (row[LOWER_BOUND] < 0.0 or row[UPPER_BOUND] < 0.0) and row[ - PARAMETER_SCALE - ] in [LOG, LOG10]: - raise AssertionError( - f"Bounds for {row[PARAMETER_SCALE]} scaled parameter " - f"{ row.name} must be positive." - ) - - -def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None: - """Check that valid prior types have been selected - - Arguments: - parameter_df: PEtab parameter table - - Raises: - AssertionError: in case of invalid prior - """ - for col in [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]: - if col not in parameter_df.columns: - continue - for _, row in parameter_df.iterrows(): - if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]): - raise AssertionError( - f"{col} must be one of {PRIOR_TYPES} but is " - f"'{row[col]}'." - ) - - -def assert_parameter_prior_parameters_are_valid( - parameter_df: pd.DataFrame, -) -> None: - """Check that the prior parameters are valid. - - Arguments: - parameter_df: PEtab parameter table - - Raises: - AssertionError: in case of invalid prior parameters - """ - prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE] - prior_par_cols = [ - INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_PARAMETERS, - ] - - # perform test for both priors - for type_col, par_col in zip(prior_type_cols, prior_par_cols): - # iterate over rows - for _, row in parameter_df.iterrows(): - # get type - if type_col not in row or core.is_empty(row[type_col]): - type_ = PARAMETER_SCALE_UNIFORM - else: - type_ = row[type_col] - # get parameters - pars_str = row.get(par_col, "") - with_default_parameters = [PARAMETER_SCALE_UNIFORM] - # check if parameters are empty - if core.is_empty(pars_str): - if type_ not in with_default_parameters: - raise AssertionError( - f"An empty {par_col} is only permitted with " - f"{type_col} in {with_default_parameters}." - ) - # empty parameters fine - continue - # parse parameters - try: - pars = tuple( - float(val) for val in pars_str.split(PARAMETER_SEPARATOR) - ) - except ValueError as e: - raise AssertionError( - f"Could not parse prior parameters '{pars_str}'." - ) from e - - # all distributions take 2 parameters - if len(pars) != 2: - raise AssertionError( - f"The prior parameters '{pars}' do not contain the " - "expected number of entries (currently 'par1" - f"{PARAMETER_SEPARATOR}par2' for all prior types)." - ) - - -def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None: - """ - Check if all entries in the estimate column of the parameter table are - 0 or 1. - - Arguments: - parameter_df: PEtab parameter DataFrame - - Raises: - AssertionError: in case of problems - """ - for estimate in parameter_df[ESTIMATE]: - if int(estimate) not in [True, False]: - raise AssertionError( - f"Expected 0 or 1 but got {estimate} in {ESTIMATE} column." - ) - - -def is_scalar_float(x: Any): - """ - Checks whether input is a number or can be transformed into a number - via float - - :param x: - input - :return: - ``True`` if is or can be converted to number, ``False`` otherwise. - """ - if isinstance(x, numbers.Number): - return True - try: - float(x) - return True - except (ValueError, TypeError): - return False - - -def measurement_table_has_timepoint_specific_mappings( - measurement_df: Optional[pd.DataFrame], - allow_scalar_numeric_noise_parameters: bool = False, - allow_scalar_numeric_observable_parameters: bool = False, -) -> bool: - """ - Are there time-point or replicate specific parameter assignments in the - measurement table. - - Arguments: - measurement_df: - PEtab measurement table - - allow_scalar_numeric_noise_parameters: - ignore scalar numeric assignments to noiseParameter placeholders - - allow_scalar_numeric_observable_parameters: - ignore scalar numeric assignments to observableParameter - placeholders - - Returns: - True if there are time-point or replicate specific (non-numeric) - parameter assignments in the measurement table, False otherwise. - """ - if measurement_df is None: - return False - - # since we edit it, copy it first - measurement_df = copy.deepcopy(measurement_df) - - # mask numeric values - for col, allow_scalar_numeric in [ - (OBSERVABLE_PARAMETERS, allow_scalar_numeric_observable_parameters), - (NOISE_PARAMETERS, allow_scalar_numeric_noise_parameters), - ]: - if col not in measurement_df: - continue - - measurement_df[col] = measurement_df[col].apply(str) - - if allow_scalar_numeric: - measurement_df.loc[ - measurement_df[col].apply(is_scalar_float), col - ] = np.nan - - grouping_cols = core.get_notnull_columns( - measurement_df, - [ - OBSERVABLE_ID, - SIMULATION_CONDITION_ID, - PREEQUILIBRATION_CONDITION_ID, - OBSERVABLE_PARAMETERS, - NOISE_PARAMETERS, - ], - ) - grouped_df = measurement_df.groupby(grouping_cols, dropna=False) - - grouping_cols = core.get_notnull_columns( - measurement_df, - [ - OBSERVABLE_ID, - SIMULATION_CONDITION_ID, - PREEQUILIBRATION_CONDITION_ID, - ], - ) - grouped_df2 = measurement_df.groupby(grouping_cols) - # data frame has timepoint specific overrides if grouping by noise - # parameters and observable parameters in addition to observable, - # condition and preeq id yields more groups - return len(grouped_df) != len(grouped_df2) - - -def observable_table_has_nontrivial_noise_formula( - observable_df: Optional[pd.DataFrame], -) -> bool: - """ - Does any observable have a noise formula that is not just a single - parameter? - - Arguments: - observable_df: PEtab observable table - - Returns: - ``True`` if any noise formula does not consist of a single identifier, - ``False`` otherwise. - """ - if observable_df is None: - return False - - return ( - not observable_df[NOISE_FORMULA] - .apply( - lambda x: is_scalar_float(x) - or re.match(r"^[\w]+$", str(x)) is not None - ) - .all() - ) - - -def measurement_table_has_observable_parameter_numeric_overrides( - measurement_df: pd.DataFrame, -) -> bool: - """Are there any numbers to override observable parameters? - - Arguments: - measurement_df: PEtab measurement table - - Returns: - ``True`` if there are any numbers to override observable/noise - parameters, ``False`` otherwise. - """ - if OBSERVABLE_PARAMETERS not in measurement_df: - return False - - for _, row in measurement_df.iterrows(): - for override in measurements.split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ): - if isinstance(override, numbers.Number): - return True - - return False - - -def assert_noise_distributions_valid(observable_df: pd.DataFrame) -> None: - """ - Ensure that noise distributions and transformations for observables are - valid. - - Arguments: - observable_df: PEtab observable table - - Raises: - AssertionError: in case of problems - """ - if OBSERVABLE_TRANSFORMATION in observable_df: - # check for valid values - for trafo in observable_df[OBSERVABLE_TRANSFORMATION]: - if trafo not in ["", *OBSERVABLE_TRANSFORMATIONS] and not ( - isinstance(trafo, numbers.Number) and np.isnan(trafo) - ): - raise ValueError( - f"Unrecognized observable transformation in observable " - f"table: {trafo}." - ) - - if NOISE_DISTRIBUTION in observable_df: - for distr in observable_df[NOISE_DISTRIBUTION]: - if distr not in ["", *NOISE_MODELS] and not ( - isinstance(distr, numbers.Number) and np.isnan(distr) - ): - raise ValueError( - f"Unrecognized noise distribution in observable " - f"table: {distr}." - ) - - -def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None: - """ - Check if the observableId column of the observable table is unique. - - Arguments: - observable_df: PEtab observable DataFrame - - Raises: - AssertionError: in case of problems - """ - non_unique_ids = get_non_unique(observable_df.index) - if len(non_unique_ids) > 0: - raise AssertionError( - f"Non-unique values found in the {OBSERVABLE_ID} column" - " of the observable table: " + str(non_unique_ids) - ) - - -def get_non_unique(values): - counter = Counter(values) - return [value for (value, count) in counter.items() if count > 1] - - -def lint_problem(problem: "petab.Problem") -> bool: - """Run PEtab validation on problem - - Arguments: - problem: PEtab problem to check - - Returns: - ``True`` if errors occurred, ``False`` otherwise - """ - # pylint: disable=too-many-statements - errors_occurred = False - - if problem.extensions_config: - logger.warning( - "Validation of PEtab extensions is not yet implemented, " - "but the given problem uses the following extensions: " - f"{'', ''.join(problem.extensions_config.keys())}" - ) - - # Run checks on individual files - if problem.model is not None: - logger.info("Checking model...") - errors_occurred |= not problem.model.is_valid() - else: - logger.warning("Model not available. Skipping.") - - if problem.measurement_df is not None: - logger.info("Checking measurement table...") - try: - check_measurement_df(problem.measurement_df, problem.observable_df) - - if problem.condition_df is not None: - assert_measurement_conditions_present_in_condition_table( - problem.measurement_df, problem.condition_df - ) - except AssertionError as e: - logger.error(e) - errors_occurred = True - else: - logger.warning("Measurement table not available. Skipping.") - - if problem.condition_df is not None: - logger.info("Checking condition table...") - try: - check_condition_df( - problem.condition_df, - model=problem.model, - observable_df=problem.observable_df, - mapping_df=problem.mapping_df, - ) - except AssertionError as e: - logger.error(e) - errors_occurred = True - else: - logger.warning("Condition table not available. Skipping.") - - if problem.observable_df is not None: - logger.info("Checking observable table...") - try: - check_observable_df(problem.observable_df) - except AssertionError as e: - logger.error(e) - errors_occurred = True - if problem.model is not None: - for obs_id in problem.observable_df.index: - if problem.model.has_entity_with_id(obs_id): - logger.error( - f"Observable ID {obs_id} shadows model " "entity." - ) - errors_occurred = True - else: - logger.warning("Observable table not available. Skipping.") - - if problem.parameter_df is not None: - logger.info("Checking parameter table...") - try: - check_parameter_df( - problem.parameter_df, - problem.model, - problem.observable_df, - problem.measurement_df, - problem.condition_df, - problem.mapping_df, - ) - except AssertionError as e: - logger.error(e) - errors_occurred = True - else: - logger.warning("Parameter table not available. Skipping.") - - if ( - problem.model is not None - and problem.condition_df is not None - and problem.parameter_df is not None - ): - try: - assert_model_parameters_in_condition_or_parameter_table( - problem.model, - problem.condition_df, - problem.parameter_df, - problem.mapping_df, - ) - except AssertionError as e: - logger.error(e) - errors_occurred = True - - if problem.visualization_df is not None: - logger.info("Checking visualization table...") - from petab.visualize.lint import validate_visualization_df - - errors_occurred |= validate_visualization_df(problem) - else: - logger.warning("Visualization table not available. Skipping.") - - if errors_occurred: - logger.error("Not OK") - elif ( - problem.measurement_df is None - or problem.condition_df is None - or problem.model is None - or problem.parameter_df is None - or problem.observable_df is None - ): - logger.warning( - "Not all files of the PEtab problem definition could " - "be checked." - ) - else: - logger.info("PEtab format check completed successfully.") - - return errors_occurred - - -def assert_model_parameters_in_condition_or_parameter_table( - model: Model, - condition_df: pd.DataFrame, - parameter_df: pd.DataFrame, - mapping_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, -) -> None: - """Model parameters that are rule targets must not be present in the - parameter table. Other parameters must only be present in either in - parameter table or condition table columns. Check that. - - Arguments: - parameter_df: PEtab parameter DataFrame - model: PEtab model - condition_df: PEtab condition table - mapping_df: PEtab mapping table - observable_df: PEtab observable table - measurement_df: PEtab measurement table - - Raises: - AssertionError: in case of problems - """ - allowed_in_condition_cols = set(model.get_valid_ids_for_condition_table()) - if mapping_df is not None: - allowed_in_condition_cols |= { - from_id - for from_id, to_id in zip( - mapping_df.index.values, mapping_df[MODEL_ENTITY_ID] - ) - # mapping table entities mapping to already allowed parameters - if to_id in allowed_in_condition_cols - # mapping table entities mapping to species - or model.is_state_variable(to_id) - } - - allowed_in_parameter_table = ( - parameters.get_valid_parameters_for_parameter_table( - model=model, - condition_df=condition_df, - observable_df=observable_df, - measurement_df=measurement_df, - mapping_df=mapping_df, - ) - ) - - entities_in_condition_table = set(condition_df.columns) - {CONDITION_NAME} - entities_in_parameter_table = set(parameter_df.index.values) - - disallowed_in_condition = { - x - for x in (entities_in_condition_table - allowed_in_condition_cols) - # we only check model entities here, not output parameters - if model.has_entity_with_id(x) - } - if disallowed_in_condition: - is_or_are = "is" if len(disallowed_in_condition) == 1 else "are" - raise AssertionError( - f"{disallowed_in_condition} {is_or_are} not " - "allowed to occur in condition table " - "columns." - ) - - disallowed_in_parameters = { - x - for x in (entities_in_parameter_table - allowed_in_parameter_table) - # we only check model entities here, not output parameters - if model.has_entity_with_id(x) - } - - if disallowed_in_parameters: - is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are" - raise AssertionError( - f"{disallowed_in_parameters} {is_or_are} not " - "allowed to occur in the parameters table." - ) - - in_both = entities_in_condition_table & entities_in_parameter_table - if in_both: - is_or_are = "is" if len(in_both) == 1 else "are" - raise AssertionError( - f"{in_both} {is_or_are} present in both " - "the condition table and the parameter table." - ) - - -def assert_measurement_conditions_present_in_condition_table( - measurement_df: pd.DataFrame, condition_df: pd.DataFrame -) -> None: - """Ensure that all entries from measurement_df.simulationConditionId and - measurement_df.preequilibrationConditionId are present in - condition_df.index. - - Arguments: - measurement_df: PEtab measurement table - condition_df: PEtab condition table - - Raises: - AssertionError: in case of problems - """ - used_conditions = set(measurement_df[SIMULATION_CONDITION_ID].values) - if PREEQUILIBRATION_CONDITION_ID in measurement_df: - used_conditions |= set( - measurement_df[PREEQUILIBRATION_CONDITION_ID].dropna().values - ) - available_conditions = set(condition_df.index.values) - if missing_conditions := (used_conditions - available_conditions): - raise AssertionError( - "Measurement table references conditions that " - "are not specified in the condition table: " - + str(missing_conditions) - ) - - -def assert_measurements_not_null( - measurement_df: pd.DataFrame, -) -> None: - """Check whether all measurements are not null. - - Arguments: - measurement_df: - PEtab measurement table. - - Raises: - AssertionError: - Some measurement value(s) are null (missing). - """ - if measurement_df[MEASUREMENT].isnull().any(): - raise AssertionError("Some measurement(s) are null (missing).") - - -def assert_measurements_numeric( - measurement_df: pd.DataFrame, -) -> None: - """Check whether all measurements are numeric. - - Note that null (missing) measurements are ignored. - - Arguments: - measurement_df: - PEtab measurement table. - - Raises: - AssertionError: - Some measurement value(s) are not numeric. - """ - not_null_measurement_values = measurement_df[MEASUREMENT].dropna() - all_measurements_are_numeric = ( - pd.to_numeric(not_null_measurement_values, errors="coerce") - .notnull() - .all() - ) - if not all_measurements_are_numeric: - raise AssertionError( - "Some values in the `petab.C.MEASUREMENT` column of the PEtab " - "measurements table are not numeric." - ) - - -def is_valid_identifier(x: str) -> bool: - """Check whether `x` is a valid identifier - - Check whether `x` is a valid identifier for conditions, parameters, - observables... . Identifiers may contain upper and lower case letters, - digits and underscores, but must not start with a digit. - - Arguments: - x: string to check - - Returns: - ``True`` if valid, ``False`` otherwise - """ - if pd.isna(x): - return False - - return re.match(r"^[a-zA-Z_]\w*$", x) is not None - - -def check_ids(ids: Iterable[str], kind: str = "") -> None: - """Check IDs are valid - - Arguments: - ids: Iterable of IDs to check - kind: Kind of IDs, for more informative error message - - Raises: - ValueError: in case of invalid IDs - """ - invalids = [ - (index, _id) - for index, _id in enumerate(ids) - if not is_valid_identifier(_id) - ] - - if invalids: - # The first row is the header row, and Python lists are zero-indexed, - # hence need to add 2 for the correct line number. - offset = 2 - error_output = "\n".join( - [ - f"Line {index+offset}: " - + ("Missing ID" if pd.isna(_id) else _id) - for index, _id in invalids - ] - ) - raise ValueError(f"Invalid {kind} ID(s):\n{error_output}") +_deprecated_import_v1(__name__) diff --git a/petab/mapping.py b/petab/mapping.py index a345ca88..ca6cdd3f 100644 --- a/petab/mapping.py +++ b/petab/mapping.py @@ -1,118 +1,7 @@ -"""Functionality related to the PEtab entity mapping table""" -from pathlib import Path -from typing import Optional, Union +"""Deprecated module for mapping tables. -import pandas as pd +Use petab.v1.mapping instead.""" +from petab import _deprecated_import_v1 +from petab.v1.mapping import * # noqa: F403, F401, E402 -from . import lint -from .C import * # noqa: F403 -from .models import Model - -__all__ = [ - "get_mapping_df", - "write_mapping_df", - "check_mapping_df", -] - - -def get_mapping_df( - mapping_file: Union[None, str, Path, pd.DataFrame], -) -> pd.DataFrame: - """ - Read the provided mapping file into a ``pandas.Dataframe``. - - Arguments: - mapping_file: Name of file to read from or pandas.Dataframe - - Returns: - Mapping DataFrame - """ - if mapping_file is None: - return mapping_file - - if isinstance(mapping_file, (str, Path)): - mapping_file = pd.read_csv( - mapping_file, sep="\t", float_precision="round_trip" - ) - - if not isinstance(mapping_file.index, pd.RangeIndex): - mapping_file.reset_index( - drop=mapping_file.index.name != PETAB_ENTITY_ID, - inplace=True, - ) - - for col in MAPPING_DF_REQUIRED_COLS: - if col not in mapping_file.columns: - raise KeyError( - f"Mapping table missing mandatory field {PETAB_ENTITY_ID}." - ) - - lint.assert_no_leading_trailing_whitespace( - mapping_file.reset_index()[col].values, col - ) - - mapping_file.set_index([PETAB_ENTITY_ID], inplace=True) - - return mapping_file - - -def write_mapping_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: - """Write PEtab mapping table - - Arguments: - df: PEtab mapping table - filename: Destination file name - """ - df = get_mapping_df(df) - df.to_csv(filename, sep="\t", index=True) - - -def check_mapping_df( - df: pd.DataFrame, - model: Optional[Model] = None, -) -> None: - """Run sanity checks on PEtab mapping table - - Arguments: - df: PEtab mapping DataFrame - model: Model for additional checking of parameter IDs - - Raises: - AssertionError: in case of problems - """ - lint._check_df(df, MAPPING_DF_REQUIRED_COLS[1:], "mapping") - - if df.index.name != PETAB_ENTITY_ID: - raise AssertionError( - f"Mapping table has wrong index {df.index.name}. " - f"Expected {PETAB_ENTITY_ID}." - ) - - lint.check_ids(df.index.values, kind=PETAB_ENTITY_ID) - - if model: - for model_entity_id in df[MODEL_ENTITY_ID]: - if not model.has_entity_with_id(model_entity_id): - raise AssertionError( - "Mapping table maps to unknown " - f"model entity ID {model_entity_id}." - ) - - -def resolve_mapping(mapping_df: Optional[pd.DataFrame], element: str) -> str: - """Resolve mapping for a given element. - - :param element: - Element to resolve. - - :param mapping_df: - Mapping table. - - :return: - Resolved element. - """ - if mapping_df is None: - return element - if element in mapping_df.index: - return mapping_df.loc[element, MODEL_ENTITY_ID] - return element +_deprecated_import_v1(__name__) diff --git a/petab/math/__init__.py b/petab/math/__init__.py index 27ebacd2..bc857377 100644 --- a/petab/math/__init__.py +++ b/petab/math/__init__.py @@ -1,2 +1,9 @@ -"""Functions for parsing and evaluating mathematical expressions.""" +"""Deprecated module for math handling. + +Use petab.v1.math instead.""" +from petab import _deprecated_import_v1 +from petab.v1.math import * # noqa: F403, F401, E402 + from .sympify import sympify_petab # noqa: F401 + +_deprecated_import_v1(__name__) diff --git a/petab/math/sympify.py b/petab/math/sympify.py index 9227c51d..d85b8e1b 100644 --- a/petab/math/sympify.py +++ b/petab/math/sympify.py @@ -1,20 +1,5 @@ -"""PEtab math to sympy conversion.""" +"""Deprecated module. Use petab.math.sympify instead.""" +from petab import _deprecated_import_v1 +from petab.v1.math.sympify import * # noqa: F403, F401, E402 -import sympy as sp -from sympy.abc import _clash - - -def sympify_petab(expr: str) -> sp.Expr: - """ - Convert a PEtab math expression to a sympy expression. - - Parameters - ---------- - expr: - The PEtab math expression. - - Returns - ------- - The sympy expression corresponding to ``expr``. - """ - return sp.sympify(expr, locals=_clash) +_deprecated_import_v1(__name__) diff --git a/petab/measurements.py b/petab/measurements.py index caa32047..fcc0ac8e 100644 --- a/petab/measurements.py +++ b/petab/measurements.py @@ -1,348 +1,7 @@ -"""Functions operating on the PEtab measurement table""" -# noqa: F405 +"""Deprecated module for measurement tables. -import itertools -import math -import numbers -from pathlib import Path -from typing import Dict, List, Union +Use petab.v1.measurements instead.""" +from petab import _deprecated_import_v1 +from petab.v1.measurements import * # noqa: F403, F401, E402 -import numpy as np -import pandas as pd - -from . import core, lint, observables -from .C import * # noqa: F403 - -__all__ = [ - "assert_overrides_match_parameter_count", - "create_measurement_df", - "get_measurement_df", - "get_measurement_parameter_ids", - "get_rows_for_condition", - "get_simulation_conditions", - "measurements_have_replicates", - "measurement_is_at_steady_state", - "split_parameter_replacement_list", - "write_measurement_df", -] - - -def get_measurement_df( - measurement_file: Union[None, str, Path, pd.DataFrame], -) -> pd.DataFrame: - """ - Read the provided measurement file into a ``pandas.Dataframe``. - - Arguments: - measurement_file: Name of file to read from or pandas.Dataframe - - Returns: - Measurement DataFrame - """ - if measurement_file is None: - return measurement_file - - if isinstance(measurement_file, (str, Path)): - measurement_file = pd.read_csv( - measurement_file, sep="\t", float_precision="round_trip" - ) - - lint.assert_no_leading_trailing_whitespace( - measurement_file.columns.values, MEASUREMENT - ) - - return measurement_file - - -def write_measurement_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: - """Write PEtab measurement table - - Arguments: - df: PEtab measurement table - filename: Destination file name - """ - df = get_measurement_df(df) - df.to_csv(filename, sep="\t", index=False) - - -def get_simulation_conditions(measurement_df: pd.DataFrame) -> pd.DataFrame: - """ - Create a table of separate simulation conditions. A simulation condition - is a specific combination of simulationConditionId and - preequilibrationConditionId. - - Arguments: - measurement_df: PEtab measurement table - - Returns: - Dataframe with columns 'simulationConditionId' and - 'preequilibrationConditionId'. All-null columns will be omitted. - Missing 'preequilibrationConditionId's will be set to '' (empty - string). - """ - if measurement_df.empty: - return pd.DataFrame(data={SIMULATION_CONDITION_ID: []}) - # find columns to group by (i.e. if not all nans). - # can be improved by checking for identical condition vectors - grouping_cols = core.get_notnull_columns( - measurement_df, - [SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID], - ) - - # group by cols and return dataframe containing each combination - # of those rows only once (and an additional counting row) - # We require NaN-containing rows, but they are ignored by `groupby`, - # therefore replace them before - simulation_conditions = ( - measurement_df.fillna("") - .groupby(grouping_cols) - .size() - .reset_index()[grouping_cols] - ) - # sort to be really sure that we always get the same order - return simulation_conditions.sort_values(grouping_cols, ignore_index=True) - - -def get_rows_for_condition( - measurement_df: pd.DataFrame, - condition: Union[pd.Series, pd.DataFrame, Dict], -) -> pd.DataFrame: - """ - Extract rows in `measurement_df` for `condition` according - to 'preequilibrationConditionId' and 'simulationConditionId' in - `condition`. - - Arguments: - measurement_df: - PEtab measurement DataFrame - condition: - DataFrame with single row (or Series) and columns - 'preequilibrationConditionId' and 'simulationConditionId'. - Or dictionary with those keys. - - Returns: - The subselection of rows in ``measurement_df`` for the condition - ``condition``. - """ - # filter rows for condition - row_filter = 1 - # check for equality in all grouping cols - if PREEQUILIBRATION_CONDITION_ID in condition: - row_filter = ( - measurement_df[PREEQUILIBRATION_CONDITION_ID].fillna("") - == condition[PREEQUILIBRATION_CONDITION_ID] - ) & row_filter - if SIMULATION_CONDITION_ID in condition: - row_filter = ( - measurement_df[SIMULATION_CONDITION_ID] - == condition[SIMULATION_CONDITION_ID] - ) & row_filter - # apply filter - cur_measurement_df = measurement_df.loc[row_filter, :] - - return cur_measurement_df - - -def get_measurement_parameter_ids(measurement_df: pd.DataFrame) -> List[str]: - """ - Return list of ID of parameters which occur in measurement table as - observable or noise parameter overrides. - - Arguments: - measurement_df: - PEtab measurement DataFrame - - Returns: - List of parameter IDs - """ - - def get_unique_parameters(series): - return core.unique_preserve_order( - itertools.chain.from_iterable( - series.apply(split_parameter_replacement_list) - ) - ) - - return core.unique_preserve_order( - get_unique_parameters(measurement_df[OBSERVABLE_PARAMETERS]) - + get_unique_parameters(measurement_df[NOISE_PARAMETERS]) - ) - - -def split_parameter_replacement_list( - list_string: Union[str, numbers.Number], delim: str = PARAMETER_SEPARATOR -) -> List[Union[str, numbers.Number]]: - """ - Split values in observableParameters and noiseParameters in measurement - table. - - Arguments: - list_string: delim-separated stringified list - delim: delimiter - - Returns: - List of split values. Numeric values may be converted to `float`, - and parameter IDs are kept as strings. - """ - if list_string is None or list_string == "": - return [] - - if isinstance(list_string, numbers.Number): - # Empty cells in pandas might be turned into nan - # We might want to allow nan as replacement... - if np.isnan(list_string): - return [] - return [list_string] - - result = [x.strip() for x in list_string.split(delim)] - - def convert_and_check(x): - x = core.to_float_if_float(x) - if isinstance(x, float): - return x - if lint.is_valid_identifier(x): - return x - - raise ValueError( - f"The value '{x}' in the parameter replacement list " - f"'{list_string}' is neither a number, nor a valid parameter ID." - ) - - return list(map(convert_and_check, result)) - - -def create_measurement_df() -> pd.DataFrame: - """Create empty measurement dataframe - - Returns: - Created DataFrame - """ - return pd.DataFrame( - data={ - OBSERVABLE_ID: [], - PREEQUILIBRATION_CONDITION_ID: [], - SIMULATION_CONDITION_ID: [], - MEASUREMENT: [], - TIME: [], - OBSERVABLE_PARAMETERS: [], - NOISE_PARAMETERS: [], - DATASET_ID: [], - REPLICATE_ID: [], - } - ) - - -def measurements_have_replicates(measurement_df: pd.DataFrame) -> bool: - """Tests whether the measurements come with replicates - - Arguments: - measurement_df: Measurement table - - Returns: - ``True`` if there are replicates, ``False`` otherwise - """ - grouping_cols = core.get_notnull_columns( - measurement_df, - [ - OBSERVABLE_ID, - SIMULATION_CONDITION_ID, - PREEQUILIBRATION_CONDITION_ID, - TIME, - ], - ) - return np.any( - measurement_df.fillna("").groupby(grouping_cols).size().values - 1 - ) - - -def assert_overrides_match_parameter_count( - measurement_df: pd.DataFrame, observable_df: pd.DataFrame -) -> None: - """Ensure that number of parameters in the observable definition matches - the number of overrides in ``measurement_df`` - - Arguments: - measurement_df: PEtab measurement table - observable_df: PEtab observable table - """ - # sympify only once and save number of parameters - observable_parameters_count = { - obs_id: len( - observables.get_formula_placeholders(formula, obs_id, "observable") - ) - for obs_id, formula in zip( - observable_df.index.values, observable_df[OBSERVABLE_FORMULA] - ) - } - noise_parameters_count = { - obs_id: len( - observables.get_formula_placeholders(formula, obs_id, "noise") - ) - for obs_id, formula in zip( - observable_df.index.values, observable_df[NOISE_FORMULA] - ) - } - - for _, row in measurement_df.iterrows(): - # check observable parameters - try: - expected = observable_parameters_count[row[OBSERVABLE_ID]] - except KeyError as e: - raise ValueError( - f"Observable {row[OBSERVABLE_ID]} used in measurement table " - f"is not defined." - ) from e - - actual = len( - split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - # No overrides are also allowed - if actual != expected: - formula = observable_df.loc[row[OBSERVABLE_ID], OBSERVABLE_FORMULA] - raise AssertionError( - f"Mismatch of observable parameter overrides for " - f"{row[OBSERVABLE_ID]} ({formula})" - f"in:\n{row}\n" - f"Expected {expected} but got {actual}" - ) - - # check noise parameters - replacements = split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None) - ) - try: - expected = noise_parameters_count[row[OBSERVABLE_ID]] - - # No overrides are also allowed - if len(replacements) != expected: - raise AssertionError( - f"Mismatch of noise parameter overrides in:\n{row}\n" - f"Expected {expected} but got {len(replacements)}" - ) - except KeyError as err: - # no overrides defined, but a numerical sigma can be provided - # anyways - if len(replacements) != 1 or not isinstance( - replacements[0], numbers.Number - ): - raise AssertionError( - f"No placeholders have been specified in the noise model " - f"for observable {row[OBSERVABLE_ID]}, but parameter ID " - "or multiple overrides were specified in the " - "noiseParameters column." - ) from err - - -def measurement_is_at_steady_state(time: float) -> bool: - """Check whether a measurement is at steady state. - - Arguments: - time: - The time. - - Returns: - Whether the measurement is at steady state. - """ - return math.isinf(time) +_deprecated_import_v1(__name__) diff --git a/petab/models/__init__.py b/petab/models/__init__.py index a5628aaa..4b8c87d3 100644 --- a/petab/models/__init__.py +++ b/petab/models/__init__.py @@ -1,9 +1,7 @@ -MODEL_TYPE_SBML = "sbml" -MODEL_TYPE_PYSB = "pysb" +"""Deprecated module for PEtab models. -known_model_types = { - MODEL_TYPE_SBML, - MODEL_TYPE_PYSB, -} +Use petab.v1.models instead""" +from petab import _deprecated_import_v1 +from petab.v1.models import * # noqa: F403, F401, E402 -from .model import Model # noqa F401 +_deprecated_import_v1(__name__) diff --git a/petab/models/model.py b/petab/models/model.py index 5d2f63ad..72387313 100644 --- a/petab/models/model.py +++ b/petab/models/model.py @@ -1,154 +1,7 @@ -"""PEtab model abstraction""" -from __future__ import annotations +"""Deprecated module for PEtab models. -import abc -from pathlib import Path -from typing import Any, Iterable +Use petab.v1.models instead.""" +from petab import _deprecated_import_v1 +from petab.v1.models.model import * # noqa: F403, F401, E402 - -class Model(abc.ABC): - """Base class for wrappers for any PEtab-supported model type""" - - @abc.abstractmethod - def __init__(self): - ... - - @staticmethod - @abc.abstractmethod - def from_file(filepath_or_buffer: Any, model_id: str) -> Model: - """Load the model from the given path/URL - - :param filepath_or_buffer: URL or path of the model - :param model_id: Model ID - :returns: A ``Model`` instance holding the given model - """ - ... - - @abc.abstractmethod - def to_file(self, filename: [str, Path]): - """Save the model to the given file - - :param filename: Destination filename - """ - ... - - @classmethod - @property - @abc.abstractmethod - def type_id(cls): - ... - - @property - @abc.abstractmethod - def model_id(self): - ... - - @abc.abstractmethod - def get_parameter_value(self, id_: str) -> float: - """Get a parameter value - - :param id_: ID of the parameter whose value is to be returned - :raises ValueError: If no parameter with the given ID exists - :returns: The value of the given parameter as specified in the model - """ - ... - - @abc.abstractmethod - def get_free_parameter_ids_with_values( - self, - ) -> Iterable[tuple[str, float]]: - """Get free model parameters along with their values - - Returns: - Iterator over tuples of (parameter_id, parameter_value) - """ - ... - - @abc.abstractmethod - def get_parameter_ids(self) -> Iterable[str]: - """Get all parameter IDs from this model - - :returns: Iterator over model parameter IDs - """ - ... - - @abc.abstractmethod - def has_entity_with_id(self, entity_id) -> bool: - """Check if there is a model entity with the given ID - - :param entity_id: ID to check for - :returns: ``True``, if there is an entity with the given ID, - ``False`` otherwise - """ - ... - - @abc.abstractmethod - def get_valid_parameters_for_parameter_table(self) -> Iterable[str]: - """Get IDs of all parameters that are allowed to occur in the PEtab - parameters table - - :returns: Iterator over parameter IDs - """ - ... - - @abc.abstractmethod - def get_valid_ids_for_condition_table(self) -> Iterable[str]: - """Get IDs of all model entities that are allowed to occur as columns - in the PEtab conditions table. - - :returns: Iterator over model entity IDs - """ - ... - - @abc.abstractmethod - def symbol_allowed_in_observable_formula(self, id_: str) -> bool: - """Check if the given ID is allowed to be used in observable and noise - formulas - - :returns: ``True``, if allowed, ``False`` otherwise - """ - ... - - @abc.abstractmethod - def is_valid(self) -> bool: - """Validate this model - - :returns: `True` if the model is valid, `False` if there are errors in - this model - """ - ... - - @abc.abstractmethod - def is_state_variable(self, id_: str) -> bool: - """Check whether the given ID corresponds to a model state variable""" - ... - - -def model_factory( - filepath_or_buffer: Any, model_language: str, model_id: str = None -) -> Model: - """Create a PEtab model instance from the given model - - :param filepath_or_buffer: Path/URL of the model - :param model_language: PEtab model language ID for the given model - :param model_id: PEtab model ID for the given model - :returns: A :py:class:`Model` instance representing the given model - """ - from . import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, known_model_types - - if model_language == MODEL_TYPE_SBML: - from .sbml_model import SbmlModel - - return SbmlModel.from_file(filepath_or_buffer, model_id=model_id) - - if model_language == MODEL_TYPE_PYSB: - from .pysb_model import PySBModel - - return PySBModel.from_file(filepath_or_buffer, model_id=model_id) - - if model_language in known_model_types: - raise NotImplementedError( - f"Unsupported model format: {model_language}" - ) - - raise ValueError(f"Unknown model format: {model_language}") +_deprecated_import_v1(__name__) diff --git a/petab/models/pysb_model.py b/petab/models/pysb_model.py index 9dfd9512..f60945f4 100644 --- a/petab/models/pysb_model.py +++ b/petab/models/pysb_model.py @@ -1,227 +1,7 @@ -"""Functions for handling PySB models""" +"""Deprecated module for PySB models. -import itertools -import re -import sys -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union +Use petab.v1.models.pysb_model instead.""" +from petab import _deprecated_import_v1 +from petab.v1.models.pysb_model import * # noqa: F403, F401, E402 -import pysb - -from . import MODEL_TYPE_PYSB -from .model import Model - - -def _pysb_model_from_path(pysb_model_file: Union[str, Path]) -> pysb.Model: - """Load a pysb model module and return the :class:`pysb.Model` instance - - :param pysb_model_file: Full or relative path to the PySB model module - :return: The pysb Model instance - """ - pysb_model_file = Path(pysb_model_file) - pysb_model_module_name = pysb_model_file.with_suffix("").name - - import importlib.util - - spec = importlib.util.spec_from_file_location( - pysb_model_module_name, pysb_model_file - ) - module = importlib.util.module_from_spec(spec) - sys.modules[pysb_model_module_name] = module - spec.loader.exec_module(module) - - # find a pysb.Model instance in the module - # 1) check if module.model exists and is a pysb.Model - model = getattr(module, "model", None) - if model: - return model - - # 2) check if there is any other pysb.Model instance - for x in dir(module): - attr = getattr(module, x) - if isinstance(attr, pysb.Model): - return attr - - raise ValueError(f"Could not find any pysb.Model in {pysb_model_file}.") - - -class PySBModel(Model): - """PEtab wrapper for PySB models""" - - type_id = MODEL_TYPE_PYSB - - def __init__(self, model: pysb.Model, model_id: str): - super().__init__() - - self.model = model - self._model_id = model_id - - @staticmethod - def from_file(filepath_or_buffer, model_id: str): - return PySBModel( - model=_pysb_model_from_path(filepath_or_buffer), model_id=model_id - ) - - def to_file(self, filename: [str, Path]): - from pysb.export import export - - model_source = export(self.model, "pysb_flat") - with open(filename, "w") as f: - f.write(model_source) - - @property - def model_id(self): - return self._model_id - - @model_id.setter - def model_id(self, model_id): - self._model_id = model_id - - def get_parameter_ids(self) -> Iterable[str]: - return (p.name for p in self.model.parameters) - - def get_parameter_value(self, id_: str) -> float: - try: - return self.model.parameters[id_].value - except KeyError as e: - raise ValueError(f"Parameter {id_} does not exist.") from e - - def get_free_parameter_ids_with_values( - self, - ) -> Iterable[Tuple[str, float]]: - return ((p.name, p.value) for p in self.model.parameters) - - def has_entity_with_id(self, entity_id) -> bool: - try: - _ = self.model.components[entity_id] - return True - except KeyError: - return False - - def get_valid_parameters_for_parameter_table(self) -> Iterable[str]: - # all parameters are allowed in the parameter table - return self.get_parameter_ids() - - def get_valid_ids_for_condition_table(self) -> Iterable[str]: - return itertools.chain( - self.get_parameter_ids(), self.get_compartment_ids() - ) - - def symbol_allowed_in_observable_formula(self, id_: str) -> bool: - return id_ in ( - x.name - for x in itertools.chain( - self.model.parameters, - self.model.observables, - self.model.expressions, - ) - ) - - def is_valid(self) -> bool: - # PySB models are always valid - return True - - def is_state_variable(self, id_: str) -> bool: - # If there is a component with that name, it's not a state variable - # (there are no dynamically-sized compartments) - if self.model.components.get(id_, None): - return False - - # Try parsing the ID - try: - result = parse_species_name(id_) - except ValueError: - return False - else: - # check if the ID is plausible - for monomer, compartment, site_config in result: - pysb_monomer: pysb.Monomer = self.model.monomers.get(monomer) - if pysb_monomer is None: - return False - if compartment: - pysb_compartment = self.model.compartments.get(compartment) - if pysb_compartment is None: - return False - for site, state in site_config.items(): - if site not in pysb_monomer.sites: - return False - if state not in pysb_monomer.site_states[site]: - return False - if set(pysb_monomer.sites) - set(site_config.keys()): - # There are undefined sites - return False - return True - - def get_compartment_ids(self) -> Iterable[str]: - return (compartment.name for compartment in self.model.compartments) - - -def parse_species_name( - name: str, -) -> List[Tuple[str, Optional[str], Dict[str, Any]]]: - """Parse a PySB species name - - :param name: Species name to parse - :returns: List of species, representing complex constituents, each as - a tuple of the monomer name, the compartment name, and a dict of sites - mapping to site states. - :raises ValueError: In case this is not a valid ID - """ - if "=MultiState(" in name: - raise NotImplementedError("MultiState is not yet supported.") - - complex_constituent_pattern = re.compile( - r"^(?P\w+)\((?P.*)\)" - r"( \*\* (?P.*))?$" - ) - result = [] - complex_constituents = name.split(" % ") - - for complex_constituent in complex_constituents: - match = complex_constituent_pattern.match(complex_constituent) - if not match: - raise ValueError( - f"Invalid species name: '{name}' " f"('{complex_constituent}')" - ) - monomer = match.groupdict()["monomer"] - site_config_str = match.groupdict()["site_config"] - compartment = match.groupdict()["compartment"] - - site_config = {} - for site_str in site_config_str.split(", "): - if not site_str: - continue - site, config = site_str.split("=") - if config == "None": - config = None - elif config.startswith("'"): - if not config.endswith("'"): - raise ValueError( - f"Invalid species name: '{name}' " f"('{config}')" - ) - # strip quotes - config = config[1:-1] - else: - config = int(config) - site_config[site] = config - result.append( - (monomer, compartment, site_config), - ) - - return result - - -def pattern_from_string(string: str, model: pysb.Model) -> pysb.ComplexPattern: - """Convert a pattern string to a Pattern instance""" - parts = parse_species_name(string) - patterns = [] - for part in parts: - patterns.append( - pysb.MonomerPattern( - monomer=model.monomers.get(part[0]), - compartment=model.compartments.get(part[1], None), - site_conditions=part[2], - ) - ) - - return pysb.ComplexPattern(patterns, compartment=None) +_deprecated_import_v1(__name__) diff --git a/petab/models/sbml_model.py b/petab/models/sbml_model.py index d68884fd..e754e903 100644 --- a/petab/models/sbml_model.py +++ b/petab/models/sbml_model.py @@ -1,222 +1,5 @@ -"""Functions for handling SBML models""" +"""Deprecated module. Use petab.v1.models.sbml_model instead.""" +from petab import _deprecated_import_v1 +from petab.v1.models.sbml_model import * # noqa: F403, F401, E402 -import itertools -from pathlib import Path -from typing import Iterable, Optional, Tuple - -import libsbml -import sympy as sp -from sympy.abc import _clash - -from ..sbml import ( - get_sbml_model, - is_sbml_consistent, - load_sbml_from_string, - write_sbml, -) -from . import MODEL_TYPE_SBML -from .model import Model - - -class SbmlModel(Model): - """PEtab wrapper for SBML models""" - - type_id = MODEL_TYPE_SBML - - def __init__( - self, - sbml_model: libsbml.Model = None, - sbml_reader: libsbml.SBMLReader = None, - sbml_document: libsbml.SBMLDocument = None, - model_id: str = None, - ): - super().__init__() - - self.sbml_reader: Optional[libsbml.SBMLReader] = sbml_reader - self.sbml_document: Optional[libsbml.SBMLDocument] = sbml_document - self.sbml_model: Optional[libsbml.Model] = sbml_model - - self._model_id = model_id or sbml_model.getIdAttribute() - - def __getstate__(self): - """Return state for pickling""" - state = self.__dict__.copy() - - # libsbml stuff cannot be serialized directly - if self.sbml_model: - sbml_document = self.sbml_model.getSBMLDocument() - sbml_writer = libsbml.SBMLWriter() - state["sbml_string"] = sbml_writer.writeSBMLToString(sbml_document) - - exclude = ["sbml_reader", "sbml_document", "sbml_model"] - for key in exclude: - state.pop(key) - - return state - - def __setstate__(self, state): - """Set state after unpickling""" - # load SBML model from pickled string - sbml_string = state.pop("sbml_string", None) - if sbml_string: - ( - self.sbml_reader, - self.sbml_document, - self.sbml_model, - ) = load_sbml_from_string(sbml_string) - - self.__dict__.update(state) - - @staticmethod - def from_file(filepath_or_buffer, model_id: str = None): - sbml_reader, sbml_document, sbml_model = get_sbml_model( - filepath_or_buffer - ) - return SbmlModel( - sbml_model=sbml_model, - sbml_reader=sbml_reader, - sbml_document=sbml_document, - model_id=model_id, - ) - - @property - def model_id(self): - return self._model_id - - @model_id.setter - def model_id(self, model_id): - self._model_id = model_id - - def to_file(self, filename: [str, Path]): - write_sbml( - self.sbml_document or self.sbml_model.getSBMLDocument(), filename - ) - - def get_parameter_value(self, id_: str) -> float: - parameter = self.sbml_model.getParameter(id_) - if not parameter: - raise ValueError(f"Parameter {id_} does not exist.") - return parameter.getValue() - - def get_free_parameter_ids_with_values( - self, - ) -> Iterable[Tuple[str, float]]: - rule_targets = { - ar.getVariable() for ar in self.sbml_model.getListOfRules() - } - - def get_initial(p): - # return the initial assignment value if there is one, and it is a - # number; `None`, if there is a non-numeric initial assignment; - # otherwise, the parameter value - if ia := self.sbml_model.getInitialAssignmentBySymbol(p.getId()): - sym_expr = sympify_sbml(ia.getMath()) - return ( - float(sym_expr.evalf()) - if sym_expr.evalf().is_Number - else None - ) - return p.getValue() - - return ( - (p.getId(), initial) - for p in self.sbml_model.getListOfParameters() - if p.getId() not in rule_targets - and (initial := get_initial(p)) is not None - ) - - def get_parameter_ids(self) -> Iterable[str]: - rule_targets = { - ar.getVariable() for ar in self.sbml_model.getListOfRules() - } - - return ( - p.getId() - for p in self.sbml_model.getListOfParameters() - if p.getId() not in rule_targets - ) - - def get_parameter_ids_with_values(self) -> Iterable[Tuple[str, float]]: - rule_targets = { - ar.getVariable() for ar in self.sbml_model.getListOfRules() - } - - return ( - (p.getId(), p.getValue()) - for p in self.sbml_model.getListOfParameters() - if p.getId() not in rule_targets - ) - - def has_entity_with_id(self, entity_id) -> bool: - return self.sbml_model.getElementBySId(entity_id) is not None - - def get_valid_parameters_for_parameter_table(self) -> Iterable[str]: - # All parameters except rule-targets - disallowed_set = { - ar.getVariable() for ar in self.sbml_model.getListOfRules() - } - - return ( - p.getId() - for p in self.sbml_model.getListOfParameters() - if p.getId() not in disallowed_set - ) - - def get_valid_ids_for_condition_table(self) -> Iterable[str]: - return ( - x.getId() - for x in itertools.chain( - self.sbml_model.getListOfParameters(), - self.sbml_model.getListOfSpecies(), - self.sbml_model.getListOfCompartments(), - ) - ) - - def symbol_allowed_in_observable_formula(self, id_: str) -> bool: - return self.sbml_model.getElementBySId(id_) or id_ == "time" - - def is_valid(self) -> bool: - return is_sbml_consistent(self.sbml_model.getSBMLDocument()) - - def is_state_variable(self, id_: str) -> bool: - return ( - self.sbml_model.getSpecies(id_) is not None - or self.sbml_model.getCompartment(id_) is not None - or self.sbml_model.getRuleByVariable(id_) is not None - ) - - -def sympify_sbml(sbml_obj: libsbml.ASTNode | libsbml.SBase) -> sp.Expr: - """Convert SBML math expression to sympy expression. - - Parameters - ---------- - sbml_obj: - SBML math element or an SBML object with a math element. - - Returns - ------- - The sympy expression corresponding to ``sbml_obj``. - """ - ast_node = ( - sbml_obj - if isinstance(sbml_obj, libsbml.ASTNode) - else sbml_obj.getMath() - ) - - parser_settings = libsbml.L3ParserSettings( - ast_node.getParentSBMLObject().getModel(), - libsbml.L3P_PARSE_LOG_AS_LOG10, - libsbml.L3P_EXPAND_UNARY_MINUS, - libsbml.L3P_NO_UNITS, - libsbml.L3P_AVOGADRO_IS_CSYMBOL, - libsbml.L3P_COMPARE_BUILTINS_CASE_INSENSITIVE, - None, - libsbml.L3P_MODULO_IS_PIECEWISE, - ) - - formula_str = libsbml.formulaToL3StringWithSettings( - ast_node, parser_settings - ) - - return sp.sympify(formula_str, locals=_clash) +_deprecated_import_v1(__name__) diff --git a/petab/observables.py b/petab/observables.py index 35cbd705..0d94736b 100644 --- a/petab/observables.py +++ b/petab/observables.py @@ -1,228 +1,8 @@ -"""Functions for working with the PEtab observables table""" +"""Deprecated module for observable tables. -import re -from collections import OrderedDict -from pathlib import Path -from typing import List, Literal, Union +Use petab.v1.observables instead. +""" +from petab import _deprecated_import_v1 +from petab.v1.observables import * # noqa: F403, F401, E402 -import pandas as pd - -from . import core, lint -from .C import * # noqa: F403 -from .math import sympify_petab -from .models import Model - -__all__ = [ - "create_observable_df", - "get_formula_placeholders", - "get_observable_df", - "get_output_parameters", - "get_placeholders", - "write_observable_df", -] - - -def get_observable_df( - observable_file: Union[str, pd.DataFrame, Path, None], -) -> Union[pd.DataFrame, None]: - """ - Read the provided observable file into a ``pandas.Dataframe``. - - Arguments: - observable_file: Name of the file to read from or pandas.Dataframe. - - Returns: - Observable DataFrame - """ - if observable_file is None: - return observable_file - - if isinstance(observable_file, (str, Path)): - observable_file = pd.read_csv( - observable_file, sep="\t", float_precision="round_trip" - ) - - lint.assert_no_leading_trailing_whitespace( - observable_file.columns.values, "observable" - ) - - if not isinstance(observable_file.index, pd.RangeIndex): - observable_file.reset_index( - drop=observable_file.index.name != OBSERVABLE_ID, - inplace=True, - ) - - try: - observable_file.set_index([OBSERVABLE_ID], inplace=True) - except KeyError: - raise KeyError( - f"Observable table missing mandatory field {OBSERVABLE_ID}." - ) from None - - return observable_file - - -def write_observable_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: - """Write PEtab observable table - - Arguments: - df: PEtab observable table - filename: Destination file name - """ - df = get_observable_df(df) - df.to_csv(filename, sep="\t", index=True) - - -def get_output_parameters( - observable_df: pd.DataFrame, - model: Model, - observables: bool = True, - noise: bool = True, - mapping_df: pd.DataFrame = None, -) -> List[str]: - """Get output parameters - - Returns IDs of parameters used in observable and noise formulas that are - not defined in the model. - - Arguments: - observable_df: PEtab observable table - model: The underlying model - observables: Include parameters from observableFormulas - noise: Include parameters from noiseFormulas - mapping_df: PEtab mapping table - - Returns: - List of output parameter IDs - """ - formulas = [] - if observables: - formulas.extend(observable_df[OBSERVABLE_FORMULA]) - if noise and NOISE_FORMULA in observable_df: - formulas.extend(observable_df[NOISE_FORMULA]) - output_parameters = OrderedDict() - - for formula in formulas: - free_syms = sorted( - sympify_petab(formula).free_symbols, - key=lambda symbol: symbol.name, - ) - for free_sym in free_syms: - sym = str(free_sym) - if model.symbol_allowed_in_observable_formula(sym): - continue - - # does it map to a model entity? - if ( - mapping_df is not None - and sym in mapping_df.index - and model.symbol_allowed_in_observable_formula( - mapping_df.loc[sym, MODEL_ENTITY_ID] - ) - ): - continue - - output_parameters[sym] = None - - return list(output_parameters.keys()) - - -def get_formula_placeholders( - formula_string: str, - observable_id: str, - override_type: Literal["observable", "noise"], -) -> List[str]: - """ - Get placeholder variables in noise or observable definition for the - given observable ID. - - Arguments: - formula_string: observable formula - observable_id: ID of current observable - override_type: ``'observable'`` or ``'noise'``, depending on whether - ``formula`` is for observable or for noise model - - Returns: - List of placeholder parameter IDs in the order expected in the - observableParameter column of the measurement table. - """ - if not formula_string: - return [] - - if not isinstance(formula_string, str): - return [] - - pattern = re.compile( - r"(?:^|\W)(" - + re.escape(override_type) - + r"Parameter\d+_" - + re.escape(observable_id) - + r")(?=\W|$)" - ) - placeholder_set = set(pattern.findall(formula_string)) - - # need to sort and check that there are no gaps in numbering - placeholders = [ - f"{override_type}Parameter{i}_{observable_id}" - for i in range(1, len(placeholder_set) + 1) - ] - - if placeholder_set != set(placeholders): - raise AssertionError( - "Non-consecutive numbering of placeholder " - f"parameter for {placeholder_set}" - ) - - return placeholders - - -def get_placeholders( - observable_df: pd.DataFrame, - observables: bool = True, - noise: bool = True, -) -> List[str]: - """Get all placeholder parameters from observable table observableFormulas - and noiseFormulas - - Arguments: - observable_df: PEtab observable table - observables: Include parameters from observableFormulas - noise: Include parameters from noiseFormulas - - Returns: - List of placeholder parameters from observable table observableFormulas - and noiseFormulas. - """ - # collect placeholder parameters overwritten by - # {observable,noise}Parameters - placeholder_types = [] - formula_columns = [] - if observables: - placeholder_types.append("observable") - formula_columns.append(OBSERVABLE_FORMULA) - if noise: - placeholder_types.append("noise") - formula_columns.append(NOISE_FORMULA) - - placeholders = [] - for _, row in observable_df.iterrows(): - for placeholder_type, formula_column in zip( - placeholder_types, formula_columns - ): - if formula_column not in row: - continue - - cur_placeholders = get_formula_placeholders( - row[formula_column], row.name, placeholder_type - ) - placeholders.extend(cur_placeholders) - return core.unique_preserve_order(placeholders) - - -def create_observable_df() -> pd.DataFrame: - """Create empty observable dataframe - - Returns: - Created DataFrame - """ - return pd.DataFrame(data={col: [] for col in OBSERVABLE_DF_COLS}) +_deprecated_import_v1(__name__) diff --git a/petab/parameter_mapping.py b/petab/parameter_mapping.py index 4fa3115d..79598380 100644 --- a/petab/parameter_mapping.py +++ b/petab/parameter_mapping.py @@ -1,801 +1,7 @@ -"""Functions related to mapping parameter from model to parameter estimation -problem -""" +"""Deprecated module for parameter mapping. -import logging -import numbers -import os -import re -import warnings -from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union +Use petab.v1.parameter_mapping instead.""" +from petab import _deprecated_import_v1 +from petab.v1.parameter_mapping import * # noqa: F403, F401, E402 -import libsbml -import numpy as np -import pandas as pd - -from . import ( - ENV_NUM_THREADS, - core, - lint, - measurements, - observables, - parameters, -) -from .C import * # noqa: F403 -from .mapping import resolve_mapping -from .models import Model - -logger = logging.getLogger(__name__) -__all__ = [ - "get_optimization_to_simulation_parameter_mapping", - "get_parameter_mapping_for_condition", - "handle_missing_overrides", - "merge_preeq_and_sim_pars", - "merge_preeq_and_sim_pars_condition", - "ParMappingDict", - "ParMappingDictTuple", - "ScaleMappingDict", - "ScaleMappingDictTuple", - "ParMappingDictQuadruple", -] - - -# Parameter mapping for condition -ParMappingDict = Dict[str, Union[str, numbers.Number]] -# Parameter mapping for combination of preequilibration and simulation -# condition -ParMappingDictTuple = Tuple[ParMappingDict, ParMappingDict] -# Same for scale mapping -ScaleMappingDict = Dict[str, str] -ScaleMappingDictTuple = Tuple[ScaleMappingDict, ScaleMappingDict] -# Parameter mapping for combination of preequilibration and simulation -# conditions, for parameter and scale mapping -ParMappingDictQuadruple = Tuple[ - ParMappingDict, ParMappingDict, ScaleMappingDict, ScaleMappingDict -] - - -def get_optimization_to_simulation_parameter_mapping( - condition_df: pd.DataFrame, - measurement_df: pd.DataFrame, - parameter_df: Optional[pd.DataFrame] = None, - observable_df: Optional[pd.DataFrame] = None, - mapping_df: Optional[pd.DataFrame] = None, - sbml_model: libsbml.Model = None, - simulation_conditions: Optional[pd.DataFrame] = None, - warn_unmapped: Optional[bool] = True, - scaled_parameters: bool = False, - fill_fixed_parameters: bool = True, - allow_timepoint_specific_numeric_noise_parameters: bool = False, - model: Model = None, -) -> List[ParMappingDictQuadruple]: - """ - Create list of mapping dicts from PEtab-problem to model parameters. - - Mapping can be performed in parallel. The number of threads is controlled - by the environment variable with the name of - :py:data:`petab.ENV_NUM_THREADS`. - - Parameters: - condition_df, measurement_df, parameter_df, observable_df: - The dataframes in the PEtab format. - sbml_model: - The SBML model (deprecated) - model: - The model. - simulation_conditions: - Table of simulation conditions as created by - ``petab.get_simulation_conditions``. - warn_unmapped: - If ``True``, log warning regarding unmapped parameters - scaled_parameters: - Whether parameter values should be scaled. - fill_fixed_parameters: - Whether to fill in nominal values for fixed parameters - (estimate=0 in parameters table). - allow_timepoint_specific_numeric_noise_parameters: - Mapping of timepoint-specific parameters overrides is generally - not supported. If this option is set to True, this function will - not fail in case of timepoint-specific fixed noise parameters, - if the noise formula consists only of one single parameter. - It is expected that the respective mapping is performed elsewhere. - The value mapped to the respective parameter here is undefined. - - Returns: - Parameter value and parameter scale mapping for all conditions. - - The length of the returned array is the number of unique combinations - of ``simulationConditionId`` s and ``preequilibrationConditionId`` s - from the measurement table. Each entry is a tuple of four dicts of - length equal to the number of model parameters. - The first two dicts map simulation parameter IDs to optimization - parameter IDs or values (where values are fixed) for preequilibration - and simulation condition, respectively. - The last two dicts map simulation parameter IDs to the parameter scale - of the respective parameter, again for preequilibration and simulation - condition. - If no preequilibration condition is defined, the respective dicts will - be empty. ``NaN`` is used where no mapping exists. - """ - if sbml_model: - warnings.warn( - "Passing a model via the `sbml_model` argument is " - "deprecated, use `model=petab.models.sbml_model." - "SbmlModel(...)` instead.", - DeprecationWarning, - stacklevel=2, - ) - from petab.models.sbml_model import SbmlModel - - if model: - raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." - ) - model = SbmlModel(sbml_model=sbml_model) - - # Ensure inputs are okay - _perform_mapping_checks( - measurement_df, - allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 - ) - - if simulation_conditions is None: - simulation_conditions = measurements.get_simulation_conditions( - measurement_df - ) - - simulation_parameters = dict(model.get_free_parameter_ids_with_values()) - # Add output parameters that are not already defined in the model - if observable_df is not None: - output_parameters = observables.get_output_parameters( - observable_df=observable_df, model=model, mapping_df=mapping_df - ) - for par_id in output_parameters: - simulation_parameters[par_id] = np.nan - - num_threads = int(os.environ.get(ENV_NUM_THREADS, 1)) - - # If sequential execution is requested, let's not create any - # thread-allocation overhead - if num_threads == 1: - mapping = map( - _map_condition, - _map_condition_arg_packer( - simulation_conditions, - measurement_df, - condition_df, - parameter_df, - mapping_df, - model, - simulation_parameters, - warn_unmapped, - scaled_parameters, - fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters, - ), - ) - return list(mapping) - - # Run multi-threaded - from concurrent.futures import ThreadPoolExecutor - - with ThreadPoolExecutor(max_workers=num_threads) as executor: - mapping = executor.map( - _map_condition, - _map_condition_arg_packer( - simulation_conditions, - measurement_df, - condition_df, - parameter_df, - mapping_df, - model, - simulation_parameters, - warn_unmapped, - scaled_parameters, - fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters, - ), - ) - return list(mapping) - - -def _map_condition_arg_packer( - simulation_conditions, - measurement_df, - condition_df, - parameter_df, - mapping_df, - model, - simulation_parameters, - warn_unmapped, - scaled_parameters, - fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters, -): - """Helper function to pack extra arguments for _map_condition""" - for _, condition in simulation_conditions.iterrows(): - yield ( - condition, - measurement_df, - condition_df, - parameter_df, - mapping_df, - model, - simulation_parameters, - warn_unmapped, - scaled_parameters, - fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters, - ) - - -def _map_condition(packed_args): - """Helper function for parallel condition mapping. - - For arguments see - :py:func:`get_optimization_to_simulation_parameter_mapping`. - """ - ( - condition, - measurement_df, - condition_df, - parameter_df, - mapping_df, - model, - simulation_parameters, - warn_unmapped, - scaled_parameters, - fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters, - ) = packed_args - - cur_measurement_df = None - # Get the condition specific measurements for the current condition, but - # only if relevant for parameter mapping - if ( - OBSERVABLE_PARAMETERS in measurement_df - and measurement_df[OBSERVABLE_PARAMETERS].notna().any() - ) or ( - NOISE_PARAMETERS in measurement_df - and measurement_df[NOISE_PARAMETERS].notna().any() - ): - cur_measurement_df = measurements.get_rows_for_condition( - measurement_df, condition - ) - - if ( - PREEQUILIBRATION_CONDITION_ID not in condition - or not isinstance(condition[PREEQUILIBRATION_CONDITION_ID], str) - or not condition[PREEQUILIBRATION_CONDITION_ID] - ): - par_map_preeq = {} - scale_map_preeq = {} - else: - par_map_preeq, scale_map_preeq = get_parameter_mapping_for_condition( - condition_id=condition[PREEQUILIBRATION_CONDITION_ID], - is_preeq=True, - cur_measurement_df=cur_measurement_df, - model=model, - condition_df=condition_df, - parameter_df=parameter_df, - mapping_df=mapping_df, - simulation_parameters=simulation_parameters, - warn_unmapped=warn_unmapped, - scaled_parameters=scaled_parameters, - fill_fixed_parameters=fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 - ) - - par_map_sim, scale_map_sim = get_parameter_mapping_for_condition( - condition_id=condition[SIMULATION_CONDITION_ID], - is_preeq=False, - cur_measurement_df=cur_measurement_df, - model=model, - condition_df=condition_df, - parameter_df=parameter_df, - mapping_df=mapping_df, - simulation_parameters=simulation_parameters, - warn_unmapped=warn_unmapped, - scaled_parameters=scaled_parameters, - fill_fixed_parameters=fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 - ) - - return par_map_preeq, par_map_sim, scale_map_preeq, scale_map_sim - - -def get_parameter_mapping_for_condition( - condition_id: str, - is_preeq: bool, - cur_measurement_df: Optional[pd.DataFrame] = None, - sbml_model: libsbml.Model = None, - condition_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, - mapping_df: Optional[pd.DataFrame] = None, - simulation_parameters: Optional[Dict[str, str]] = None, - warn_unmapped: bool = True, - scaled_parameters: bool = False, - fill_fixed_parameters: bool = True, - allow_timepoint_specific_numeric_noise_parameters: bool = False, - model: Model = None, -) -> Tuple[ParMappingDict, ScaleMappingDict]: - """ - Create dictionary of parameter value and parameter scale mappings from - PEtab-problem to SBML parameters for the given condition. - - Parameters: - condition_id: - Condition ID for which to perform mapping - is_preeq: - If ``True``, output parameters will not be mapped - cur_measurement_df: - Measurement sub-table for current condition, can be ``None`` if - not relevant for parameter mapping - condition_df: - PEtab condition DataFrame - parameter_df: - PEtab parameter DataFrame - mapping_df: - PEtab mapping DataFrame - sbml_model: - The SBML model (deprecated) - model: - The model. - simulation_parameters: - Model simulation parameter IDs mapped to parameter values (output - of ``petab.sbml.get_model_parameters(.., with_values=True)``). - Optional, saves time if precomputed. - warn_unmapped: - If ``True``, log warning regarding unmapped parameters - scaled_parameters: - Whether parameter values should be scaled. - fill_fixed_parameters: - Whether to fill in nominal values for fixed parameters - (estimate=0 in parameters table). - allow_timepoint_specific_numeric_noise_parameters: - Mapping of timepoint-specific parameters overrides is generally - not supported. If this option is set to True, this function will - not fail in case of timepoint-specific fixed noise parameters, - if the noise formula consists only of one single parameter. - It is expected that the respective mapping is performed elsewhere. - The value mapped to the respective parameter here is undefined. - - Returns: - Tuple of two dictionaries. First dictionary mapping model parameter IDs - to mapped parameters IDs to be estimated or to filled-in values in case - of non-estimated parameters. - Second dictionary mapping model parameter IDs to their scale. - ``NaN`` is used where no mapping exists. - """ - if sbml_model: - warnings.warn( - "Passing a model via the `sbml_model` argument is " - "deprecated, use `model=petab.models.sbml_model." - "SbmlModel(...)` instead.", - DeprecationWarning, - stacklevel=2, - ) - from petab.models.sbml_model import SbmlModel - - if model: - raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." - ) - model = SbmlModel(sbml_model=sbml_model) - - if cur_measurement_df is not None: - _perform_mapping_checks( - cur_measurement_df, - allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 - ) - - if simulation_parameters is None: - simulation_parameters = dict( - model.get_free_parameter_ids_with_values() - ) - - # NOTE: order matters here - the former is overwritten by the latter: - # model < condition table < measurement < table parameter table - - # initialize mapping dicts - # for the case of matching simulation and optimization parameter vector - par_mapping = simulation_parameters.copy() - scale_mapping = {par_id: LIN for par_id in par_mapping.keys()} - _output_parameters_to_nan(par_mapping) - - # not strictly necessary for preequilibration, be we do it to have - # same length of parameter vectors - if cur_measurement_df is not None: - _apply_output_parameter_overrides(par_mapping, cur_measurement_df) - - if not is_preeq: - handle_missing_overrides(par_mapping, warn=warn_unmapped) - - _apply_condition_parameters( - par_mapping, - scale_mapping, - condition_id, - condition_df, - model, - mapping_df, - ) - _apply_parameter_table( - par_mapping, - scale_mapping, - parameter_df, - scaled_parameters, - fill_fixed_parameters, - ) - - return par_mapping, scale_mapping - - -def _output_parameters_to_nan(mapping: ParMappingDict) -> None: - """Set output parameters in mapping dictionary to nan""" - rex = re.compile("^(noise|observable)Parameter[0-9]+_") - for key in mapping.keys(): - try: - matches = rex.match(key) - except TypeError: - continue - - if matches: - mapping[key] = np.nan - - -def _apply_output_parameter_overrides( - mapping: ParMappingDict, cur_measurement_df: pd.DataFrame -) -> None: - """ - Apply output parameter overrides to the parameter mapping dict for a given - condition as defined in the measurement table (``observableParameter``, - ``noiseParameters``). - - Arguments: - mapping: parameter mapping dict as obtained from - :py:func:`get_parameter_mapping_for_condition`. - cur_measurement_df: - Subset of the measurement table for the current condition - """ - for _, row in cur_measurement_df.iterrows(): - # we trust that the number of overrides matches (see above) - overrides = measurements.split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - _apply_overrides_for_observable( - mapping, row[OBSERVABLE_ID], "observable", overrides - ) - - overrides = measurements.split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None) - ) - _apply_overrides_for_observable( - mapping, row[OBSERVABLE_ID], "noise", overrides - ) - - -def _apply_overrides_for_observable( - mapping: ParMappingDict, - observable_id: str, - override_type: Literal["observable", "noise"], - overrides: List[str], -) -> None: - """ - Apply parameter-overrides for observables and noises to mapping - matrix. - - Arguments: - mapping: mapping dict to which to apply overrides - observable_id: observable ID - override_type: ``'observable'`` or ``'noise'`` - overrides: list of overrides for noise or observable parameters - """ - for i, override in enumerate(overrides): - overridee_id = f"{override_type}Parameter{i+1}_{observable_id}" - mapping[overridee_id] = override - - -def _apply_condition_parameters( - par_mapping: ParMappingDict, - scale_mapping: ScaleMappingDict, - condition_id: str, - condition_df: pd.DataFrame, - model: Model, - mapping_df: Optional[pd.DataFrame] = None, -) -> None: - """Replace parameter IDs in parameter mapping dictionary by condition - table parameter values (in-place). - - Arguments: - par_mapping: see :py:func:`get_parameter_mapping_for_condition` - condition_id: ID of condition to work on - condition_df: PEtab condition table - """ - for overridee_id in condition_df.columns: - if overridee_id == CONDITION_NAME: - continue - - overridee_id = resolve_mapping(mapping_df, overridee_id) - - # Species, compartments, and rule targets are handled elsewhere - if model.is_state_variable(overridee_id): - continue - - par_mapping[overridee_id] = core.to_float_if_float( - condition_df.loc[condition_id, overridee_id] - ) - - if isinstance(par_mapping[overridee_id], numbers.Number) and np.isnan( - par_mapping[overridee_id] - ): - # NaN in the condition table for an entity without time derivative - # indicates that the model value should be used - try: - par_mapping[overridee_id] = model.get_parameter_value( - overridee_id - ) - except ValueError as e: - raise NotImplementedError( - "Not sure how to handle NaN in condition table for " - f"{overridee_id}." - ) from e - - scale_mapping[overridee_id] = LIN - - -def _apply_parameter_table( - par_mapping: ParMappingDict, - scale_mapping: ScaleMappingDict, - parameter_df: Optional[pd.DataFrame] = None, - scaled_parameters: bool = False, - fill_fixed_parameters: bool = True, -) -> None: - """Replace parameters from parameter table in mapping list for a given - condition and set the corresponding scale. - - Replace non-estimated parameters by ``nominalValues`` - (un-scaled / lin-scaled), replace estimated parameters by the respective - ID. - - Arguments: - par_mapping: - mapping dict obtained from - :py:func:`get_parameter_mapping_for_condition` - parameter_df: - PEtab parameter table - """ - if parameter_df is None: - return - - for row in parameter_df.itertuples(): - if row.Index not in par_mapping: - # The current parameter is not required for this condition - continue - - scale = getattr(row, PARAMETER_SCALE, LIN) - scale_mapping[row.Index] = scale - if fill_fixed_parameters and getattr(row, ESTIMATE) == 0: - val = getattr(row, NOMINAL_VALUE) - if scaled_parameters: - val = parameters.scale(val, scale) - else: - scale_mapping[row.Index] = LIN - par_mapping[row.Index] = val - else: - par_mapping[row.Index] = row.Index - - # Replace any leftover mapped parameter coming from condition table - for problem_par, sim_par in par_mapping.items(): - # string indicates unmapped - if not isinstance(sim_par, str): - continue - - try: - # the overridee is a model parameter - par_mapping[problem_par] = par_mapping[sim_par] - scale_mapping[problem_par] = scale_mapping[sim_par] - except KeyError: - if parameter_df is None: - raise - - # or the overridee is only defined in the parameter table - scale = ( - parameter_df.loc[sim_par, PARAMETER_SCALE] - if PARAMETER_SCALE in parameter_df - else LIN - ) - - if ( - fill_fixed_parameters - and ESTIMATE in parameter_df - and parameter_df.loc[sim_par, ESTIMATE] == 0 - ): - val = parameter_df.loc[sim_par, NOMINAL_VALUE] - if scaled_parameters: - val = parameters.scale(val, scale) - else: - scale = LIN - par_mapping[problem_par] = val - - scale_mapping[problem_par] = scale - - -def _perform_mapping_checks( - measurement_df: pd.DataFrame, - allow_timepoint_specific_numeric_noise_parameters: bool = False, -) -> None: - """Check for PEtab features which we can't account for during parameter - mapping. - """ - if lint.measurement_table_has_timepoint_specific_mappings( - measurement_df, - allow_scalar_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 - ): - # we could allow that for floats, since they don't matter in this - # function and would be simply ignored - raise ValueError( - "Timepoint-specific parameter overrides currently unsupported." - ) - - -def handle_missing_overrides( - mapping_par_opt_to_par_sim: ParMappingDict, - warn: bool = True, - condition_id: str = None, -) -> None: - """ - Find all observable parameters and noise parameters that were not mapped - and set their mapping to np.nan. - - Assumes that parameters matching the regular expression - ``(noise|observable)Parameter[0-9]+_`` were all supposed to be overwritten. - - Parameters: - mapping_par_opt_to_par_sim: - Output of :py:func:`get_parameter_mapping_for_condition` - warn: - If True, log warning regarding unmapped parameters - condition_id: - Optional condition ID for more informative output - """ - _missed_vals = [] - rex = re.compile("^(noise|observable)Parameter[0-9]+_") - for key, val in mapping_par_opt_to_par_sim.items(): - try: - matches = rex.match(val) - except TypeError: - continue - - if matches: - mapping_par_opt_to_par_sim[key] = np.nan - _missed_vals.append(key) - - if _missed_vals and warn: - logger.warning( - f"Could not map the following overrides for condition " - f"{condition_id}: " - f"{_missed_vals}. Usually, this is just due to missing " - f"data points." - ) - - -def merge_preeq_and_sim_pars_condition( - condition_map_preeq: ParMappingDict, - condition_map_sim: ParMappingDict, - condition_scale_map_preeq: ScaleMappingDict, - condition_scale_map_sim: ScaleMappingDict, - condition: Any, -) -> None: - """Merge preequilibration and simulation parameters and scales for a single - condition while checking for compatibility. - - This function is meant for the case where we cannot have different - parameters (and scales) for preequilibration and simulation. Therefore, - merge both and ensure matching scales and parameters. - ``condition_map_sim`` and ``condition_scale_map_sim`` will be modified in - place. - - Arguments: - condition_map_preeq, condition_map_sim: - Parameter mapping as obtained from - :py:func:`get_parameter_mapping_for_condition` - condition_scale_map_preeq, condition_scale_map_sim: - Parameter scale mapping as obtained from - :py:func:`get_parameter_mapping_for_condition` - condition: Condition identifier for more informative error messages - """ - if not condition_map_preeq: - # nothing to do - return - - all_par_ids = set(condition_map_sim.keys()) | set( - condition_map_preeq.keys() - ) - - for par_id in all_par_ids: - if par_id not in condition_map_preeq: - # nothing to do - continue - - if par_id not in condition_map_sim: - # unmapped for simulation -> just use preeq values - condition_map_sim[par_id] = condition_map_preeq[par_id] - condition_scale_map_sim[par_id] = condition_scale_map_preeq[par_id] - continue - - # present in both - par_preeq = condition_map_preeq[par_id] - par_sim = condition_map_sim[par_id] - if par_preeq != par_sim and not ( - core.is_empty(par_sim) and core.is_empty(par_preeq) - ): - # both identical or both nan is okay - if core.is_empty(par_sim): - # unmapped for simulation - condition_map_sim[par_id] = par_preeq - elif core.is_empty(par_preeq): - # unmapped for preeq is okay - pass - else: - raise ValueError( - "Cannot handle different values for dynamic " - f"parameters: for condition {condition} " - f"parameter {par_id} is {par_preeq} for preeq " - f"and {par_sim} for simulation." - ) - - scale_preeq = condition_scale_map_preeq[par_id] - scale_sim = condition_scale_map_sim[par_id] - - if scale_preeq != scale_sim: - # both identical is okay - if core.is_empty(par_sim): - # unmapped for simulation - condition_scale_map_sim[par_id] = scale_preeq - elif core.is_empty(par_preeq): - # unmapped for preeq is okay - pass - else: - raise ValueError( - "Cannot handle different parameter scales " - f"parameters: for condition {condition} " - f"scale for parameter {par_id} is {scale_preeq} for preeq " - f"and {scale_sim} for simulation." - ) - - -def merge_preeq_and_sim_pars( - parameter_mappings: Iterable[ParMappingDictTuple], - scale_mappings: Iterable[ScaleMappingDictTuple], -) -> Tuple[List[ParMappingDictTuple], List[ScaleMappingDictTuple]]: - """Merge preequilibration and simulation parameters and scales for a list - of conditions while checking for compatibility. - - Parameters: - parameter_mappings: - As returned by - :py:func:`petab.get_optimization_to_simulation_parameter_mapping`. - scale_mappings: - As returned by - :py:func:`petab.get_optimization_to_simulation_parameter_mapping`. - - Returns: - The parameter and scale simulation mappings, modified and checked. - """ - parameter_mapping = [] - scale_mapping = [] - for ic, ( - (map_preeq, map_sim), - (scale_map_preeq, scale_map_sim), - ) in enumerate(zip(parameter_mappings, scale_mappings)): - merge_preeq_and_sim_pars_condition( - condition_map_preeq=map_preeq, - condition_map_sim=map_sim, - condition_scale_map_preeq=scale_map_preeq, - condition_scale_map_sim=scale_map_sim, - condition=ic, - ) - parameter_mapping.append(map_sim) - scale_mapping.append(scale_map_sim) - - return parameter_mapping, scale_mapping +_deprecated_import_v1(__name__) diff --git a/petab/parameters.py b/petab/parameters.py index 3339ef63..39e66fe3 100644 --- a/petab/parameters.py +++ b/petab/parameters.py @@ -1,639 +1,7 @@ -"""Functions operating on the PEtab parameter table""" +"""Deprecated module for parameter table handling. -import numbers -import warnings -from collections import OrderedDict -from pathlib import Path -from typing import ( - Dict, - Iterable, - List, - Literal, - Optional, - Sequence, - Set, - Tuple, - Union, -) +Use petab.v1.parameters instead.""" +from petab import _deprecated_import_v1 +from petab.v1.parameters import * # noqa: F403, F401, E402 -import libsbml -import numpy as np -import pandas as pd - -from . import conditions, core, lint, measurements, observables -from .C import * # noqa: F403 -from .models import Model - -__all__ = [ - "create_parameter_df", - "get_optimization_parameter_scaling", - "get_optimization_parameters", - "get_parameter_df", - "get_priors_from_df", - "get_valid_parameters_for_parameter_table", - "map_scale", - "map_unscale", - "normalize_parameter_df", - "scale", - "unscale", - "write_parameter_df", -] - -PARAMETER_SCALE_ARGS = Literal["", "lin", "log", "log10"] - - -def get_parameter_df( - parameter_file: Union[ - str, Path, pd.DataFrame, Iterable[Union[str, Path, pd.DataFrame]], None - ], -) -> Union[pd.DataFrame, None]: - """ - Read the provided parameter file into a ``pandas.Dataframe``. - - Arguments: - parameter_file: Name of the file to read from or pandas.Dataframe, - or an Iterable. - - Returns: - Parameter ``DataFrame``, or ``None`` if ``None`` was passed. - """ - if parameter_file is None: - return None - if isinstance(parameter_file, pd.DataFrame): - parameter_df = parameter_file - elif isinstance(parameter_file, (str, Path)): - parameter_df = pd.read_csv( - parameter_file, sep="\t", float_precision="round_trip" - ) - elif isinstance(parameter_file, Iterable): - dfs = [get_parameter_df(x) for x in parameter_file if x] - - if not dfs: - return None - - parameter_df = pd.concat(dfs) - # Check for contradicting parameter definitions - _check_for_contradicting_parameter_definitions(parameter_df) - - return parameter_df - - lint.assert_no_leading_trailing_whitespace( - parameter_df.columns.values, "parameter" - ) - - if not isinstance(parameter_df.index, pd.RangeIndex): - parameter_df.reset_index( - drop=parameter_file.index.name != PARAMETER_ID, - inplace=True, - ) - - try: - parameter_df.set_index([PARAMETER_ID], inplace=True) - except KeyError as e: - raise KeyError( - f"Parameter table missing mandatory field {PARAMETER_ID}." - ) from e - _check_for_contradicting_parameter_definitions(parameter_df) - - return parameter_df - - -def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame): - """ - Raises a ValueError for non-unique parameter IDs - """ - parameter_duplicates = set( - parameter_df.index.values[parameter_df.index.duplicated()] - ) - if parameter_duplicates: - raise ValueError( - f"The values of `{PARAMETER_ID}` must be unique. The " - f"following duplicates were found:\n{parameter_duplicates}" - ) - - -def write_parameter_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: - """Write PEtab parameter table - - Arguments: - df: PEtab parameter table - filename: Destination file name - """ - df = get_parameter_df(df) - df.to_csv(filename, sep="\t", index=True) - - -def get_optimization_parameters(parameter_df: pd.DataFrame) -> List[str]: - """ - Get list of optimization parameter IDs from parameter table. - - Arguments: - parameter_df: PEtab parameter DataFrame - - Returns: - List of IDs of parameters selected for optimization. - """ - return list(parameter_df.index[parameter_df[ESTIMATE] == 1]) - - -def get_optimization_parameter_scaling( - parameter_df: pd.DataFrame, -) -> Dict[str, str]: - """ - Get Dictionary with optimization parameter IDs mapped to parameter scaling - strings. - - Arguments: - parameter_df: PEtab parameter DataFrame - - Returns: - Dictionary with optimization parameter IDs mapped to parameter scaling - strings. - """ - estimated_df = parameter_df.loc[parameter_df[ESTIMATE] == 1] - return dict(zip(estimated_df.index, estimated_df[PARAMETER_SCALE])) - - -def create_parameter_df( - sbml_model: Optional[libsbml.Model] = None, - condition_df: Optional[pd.DataFrame] = None, - observable_df: Optional[pd.DataFrame] = None, - measurement_df: Optional[pd.DataFrame] = None, - model: Optional[Model] = None, - include_optional: bool = False, - parameter_scale: str = LOG10, - lower_bound: Iterable = None, - upper_bound: Iterable = None, - mapping_df: Optional[pd.DataFrame] = None, -) -> pd.DataFrame: - """Create a new PEtab parameter table - - All table entries can be provided as string or list-like with length - matching the number of parameters - - Arguments: - sbml_model: SBML Model (deprecated, mutually exclusive with ``model``) - model: PEtab model (mutually exclusive with ``sbml_model``) - condition_df: PEtab condition DataFrame - observable_df: PEtab observable DataFrame - measurement_df: PEtab measurement DataFrame - include_optional: By default this only returns parameters that are - required to be present in the parameter table. If set to ``True``, - this returns all parameters that are allowed to be present in the - parameter table (i.e. also including parameters specified in the - model). - parameter_scale: parameter scaling - lower_bound: lower bound for parameter value - upper_bound: upper bound for parameter value - mapping_df: PEtab mapping DataFrame - - Returns: - The created parameter DataFrame - """ - if sbml_model: - warnings.warn( - "Passing a model via the `sbml_model` argument is " - "deprecated, use `model=petab.models.sbml_model." - "SbmlModel(...)` instead.", - DeprecationWarning, - stacklevel=2, - ) - from petab.models.sbml_model import SbmlModel - - if model: - raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." - ) - model = SbmlModel(sbml_model=sbml_model) - if include_optional: - parameter_ids = list( - get_valid_parameters_for_parameter_table( - model=model, - condition_df=condition_df, - observable_df=observable_df, - measurement_df=measurement_df, - ) - ) - else: - parameter_ids = list( - get_required_parameters_for_parameter_table( - model=model, - condition_df=condition_df, - observable_df=observable_df, - measurement_df=measurement_df, - mapping_df=mapping_df, - ) - ) - - df = pd.DataFrame( - data={ - PARAMETER_ID: parameter_ids, - PARAMETER_NAME: parameter_ids, - PARAMETER_SCALE: parameter_scale, - LOWER_BOUND: lower_bound, - UPPER_BOUND: upper_bound, - NOMINAL_VALUE: np.nan, - ESTIMATE: 1, - INITIALIZATION_PRIOR_TYPE: "", - INITIALIZATION_PRIOR_PARAMETERS: "", - OBJECTIVE_PRIOR_TYPE: "", - OBJECTIVE_PRIOR_PARAMETERS: "", - } - ) - df.set_index([PARAMETER_ID], inplace=True) - - # For model parameters, set nominal values as defined in the model - for parameter_id in df.index: - try: - df.loc[parameter_id, NOMINAL_VALUE] = model.get_parameter_value( - parameter_id - ) - except ValueError: - # parameter was introduced as condition-specific override and - # is potentially not present in the model - pass - return df - - -def get_required_parameters_for_parameter_table( - model: Model, - condition_df: pd.DataFrame, - observable_df: pd.DataFrame, - measurement_df: pd.DataFrame, - mapping_df: pd.DataFrame = None, -) -> Set[str]: - """ - Get set of parameters which need to go into the parameter table - - Arguments: - model: PEtab model - condition_df: PEtab condition table - observable_df: PEtab observable table - measurement_df: PEtab measurement table - mapping_df: PEtab mapping table - - Returns: - Set of parameter IDs which PEtab requires to be present in the - parameter table. That is all {observable,noise}Parameters from the - measurement table as well as all parametric condition table overrides - that are not defined in the model. - """ - # use ordered dict as proxy for ordered set - parameter_ids = OrderedDict() - - # Add parameters from measurement table, unless they are fixed parameters - def append_overrides(overrides): - for p in overrides: - if isinstance(p, str) and p not in condition_df.columns: - parameter_ids[p] = None - - for _, row in measurement_df.iterrows(): - # we trust that the number of overrides matches - append_overrides( - measurements.split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - measurements.split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None) - ) - ) - - # Add output parameters except for placeholders - for formula_type, placeholder_sources in ( - ( - # Observable formulae - {"observables": True, "noise": False}, - # can only contain observable placeholders - {"noise": False, "observables": True}, - ), - ( - # Noise formulae - {"observables": False, "noise": True}, - # can contain noise and observable placeholders - {"noise": True, "observables": True}, - ), - ): - output_parameters = observables.get_output_parameters( - observable_df, - model, - mapping_df=mapping_df, - **formula_type, - ) - placeholders = observables.get_placeholders( - observable_df, - **placeholder_sources, - ) - for p in output_parameters: - if p not in placeholders: - parameter_ids[p] = None - - # Add condition table parametric overrides unless already defined in the - # model - for p in conditions.get_parametric_overrides(condition_df): - if not model.has_entity_with_id(p): - parameter_ids[p] = None - - # remove parameters that occur in the condition table and are overridden - # for ALL conditions - for p in condition_df.columns[~condition_df.isnull().any()]: - try: - del parameter_ids[p] - except KeyError: - pass - return parameter_ids.keys() - - -def get_valid_parameters_for_parameter_table( - model: Model, - condition_df: pd.DataFrame, - observable_df: pd.DataFrame, - measurement_df: pd.DataFrame, - mapping_df: pd.DataFrame = None, -) -> Set[str]: - """ - Get set of parameters which may be present inside the parameter table - - Arguments: - model: PEtab model - condition_df: PEtab condition table - observable_df: PEtab observable table - measurement_df: PEtab measurement table - mapping_df: PEtab mapping table for additional checks - - Returns: - Set of parameter IDs which PEtab allows to be present in the - parameter table. - """ - # - grab all allowed model parameters - # - grab corresponding names from mapping table - # - grab all output parameters defined in {observable,noise}Formula - # - grab all parameters from measurement table - # - grab all parametric overrides from condition table - # - remove parameters for which condition table columns exist - # - remove placeholder parameters - # (only partial overrides are not supported) - - # must not go into parameter table - blackset = set() - - if observable_df is not None: - placeholders = set(observables.get_placeholders(observable_df)) - - # collect assignment targets - blackset |= placeholders - - if condition_df is not None: - blackset |= set(condition_df.columns.values) - {CONDITION_NAME} - - # don't use sets here, to have deterministic ordering, - # e.g. for creating parameter tables - parameter_ids = OrderedDict.fromkeys( - p - for p in model.get_valid_parameters_for_parameter_table() - if p not in blackset - ) - - if mapping_df is not None: - for from_id, to_id in zip( - mapping_df.index.values, mapping_df[MODEL_ENTITY_ID] - ): - if to_id in parameter_ids.keys(): - parameter_ids[from_id] = None - - if observable_df is not None: - # add output parameters from observables table - output_parameters = observables.get_output_parameters( - observable_df=observable_df, model=model - ) - for p in output_parameters: - if p not in blackset: - parameter_ids[p] = None - - # Append parameters from measurement table, unless they occur as condition - # table columns - def append_overrides(overrides): - for p in overrides: - if isinstance(p, str) and p not in blackset: - parameter_ids[p] = None - - if measurement_df is not None: - for _, row in measurement_df.iterrows(): - # we trust that the number of overrides matches - append_overrides( - measurements.split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - measurements.split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None) - ) - ) - - # Append parameter overrides from condition table - if condition_df is not None: - for p in conditions.get_parametric_overrides(condition_df): - parameter_ids[p] = None - - return parameter_ids.keys() - - -def get_priors_from_df( - parameter_df: pd.DataFrame, - mode: Literal["initialization", "objective"], - parameter_ids: Sequence[str] = None, -) -> List[Tuple]: - """Create list with information about the parameter priors - - Arguments: - parameter_df: PEtab parameter table - mode: ``'initialization'`` or ``'objective'`` - parameter_ids: A sequence of parameter IDs for which to sample starting - points. - For subsetting or reordering the parameters. - Defaults to all estimated parameters. - - Returns: - List with prior information. - """ - # get types and parameters of priors from dataframe - par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1] - - if parameter_ids: - try: - par_to_estimate = par_to_estimate.loc[parameter_ids, :] - except KeyError as e: - missing_ids = set(parameter_ids) - set(par_to_estimate.index) - raise KeyError( - "Parameter table does not contain estimated parameter(s) " - f"{missing_ids}." - ) from e - - prior_list = [] - for _, row in par_to_estimate.iterrows(): - # retrieve info about type - prior_type = str(row.get(f"{mode}PriorType", "")) - if core.is_empty(prior_type): - prior_type = PARAMETER_SCALE_UNIFORM - - # retrieve info about parameters of priors, make it a tuple of floats - pars_str = str(row.get(f"{mode}PriorParameters", "")) - if core.is_empty(pars_str): - lb, ub = map_scale( - [row[LOWER_BOUND], row[UPPER_BOUND]], - [row[PARAMETER_SCALE]] * 2, - ) - pars_str = f"{lb}{PARAMETER_SEPARATOR}{ub}" - prior_pars = tuple( - float(entry) for entry in pars_str.split(PARAMETER_SEPARATOR) - ) - - # add parameter scale and bounds, as this may be needed - par_scale = row[PARAMETER_SCALE] - par_bounds = (row[LOWER_BOUND], row[UPPER_BOUND]) - - # if no prior is specified, we assume a non-informative (uniform) one - if prior_type == "nan": - prior_type = PARAMETER_SCALE_UNIFORM - prior_pars = ( - scale(row[LOWER_BOUND], par_scale), - scale(row[UPPER_BOUND], par_scale), - ) - - prior_list.append((prior_type, prior_pars, par_scale, par_bounds)) - - return prior_list - - -def scale( - parameter: numbers.Number, - scale_str: PARAMETER_SCALE_ARGS, -) -> numbers.Number: - """Scale parameter according to ``scale_str``. - - Arguments: - parameter: - Parameter to be scaled. - scale_str: - One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``. - - Returns: - The scaled parameter. - """ - if scale_str == LIN or not scale_str: - return parameter - if scale_str == LOG: - return np.log(parameter) - if scale_str == LOG10: - return np.log10(parameter) - raise ValueError(f"Invalid parameter scaling: {scale_str}") - - -def unscale( - parameter: numbers.Number, - scale_str: PARAMETER_SCALE_ARGS, -) -> numbers.Number: - """Unscale parameter according to ``scale_str``. - - Arguments: - parameter: - Parameter to be unscaled. - scale_str: - One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``. - - Returns: - The unscaled parameter. - """ - if scale_str == LIN or not scale_str: - return parameter - if scale_str == LOG: - return np.exp(parameter) - if scale_str == LOG10: - return 10**parameter - raise ValueError(f"Invalid parameter scaling: {scale_str}") - - -def map_scale( - parameters: Sequence[numbers.Number], - scale_strs: Union[Iterable[PARAMETER_SCALE_ARGS], PARAMETER_SCALE_ARGS], -) -> Iterable[numbers.Number]: - """Scale the parameters, i.e. as :func:`scale`, but for Sequences. - - Arguments: - parameters: - Parameters to be scaled. - scale_strs: - Scales to apply. Broadcast if a single string. - - Returns: - The scaled parameters. - """ - if isinstance(scale_strs, str): - scale_strs = [scale_strs] * len(parameters) - return ( - scale(par_val, scale_str) - for par_val, scale_str in zip(parameters, scale_strs) - ) - - -def map_unscale( - parameters: Sequence[numbers.Number], - scale_strs: Union[Iterable[PARAMETER_SCALE_ARGS], PARAMETER_SCALE_ARGS], -) -> Iterable[numbers.Number]: - """Unscale the parameters, i.e. as :func:`unscale`, but for Sequences. - - Arguments: - parameters: - Parameters to be unscaled. - scale_strs: - Scales that the parameters are currently on. - Broadcast if a single string. - - Returns: - The unscaled parameters. - """ - if isinstance(scale_strs, str): - scale_strs = [scale_strs] * len(parameters) - return ( - unscale(par_val, scale_str) - for par_val, scale_str in zip(parameters, scale_strs) - ) - - -def normalize_parameter_df(parameter_df: pd.DataFrame) -> pd.DataFrame: - """Add missing columns and fill in default values.""" - df = parameter_df.copy(deep=True) - - if PARAMETER_NAME not in df: - df[PARAMETER_NAME] = df.reset_index()[PARAMETER_ID] - - prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE] - prior_par_cols = [ - INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_PARAMETERS, - ] - # iterate over initialization and objective priors - for prior_type_col, prior_par_col in zip(prior_type_cols, prior_par_cols): - # fill in default values for prior type - if prior_type_col not in df: - df[prior_type_col] = PARAMETER_SCALE_UNIFORM - else: - for irow, row in df.iterrows(): - if core.is_empty(row[prior_type_col]): - df.loc[irow, prior_type_col] = PARAMETER_SCALE_UNIFORM - if prior_par_col not in df: - df[prior_par_col] = None - for irow, row in df.iterrows(): - if ( - core.is_empty(row[prior_par_col]) - and row[prior_type_col] == PARAMETER_SCALE_UNIFORM - ): - lb, ub = map_scale( - [row[LOWER_BOUND], row[UPPER_BOUND]], - [row[PARAMETER_SCALE]] * 2, - ) - df.loc[irow, prior_par_col] = f"{lb}{PARAMETER_SEPARATOR}{ub}" - - return df +_deprecated_import_v1(__name__) diff --git a/petab/petablint.py b/petab/petablint.py index 45995602..f8228d42 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -8,8 +8,13 @@ from colorama import Fore from colorama import init as init_colorama +from jsonschema.exceptions import ValidationError as SchemaValidationError -import petab +import petab.v1 as petab +from petab.v1.C import FORMAT_VERSION +from petab.v2.lint import lint_problem +from petab.versions import get_major_version +from petab.yaml import validate logger = logging.getLogger(__name__) @@ -49,34 +54,39 @@ def parse_cli_args(): ) # Call with set of files - parser.add_argument( + group = parser.add_argument_group( + "Check individual files *DEPRECATED*. Please contact us via " + "https://github.com/PEtab-dev/libpetab-python/issues, " + "if you need this." + ) + group.add_argument( "-s", "--sbml", dest="sbml_file_name", help="SBML model filename" ) - parser.add_argument( + group.add_argument( "-o", "--observables", dest="observable_file_name", help="Observable table", ) - parser.add_argument( + group.add_argument( "-m", "--measurements", dest="measurement_file_name", help="Measurement table", ) - parser.add_argument( + group.add_argument( "-c", "--conditions", dest="condition_file_name", help="Conditions table", ) - parser.add_argument( + group.add_argument( "-p", "--parameters", dest="parameter_file_name", help="Parameter table", ) - parser.add_argument( + group.add_argument( "--vis", "--visualizations", dest="visualization_file_name", @@ -87,13 +97,18 @@ def parse_cli_args(): group.add_argument( "-y", "--yaml", + dest="yaml_file_name_deprecated", + help="PEtab YAML problem filename. " + "*DEPRECATED* pass the file name as positional argument instead.", + ) + group.add_argument( dest="yaml_file_name", help="PEtab YAML problem filename", + nargs="?", ) args = parser.parse_args() - - if args.yaml_file_name and any( + if any( ( args.sbml_file_name, args.condition_file_name, @@ -101,34 +116,51 @@ def parse_cli_args(): args.parameter_file_name, ) ): - parser.error( - "When providing a yaml file, no other files may " "be specified." + logger.warning( + "Passing individual tables to petablint is deprecated, please " + "provide a PEtab YAML file instead. " + "Please contact us via " + "https://github.com/PEtab-dev/libpetab-python/issues, " + "if you need this." ) + if args.yaml_file_name or args.yaml_file_name_deprecated: + parser.error( + "When providing a yaml file, no other files may be specified." + ) + + if args.yaml_file_name_deprecated: + logger.warning( + "The -y/--yaml option is deprecated. " + "Please provide the YAML file as a positional argument." + ) + if args.yaml_file_name: + parser.error( + "Please provide only one of --yaml or positional argument." + ) + + args.yaml_file_name = args.yaml_file_name or args.yaml_file_name_deprecated return args def main(): """Run PEtab validator""" - args = parse_cli_args() init_colorama(autoreset=True) - ch = logging.StreamHandler() + ch.setFormatter(LintFormatter()) + logging.basicConfig(level=logging.DEBUG, handlers=[ch]) + + args = parse_cli_args() + if args.verbose: ch.setLevel(logging.DEBUG) else: ch.setLevel(logging.WARN) - ch.setFormatter(LintFormatter()) - logging.basicConfig(level=logging.DEBUG, handlers=[ch]) if args.yaml_file_name: - from jsonschema.exceptions import ValidationError - - from petab.yaml import validate - try: validate(args.yaml_file_name) - except ValidationError as e: + except SchemaValidationError as e: logger.error( "Provided YAML file does not adhere to PEtab " f"schema: {e}" ) @@ -140,37 +172,54 @@ def main(): # problem = petab.CompositeProblem.from_yaml(args.yaml_file_name) return - problem = petab.Problem.from_yaml(args.yaml_file_name) - - else: - logger.debug("Looking for...") - if args.sbml_file_name: - logger.debug(f"\tSBML model: {args.sbml_file_name}") - if args.condition_file_name: - logger.debug(f"\tCondition table: {args.condition_file_name}") - if args.observable_file_name: - logger.debug(f"\tObservable table: {args.observable_file_name}") - if args.measurement_file_name: - logger.debug(f"\tMeasurement table: {args.measurement_file_name}") - if args.parameter_file_name: - logger.debug(f"\tParameter table: {args.parameter_file_name}") - if args.visualization_file_name: - logger.debug( - "\tVisualization table: " f"{args.visualization_file_name}" - ) + match get_major_version(args.yaml_file_name): + case 1: + problem = petab.Problem.from_yaml(args.yaml_file_name) + ret = petab.lint.lint_problem(problem) + sys.exit(ret) + case 2: + validation_issues = lint_problem(args.yaml_file_name) + if validation_issues: + validation_issues.log(logger=logger) + sys.exit(1) + logger.info("PEtab format check completed successfully.") + sys.exit(0) + case _: + logger.error( + "The provided PEtab files are of unsupported version " + f"or the `{FORMAT_VERSION}` field is missing in the yaml " + "file." + ) + + # DEPRECATED - only supported for v1 + logger.debug("Looking for...") + if args.sbml_file_name: + logger.debug(f"\tSBML model: {args.sbml_file_name}") + if args.condition_file_name: + logger.debug(f"\tCondition table: {args.condition_file_name}") + if args.observable_file_name: + logger.debug(f"\tObservable table: {args.observable_file_name}") + if args.measurement_file_name: + logger.debug(f"\tMeasurement table: {args.measurement_file_name}") + if args.parameter_file_name: + logger.debug(f"\tParameter table: {args.parameter_file_name}") + if args.visualization_file_name: + logger.debug( + "\tVisualization table: " f"{args.visualization_file_name}" + ) - try: - problem = petab.Problem.from_files( - sbml_file=args.sbml_file_name, - condition_file=args.condition_file_name, - measurement_file=args.measurement_file_name, - parameter_file=args.parameter_file_name, - observable_files=args.observable_file_name, - visualization_files=args.visualization_file_name, - ) - except FileNotFoundError as e: - logger.error(e) - sys.exit(1) + try: + problem = petab.Problem.from_files( + sbml_file=args.sbml_file_name, + condition_file=args.condition_file_name, + measurement_file=args.measurement_file_name, + parameter_file=args.parameter_file_name, + observable_files=args.observable_file_name, + visualization_files=args.visualization_file_name, + ) + except FileNotFoundError as e: + logger.error(e) + sys.exit(1) ret = petab.lint.lint_problem(problem) sys.exit(ret) diff --git a/petab/schemas/petab_schema.v1.0.0.yaml b/petab/schemas/petab_schema.v1.0.0.yaml index bf012e57..255fbb04 100644 --- a/petab/schemas/petab_schema.v1.0.0.yaml +++ b/petab/schemas/petab_schema.v1.0.0.yaml @@ -1,12 +1,26 @@ -# For syntax see: https://json-schema.org/understanding-json-schema/index.html +# For syntax see: https://json-schema.org/understanding-json-schema #$schema: "https://json-schema.org/draft/2019-09/meta/core" $schema: "http://json-schema.org/draft-06/schema" description: PEtab parameter estimation problem config file schema +definitions: + list_of_files: + type: array + description: List of files. + items: + type: string + description: File name or URL. + version_number: + type: string + pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$ + description: Version number (corresponding to PEP 440). + properties: format_version: - type: integer + anyof: + - $ref: "#/definitions/version_number" + - type: integer description: Version of the PEtab format (e.g. 1). parameter_file: @@ -35,45 +49,25 @@ properties: properties: sbml_files: - type: array - description: List of PEtab SBML files. - - items: - type: string - description: PEtab SBML file name or URL. + $ref: "#/definitions/list_of_files" + description: List of SBML model files. measurement_files: - type: array + $ref: "#/definitions/list_of_files" description: List of PEtab measurement files. - items: - type: string - description: PEtab measurement file name or URL. - condition_files: - type: array + $ref: "#/definitions/list_of_files" description: List of PEtab condition files. - items: - type: string - description: PEtab condition file name or URL. - observable_files: - type: array + $ref: "#/definitions/list_of_files" description: List of PEtab observable files. - items: - type: string - description: PEtab observable file name or URL. - visualization_files: - type: array + $ref: "#/definitions/list_of_files" description: List of PEtab visualization files. - items: - type: string - description: PEtab visualization file name or URL. - required: - sbml_files - observable_files diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml index 416f0c4a..ddeb428a 100644 --- a/petab/schemas/petab_schema.v2.0.0.yaml +++ b/petab/schemas/petab_schema.v2.0.0.yaml @@ -1,17 +1,26 @@ -# For syntax see: https://json-schema.org/understanding-json-schema/index.html +# For syntax see: https://json-schema.org/understanding-json-schema #$schema: "https://json-schema.org/draft/2019-09/meta/core" $schema: "http://json-schema.org/draft-06/schema" description: PEtab parameter estimation problem config file schema +definitions: + list_of_files: + type: array + description: List of files. + items: + type: string + description: File name or URL. + version_number: + type: string + pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$ + description: Version number (corresponding to PEP 440). + properties: format_version: anyof: - - type: string - # (corresponding to PEP 440). - pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$ + - $ref: "#/definitions/version_number" - type: integer - description: Version of the PEtab format parameter_file: @@ -60,36 +69,20 @@ properties: additionalProperties: false measurement_files: - type: array description: List of PEtab measurement files. - - items: - type: string - description: PEtab measurement file name or URL. + $ref: "#/definitions/list_of_files" condition_files: - type: array description: List of PEtab condition files. - - items: - type: string - description: PEtab condition file name or URL. + $ref: "#/definitions/list_of_files" observable_files: - type: array description: List of PEtab observable files. - - items: - type: string - description: PEtab observable file name or URL. + $ref: "#/definitions/list_of_files" visualization_files: - type: array description: List of PEtab visualization files. - - items: - type: string - description: PEtab visualization file name or URL. + $ref: "#/definitions/list_of_files" mapping_file: type: string @@ -113,8 +106,7 @@ properties: Information on a specific extension properties: version: - type: string - pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$ + $ref: "#/definitions/version_number" required: - version diff --git a/petab/simplify.py b/petab/simplify.py index 5946be05..cd7ba25e 100644 --- a/petab/simplify.py +++ b/petab/simplify.py @@ -1,115 +1,7 @@ -"""Functionality for simplifying PEtab problems""" -from math import nan +"""Deprecated module for simplifying PEtab problems. -import pandas as pd +Use petab.simplify instead.""" +from petab import _deprecated_import_v1 +from petab.v1.simplify import * # noqa: F403, F401, E402 -import petab - -from . import Problem -from .C import * # noqa: F403 -from .lint import lint_problem - -__all__ = [ - "remove_nan_measurements", - "remove_unused_observables", - "remove_unused_conditions", - "simplify_problem", - "condition_parameters_to_parameter_table", -] - - -def remove_nan_measurements(problem: Problem): - """Drop any measurements that are NaN""" - problem.measurement_df = problem.measurement_df[ - ~problem.measurement_df[MEASUREMENT].isna() - ] - problem.measurement_df.reset_index(inplace=True, drop=True) - - -def remove_unused_observables(problem: Problem): - """Remove observables that have no measurements""" - measured_observables = set(problem.measurement_df[OBSERVABLE_ID].unique()) - problem.observable_df = problem.observable_df[ - problem.observable_df.index.isin(measured_observables) - ] - - -def remove_unused_conditions(problem: Problem): - """Remove conditions that have no measurements""" - measured_conditions = set( - problem.measurement_df[SIMULATION_CONDITION_ID].unique() - ) - if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df: - measured_conditions |= set( - problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].unique() - ) - - problem.condition_df = problem.condition_df[ - problem.condition_df.index.isin(measured_conditions) - ] - - -def simplify_problem(problem: Problem): - if lint_problem(problem): - raise ValueError("Invalid PEtab problem supplied.") - - remove_unused_observables(problem) - remove_unused_conditions(problem) - condition_parameters_to_parameter_table(problem) - - if lint_problem(problem): - raise AssertionError("Invalid PEtab problem generated.") - - -def condition_parameters_to_parameter_table(problem: Problem): - """Move parameters from the condition table to the parameters table, if - the same parameter value is used for all conditions. - """ - if ( - problem.condition_df is None - or problem.condition_df.empty - or problem.model is None - ): - return - - replacements = {} - for parameter_id in problem.condition_df: - if parameter_id == CONDITION_NAME: - continue - - if problem.model.is_state_variable(parameter_id): - # initial states can't go the parameters table - continue - - series = problem.condition_df[parameter_id] - value = petab.to_float_if_float(series.iloc[0]) - - # same value for all conditions and no parametric overrides (str)? - if isinstance(value, float) and len(series.unique()) == 1: - replacements[parameter_id] = series.iloc[0] - - if not replacements: - return - - rows = [ - { - PARAMETER_ID: parameter_id, - PARAMETER_SCALE: LIN, - LOWER_BOUND: nan, - UPPER_BOUND: nan, - NOMINAL_VALUE: value, - ESTIMATE: 0, - } - for parameter_id, value in replacements.items() - ] - rows = pd.DataFrame(rows) - rows.set_index(PARAMETER_ID, inplace=True) - - if problem.parameter_df is None: - problem.parameter_df = rows - else: - problem.parameter_df = pd.concat([problem.parameter_df, rows]) - - problem.condition_df = problem.condition_df.drop( - columns=replacements.keys() - ) +_deprecated_import_v1(__name__) diff --git a/petab/simulate.py b/petab/simulate.py index 59aa46cf..afa866a6 100644 --- a/petab/simulate.py +++ b/petab/simulate.py @@ -1,261 +1,8 @@ -"""PEtab simulator base class and related functions.""" +"""Deprecated module for simulating PEtab models. -import abc -import pathlib -import shutil -import tempfile -from typing import Dict, Optional, Union -from warnings import warn +Use petab.v1.simulate instead.""" -import numpy as np -import pandas as pd -import sympy as sp +from petab import _deprecated_import_v1 +from petab.v1.simulate import * # noqa: F403, F401, E402 -import petab - -__all__ = ["Simulator", "sample_noise"] - - -class Simulator(abc.ABC): - """Base class that specific simulators should inherit. - - Specific simulators should minimally implement the - :meth:`petab.simulate.Simulator.simulate_without_noise` method. - Example (AMICI): https://bit.ly/33SUSG4 - - Attributes: - noise_formulas: - The formulae that will be used to calculate the scale of noise - distributions. - petab_problem: - A PEtab problem, which will be simulated. - rng: - A NumPy random generator, used to sample from noise distributions. - temporary_working_dir: - Whether ``working_dir`` is a temporary directory, which can be - deleted without significant consequence. - working_dir: - All simulator-specific output files will be saved here. This - directory and its contents may be modified and deleted, and - should be considered ephemeral. - """ - - def __init__( - self, - petab_problem: petab.Problem, - working_dir: Optional[Union[pathlib.Path, str]] = None, - ): - """Initialize the simulator. - - Initialize the simulator with sufficient information to perform a - simulation. If no working directory is specified, a temporary one is - created. - - Arguments: - petab_problem: - A PEtab problem. - working_dir: - All simulator-specific output files will be saved here. This - directory and its contents may be modified and deleted, and - should be considered ephemeral. - """ - self.petab_problem = petab_problem - - self.temporary_working_dir = False - if working_dir is None: - working_dir = tempfile.mkdtemp() - self.temporary_working_dir = True - if not isinstance(working_dir, pathlib.Path): - working_dir = pathlib.Path(working_dir) - self.working_dir = working_dir - self.working_dir.mkdir(parents=True, exist_ok=True) - - self.noise_formulas = petab.calculate.get_symbolic_noise_formulas( - self.petab_problem.observable_df - ) - self.rng = np.random.default_rng() - - def remove_working_dir(self, force: bool = False, **kwargs) -> None: - """Remove the simulator working directory, and all files within. - - See the :meth:`petab.simulate.Simulator.__init__` method arguments. - - Arguments: - force: - If ``True``, the working directory is removed regardless of - whether it is a temporary directory. - **kwargs: - Additional keyword arguments are passed to - :func:`shutil.rmtree`. - """ - if force or self.temporary_working_dir: - shutil.rmtree(self.working_dir, **kwargs) - if self.working_dir.is_dir(): - warn( - "Failed to remove the working directory: " - + str(self.working_dir), - stacklevel=2, - ) - else: - warn( - "By default, specified working directories are not removed. " - "Please call this method with `force=True`, or manually " - f"delete the working directory: {self.working_dir}", - stacklevel=2, - ) - - @abc.abstractmethod - def simulate_without_noise(self) -> pd.DataFrame: - """Simulate the PEtab problem. - - This is an abstract method that should be implemented with a simulation - package. Examples of this are referenced in the class docstring. - - Returns: - Simulated data, as a PEtab measurements table, which should be - equivalent to replacing all values in the - :const:`petab.C.MEASUREMENT` column of the measurements table (of - the PEtab problem supplied to the - :meth:`petab.simulate.Simulator.__init__` method), with - simulated values. - """ - raise NotImplementedError() - - def simulate( - self, - noise: bool = False, - noise_scaling_factor: float = 1, - as_measurement: bool = False, - **kwargs, - ) -> pd.DataFrame: - """Simulate a PEtab problem, optionally with noise. - - Arguments: - noise: If True, noise is added to simulated data. - noise_scaling_factor: - A multiplier of the scale of the noise distribution. - as_measurement: - Whether the data column is named :const:`petab.C.MEASUREMENT` - (`True`) or :const:`petab.C.SIMULATION` (`False`). - **kwargs: - Additional keyword arguments are passed to - :meth:`petab.simulate.Simulator.simulate_without_noise`. - - Returns: - Simulated data, as a PEtab measurements table. - """ - simulation_df = self.simulate_without_noise(**kwargs) - if noise: - simulation_df = self.add_noise(simulation_df, noise_scaling_factor) - - columns = {petab.C.MEASUREMENT: petab.C.SIMULATION} - if as_measurement: - columns = {petab.C.SIMULATION: petab.C.MEASUREMENT} - simulation_df = simulation_df.rename(columns=columns) - - return simulation_df - - def add_noise( - self, - simulation_df: pd.DataFrame, - noise_scaling_factor: float = 1, - **kwargs, - ) -> pd.DataFrame: - """Add noise to simulated data. - - Arguments: - simulation_df: - A PEtab measurements table that contains simulated data. - noise_scaling_factor: - A multiplier of the scale of the noise distribution. - **kwargs: - Additional keyword arguments are passed to - :func:`sample_noise`. - - Returns: - Simulated data with noise, as a PEtab measurements table. - """ - simulation_df_with_noise = simulation_df.copy() - simulation_df_with_noise[petab.C.MEASUREMENT] = [ - sample_noise( - self.petab_problem, - row, - row[petab.C.MEASUREMENT], - self.noise_formulas, - self.rng, - noise_scaling_factor, - **kwargs, - ) - for _, row in simulation_df_with_noise.iterrows() - ] - return simulation_df_with_noise - - -def sample_noise( - petab_problem: petab.Problem, - measurement_row: pd.Series, - simulated_value: float, - noise_formulas: Optional[Dict[str, sp.Expr]] = None, - rng: Optional[np.random.Generator] = None, - noise_scaling_factor: float = 1, - zero_bounded: bool = False, -) -> float: - """Generate a sample from a PEtab noise distribution. - - Arguments: - petab_problem: - The PEtab problem used to generate the simulated value. - Instance of :class:`petab.Problem`. - measurement_row: - The row in the PEtab problem measurement table that corresponds - to the simulated value. - simulated_value: - A simulated value without noise. - noise_formulas: - Processed noise formulas from the PEtab observables table, in the - form output by :func:`petab.calculate.get_symbolic_noise_formulas`. - rng: - A NumPy random generator. - noise_scaling_factor: - A multiplier of the scale of the noise distribution. - zero_bounded: - Return zero if the sign of the return value and ``simulated_value`` - differ. Can be used to ensure non-negative and non-positive values, - if the sign of ``simulated_value`` should not change. - - Returns: - The sample from the PEtab noise distribution. - """ - if noise_formulas is None: - noise_formulas = petab.calculate.get_symbolic_noise_formulas( - petab_problem.observable_df - ) - if rng is None: - rng = np.random.default_rng() - - noise_value = petab.calculate.evaluate_noise_formula( - measurement_row, - noise_formulas, - petab_problem.parameter_df, - simulated_value, - ) - - # default noise distribution is petab.C.NORMAL - noise_distribution = petab_problem.observable_df.loc[ - measurement_row[petab.C.OBSERVABLE_ID] - ].get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL) - # an empty noise distribution column in an observables table can result in - # `noise_distribution == float('nan')` - if pd.isna(noise_distribution): - noise_distribution = petab.C.NORMAL - - # below is e.g.: `np.random.normal(loc=simulation, scale=noise_value)` - simulated_value_with_noise = getattr(rng, noise_distribution)( - loc=simulated_value, scale=noise_value * noise_scaling_factor - ) - - if zero_bounded and np.sign(simulated_value) != np.sign( - simulated_value_with_noise - ): - return 0.0 - return simulated_value_with_noise +_deprecated_import_v1(__name__) diff --git a/petab/v1/C.py b/petab/v1/C.py new file mode 100644 index 00000000..70ce22c3 --- /dev/null +++ b/petab/v1/C.py @@ -0,0 +1,372 @@ +# pylint: disable:invalid-name +""" +This file contains constant definitions. +""" +import math as _math +import sys + +# MEASUREMENTS + +#: +OBSERVABLE_ID = "observableId" + +#: +PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId" + +#: +SIMULATION_CONDITION_ID = "simulationConditionId" + +#: +MEASUREMENT = "measurement" + +#: +TIME = "time" + +#: Time value that indicates steady-state measurements +TIME_STEADY_STATE = _math.inf + +#: +OBSERVABLE_PARAMETERS = "observableParameters" + +#: +NOISE_PARAMETERS = "noiseParameters" + +#: +DATASET_ID = "datasetId" + +#: +REPLICATE_ID = "replicateId" + +#: Mandatory columns of measurement table +MEASUREMENT_DF_REQUIRED_COLS = [ + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + MEASUREMENT, + TIME, +] + +#: Optional columns of measurement table +MEASUREMENT_DF_OPTIONAL_COLS = [ + PREEQUILIBRATION_CONDITION_ID, + OBSERVABLE_PARAMETERS, + NOISE_PARAMETERS, + DATASET_ID, + REPLICATE_ID, +] + +#: Measurement table columns +MEASUREMENT_DF_COLS = [ + MEASUREMENT_DF_REQUIRED_COLS[0], + MEASUREMENT_DF_OPTIONAL_COLS[0], + *MEASUREMENT_DF_REQUIRED_COLS[1:], + *MEASUREMENT_DF_OPTIONAL_COLS[1:], +] + + +# PARAMETERS + +#: +PARAMETER_ID = "parameterId" +#: +PARAMETER_NAME = "parameterName" +#: +PARAMETER_SCALE = "parameterScale" +#: +LOWER_BOUND = "lowerBound" +#: +UPPER_BOUND = "upperBound" +#: +NOMINAL_VALUE = "nominalValue" +#: +ESTIMATE = "estimate" +#: +INITIALIZATION_PRIOR_TYPE = "initializationPriorType" +#: +INITIALIZATION_PRIOR_PARAMETERS = "initializationPriorParameters" +#: +OBJECTIVE_PRIOR_TYPE = "objectivePriorType" +#: +OBJECTIVE_PRIOR_PARAMETERS = "objectivePriorParameters" + +#: Mandatory columns of parameter table +PARAMETER_DF_REQUIRED_COLS = [ + PARAMETER_ID, + PARAMETER_SCALE, + LOWER_BOUND, + UPPER_BOUND, + ESTIMATE, +] + +#: Optional columns of parameter table +PARAMETER_DF_OPTIONAL_COLS = [ + PARAMETER_NAME, + NOMINAL_VALUE, + INITIALIZATION_PRIOR_TYPE, + INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_TYPE, + OBJECTIVE_PRIOR_PARAMETERS, +] + +#: Parameter table columns +PARAMETER_DF_COLS = [ + PARAMETER_DF_REQUIRED_COLS[0], + PARAMETER_DF_OPTIONAL_COLS[0], + *PARAMETER_DF_REQUIRED_COLS[1:], + *PARAMETER_DF_OPTIONAL_COLS[1:], +] + +#: +INITIALIZATION = "initialization" +#: +OBJECTIVE = "objective" + + +# CONDITIONS + +#: +CONDITION_ID = "conditionId" +#: +CONDITION_NAME = "conditionName" + + +# OBSERVABLES + +#: +OBSERVABLE_NAME = "observableName" +#: +OBSERVABLE_FORMULA = "observableFormula" +#: +NOISE_FORMULA = "noiseFormula" +#: +OBSERVABLE_TRANSFORMATION = "observableTransformation" +#: +NOISE_DISTRIBUTION = "noiseDistribution" + +#: Mandatory columns of observables table +OBSERVABLE_DF_REQUIRED_COLS = [ + OBSERVABLE_ID, + OBSERVABLE_FORMULA, + NOISE_FORMULA, +] + +#: Optional columns of observables table +OBSERVABLE_DF_OPTIONAL_COLS = [ + OBSERVABLE_NAME, + OBSERVABLE_TRANSFORMATION, + NOISE_DISTRIBUTION, +] + +#: Observables table columns +OBSERVABLE_DF_COLS = [ + *OBSERVABLE_DF_REQUIRED_COLS, + *OBSERVABLE_DF_OPTIONAL_COLS, +] + + +# TRANSFORMATIONS + +#: +LIN = "lin" +#: +LOG = "log" +#: +LOG10 = "log10" +#: Supported observable transformations +OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10] + + +# NOISE MODELS + +#: +UNIFORM = "uniform" +#: +PARAMETER_SCALE_UNIFORM = "parameterScaleUniform" +#: +NORMAL = "normal" +#: +PARAMETER_SCALE_NORMAL = "parameterScaleNormal" +#: +LAPLACE = "laplace" +#: +PARAMETER_SCALE_LAPLACE = "parameterScaleLaplace" +#: +LOG_NORMAL = "logNormal" +#: +LOG_LAPLACE = "logLaplace" + +#: Supported prior types +PRIOR_TYPES = [ + UNIFORM, + NORMAL, + LAPLACE, + LOG_NORMAL, + LOG_LAPLACE, + PARAMETER_SCALE_UNIFORM, + PARAMETER_SCALE_NORMAL, + PARAMETER_SCALE_LAPLACE, +] + +#: Supported noise distributions +NOISE_MODELS = [NORMAL, LAPLACE] + + +# VISUALIZATION + +#: +PLOT_ID = "plotId" +#: +PLOT_NAME = "plotName" +#: +PLOT_TYPE_SIMULATION = "plotTypeSimulation" +#: +PLOT_TYPE_DATA = "plotTypeData" +#: +X_VALUES = "xValues" +#: +X_OFFSET = "xOffset" +#: +X_LABEL = "xLabel" +#: +X_SCALE = "xScale" +#: +Y_VALUES = "yValues" +#: +Y_OFFSET = "yOffset" +#: +Y_LABEL = "yLabel" +#: +Y_SCALE = "yScale" +#: +LEGEND_ENTRY = "legendEntry" + +#: Mandatory columns of visualization table +VISUALIZATION_DF_REQUIRED_COLS = [PLOT_ID] + +#: Optional columns of visualization table +VISUALIZATION_DF_OPTIONAL_COLS = [ + PLOT_NAME, + PLOT_TYPE_SIMULATION, + PLOT_TYPE_DATA, + X_VALUES, + X_OFFSET, + X_LABEL, + X_SCALE, + Y_VALUES, + Y_OFFSET, + Y_LABEL, + Y_SCALE, + LEGEND_ENTRY, + DATASET_ID, +] + +#: Visualization table columns +VISUALIZATION_DF_COLS = [ + *VISUALIZATION_DF_REQUIRED_COLS, + *VISUALIZATION_DF_OPTIONAL_COLS, +] + +#: Visualization table columns that contain subplot specifications +VISUALIZATION_DF_SUBPLOT_LEVEL_COLS = [ + PLOT_ID, + PLOT_NAME, + PLOT_TYPE_SIMULATION, + PLOT_TYPE_DATA, + X_LABEL, + X_SCALE, + Y_LABEL, + Y_SCALE, +] + +#: Visualization table columns that contain single plot specifications +VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS = [ + X_VALUES, + X_OFFSET, + Y_VALUES, + Y_OFFSET, + LEGEND_ENTRY, + DATASET_ID, +] + +#: +LINE_PLOT = "LinePlot" +#: +BAR_PLOT = "BarPlot" +#: +SCATTER_PLOT = "ScatterPlot" +#: Supported plot types +PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT] + +#: Supported xScales +X_SCALES = [LIN, LOG, LOG10] + +#: Supported yScales +Y_SCALES = [LIN, LOG, LOG10] + + +#: +MEAN_AND_SD = "MeanAndSD" +#: +MEAN_AND_SEM = "MeanAndSEM" +#: +REPLICATE = "replicate" +#: +PROVIDED = "provided" +#: Supported settings for handling replicates +PLOT_TYPES_DATA = [MEAN_AND_SD, MEAN_AND_SEM, REPLICATE, PROVIDED] + + +# YAML +#: +FORMAT_VERSION = "format_version" +#: +PARAMETER_FILE = "parameter_file" +#: +PROBLEMS = "problems" +#: +SBML_FILES = "sbml_files" +#: +MODEL_FILES = "model_files" +#: +MODEL_LOCATION = "location" +#: +MODEL_LANGUAGE = "language" +#: +CONDITION_FILES = "condition_files" +#: +MEASUREMENT_FILES = "measurement_files" +#: +OBSERVABLE_FILES = "observable_files" +#: +VISUALIZATION_FILES = "visualization_files" +#: +MAPPING_FILES = "mapping_files" +#: +EXTENSIONS = "extensions" + + +# MAPPING +#: +PETAB_ENTITY_ID = "petabEntityId" +#: +MODEL_ENTITY_ID = "modelEntityId" +#: +MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID] + +# MORE + +#: +SIMULATION = "simulation" +#: +RESIDUAL = "residual" +#: +NOISE_VALUE = "noiseValue" + +# separator for multiple parameter values (bounds, observableParameters, ...) +PARAMETER_SEPARATOR = ";" + + +__all__ = [ + x + for x in dir(sys.modules[__name__]) + if not x.startswith("_") and x not in {"sys", "math"} +] diff --git a/petab/v1/__init__.py b/petab/v1/__init__.py new file mode 100644 index 00000000..a8609621 --- /dev/null +++ b/petab/v1/__init__.py @@ -0,0 +1,23 @@ +"""The PEtab 1.0 subpackage. + +Contains all functionality related to handling PEtab 1.0 problems. +""" + +from ..version import __version__ # noqa: F401, E402 +from .C import * # noqa: F403, F401, E402 +from .calculate import * # noqa: F403, F401, E402 +from .composite_problem import * # noqa: F403, F401, E402 +from .conditions import * # noqa: F403, F401, E402 +from .core import * # noqa: F403, F401, E402 +from .format_version import __format_version__ # noqa: F401, E402 +from .lint import * # noqa: F403, F401, E402 +from .mapping import * # noqa: F403, F401, E402 +from .measurements import * # noqa: F403, F401, E402 +from .observables import * # noqa: F403, F401, E402 +from .parameter_mapping import * # noqa: F403, F401, E402 +from .parameters import * # noqa: F403, F401, E402 +from .problem import * # noqa: F403, F401, E402 +from .sampling import * # noqa: F403, F401, E402 +from .sbml import * # noqa: F403, F401, E402 +from .simulate import * # noqa: F403, F401, E402 +from .yaml import * # noqa: F403, F401, E402 diff --git a/petab/v1/calculate.py b/petab/v1/calculate.py new file mode 100644 index 00000000..3cc86f73 --- /dev/null +++ b/petab/v1/calculate.py @@ -0,0 +1,430 @@ +"""Functions performing various calculations.""" + +import numbers +from functools import reduce + +import numpy as np +import pandas as pd +import sympy as sp + +import petab.v1 as petab + +from .C import * +from .math import sympify_petab + +__all__ = [ + "calculate_residuals", + "calculate_residuals_for_table", + "get_symbolic_noise_formulas", + "evaluate_noise_formula", + "calculate_chi2", + "calculate_chi2_for_table_from_residuals", + "calculate_llh", + "calculate_llh_for_table", + "calculate_single_llh", +] + + +def calculate_residuals( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> list[pd.DataFrame]: + """Calculate residuals. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + normalize: + Whether to normalize residuals by the noise standard deviation + terms. + scale: + Whether to calculate residuals of scaled values. + + Returns: + List of DataFrames in the same structure as `measurement_dfs` + with a field `residual` instead of measurement. + """ + # convenience + if isinstance(measurement_dfs, pd.DataFrame): + measurement_dfs = [measurement_dfs] + if isinstance(simulation_dfs, pd.DataFrame): + simulation_dfs = [simulation_dfs] + if isinstance(observable_dfs, pd.DataFrame): + observable_dfs = [observable_dfs] + if isinstance(parameter_dfs, pd.DataFrame): + parameter_dfs = [parameter_dfs] + + # iterate over data frames + residual_dfs = [] + for measurement_df, simulation_df, observable_df, parameter_df in zip( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + strict=True, + ): + residual_df = calculate_residuals_for_table( + measurement_df, + simulation_df, + observable_df, + parameter_df, + normalize, + scale, + ) + residual_dfs.append(residual_df) + return residual_dfs + + +def calculate_residuals_for_table( + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> pd.DataFrame: + """ + Calculate residuals for a single measurement table. + For the arguments, see `calculate_residuals`. + """ + # create residual df as copy of measurement df, change column + residual_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: RESIDUAL} + ) + residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64") + # matching columns + compared_cols = set(MEASUREMENT_DF_COLS) + compared_cols -= {MEASUREMENT} + compared_cols &= set(measurement_df.columns) + compared_cols &= set(simulation_df.columns) + + # compute noise formulas for observables + noise_formulas = get_symbolic_noise_formulas(observable_df) + + # iterate over measurements, find corresponding simulations + for irow, row in measurement_df.iterrows(): + measurement = row[MEASUREMENT] + # look up in simulation df + masks = [ + (simulation_df[col] == row[col]) | petab.is_empty(row[col]) + for col in compared_cols + ] + mask = reduce(lambda x, y: x & y, masks) + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] + if scale: + # apply scaling + observable = observable_df.loc[row[OBSERVABLE_ID]] + trafo = observable.get(OBSERVABLE_TRANSFORMATION, LIN) + simulation = petab.scale(simulation, trafo) + measurement = petab.scale(measurement, trafo) + + # non-normalized residual is just the difference + residual = simulation - measurement + + noise_value = 1 + if normalize: + # look up noise standard deviation + noise_value = evaluate_noise_formula( + row, noise_formulas, parameter_df, simulation + ) + residual /= noise_value + + # fill in value + residual_df.loc[irow, RESIDUAL] = residual + return residual_df + + +def get_symbolic_noise_formulas(observable_df) -> dict[str, sp.Expr]: + """Sympify noise formulas. + + Arguments: + observable_df: The observable table. + + Returns: + Dictionary of {observable_id}: {noise_formula}. + """ + noise_formulas = {} + # iterate over observables + for row in observable_df.itertuples(): + observable_id = row.Index + if NOISE_FORMULA not in observable_df.columns: + noise_formula = None + else: + noise_formula = sympify_petab(row.noiseFormula) + noise_formulas[observable_id] = noise_formula + return noise_formulas + + +def evaluate_noise_formula( + measurement: pd.Series, + noise_formulas: dict[str, sp.Expr], + parameter_df: pd.DataFrame, + simulation: numbers.Number, +) -> float: + """Fill in parameters for `measurement` and evaluate noise_formula. + + Arguments: + measurement: A measurement table row. + noise_formulas: The noise formulas as computed by + `get_symbolic_noise_formulas`. + parameter_df: The parameter table. + simulation: The simulation corresponding to the measurement, scaled. + + Returns: + The noise value. + """ + # the observable id + observable_id = measurement[OBSERVABLE_ID] + + # extract measurement specific overrides + observable_parameter_overrides = petab.split_parameter_replacement_list( + measurement.get(NOISE_PARAMETERS, None) + ) + # fill in measurement specific parameters + overrides = { + sp.Symbol( + f"noiseParameter{i_obs_par + 1}_{observable_id}", real=True + ): obs_par + for i_obs_par, obs_par in enumerate(observable_parameter_overrides) + } + + # fill in observables + overrides[sp.Symbol(observable_id, real=True)] = simulation + + # fill in general parameters + for row in parameter_df.itertuples(): + overrides[sp.Symbol(row.Index, real=True)] = row.nominalValue + + # replace parametric measurement specific parameters + for key, value in overrides.items(): + if not isinstance(value, numbers.Number): + # is parameter + overrides[key] = parameter_df.loc[value, NOMINAL_VALUE] + + # replace parameters by values in formula + noise_formula = noise_formulas[observable_id] + noise_value = noise_formula.subs(overrides) + + # conversion is possible if all parameters are replaced + try: + noise_value = float(noise_value) + except TypeError as e: + raise ValueError( + f"Cannot replace all parameters in noise formula {noise_value} " + f"for observable {observable_id}. " + f"Missing {noise_formula.free_symbols}. Note that model states " + "are currently not supported." + ) from e + return noise_value + + +def calculate_chi2( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> float: + """Calculate the chi2 value. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + normalize: + Whether to normalize residuals by the noise standard deviation + terms. + scale: + Whether to calculate residuals of scaled values. + + Returns: + The aggregated chi2 value. + """ + residual_dfs = calculate_residuals( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + normalize, + scale, + ) + chi2s = [ + calculate_chi2_for_table_from_residuals(df) for df in residual_dfs + ] + return sum(chi2s) + + +def calculate_chi2_for_table_from_residuals( + residual_df: pd.DataFrame, +) -> float: + """Compute chi2 value for a single residual table.""" + return (np.array(residual_df[RESIDUAL]) ** 2).sum() + + +def calculate_llh( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, +) -> float: + """Calculate total log likelihood. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + + Returns: + The log-likelihood. + """ + # convenience + if isinstance(measurement_dfs, pd.DataFrame): + measurement_dfs = [measurement_dfs] + if isinstance(simulation_dfs, pd.DataFrame): + simulation_dfs = [simulation_dfs] + if isinstance(observable_dfs, pd.DataFrame): + observable_dfs = [observable_dfs] + if isinstance(parameter_dfs, pd.DataFrame): + parameter_dfs = [parameter_dfs] + + # iterate over data frames + llhs = [] + for measurement_df, simulation_df, observable_df, parameter_df in zip( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + strict=True, + ): + _llh = calculate_llh_for_table( + measurement_df, simulation_df, observable_df, parameter_df + ) + llhs.append(_llh) + return sum(llhs) + + +def calculate_llh_for_table( + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame, +) -> float: + """Calculate log-likelihood for one set of tables. For the arguments, see + `calculate_llh`. + """ + llhs = [] + + # matching columns + compared_cols = set(MEASUREMENT_DF_COLS) + compared_cols -= {MEASUREMENT} + compared_cols &= set(measurement_df.columns) + compared_cols &= set(simulation_df.columns) + + # compute noise formulas for observables + noise_formulas = get_symbolic_noise_formulas(observable_df) + + # iterate over measurements, find corresponding simulations + for _, row in measurement_df.iterrows(): + measurement = row[MEASUREMENT] + + # look up in simulation df + masks = [ + (simulation_df[col] == row[col]) | petab.is_empty(row[col]) + for col in compared_cols + ] + mask = reduce(lambda x, y: x & y, masks) + + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] + + observable = observable_df.loc[row[OBSERVABLE_ID]] + + # get scale + scale = observable.get(OBSERVABLE_TRANSFORMATION, LIN) + + # get noise standard deviation + noise_value = evaluate_noise_formula( + row, noise_formulas, parameter_df, petab.scale(simulation, scale) + ) + + # get noise distribution + noise_distribution = observable.get(NOISE_DISTRIBUTION, NORMAL) + + llh = calculate_single_llh( + measurement, simulation, scale, noise_distribution, noise_value + ) + llhs.append(llh) + return sum(llhs) + + +def calculate_single_llh( + measurement: float, + simulation: float, + scale: str, + noise_distribution: str, + noise_value: float, +) -> float: + """Calculate a single log likelihood. + + Arguments: + measurement: The measurement value. + simulation: The simulated value. + scale: The scale on which the noise model is to be applied. + noise_distribution: The noise distribution. + noise_value: The considered noise models possess a single noise + parameter, e.g. the normal standard deviation. + + Returns: + The computed likelihood for the given values. + """ + # short-hand + m, s, sigma = measurement, simulation, noise_value + pi, log, log10 = np.pi, np.log, np.log10 + + # go over the possible cases + if noise_distribution == NORMAL and scale == LIN: + nllh = 0.5 * log(2 * pi * sigma**2) + 0.5 * ((s - m) / sigma) ** 2 + elif noise_distribution == NORMAL and scale == LOG: + nllh = ( + 0.5 * log(2 * pi * sigma**2 * m**2) + + 0.5 * ((log(s) - log(m)) / sigma) ** 2 + ) + elif noise_distribution == NORMAL and scale == LOG10: + nllh = ( + 0.5 * log(2 * pi * sigma**2 * m**2 * log(10) ** 2) + + 0.5 * ((log10(s) - log10(m)) / sigma) ** 2 + ) + elif noise_distribution == LAPLACE and scale == LIN: + nllh = log(2 * sigma) + abs((s - m) / sigma) + elif noise_distribution == LAPLACE and scale == LOG: + nllh = log(2 * sigma * m) + abs((log(s) - log(m)) / sigma) + elif noise_distribution == LAPLACE and scale == LOG10: + nllh = log(2 * sigma * m * log(10)) + abs( + (log10(s) - log10(m)) / sigma + ) + else: + raise NotImplementedError( + "Unsupported combination of noise_distribution and scale " + f"specified: {noise_distribution}, {scale}." + ) + return -nllh diff --git a/petab/v1/composite_problem.py b/petab/v1/composite_problem.py new file mode 100644 index 00000000..5f07d523 --- /dev/null +++ b/petab/v1/composite_problem.py @@ -0,0 +1,85 @@ +"""PEtab problems consisting of multiple models""" +import os + +import pandas as pd + +from . import parameters, problem, yaml +from .C import * # noqa: F403 + +__all__ = ["CompositeProblem"] + + +class CompositeProblem: + """Representation of a PEtab problem consisting of multiple models + + Attributes: + problems: + List of :py:class:`petab.Problem` s + parameter_df: + PEtab parameter DataFrame + """ + + def __init__( + self, + parameter_df: pd.DataFrame = None, + problems: list[problem.Problem] = None, + ): + """Constructor + + Arguments: + parameter_df: + see CompositeProblem.parameter_df + problems: + see CompositeProblem.problems + """ + self.problems: list[problem.Problem] = problems + self.parameter_df: pd.DataFrame = parameter_df + + @staticmethod + def from_yaml(yaml_config: dict | str) -> "CompositeProblem": + """Create from YAML file + + Factory method to create a CompositeProblem instance from a PEtab + YAML config file + + Arguments: + yaml_config: PEtab configuration as dictionary or YAML file name + """ + if isinstance(yaml_config, str): + path_prefix = os.path.dirname(yaml_config) + yaml_config = yaml.load_yaml(yaml_config) + else: + path_prefix = "" + + parameter_df = parameters.get_parameter_df( + os.path.join(path_prefix, yaml_config[PARAMETER_FILE]) + ) + + problems = [] + for problem_config in yaml_config[PROBLEMS]: + yaml.assert_single_condition_and_sbml_file(problem_config) + + # don't set parameter file if we have multiple models + cur_problem = problem.Problem.from_files( + sbml_file=os.path.join( + path_prefix, problem_config[SBML_FILES][0] + ), + measurement_file=[ + os.path.join(path_prefix, f) + for f in problem_config[MEASUREMENT_FILES] + ], + condition_file=os.path.join( + path_prefix, problem_config[CONDITION_FILES][0] + ), + visualization_files=[ + os.path.join(path_prefix, f) + for f in problem_config[VISUALIZATION_FILES] + ], + observable_files=[ + os.path.join(path_prefix, f) + for f in problem_config[OBSERVABLE_FILES] + ], + ) + problems.append(cur_problem) + + return CompositeProblem(parameter_df=parameter_df, problems=problems) diff --git a/petab/v1/conditions.py b/petab/v1/conditions.py new file mode 100644 index 00000000..4e691d62 --- /dev/null +++ b/petab/v1/conditions.py @@ -0,0 +1,118 @@ +"""Functions operating on the PEtab condition table""" + +from collections.abc import Iterable +from pathlib import Path + +import numpy as np +import pandas as pd + +from . import core, lint +from .C import * + +__all__ = [ + "get_condition_df", + "write_condition_df", + "create_condition_df", + "get_parametric_overrides", +] + + +def get_condition_df( + condition_file: str | pd.DataFrame | Path | None, +) -> pd.DataFrame: + """Read the provided condition file into a ``pandas.Dataframe`` + + Conditions are rows, parameters are columns, conditionId is index. + + Arguments: + condition_file: File name of PEtab condition file or pandas.Dataframe + """ + if condition_file is None: + return condition_file + + if isinstance(condition_file, str | Path): + condition_file = pd.read_csv( + condition_file, sep="\t", float_precision="round_trip" + ) + + lint.assert_no_leading_trailing_whitespace( + condition_file.columns.values, "condition" + ) + + if not isinstance(condition_file.index, pd.RangeIndex): + condition_file.reset_index( + drop=condition_file.index.name != CONDITION_ID, + inplace=True, + ) + + try: + condition_file.set_index([CONDITION_ID], inplace=True) + except KeyError: + raise KeyError( + f"Condition table missing mandatory field {CONDITION_ID}." + ) from None + + return condition_file + + +def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab condition table + + Arguments: + df: PEtab condition table + filename: Destination file name + """ + df = get_condition_df(df) + df.to_csv(filename, sep="\t", index=True) + + +def create_condition_df( + parameter_ids: Iterable[str], condition_ids: Iterable[str] | None = None +) -> pd.DataFrame: + """Create empty condition DataFrame + + Arguments: + parameter_ids: the columns + condition_ids: the rows + Returns: + A :py:class:`pandas.DataFrame` with empty given rows and columns and + all nan values + """ + condition_ids = [] if condition_ids is None else list(condition_ids) + + data = {CONDITION_ID: condition_ids} + df = pd.DataFrame(data) + + for p in parameter_ids: + if not lint.is_valid_identifier(p): + raise ValueError("Invalid parameter ID: " + p) + df[p] = np.nan + + df.set_index(CONDITION_ID, inplace=True) + + return df + + +def get_parametric_overrides(condition_df: pd.DataFrame) -> list[str]: + """Get parametric overrides from condition table + + Arguments: + condition_df: PEtab condition table + + Returns: + List of parameter IDs that are mapped in a condition-specific way + """ + constant_parameters = set(condition_df.columns.values.tolist()) - { + CONDITION_ID, + CONDITION_NAME, + } + result = [] + + for column in constant_parameters: + if np.issubdtype(condition_df[column].dtype, np.number): + continue + + floatified = condition_df.loc[:, column].apply(core.to_float_if_float) + + result.extend(x for x in floatified if not isinstance(x, float)) + return result diff --git a/petab/v1/core.py b/petab/v1/core.py new file mode 100644 index 00000000..5004141f --- /dev/null +++ b/petab/v1/core.py @@ -0,0 +1,535 @@ +"""PEtab core functions (or functions that don't fit anywhere else)""" +import logging +import os +import re +from collections.abc import Callable, Iterable, Sequence +from pathlib import Path +from typing import ( + Any, +) +from warnings import warn + +import numpy as np +import pandas as pd +from pandas.api.types import is_string_dtype + +from . import yaml +from .C import * # noqa: F403 + +logger = logging.getLogger(__name__) +__all__ = [ + "get_simulation_df", + "write_simulation_df", + "get_visualization_df", + "write_visualization_df", + "get_notnull_columns", + "flatten_timepoint_specific_output_overrides", + "concat_tables", + "to_float_if_float", + "is_empty", + "create_combine_archive", + "unique_preserve_order", + "unflatten_simulation_df", +] + +POSSIBLE_GROUPVARS_FLATTENED_PROBLEM = [ + OBSERVABLE_ID, + OBSERVABLE_PARAMETERS, + NOISE_PARAMETERS, + SIMULATION_CONDITION_ID, + PREEQUILIBRATION_CONDITION_ID, +] + + +def get_simulation_df(simulation_file: str | Path) -> pd.DataFrame: + """Read PEtab simulation table + + Arguments: + simulation_file: URL or filename of PEtab simulation table + + Returns: + Simulation DataFrame + """ + return pd.read_csv( + simulation_file, sep="\t", index_col=None, float_precision="round_trip" + ) + + +def write_simulation_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab simulation table + + Arguments: + df: PEtab simulation table + filename: Destination file name + """ + df.to_csv(filename, sep="\t", index=False) + + +def get_visualization_df( + visualization_file: str | Path | pd.DataFrame | None, +) -> pd.DataFrame | None: + """Read PEtab visualization table + + Arguments: + visualization_file: + URL or filename of PEtab visualization table to read from, + or a DataFrame or None that will be returned as is. + + Returns: + Visualization DataFrame + """ + if visualization_file is None: + return None + + if isinstance(visualization_file, pd.DataFrame): + return visualization_file + + try: + types = {PLOT_NAME: str} + vis_spec = pd.read_csv( + visualization_file, + sep="\t", + index_col=None, + converters=types, + float_precision="round_trip", + ) + except pd.errors.EmptyDataError: + warn( + "Visualization table is empty. Defaults will be used. " + "Refer to the documentation for details.", + stacklevel=2, + ) + vis_spec = pd.DataFrame() + return vis_spec + + +def write_visualization_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab visualization table + + Arguments: + df: PEtab visualization table + filename: Destination file name + """ + df.to_csv(filename, sep="\t", index=False) + + +def get_notnull_columns(df: pd.DataFrame, candidates: Iterable): + """ + Return list of ``df``-columns in ``candidates`` which are not all null/nan. + + The output can e.g. be used as input for ``pandas.DataFrame.groupby``. + + Arguments: + df: + Dataframe + candidates: + Columns of ``df`` to consider + """ + return [ + col for col in candidates if col in df and not np.all(df[col].isnull()) + ] + + +def get_observable_replacement_id(groupvars, groupvar) -> str: + """Get the replacement ID for an observable. + + Arguments: + groupvars: + The columns of a PEtab measurement table that should be unique + between observables in a flattened PEtab problem. + groupvar: + A specific grouping of `groupvars`. + + Returns: + The observable replacement ID. + """ + replacement_id = "" + for field in POSSIBLE_GROUPVARS_FLATTENED_PROBLEM: + if field in groupvars: + val = ( + str(groupvar[groupvars.index(field)]) + .replace(PARAMETER_SEPARATOR, "_") + .replace(".", "_") + ) + if replacement_id == "": + replacement_id = val + elif val != "": + replacement_id += f"__{val}" + return replacement_id + + +def get_hyperparameter_replacement_id( + hyperparameter_type, + observable_replacement_id, +): + """Get the full ID for a replaced hyperparameter. + + Arguments: + hyperparameter_type: + The type of hyperparameter, e.g. `noiseParameter`. + observable_replacement_id: + The observable replacement ID, e.g. the output of + `get_observable_replacement_id`. + + Returns: + The hyperparameter replacement ID, with a field that will be replaced + by the first matched substring in a regex substitution. + """ + return f"{hyperparameter_type}\\1_{observable_replacement_id}" + + +def get_flattened_id_mappings( + petab_problem: "petab.problem.Problem", +) -> dict[str, dict[str, str]]: + """Get mapping from unflattened to flattened observable IDs. + + Arguments: + petab_problem: + The unflattened PEtab problem. + + Returns: + A dictionary of dictionaries. Each inner dictionary is a mapping + from original ID to flattened ID. Each outer dictionary is the mapping + for either: observable IDs; noise parameter IDs; or, observable + parameter IDs. + """ + groupvars = get_notnull_columns( + petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM + ) + mappings = { + OBSERVABLE_ID: {}, + NOISE_PARAMETERS: {}, + OBSERVABLE_PARAMETERS: {}, + } + for groupvar, measurements in petab_problem.measurement_df.groupby( + groupvars, dropna=False + ): + observable_id = groupvar[groupvars.index(OBSERVABLE_ID)] + observable_replacement_id = get_observable_replacement_id( + groupvars, groupvar + ) + + logger.debug(f"Creating synthetic observable {observable_id}") + if observable_replacement_id in petab_problem.observable_df.index: + raise RuntimeError( + "could not create synthetic observables " + f"since {observable_replacement_id} was " + "already present in observable table" + ) + + mappings[OBSERVABLE_ID][observable_replacement_id] = observable_id + + for field, hyperparameter_type in [ + (NOISE_PARAMETERS, "noiseParameter"), + (OBSERVABLE_PARAMETERS, "observableParameter"), + ]: + if field in measurements: + mappings[field][ + get_hyperparameter_replacement_id( + hyperparameter_type=hyperparameter_type, + observable_replacement_id=observable_replacement_id, + ) + ] = rf"{hyperparameter_type}([0-9]+)_{observable_id}" + return mappings + + +def flatten_timepoint_specific_output_overrides( + petab_problem: "petab.problem.Problem", +) -> None: + """Flatten timepoint-specific output parameter overrides. + + If the PEtab problem definition has timepoint-specific + `observableParameters` or `noiseParameters` for the same observable, + replace those by replicating the respective observable. + + This is a helper function for some tools which may not support such + timepoint-specific mappings. The observable table and measurement table + are modified in place. + + Arguments: + petab_problem: + PEtab problem to work on. Modified in place. + """ + new_measurement_dfs = [] + new_observable_dfs = [] + groupvars = get_notnull_columns( + petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM + ) + + mappings = get_flattened_id_mappings(petab_problem) + + for groupvar, measurements in petab_problem.measurement_df.groupby( + groupvars, dropna=False + ): + obs_id = groupvar[groupvars.index(OBSERVABLE_ID)] + observable_replacement_id = get_observable_replacement_id( + groupvars, groupvar + ) + + observable = petab_problem.observable_df.loc[obs_id].copy() + observable.name = observable_replacement_id + for field, hyperparameter_type, target in [ + (NOISE_PARAMETERS, "noiseParameter", NOISE_FORMULA), + (OBSERVABLE_PARAMETERS, "observableParameter", OBSERVABLE_FORMULA), + (OBSERVABLE_PARAMETERS, "observableParameter", NOISE_FORMULA), + ]: + if field not in measurements: + continue + + if not is_string_dtype(type(observable[target])): + # if not a string, we don't have to substitute anything + continue + + hyperparameter_replacement_id = get_hyperparameter_replacement_id( + hyperparameter_type=hyperparameter_type, + observable_replacement_id=observable_replacement_id, + ) + hyperparameter_id = mappings[field][hyperparameter_replacement_id] + observable[target] = re.sub( + hyperparameter_id, + hyperparameter_replacement_id, + observable[target], + ) + + measurements[OBSERVABLE_ID] = observable_replacement_id + new_measurement_dfs.append(measurements) + new_observable_dfs.append(observable) + + petab_problem.observable_df = pd.concat(new_observable_dfs, axis=1).T + petab_problem.observable_df.index.name = OBSERVABLE_ID + petab_problem.measurement_df = pd.concat(new_measurement_dfs) + + +def unflatten_simulation_df( + simulation_df: pd.DataFrame, + petab_problem: "petab.problem.Problem", +) -> pd.DataFrame: + """Unflatten simulations from a flattened PEtab problem. + + A flattened PEtab problem is the output of applying + :func:`flatten_timepoint_specific_output_overrides` to a PEtab problem. + + Arguments: + simulation_df: + The simulation dataframe. A dataframe in the same format as a PEtab + measurements table, but with the ``measurement`` column switched + with a ``simulation`` column. + petab_problem: + The unflattened PEtab problem. + + Returns: + The simulation dataframe for the unflattened PEtab problem. + """ + mappings = get_flattened_id_mappings(petab_problem) + original_observable_ids = simulation_df[OBSERVABLE_ID].replace( + mappings[OBSERVABLE_ID] + ) + unflattened_simulation_df = simulation_df.assign( + **{ + OBSERVABLE_ID: original_observable_ids, + } + ) + return unflattened_simulation_df + + +def concat_tables( + tables: str | Path | pd.DataFrame | Iterable[pd.DataFrame | str | Path], + file_parser: Callable | None = None, +) -> pd.DataFrame: + """Concatenate DataFrames provided as DataFrames or filenames, and a parser + + Arguments: + tables: + Iterable of tables to join, as DataFrame or filename. + file_parser: + Function used to read the table in case filenames are provided, + accepting a filename as only argument. + + Returns: + The concatenated DataFrames + """ + if isinstance(tables, pd.DataFrame): + return tables + + if isinstance(tables, str | Path): + return file_parser(tables) + + df = pd.DataFrame() + + for tmp_df in tables: + # load from file, if necessary + if isinstance(tmp_df, str | Path): + tmp_df = file_parser(tmp_df) + + df = pd.concat( + [df, tmp_df], + sort=False, + ignore_index=isinstance(tmp_df.index, pd.RangeIndex), + ) + + return df + + +def to_float_if_float(x: Any) -> Any: + """Return input as float if possible, otherwise return as is + + Arguments: + x: Anything + + Returns: + ``x`` as float if possible, otherwise ``x`` + """ + try: + return float(x) + except (ValueError, TypeError): + return x + + +def is_empty(val) -> bool: + """Check if the value `val`, e.g. a table entry, is empty. + + Arguments: + val: The value to check. + + Returns: + Whether the field is to be considered empty. + """ + return val == "" or pd.isnull(val) + + +def create_combine_archive( + yaml_file: str | Path, + filename: str | Path, + family_name: str | None = None, + given_name: str | None = None, + email: str | None = None, + organization: str | None = None, +) -> None: + """Create COMBINE archive (https://co.mbine.org/documents/archive) based + on PEtab YAML file. + + Arguments: + yaml_file: Path to PEtab YAML file + filename: Destination file name + family_name: Family name of archive creator + given_name: Given name of archive creator + email: E-mail address of archive creator + organization: Organization of archive creator + """ + path_prefix = os.path.dirname(str(yaml_file)) + yaml_config = yaml.load_yaml(yaml_file) + + # function-level import, because module-level import interfered with + # other SWIG interfaces + try: + import libcombine + except ImportError as err: + raise ImportError( + "To use PEtab's COMBINE functionality, libcombine " + "(python-libcombine) must be installed." + ) from err + + def _add_file_metadata(location: str, description: str = ""): + """Add metadata to the added file""" + omex_description = libcombine.OmexDescription() + omex_description.setAbout(location) + omex_description.setDescription(description) + omex_description.setCreated( + libcombine.OmexDescription.getCurrentDateAndTime() + ) + archive.addMetadata(location, omex_description) + + archive = libcombine.CombineArchive() + + # Add PEtab files and metadata + archive.addFile( + str(yaml_file), + os.path.basename(yaml_file), + "http://identifiers.org/combine.specifications/petab.version-1", + True, + ) + _add_file_metadata( + location=os.path.basename(yaml_file), description="PEtab YAML file" + ) + + # Add parameter file(s) that describe a single parameter table. + # Works for a single file name, or a list of file names. + for parameter_subset_file in list( + np.array(yaml_config[PARAMETER_FILE]).flat + ): + archive.addFile( + os.path.join(path_prefix, parameter_subset_file), + parameter_subset_file, + libcombine.KnownFormats.lookupFormat("tsv"), + False, + ) + _add_file_metadata( + location=parameter_subset_file, description="PEtab parameter file" + ) + + for problem in yaml_config[PROBLEMS]: + for sbml_file in problem[SBML_FILES]: + archive.addFile( + os.path.join(path_prefix, sbml_file), + sbml_file, + libcombine.KnownFormats.lookupFormat("sbml"), + False, + ) + _add_file_metadata(location=sbml_file, description="SBML model") + + for field in [ + MEASUREMENT_FILES, + OBSERVABLE_FILES, + VISUALIZATION_FILES, + CONDITION_FILES, + ]: + if field not in problem: + continue + + for file in problem[field]: + archive.addFile( + os.path.join(path_prefix, file), + file, + libcombine.KnownFormats.lookupFormat("tsv"), + False, + ) + desc = field.split("_")[0] + _add_file_metadata( + location=file, description=f"PEtab {desc} file" + ) + + # Add archive metadata + description = libcombine.OmexDescription() + description.setAbout(".") + description.setDescription("PEtab archive") + description.setCreated(libcombine.OmexDescription.getCurrentDateAndTime()) + + # Add creator info + creator = libcombine.VCard() + if family_name: + creator.setFamilyName(family_name) + if given_name: + creator.setGivenName(given_name) + if email: + creator.setEmail(email) + if organization: + creator.setOrganization(organization) + description.addCreator(creator) + + archive.addMetadata(".", description) + archive.writeToFile(str(filename)) + + +def unique_preserve_order(seq: Sequence) -> list: + """Return a list of unique elements in Sequence, keeping only the first + occurrence of each element + + Parameters: + seq: Sequence to prune + + Returns: + List of unique elements in ``seq`` + """ + seen = set() + seen_add = seen.add + return [x for x in seq if not (x in seen or seen_add(x))] diff --git a/petab/format_version.py b/petab/v1/format_version.py similarity index 100% rename from petab/format_version.py rename to petab/v1/format_version.py diff --git a/petab/v1/lint.py b/petab/v1/lint.py new file mode 100644 index 00000000..6f70520b --- /dev/null +++ b/petab/v1/lint.py @@ -0,0 +1,1221 @@ +"""Integrity checks and tests for specific features used""" + +import copy +import logging +import numbers +import re +from collections import Counter +from collections.abc import Iterable +from typing import Any + +import numpy as np +import pandas as pd +import sympy as sp + +import petab.v1 as petab + +from . import core, measurements, parameters +from .C import * # noqa: F403 +from .math import sympify_petab +from .models import Model + +logger = logging.getLogger(__name__) +__all__ = [ + "assert_all_parameters_present_in_parameter_df", + "assert_measured_observables_defined", + "assert_measurement_conditions_present_in_condition_table", + "assert_measurements_not_null", + "assert_measurements_numeric", + "assert_model_parameters_in_condition_or_parameter_table", + "assert_no_leading_trailing_whitespace", + "assert_noise_distributions_valid", + "assert_parameter_bounds_are_numeric", + "assert_parameter_estimate_is_boolean", + "assert_parameter_id_is_string", + "assert_parameter_prior_parameters_are_valid", + "assert_parameter_prior_type_is_valid", + "assert_parameter_scale_is_valid", + "assert_unique_observable_ids", + "assert_unique_parameter_ids", + "check_condition_df", + "check_ids", + "check_measurement_df", + "check_observable_df", + "check_parameter_bounds", + "check_parameter_df", + "condition_table_is_parameter_free", + "get_non_unique", + "is_scalar_float", + "is_valid_identifier", + "lint_problem", + "measurement_table_has_observable_parameter_numeric_overrides", + "measurement_table_has_timepoint_specific_mappings", + "observable_table_has_nontrivial_noise_formula", +] + + +def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: + """Check if given columns are present in DataFrame + + Arguments: + df: Dataframe to check + req_cols: Column names which have to be present + name: Name of the DataFrame to be included in error message + + Raises: + AssertionError: if a column is missing + """ + if missing_cols := set(req_cols) - set(df.columns.values): + raise AssertionError( + f"DataFrame {name} requires the columns {missing_cols}." + ) + + +def assert_no_leading_trailing_whitespace( + names_list: Iterable[str], name: str +) -> None: + """Check that there is no trailing whitespace in elements of Iterable + + Arguments: + names_list: strings to check for whitespace + name: name of `names_list` for error messages + + Raises: + AssertionError: if there is trailing whitespace + """ + r = re.compile(r"(?:^\s)|(?:\s$)") + for i, x in enumerate(names_list): + if isinstance(x, str) and r.search(x): + raise AssertionError(f"Whitespace around {name}[{i}] = '{x}'.") + + +def check_condition_df( + df: pd.DataFrame, + model: Model | None = None, + observable_df: pd.DataFrame | None = None, + mapping_df: pd.DataFrame | None = None, +) -> None: + """Run sanity checks on PEtab condition table + + Arguments: + df: PEtab condition DataFrame + model: Model for additional checking of parameter IDs + observable_df: PEtab observables DataFrame + mapping_df: PEtab mapping DataFrame + + Raises: + AssertionError: in case of problems + """ + # Check required columns are present + req_cols = [] + _check_df(df, req_cols, "condition") + + # Check for correct index + if df.index.name != CONDITION_ID: + raise AssertionError( + f"Condition table has wrong index {df.index.name}." + f"expected {CONDITION_ID}." + ) + + check_ids(df.index.values, kind="condition") + + if not df.index.is_unique: + raise AssertionError( + "Non-unique condition IDs: " + f"{df.index.values[df.index.duplicated()]}" + ) + + for column_name in req_cols: + if not np.issubdtype(df[column_name].dtype, np.number): + assert_no_leading_trailing_whitespace( + df[column_name].values, column_name + ) + + if model is not None: + allowed_cols = set(model.get_valid_ids_for_condition_table()) + if observable_df is not None: + allowed_cols |= set( + petab.get_output_parameters( + model=model, + observable_df=observable_df, + mapping_df=mapping_df, + ) + ) + if mapping_df is not None: + allowed_cols |= set(mapping_df.index.values) + for column_name in df.columns: + if ( + column_name != CONDITION_NAME + and column_name not in allowed_cols + ): + raise AssertionError( + "Condition table contains column for unknown entity '" + f"{column_name}'." + ) + + +def check_measurement_df( + df: pd.DataFrame, observable_df: pd.DataFrame | None = None +) -> None: + """Run sanity checks on PEtab measurement table + + Arguments: + df: PEtab measurement DataFrame + observable_df: PEtab observable DataFrame for checking if measurements + are compatible with observable transformations. + + Raises: + AssertionError, ValueError: in case of problems + """ + _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") + + for column_name in MEASUREMENT_DF_REQUIRED_COLS: + if not np.issubdtype(df[column_name].dtype, np.number): + assert_no_leading_trailing_whitespace( + df[column_name].values, column_name + ) + + for column_name in MEASUREMENT_DF_OPTIONAL_COLS: + if column_name in df and not np.issubdtype( + df[column_name].dtype, np.number + ): + assert_no_leading_trailing_whitespace( + df[column_name].values, column_name + ) + + if observable_df is not None: + assert_measured_observables_defined(df, observable_df) + measurements.assert_overrides_match_parameter_count(df, observable_df) + + if OBSERVABLE_TRANSFORMATION in observable_df: + # Check for positivity of measurements in case of + # log-transformation + assert_unique_observable_ids(observable_df) + # If the above is not checked, in the following loop + # trafo may become a pandas Series + for measurement, obs_id in zip( + df[MEASUREMENT], df[OBSERVABLE_ID], strict=True + ): + trafo = observable_df.loc[obs_id, OBSERVABLE_TRANSFORMATION] + if measurement <= 0.0 and trafo in [LOG, LOG10]: + raise ValueError( + "Measurements with observable " + f"transformation {trafo} must be " + f"positive, but {measurement} <= 0." + ) + + assert_measurements_not_null(df) + assert_measurements_numeric(df) + + +def check_parameter_df( + df: pd.DataFrame, + model: Model | None = None, + observable_df: pd.DataFrame | None = None, + measurement_df: pd.DataFrame | None = None, + condition_df: pd.DataFrame | None = None, + mapping_df: pd.DataFrame | None = None, +) -> None: + """Run sanity checks on PEtab parameter table + + Arguments: + df: PEtab parameter DataFrame + model: Model for additional checking of parameter IDs + observable_df: PEtab observable table for additional checks + measurement_df: PEtab measurement table for additional checks + condition_df: PEtab condition table for additional checks + mapping_df: PEtab mapping table for additional checks + + Raises: + AssertionError: in case of problems + """ + _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter") + + if df.index.name != PARAMETER_ID: + raise AssertionError( + f"Parameter table has wrong index {df.index.name}." + f"expected {PARAMETER_ID}." + ) + + check_ids(df.index.values, kind="parameter") + + for column_name in PARAMETER_DF_REQUIRED_COLS[1:]: # 0 is PARAMETER_ID + if not np.issubdtype(df[column_name].dtype, np.number): + assert_no_leading_trailing_whitespace( + df[column_name].values, column_name + ) + + # nominal value is generally optional, but required if any for any + # parameter estimate != 1 + non_estimated_par_ids = list( + df.index[ + (df[ESTIMATE] != 1) + | ( + pd.api.types.is_string_dtype(df[ESTIMATE]) + and df[ESTIMATE] != "1" + ) + ] + ) + if non_estimated_par_ids: + if NOMINAL_VALUE not in df: + raise AssertionError( + "Parameter table contains parameters " + f"{non_estimated_par_ids} that are not " + "specified to be estimated, " + f"but column {NOMINAL_VALUE} is missing." + ) + try: + df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) + except ValueError as e: + raise AssertionError( + f"Expected numeric values for `{NOMINAL_VALUE}` in parameter " + "table for all non-estimated parameters." + ) from e + + assert_parameter_id_is_string(df) + assert_parameter_scale_is_valid(df) + assert_parameter_bounds_are_numeric(df) + assert_parameter_estimate_is_boolean(df) + assert_unique_parameter_ids(df) + check_parameter_bounds(df) + assert_parameter_prior_type_is_valid(df) + assert_parameter_prior_parameters_are_valid(df) + + if model and measurement_df is not None and condition_df is not None: + assert_all_parameters_present_in_parameter_df( + df, model, observable_df, measurement_df, condition_df, mapping_df + ) + + +def check_observable_df(observable_df: pd.DataFrame) -> None: + """Check validity of observable table + + Arguments: + observable_df: PEtab observable DataFrame + + Raises: + AssertionError: in case of problems + """ + _check_df(observable_df, OBSERVABLE_DF_REQUIRED_COLS[1:], "observable") + + check_ids(observable_df.index.values, kind="observable") + + for column_name in OBSERVABLE_DF_REQUIRED_COLS[1:]: + if not np.issubdtype(observable_df[column_name].dtype, np.number): + assert_no_leading_trailing_whitespace( + observable_df[column_name].values, column_name + ) + + for column_name in OBSERVABLE_DF_OPTIONAL_COLS: + if column_name in observable_df and not np.issubdtype( + observable_df[column_name].dtype, np.number + ): + assert_no_leading_trailing_whitespace( + observable_df[column_name].values, column_name + ) + + assert_noise_distributions_valid(observable_df) + assert_unique_observable_ids(observable_df) + + # Check that formulas are parsable + for row in observable_df.itertuples(): + obs = getattr(row, OBSERVABLE_FORMULA) + try: + sympify_petab(obs) + except sp.SympifyError as e: + raise AssertionError( + f"Cannot parse expression '{obs}' " + f"for observable {row.Index}: {e}" + ) from e + + noise = getattr(row, NOISE_FORMULA) + try: + sympified_noise = sympify_petab(noise) + if sympified_noise is None or ( + sympified_noise.is_Number and not sympified_noise.is_finite + ): + raise AssertionError( + f"No or non-finite {NOISE_FORMULA} " + f"given for observable {row.Index}." + ) + except sp.SympifyError as e: + raise AssertionError( + f"Cannot parse expression '{noise}' " + f"for noise model for observable " + f"{row.Index}: {e}" + ) from e + + +def assert_all_parameters_present_in_parameter_df( + parameter_df: pd.DataFrame, + model: Model, + observable_df: pd.DataFrame, + measurement_df: pd.DataFrame, + condition_df: pd.DataFrame, + mapping_df: pd.DataFrame = None, +) -> None: + """Ensure all required parameters are contained in the parameter table + with no additional ones + + Arguments: + parameter_df: PEtab parameter DataFrame + model: model + observable_df: PEtab observable table + measurement_df: PEtab measurement table + condition_df: PEtab condition table + mapping_df: PEtab mapping table for additional checks + + Raises: + AssertionError: in case of problems + """ + required = parameters.get_required_parameters_for_parameter_table( + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, + mapping_df=mapping_df, + ) + + allowed = parameters.get_valid_parameters_for_parameter_table( + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, + mapping_df=mapping_df, + ) + + actual = set(parameter_df.index) + missing = required - actual + extraneous = actual - allowed + + # missing parameters might be present under a different name based on + # the mapping table + if missing and mapping_df is not None: + model_to_petab_mapping = {} + for map_from, map_to in zip( + mapping_df.index.values, mapping_df[MODEL_ENTITY_ID], strict=True + ): + if map_to in model_to_petab_mapping: + model_to_petab_mapping[map_to].append(map_from) + else: + model_to_petab_mapping[map_to] = [map_from] + missing = { + missing_id + for missing_id in missing + if missing_id not in model_to_petab_mapping + or all( + mapping_parameter not in actual + for mapping_parameter in model_to_petab_mapping[missing_id] + ) + } + + if missing: + raise AssertionError( + "Missing parameter(s) in the model or the " + "parameters table: " + str(missing) + ) + + if extraneous: + raise AssertionError( + "Extraneous parameter(s) in parameter table: " + str(extraneous) + ) + + +def assert_measured_observables_defined( + measurement_df: pd.DataFrame, observable_df: pd.DataFrame +) -> None: + """Check if all observables in the measurement table have been defined in + the observable table + + Arguments: + measurement_df: PEtab measurement table + observable_df: PEtab observable table + + Raises: + AssertionError: in case of problems + """ + used_observables = set(measurement_df[OBSERVABLE_ID].values) + defined_observables = set(observable_df.index.values) + if undefined_observables := (used_observables - defined_observables): + raise AssertionError( + f"Observables {undefined_observables} used in " + "measurement table but not defined in observables table." + ) + + +def condition_table_is_parameter_free(condition_df: pd.DataFrame) -> bool: + """Check if all entries in the condition table are numeric + (no parameter IDs) + + Arguments: + condition_df: PEtab condition table + + Returns: + ``True`` if there are no parameter overrides in the condition table, + ``False`` otherwise. + """ + return len(petab.get_parametric_overrides(condition_df)) == 0 + + +def assert_parameter_id_is_string(parameter_df: pd.DataFrame) -> None: + """ + Check if all entries in the parameterId column of the parameter table + are string and not empty. + + Arguments: + parameter_df: PEtab parameter DataFrame + + Raises: + AssertionError: in case of problems + """ + for parameter_id in parameter_df: + if isinstance(parameter_id, str): + if parameter_id[0].isdigit(): + raise AssertionError( + f"{PARAMETER_ID} {parameter_id} starts with integer." + ) + else: + raise AssertionError(f"Empty {PARAMETER_ID} found.") + + +def assert_unique_parameter_ids(parameter_df: pd.DataFrame) -> None: + """ + Check if the parameterId column of the parameter table is unique. + + Arguments: + parameter_df: PEtab parameter DataFrame + + Raises: + AssertionError: in case of problems + """ + non_unique_ids = get_non_unique(parameter_df.index) + if len(non_unique_ids) > 0: + raise AssertionError( + f"Non-unique values found in the {PARAMETER_ID} column" + " of the parameter table: " + str(non_unique_ids) + ) + + +def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None: + """ + Check if all entries in the parameterScale column of the parameter table + are 'lin' for linear, 'log' for natural logarithm or 'log10' for base 10 + logarithm. + + Arguments: + parameter_df: PEtab parameter DataFrame + + Raises: + AssertionError: in case of problems + """ + for parameter_scale in parameter_df[PARAMETER_SCALE]: + if parameter_scale not in [LIN, LOG, LOG10]: + raise AssertionError( + f"Expected {LIN}, {LOG}, or {LOG10}, but " + f"got {parameter_scale}." + ) + + +def assert_parameter_bounds_are_numeric(parameter_df: pd.DataFrame) -> None: + """ + Check if all entries in the lowerBound and upperBound columns of the + parameter table are numeric. + + Arguments: + parameter_df: PEtab parameter DataFrame + + Raises: + AssertionError: in case of problems + """ + parameter_df[LOWER_BOUND].apply(float).all() + parameter_df[UPPER_BOUND].apply(float).all() + + +def check_parameter_bounds(parameter_df: pd.DataFrame) -> None: + """ + Check if all entries in the lowerBound are smaller than upperBound column + in the parameter table and that bounds are positive for parameterScale + log|log10. + + Arguments: + parameter_df: PEtab parameter DataFrame + + Raises: + AssertionError: in case of problems + + """ + for _, row in parameter_df.iterrows(): + if int(row[ESTIMATE]): + if not row[LOWER_BOUND] <= row[UPPER_BOUND]: + raise AssertionError( + f"{LOWER_BOUND} greater than {UPPER_BOUND} for " + f"{PARAMETER_ID} {row.name}." + ) + if (row[LOWER_BOUND] < 0.0 or row[UPPER_BOUND] < 0.0) and row[ + PARAMETER_SCALE + ] in [LOG, LOG10]: + raise AssertionError( + f"Bounds for {row[PARAMETER_SCALE]} scaled parameter " + f"{ row.name} must be positive." + ) + if ( + row.get(PARAMETER_SCALE, LIN) in [LOG, LOG10] + and (row[LOWER_BOUND] == 0.0 or row[UPPER_BOUND] == 0.0) + and not row.get(INITIALIZATION_PRIOR_TYPE) + ): + raise AssertionError( + f"Bounds for {row[PARAMETER_SCALE]} scaled parameter " + f"{row.name} must be positive if no " + f"{INITIALIZATION_PRIOR_TYPE} is provided. " + "Cannot sample from unbounded interval." + ) + + +def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None: + """Check that valid prior types have been selected + + Arguments: + parameter_df: PEtab parameter table + + Raises: + AssertionError: in case of invalid prior + """ + for col in [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE]: + if col not in parameter_df.columns: + continue + for _, row in parameter_df.iterrows(): + if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]): + raise AssertionError( + f"{col} must be one of {PRIOR_TYPES} but is " + f"'{row[col]}'." + ) + + +def assert_parameter_prior_parameters_are_valid( + parameter_df: pd.DataFrame, +) -> None: + """Check that the prior parameters are valid. + + Arguments: + parameter_df: PEtab parameter table + + Raises: + AssertionError: in case of invalid prior parameters + """ + prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE] + prior_par_cols = [ + INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_PARAMETERS, + ] + + # perform test for both priors + for type_col, par_col in zip(prior_type_cols, prior_par_cols, strict=True): + # iterate over rows + for _, row in parameter_df.iterrows(): + # get type + if type_col not in row or core.is_empty(row[type_col]): + type_ = PARAMETER_SCALE_UNIFORM + else: + type_ = row[type_col] + # get parameters + pars_str = row.get(par_col, "") + with_default_parameters = [PARAMETER_SCALE_UNIFORM] + # check if parameters are empty + if core.is_empty(pars_str): + if type_ not in with_default_parameters: + raise AssertionError( + f"An empty {par_col} is only permitted with " + f"{type_col} in {with_default_parameters}." + ) + # empty parameters fine + continue + # parse parameters + try: + pars = tuple( + float(val) for val in pars_str.split(PARAMETER_SEPARATOR) + ) + except ValueError as e: + raise AssertionError( + f"Could not parse prior parameters '{pars_str}'." + ) from e + + # all distributions take 2 parameters + if len(pars) != 2: + raise AssertionError( + f"The prior parameters '{pars}' do not contain the " + "expected number of entries (currently 'par1" + f"{PARAMETER_SEPARATOR}par2' for all prior types)." + ) + + # we can't sample uniformly from [log(0)=-inf, ...] + if ( + type_col == INITIALIZATION_PRIOR_TYPE + and row.get(type_col, "") == PARAMETER_SCALE_UNIFORM + and row.get(PARAMETER_SCALE, LIN) in [LOG, LOG10] + and (pars[0] == 0.0 or pars[1] == 0.0) + ): + raise AssertionError( + f"{prior_par_cols} for {row[PARAMETER_SCALE]} scaled " + f"parameter {row.name} must be positive if " + f"{type_col}={PARAMETER_SCALE_UNIFORM}." + ) + + +def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None: + """ + Check if all entries in the estimate column of the parameter table are + 0 or 1. + + Arguments: + parameter_df: PEtab parameter DataFrame + + Raises: + AssertionError: in case of problems + """ + for estimate in parameter_df[ESTIMATE]: + if int(estimate) not in [True, False]: + raise AssertionError( + f"Expected 0 or 1 but got {estimate} in {ESTIMATE} column." + ) + + +def is_scalar_float(x: Any): + """ + Checks whether input is a number or can be transformed into a number + via float + + :param x: + input + :return: + ``True`` if is or can be converted to number, ``False`` otherwise. + """ + if isinstance(x, numbers.Number): + return True + try: + float(x) + return True + except (ValueError, TypeError): + return False + + +def measurement_table_has_timepoint_specific_mappings( + measurement_df: pd.DataFrame | None, + allow_scalar_numeric_noise_parameters: bool = False, + allow_scalar_numeric_observable_parameters: bool = False, +) -> bool: + """ + Are there time-point or replicate specific parameter assignments in the + measurement table. + + Arguments: + measurement_df: + PEtab measurement table + + allow_scalar_numeric_noise_parameters: + ignore scalar numeric assignments to noiseParameter placeholders + + allow_scalar_numeric_observable_parameters: + ignore scalar numeric assignments to observableParameter + placeholders + + Returns: + True if there are time-point or replicate specific (non-numeric) + parameter assignments in the measurement table, False otherwise. + """ + if measurement_df is None: + return False + + # since we edit it, copy it first + measurement_df = copy.deepcopy(measurement_df) + + # mask numeric values + for col, allow_scalar_numeric in [ + (OBSERVABLE_PARAMETERS, allow_scalar_numeric_observable_parameters), + (NOISE_PARAMETERS, allow_scalar_numeric_noise_parameters), + ]: + if col not in measurement_df: + continue + + measurement_df[col] = measurement_df[col].apply(str) + + if allow_scalar_numeric: + measurement_df.loc[ + measurement_df[col].apply(is_scalar_float), col + ] = np.nan + + grouping_cols = core.get_notnull_columns( + measurement_df, + [ + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + PREEQUILIBRATION_CONDITION_ID, + OBSERVABLE_PARAMETERS, + NOISE_PARAMETERS, + ], + ) + grouped_df = measurement_df.groupby(grouping_cols, dropna=False) + + grouping_cols = core.get_notnull_columns( + measurement_df, + [ + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + PREEQUILIBRATION_CONDITION_ID, + ], + ) + grouped_df2 = measurement_df.groupby(grouping_cols) + # data frame has timepoint specific overrides if grouping by noise + # parameters and observable parameters in addition to observable, + # condition and preeq id yields more groups + return len(grouped_df) != len(grouped_df2) + + +def observable_table_has_nontrivial_noise_formula( + observable_df: pd.DataFrame | None, +) -> bool: + """ + Does any observable have a noise formula that is not just a single + parameter? + + Arguments: + observable_df: PEtab observable table + + Returns: + ``True`` if any noise formula does not consist of a single identifier, + ``False`` otherwise. + """ + if observable_df is None: + return False + + return ( + not observable_df[NOISE_FORMULA] + .apply( + lambda x: is_scalar_float(x) + or re.match(r"^[\w]+$", str(x)) is not None + ) + .all() + ) + + +def measurement_table_has_observable_parameter_numeric_overrides( + measurement_df: pd.DataFrame, +) -> bool: + """Are there any numbers to override observable parameters? + + Arguments: + measurement_df: PEtab measurement table + + Returns: + ``True`` if there are any numbers to override observable/noise + parameters, ``False`` otherwise. + """ + if OBSERVABLE_PARAMETERS not in measurement_df: + return False + + for _, row in measurement_df.iterrows(): + for override in measurements.split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ): + if isinstance(override, numbers.Number): + return True + + return False + + +def assert_noise_distributions_valid(observable_df: pd.DataFrame) -> None: + """ + Ensure that noise distributions and transformations for observables are + valid. + + Arguments: + observable_df: PEtab observable table + + Raises: + AssertionError: in case of problems + """ + if OBSERVABLE_TRANSFORMATION in observable_df: + # check for valid values + for trafo in observable_df[OBSERVABLE_TRANSFORMATION]: + if trafo not in ["", *OBSERVABLE_TRANSFORMATIONS] and not ( + isinstance(trafo, numbers.Number) and np.isnan(trafo) + ): + raise ValueError( + f"Unrecognized observable transformation in observable " + f"table: {trafo}." + ) + + if NOISE_DISTRIBUTION in observable_df: + for distr in observable_df[NOISE_DISTRIBUTION]: + if distr not in ["", *NOISE_MODELS] and not ( + isinstance(distr, numbers.Number) and np.isnan(distr) + ): + raise ValueError( + f"Unrecognized noise distribution in observable " + f"table: {distr}." + ) + + +def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None: + """ + Check if the observableId column of the observable table is unique. + + Arguments: + observable_df: PEtab observable DataFrame + + Raises: + AssertionError: in case of problems + """ + non_unique_ids = get_non_unique(observable_df.index) + if len(non_unique_ids) > 0: + raise AssertionError( + f"Non-unique values found in the {OBSERVABLE_ID} column" + " of the observable table: " + str(non_unique_ids) + ) + + +def get_non_unique(values): + counter = Counter(values) + return [value for (value, count) in counter.items() if count > 1] + + +def lint_problem(problem: "petab.Problem") -> bool: + """Run PEtab validation on problem + + Arguments: + problem: PEtab problem to check + + Returns: + ``True`` if errors occurred, ``False`` otherwise + """ + # pylint: disable=too-many-statements + errors_occurred = False + + if problem.extensions_config: + logger.warning( + "Validation of PEtab extensions is not yet implemented, " + "but the given problem uses the following extensions: " + f"{'', ''.join(problem.extensions_config.keys())}" + ) + + # Run checks on individual files + if problem.model is not None: + logger.info("Checking model...") + errors_occurred |= not problem.model.is_valid() + else: + logger.warning("Model not available. Skipping.") + + if problem.measurement_df is not None: + logger.info("Checking measurement table...") + try: + check_measurement_df(problem.measurement_df, problem.observable_df) + + if problem.condition_df is not None: + assert_measurement_conditions_present_in_condition_table( + problem.measurement_df, problem.condition_df + ) + except AssertionError as e: + logger.error(e) + errors_occurred = True + else: + logger.warning("Measurement table not available. Skipping.") + + if problem.condition_df is not None: + logger.info("Checking condition table...") + try: + check_condition_df( + problem.condition_df, + model=problem.model, + observable_df=problem.observable_df, + mapping_df=problem.mapping_df, + ) + except AssertionError as e: + logger.error(e) + errors_occurred = True + else: + logger.warning("Condition table not available. Skipping.") + + if problem.observable_df is not None: + logger.info("Checking observable table...") + try: + check_observable_df(problem.observable_df) + except AssertionError as e: + logger.error(e) + errors_occurred = True + if problem.model is not None: + for obs_id in problem.observable_df.index: + if problem.model.has_entity_with_id(obs_id): + logger.error( + f"Observable ID {obs_id} shadows model " "entity." + ) + errors_occurred = True + else: + logger.warning("Observable table not available. Skipping.") + + if problem.parameter_df is not None: + logger.info("Checking parameter table...") + try: + check_parameter_df( + problem.parameter_df, + problem.model, + problem.observable_df, + problem.measurement_df, + problem.condition_df, + problem.mapping_df, + ) + except AssertionError as e: + logger.error(e) + errors_occurred = True + else: + logger.warning("Parameter table not available. Skipping.") + + if ( + problem.model is not None + and problem.condition_df is not None + and problem.parameter_df is not None + ): + try: + assert_model_parameters_in_condition_or_parameter_table( + problem.model, + problem.condition_df, + problem.parameter_df, + problem.mapping_df, + ) + except AssertionError as e: + logger.error(e) + errors_occurred = True + + if problem.visualization_df is not None: + logger.info("Checking visualization table...") + from petab.v1.visualize.lint import validate_visualization_df + + errors_occurred |= validate_visualization_df(problem) + else: + logger.warning("Visualization table not available. Skipping.") + + if errors_occurred: + logger.error("Not OK") + elif ( + problem.measurement_df is None + or problem.condition_df is None + or problem.model is None + or problem.parameter_df is None + or problem.observable_df is None + ): + logger.warning( + "Not all files of the PEtab problem definition could " + "be checked." + ) + else: + logger.info("PEtab format check completed successfully.") + + return errors_occurred + + +def assert_model_parameters_in_condition_or_parameter_table( + model: Model, + condition_df: pd.DataFrame, + parameter_df: pd.DataFrame, + mapping_df: pd.DataFrame = None, + observable_df: pd.DataFrame = None, + measurement_df: pd.DataFrame = None, +) -> None: + """Model parameters that are rule targets must not be present in the + parameter table. Other parameters must only be present in either in + parameter table or condition table columns. Check that. + + Arguments: + parameter_df: PEtab parameter DataFrame + model: PEtab model + condition_df: PEtab condition table + mapping_df: PEtab mapping table + observable_df: PEtab observable table + measurement_df: PEtab measurement table + + Raises: + AssertionError: in case of problems + """ + allowed_in_condition_cols = set(model.get_valid_ids_for_condition_table()) + if mapping_df is not None: + allowed_in_condition_cols |= { + from_id + for from_id, to_id in zip( + mapping_df.index.values, + mapping_df[MODEL_ENTITY_ID], + strict=True, + ) + # mapping table entities mapping to already allowed parameters + if to_id in allowed_in_condition_cols + # mapping table entities mapping to species + or model.is_state_variable(to_id) + } + + allowed_in_parameter_table = ( + parameters.get_valid_parameters_for_parameter_table( + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, + mapping_df=mapping_df, + ) + ) + + entities_in_condition_table = set(condition_df.columns) - {CONDITION_NAME} + entities_in_parameter_table = set(parameter_df.index.values) + + disallowed_in_condition = { + x + for x in (entities_in_condition_table - allowed_in_condition_cols) + # we only check model entities here, not output parameters + if model.has_entity_with_id(x) + } + if disallowed_in_condition: + is_or_are = "is" if len(disallowed_in_condition) == 1 else "are" + raise AssertionError( + f"{disallowed_in_condition} {is_or_are} not " + "allowed to occur in condition table " + "columns." + ) + + disallowed_in_parameters = { + x + for x in (entities_in_parameter_table - allowed_in_parameter_table) + # we only check model entities here, not output parameters + if model.has_entity_with_id(x) + } + + if disallowed_in_parameters: + is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are" + raise AssertionError( + f"{disallowed_in_parameters} {is_or_are} not " + "allowed to occur in the parameters table." + ) + + in_both = entities_in_condition_table & entities_in_parameter_table + if in_both: + is_or_are = "is" if len(in_both) == 1 else "are" + raise AssertionError( + f"{in_both} {is_or_are} present in both " + "the condition table and the parameter table." + ) + + +def assert_measurement_conditions_present_in_condition_table( + measurement_df: pd.DataFrame, condition_df: pd.DataFrame +) -> None: + """Ensure that all entries from measurement_df.simulationConditionId and + measurement_df.preequilibrationConditionId are present in + condition_df.index. + + Arguments: + measurement_df: PEtab measurement table + condition_df: PEtab condition table + + Raises: + AssertionError: in case of problems + """ + used_conditions = set(measurement_df[SIMULATION_CONDITION_ID].values) + if PREEQUILIBRATION_CONDITION_ID in measurement_df: + used_conditions |= set( + measurement_df[PREEQUILIBRATION_CONDITION_ID].dropna().values + ) + available_conditions = set(condition_df.index.values) + if missing_conditions := (used_conditions - available_conditions): + raise AssertionError( + "Measurement table references conditions that " + "are not specified in the condition table: " + + str(missing_conditions) + ) + + +def assert_measurements_not_null( + measurement_df: pd.DataFrame, +) -> None: + """Check whether all measurements are not null. + + Arguments: + measurement_df: + PEtab measurement table. + + Raises: + AssertionError: + Some measurement value(s) are null (missing). + """ + if measurement_df[MEASUREMENT].isnull().any(): + raise AssertionError("Some measurement(s) are null (missing).") + + +def assert_measurements_numeric( + measurement_df: pd.DataFrame, +) -> None: + """Check whether all measurements are numeric. + + Note that null (missing) measurements are ignored. + + Arguments: + measurement_df: + PEtab measurement table. + + Raises: + AssertionError: + Some measurement value(s) are not numeric. + """ + not_null_measurement_values = measurement_df[MEASUREMENT].dropna() + all_measurements_are_numeric = ( + pd.to_numeric(not_null_measurement_values, errors="coerce") + .notnull() + .all() + ) + if not all_measurements_are_numeric: + raise AssertionError( + "Some values in the `petab.C.MEASUREMENT` column of the PEtab " + "measurements table are not numeric." + ) + + +def is_valid_identifier(x: str) -> bool: + """Check whether `x` is a valid identifier + + Check whether `x` is a valid identifier for conditions, parameters, + observables... . Identifiers may contain upper and lower case letters, + digits and underscores, but must not start with a digit. + + Arguments: + x: string to check + + Returns: + ``True`` if valid, ``False`` otherwise + """ + if pd.isna(x): + return False + + return re.match(r"^[a-zA-Z_]\w*$", x) is not None + + +def check_ids(ids: Iterable[str], kind: str = "") -> None: + """Check IDs are valid + + Arguments: + ids: Iterable of IDs to check + kind: Kind of IDs, for more informative error message + + Raises: + ValueError: in case of invalid IDs + """ + invalids = [ + (index, _id) + for index, _id in enumerate(ids) + if not is_valid_identifier(_id) + ] + + if invalids: + # The first row is the header row, and Python lists are zero-indexed, + # hence need to add 2 for the correct line number. + offset = 2 + error_output = "\n".join( + [ + f"Line {index+offset}: " + + ("Missing ID" if pd.isna(_id) else _id) + for index, _id in invalids + ] + ) + raise ValueError(f"Invalid {kind} ID(s):\n{error_output}") diff --git a/petab/v1/mapping.py b/petab/v1/mapping.py new file mode 100644 index 00000000..80c71c68 --- /dev/null +++ b/petab/v1/mapping.py @@ -0,0 +1,118 @@ +"""Functionality related to the PEtab entity mapping table""" +from pathlib import Path + +import pandas as pd + +from . import lint +from .C import * # noqa: F403 +from .models import Model + +__all__ = [ + "get_mapping_df", + "write_mapping_df", + "check_mapping_df", + "resolve_mapping", +] + + +def get_mapping_df( + mapping_file: None | str | Path | pd.DataFrame, +) -> pd.DataFrame: + """ + Read the provided mapping file into a ``pandas.Dataframe``. + + Arguments: + mapping_file: Name of file to read from or pandas.Dataframe + + Returns: + Mapping DataFrame + """ + if mapping_file is None: + return mapping_file + + if isinstance(mapping_file, str | Path): + mapping_file = pd.read_csv( + mapping_file, sep="\t", float_precision="round_trip" + ) + + if not isinstance(mapping_file.index, pd.RangeIndex): + mapping_file.reset_index( + drop=mapping_file.index.name != PETAB_ENTITY_ID, + inplace=True, + ) + + for col in MAPPING_DF_REQUIRED_COLS: + if col not in mapping_file.columns: + raise KeyError( + f"Mapping table missing mandatory field {PETAB_ENTITY_ID}." + ) + + lint.assert_no_leading_trailing_whitespace( + mapping_file.reset_index()[col].values, col + ) + + mapping_file.set_index([PETAB_ENTITY_ID], inplace=True) + + return mapping_file + + +def write_mapping_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab mapping table + + Arguments: + df: PEtab mapping table + filename: Destination file name + """ + df = get_mapping_df(df) + df.to_csv(filename, sep="\t", index=True) + + +def check_mapping_df( + df: pd.DataFrame, + model: Model | None = None, +) -> None: + """Run sanity checks on PEtab mapping table + + Arguments: + df: PEtab mapping DataFrame + model: Model for additional checking of parameter IDs + + Raises: + AssertionError: in case of problems + """ + lint._check_df(df, MAPPING_DF_REQUIRED_COLS[1:], "mapping") + + if df.index.name != PETAB_ENTITY_ID: + raise AssertionError( + f"Mapping table has wrong index {df.index.name}. " + f"Expected {PETAB_ENTITY_ID}." + ) + + lint.check_ids(df.index.values, kind=PETAB_ENTITY_ID) + + if model: + for model_entity_id in df[MODEL_ENTITY_ID]: + if not model.has_entity_with_id(model_entity_id): + raise AssertionError( + "Mapping table maps to unknown " + f"model entity ID {model_entity_id}." + ) + + +def resolve_mapping(mapping_df: pd.DataFrame | None, element: str) -> str: + """Resolve mapping for a given element. + + :param element: + Element to resolve. + + :param mapping_df: + Mapping table. + + :return: + Resolved element. + """ + if mapping_df is None: + return element + if element in mapping_df.index: + return mapping_df.loc[element, MODEL_ENTITY_ID] + return element diff --git a/petab/v1/math/PetabMathExprLexer.g4 b/petab/v1/math/PetabMathExprLexer.g4 new file mode 100644 index 00000000..69504e0a --- /dev/null +++ b/petab/v1/math/PetabMathExprLexer.g4 @@ -0,0 +1,35 @@ +// Lexer grammar for PEtab math expressions +// run `regenerate.sh` to regenerate the lexer +lexer grammar PetabMathExprLexer; + + +NUMBER : EXPONENT_FLOAT | INTEGER | POINT_FLOAT | INF; +INTEGER : DIGITS ; +EXPONENT_FLOAT : (INTEGER | POINT_FLOAT) EXPONENT ; +POINT_FLOAT : DIGITS '.' DIGITS ; +fragment EXPONENT: ('e' | 'E') ('+' | '-')? DIGITS ; +FLOAT_NUMBER: POINT_FLOAT | EXPONENT_FLOAT; +fragment DIGITS : [0-9]+ ; + +WS : [ \t\r\n]+ -> skip ; +TRUE : 'true' ; +FALSE : 'false' ; +INF : 'inf' ; +NAME : [a-zA-Z_][a-zA-Z0-9_]* ; +OPEN_PAREN : '(' ; +CLOSE_PAREN : ')' ; +BOOLEAN_OR : '||' ; +BOOLEAN_AND : '&&' ; +GT : '>' ; +LT : '<' ; +GTE : '>=' ; +LTE : '<=' ; +EQ : '==' ; +NEQ : '!=' ; +PLUS : '+' ; +MINUS : '-' ; +ASTERISK : '*' ; +SLASH : '/' ; +CARET: '^'; +EXCLAMATION_MARK: '!'; +COMMA: ','; diff --git a/petab/v1/math/PetabMathExprParser.g4 b/petab/v1/math/PetabMathExprParser.g4 new file mode 100644 index 00000000..543c67e8 --- /dev/null +++ b/petab/v1/math/PetabMathExprParser.g4 @@ -0,0 +1,42 @@ +// Parser grammar for PEtab math expressions +// run `regenerate.sh` to regenerate the parser +parser grammar PetabMathExprParser; + +options { tokenVocab=PetabMathExprLexer; } + +petabExpression: + expr EOF ; + +expr: + expr '^' expr # PowerExpr + | ('+'|'-') expr # UnaryExpr + | '!' expr # BooleanNotExpr + | expr ('*'|'/') expr # MultExpr + | expr ('+'|'-') expr # AddExpr + | '(' expr ')' # ParenExpr + | expr comp_op expr # ComparisonExpr + | expr (BOOLEAN_AND | BOOLEAN_OR) expr # BooleanAndOrExpr + | number # Number_ + | booleanLiteral # BooleanLiteral_ + | functionCall # functionCall_ + | var # VarExpr_ + ; + +comp_op: + GT + | LT + | GTE + | LTE + | EQ + | NEQ + ; + +argumentList: expr (',' expr)* ; +functionCall: NAME OPEN_PAREN argumentList CLOSE_PAREN ; + +booleanLiteral: + TRUE + | FALSE + ; +number: NUMBER ; +var: NAME ; diff --git a/petab/v1/math/SympyVisitor.py b/petab/v1/math/SympyVisitor.py new file mode 100644 index 00000000..016e872c --- /dev/null +++ b/petab/v1/math/SympyVisitor.py @@ -0,0 +1,303 @@ +"""PEtab-math to sympy conversion.""" +import sympy as sp +from sympy.logic.boolalg import Boolean, BooleanFalse, BooleanTrue + +from ._generated.PetabMathExprParser import PetabMathExprParser +from ._generated.PetabMathExprParserVisitor import PetabMathExprParserVisitor + +__all__ = ["MathVisitorSympy"] + +# Mappings of PEtab math functions to sympy functions + +# trigonometric functions +_trig_funcs = { + "sin": sp.sin, + "cos": sp.cos, + "tan": sp.tan, + "sec": sp.sec, + "csc": sp.csc, + "cot": sp.cot, + "sinh": sp.sinh, + "cosh": sp.cosh, + "tanh": sp.tanh, + "sech": sp.sech, + "csch": sp.csch, + "coth": sp.coth, + "arccos": sp.acos, + "arcsin": sp.asin, + "arctan": sp.atan, + "arcsec": sp.asec, + "arccsc": sp.acsc, + "arccot": sp.acot, + "arcsinh": sp.asinh, + "arccosh": sp.acosh, + "arctanh": sp.atanh, + "arcsech": sp.asech, + "arccsch": sp.acsch, + "arccoth": sp.acoth, +} +_unary_funcs = { + "exp": sp.exp, + "log10": lambda x: -sp.oo if x.is_zero is True else sp.log(x, 10), + "log2": lambda x: -sp.oo if x.is_zero is True else sp.log(x, 2), + "ln": sp.log, + "sqrt": sp.sqrt, + "abs": sp.Abs, + "sign": sp.sign, +} +_binary_funcs = { + "pow": sp.Pow, + "min": sp.Min, + "max": sp.Max, +} + +# reserved names that cannot be used as variable names +_reserved_names = { + "inf", + "nan", + "true", + "false", +} + + +class MathVisitorSympy(PetabMathExprParserVisitor): + """ + ANTLR4 visitor for PEtab-math-to-sympy conversion. + + Visitor for PEtab math expression AST generated using ANTLR4. + Converts PEtab math expressions to sympy expressions. + + Most users will not need to interact with this class directly, but rather + use :func:`petab.math.sympify_petab`. + + Evaluation of any sub-expressions currently relies on sympy's defaults. + + For a general introduction to ANTLR4 visitors, see: + https://github.com/antlr/antlr4/blob/7d4cea92bc3f7d709f09c3f1ac77c5bbc71a6749/doc/python-target.md + """ + + def visitPetabExpression( + self, ctx: PetabMathExprParser.PetabExpressionContext + ) -> sp.Expr | sp.Basic: + """Visit the root of the expression tree.""" + return self.visit(ctx.getChild(0)) + + def visitNumber(self, ctx: PetabMathExprParser.NumberContext) -> sp.Float: + """Convert number to sympy Float.""" + return sp.Float(ctx.getText()) + + def visitVar(self, ctx: PetabMathExprParser.VarContext) -> sp.Symbol: + """Convert identifier to sympy Symbol.""" + if ctx.getText().lower() in _reserved_names: + raise ValueError(f"Use of reserved name {ctx.getText()!r}") + return sp.Symbol(ctx.getText(), real=True) + + def visitMultExpr( + self, ctx: PetabMathExprParser.MultExprContext + ) -> sp.Expr: + """Convert multiplication and division expressions to sympy.""" + if ctx.getChildCount() == 3: + operand1 = bool2num(self.visit(ctx.getChild(0))) + operand2 = bool2num(self.visit(ctx.getChild(2))) + if ctx.ASTERISK(): + return operand1 * operand2 + if ctx.SLASH(): + return operand1 / operand2 + + raise AssertionError(f"Unexpected expression: {ctx.getText()}") + + def visitAddExpr(self, ctx: PetabMathExprParser.AddExprContext) -> sp.Expr: + """Convert addition and subtraction expressions to sympy.""" + op1 = bool2num(self.visit(ctx.getChild(0))) + op2 = bool2num(self.visit(ctx.getChild(2))) + if ctx.PLUS(): + return op1 + op2 + if ctx.MINUS(): + return op1 - op2 + + raise AssertionError( + f"Unexpected operator: {ctx.getChild(1).getText()} " + f"in {ctx.getText()}" + ) + + def visitArgumentList( + self, ctx: PetabMathExprParser.ArgumentListContext + ) -> list[sp.Basic | sp.Expr]: + """Convert function argument lists to a list of sympy expressions.""" + return [self.visit(c) for c in ctx.children[::2]] + + def visitFunctionCall( + self, ctx: PetabMathExprParser.FunctionCallContext + ) -> sp.Expr: + """Convert function call to sympy expression.""" + if ctx.getChildCount() < 4: + raise AssertionError(f"Unexpected expression: {ctx.getText()}") + func_name = ctx.getChild(0).getText() + args = self.visit(ctx.getChild(2)) + + if func_name != "piecewise": + # all functions except piecewise expect numerical arguments + args = list(map(bool2num, args)) + + if func_name in _trig_funcs: + if len(args) != 1: + raise AssertionError( + f"Unexpected number of arguments: {len(args)} " + f"in {ctx.getText()}" + ) + return _trig_funcs[func_name](*args) + if func_name in _unary_funcs: + if len(args) != 1: + raise AssertionError( + f"Unexpected number of arguments: {len(args)} " + f"in {ctx.getText()}" + ) + return _unary_funcs[func_name](*args) + if func_name in _binary_funcs: + if len(args) != 2: + raise AssertionError( + f"Unexpected number of arguments: {len(args)} " + f"in {ctx.getText()}" + ) + return _binary_funcs[func_name](*args) + if func_name == "log": + if len(args) not in [1, 2]: + raise AssertionError( + f"Unexpected number of arguments: {len(args)} " + f"in {ctx.getText()}" + ) + return -sp.oo if args[0].is_zero is True else sp.log(*args) + + if func_name == "piecewise": + if (len(args) - 1) % 2 != 0: + raise AssertionError( + f"Unexpected number of arguments: {len(args)} " + f"in {ctx.getText()}" + ) + # sympy's Piecewise requires an explicit condition for the final + # `else` case + args.append(sp.true) + sp_args = ( + (true_expr, num2bool(condition)) + for true_expr, condition in zip( + args[::2], args[1::2], strict=True + ) + ) + return sp.Piecewise(*sp_args) + + raise ValueError(f"Unknown function: {ctx.getText()}") + + def visitParenExpr(self, ctx: PetabMathExprParser.ParenExprContext): + """Convert parenthesized expression to sympy.""" + return self.visit(ctx.getChild(1)) + + def visitPowerExpr( + self, ctx: PetabMathExprParser.PowerExprContext + ) -> sp.Pow: + """Convert power expression to sympy.""" + if ctx.getChildCount() != 3: + raise AssertionError( + f"Unexpected number of children: {ctx.getChildCount()} " + f"in {ctx.getText()}" + ) + operand1 = bool2num(self.visit(ctx.getChild(0))) + operand2 = bool2num(self.visit(ctx.getChild(2))) + return sp.Pow(operand1, operand2) + + def visitUnaryExpr( + self, ctx: PetabMathExprParser.UnaryExprContext + ) -> sp.Basic | sp.Expr: + """Convert unary expressions to sympy.""" + if ctx.getChildCount() == 2: + operand = bool2num(self.visit(ctx.getChild(1))) + match ctx.getChild(0).getText(): + case "-": + return -operand + case "+": + return operand + + raise AssertionError(f"Unexpected expression: {ctx.getText()}") + + def visitComparisonExpr( + self, ctx: PetabMathExprParser.ComparisonExprContext + ) -> sp.Basic | sp.Expr: + """Convert comparison expressions to sympy.""" + if ctx.getChildCount() != 3: + raise AssertionError(f"Unexpected expression: {ctx.getText()}") + lhs = self.visit(ctx.getChild(0)) + op = ctx.getChild(1).getText() + rhs = self.visit(ctx.getChild(2)) + + ops = { + "==": sp.Equality, + "!=": sp.Unequality, + "<": sp.StrictLessThan, + ">": sp.StrictGreaterThan, + "<=": sp.LessThan, + ">=": sp.GreaterThan, + } + if op in ops: + lhs = bool2num(lhs) + rhs = bool2num(rhs) + return ops[op](lhs, rhs) + + raise AssertionError(f"Unexpected operator: {op}") + + def visitBooleanNotExpr( + self, ctx: PetabMathExprParser.BooleanNotExprContext + ) -> sp.Basic | sp.Expr: + """Convert boolean NOT expressions to sympy.""" + if ctx.getChildCount() == 2: + return ~num2bool(self.visit(ctx.getChild(1))) + + raise AssertionError(f"Unexpected expression: {ctx.getText()}") + + def visitBooleanAndOrExpr( + self, ctx: PetabMathExprParser.BooleanAndOrExprContext + ) -> sp.Basic | sp.Expr: + """Convert boolean AND and OR expressions to sympy.""" + if ctx.getChildCount() != 3: + raise AssertionError(f"Unexpected expression: {ctx.getText()}") + + operand1 = num2bool(self.visit(ctx.getChild(0))) + operand2 = num2bool(self.visit(ctx.getChild(2))) + + if ctx.BOOLEAN_AND(): + return operand1 & operand2 + if ctx.BOOLEAN_OR(): + return operand1 | operand2 + + raise AssertionError(f"Unexpected expression: {ctx.getText()}") + + def visitBooleanLiteral( + self, ctx: PetabMathExprParser.BooleanLiteralContext + ) -> Boolean: + """Convert boolean literals to sympy.""" + if ctx.TRUE(): + return sp.true + + if ctx.FALSE(): + return sp.false + + raise AssertionError(f"Unexpected boolean literal: {ctx.getText()}") + + +def bool2num(x: sp.Basic | sp.Expr) -> sp.Basic | sp.Expr: + """Convert sympy Booleans to Floats.""" + if isinstance(x, BooleanFalse): + return sp.Float(0) + if isinstance(x, BooleanTrue): + return sp.Float(1) + return x + + +def num2bool(x: sp.Basic | sp.Expr) -> sp.Basic | sp.Expr: + """Convert sympy Floats to booleans.""" + if isinstance(x, BooleanTrue | BooleanFalse): + return x + # Note: sp.Float(0) == 0 is False in sympy>=1.13 + if x.is_zero is True: + return sp.false + if x.is_zero is False: + return sp.true + return sp.Piecewise((True, x != 0.0), (False, True)) diff --git a/petab/v1/math/__init__.py b/petab/v1/math/__init__.py new file mode 100644 index 00000000..27ebacd2 --- /dev/null +++ b/petab/v1/math/__init__.py @@ -0,0 +1,2 @@ +"""Functions for parsing and evaluating mathematical expressions.""" +from .sympify import sympify_petab # noqa: F401 diff --git a/petab/v1/math/_generated/PetabMathExprLexer.interp b/petab/v1/math/_generated/PetabMathExprLexer.interp new file mode 100644 index 00000000..85ffff54 --- /dev/null +++ b/petab/v1/math/_generated/PetabMathExprLexer.interp @@ -0,0 +1,100 @@ +token literal names: +null +null +null +null +null +null +null +'true' +'false' +'inf' +null +'(' +')' +'||' +'&&' +'>' +'<' +'>=' +'<=' +'==' +'!=' +'+' +'-' +'*' +'/' +'^' +'!' +',' + +token symbolic names: +null +NUMBER +INTEGER +EXPONENT_FLOAT +POINT_FLOAT +FLOAT_NUMBER +WS +TRUE +FALSE +INF +NAME +OPEN_PAREN +CLOSE_PAREN +BOOLEAN_OR +BOOLEAN_AND +GT +LT +GTE +LTE +EQ +NEQ +PLUS +MINUS +ASTERISK +SLASH +CARET +EXCLAMATION_MARK +COMMA + +rule names: +NUMBER +INTEGER +EXPONENT_FLOAT +POINT_FLOAT +EXPONENT +FLOAT_NUMBER +DIGITS +WS +TRUE +FALSE +INF +NAME +OPEN_PAREN +CLOSE_PAREN +BOOLEAN_OR +BOOLEAN_AND +GT +LT +GTE +LTE +EQ +NEQ +PLUS +MINUS +ASTERISK +SLASH +CARET +EXCLAMATION_MARK +COMMA + +channel names: +DEFAULT_TOKEN_CHANNEL +HIDDEN + +mode names: +DEFAULT_MODE + +atn: +[4, 0, 27, 161, 6, -1, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 2, 3, 7, 3, 2, 4, 7, 4, 2, 5, 7, 5, 2, 6, 7, 6, 2, 7, 7, 7, 2, 8, 7, 8, 2, 9, 7, 9, 2, 10, 7, 10, 2, 11, 7, 11, 2, 12, 7, 12, 2, 13, 7, 13, 2, 14, 7, 14, 2, 15, 7, 15, 2, 16, 7, 16, 2, 17, 7, 17, 2, 18, 7, 18, 2, 19, 7, 19, 2, 20, 7, 20, 2, 21, 7, 21, 2, 22, 7, 22, 2, 23, 7, 23, 2, 24, 7, 24, 2, 25, 7, 25, 2, 26, 7, 26, 2, 27, 7, 27, 2, 28, 7, 28, 1, 0, 1, 0, 1, 0, 1, 0, 3, 0, 64, 8, 0, 1, 1, 1, 1, 1, 2, 1, 2, 3, 2, 70, 8, 2, 1, 2, 1, 2, 1, 3, 1, 3, 1, 3, 1, 3, 1, 4, 1, 4, 3, 4, 80, 8, 4, 1, 4, 1, 4, 1, 5, 1, 5, 3, 5, 86, 8, 5, 1, 6, 4, 6, 89, 8, 6, 11, 6, 12, 6, 90, 1, 7, 4, 7, 94, 8, 7, 11, 7, 12, 7, 95, 1, 7, 1, 7, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 9, 1, 9, 1, 9, 1, 9, 1, 9, 1, 9, 1, 10, 1, 10, 1, 10, 1, 10, 1, 11, 1, 11, 5, 11, 117, 8, 11, 10, 11, 12, 11, 120, 9, 11, 1, 12, 1, 12, 1, 13, 1, 13, 1, 14, 1, 14, 1, 14, 1, 15, 1, 15, 1, 15, 1, 16, 1, 16, 1, 17, 1, 17, 1, 18, 1, 18, 1, 18, 1, 19, 1, 19, 1, 19, 1, 20, 1, 20, 1, 20, 1, 21, 1, 21, 1, 21, 1, 22, 1, 22, 1, 23, 1, 23, 1, 24, 1, 24, 1, 25, 1, 25, 1, 26, 1, 26, 1, 27, 1, 27, 1, 28, 1, 28, 0, 0, 29, 1, 1, 3, 2, 5, 3, 7, 4, 9, 0, 11, 5, 13, 0, 15, 6, 17, 7, 19, 8, 21, 9, 23, 10, 25, 11, 27, 12, 29, 13, 31, 14, 33, 15, 35, 16, 37, 17, 39, 18, 41, 19, 43, 20, 45, 21, 47, 22, 49, 23, 51, 24, 53, 25, 55, 26, 57, 27, 1, 0, 6, 2, 0, 69, 69, 101, 101, 2, 0, 43, 43, 45, 45, 1, 0, 48, 57, 3, 0, 9, 10, 13, 13, 32, 32, 3, 0, 65, 90, 95, 95, 97, 122, 4, 0, 48, 57, 65, 90, 95, 95, 97, 122, 167, 0, 1, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 11, 1, 0, 0, 0, 0, 15, 1, 0, 0, 0, 0, 17, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 21, 1, 0, 0, 0, 0, 23, 1, 0, 0, 0, 0, 25, 1, 0, 0, 0, 0, 27, 1, 0, 0, 0, 0, 29, 1, 0, 0, 0, 0, 31, 1, 0, 0, 0, 0, 33, 1, 0, 0, 0, 0, 35, 1, 0, 0, 0, 0, 37, 1, 0, 0, 0, 0, 39, 1, 0, 0, 0, 0, 41, 1, 0, 0, 0, 0, 43, 1, 0, 0, 0, 0, 45, 1, 0, 0, 0, 0, 47, 1, 0, 0, 0, 0, 49, 1, 0, 0, 0, 0, 51, 1, 0, 0, 0, 0, 53, 1, 0, 0, 0, 0, 55, 1, 0, 0, 0, 0, 57, 1, 0, 0, 0, 1, 63, 1, 0, 0, 0, 3, 65, 1, 0, 0, 0, 5, 69, 1, 0, 0, 0, 7, 73, 1, 0, 0, 0, 9, 77, 1, 0, 0, 0, 11, 85, 1, 0, 0, 0, 13, 88, 1, 0, 0, 0, 15, 93, 1, 0, 0, 0, 17, 99, 1, 0, 0, 0, 19, 104, 1, 0, 0, 0, 21, 110, 1, 0, 0, 0, 23, 114, 1, 0, 0, 0, 25, 121, 1, 0, 0, 0, 27, 123, 1, 0, 0, 0, 29, 125, 1, 0, 0, 0, 31, 128, 1, 0, 0, 0, 33, 131, 1, 0, 0, 0, 35, 133, 1, 0, 0, 0, 37, 135, 1, 0, 0, 0, 39, 138, 1, 0, 0, 0, 41, 141, 1, 0, 0, 0, 43, 144, 1, 0, 0, 0, 45, 147, 1, 0, 0, 0, 47, 149, 1, 0, 0, 0, 49, 151, 1, 0, 0, 0, 51, 153, 1, 0, 0, 0, 53, 155, 1, 0, 0, 0, 55, 157, 1, 0, 0, 0, 57, 159, 1, 0, 0, 0, 59, 64, 3, 5, 2, 0, 60, 64, 3, 3, 1, 0, 61, 64, 3, 7, 3, 0, 62, 64, 3, 21, 10, 0, 63, 59, 1, 0, 0, 0, 63, 60, 1, 0, 0, 0, 63, 61, 1, 0, 0, 0, 63, 62, 1, 0, 0, 0, 64, 2, 1, 0, 0, 0, 65, 66, 3, 13, 6, 0, 66, 4, 1, 0, 0, 0, 67, 70, 3, 3, 1, 0, 68, 70, 3, 7, 3, 0, 69, 67, 1, 0, 0, 0, 69, 68, 1, 0, 0, 0, 70, 71, 1, 0, 0, 0, 71, 72, 3, 9, 4, 0, 72, 6, 1, 0, 0, 0, 73, 74, 3, 13, 6, 0, 74, 75, 5, 46, 0, 0, 75, 76, 3, 13, 6, 0, 76, 8, 1, 0, 0, 0, 77, 79, 7, 0, 0, 0, 78, 80, 7, 1, 0, 0, 79, 78, 1, 0, 0, 0, 79, 80, 1, 0, 0, 0, 80, 81, 1, 0, 0, 0, 81, 82, 3, 13, 6, 0, 82, 10, 1, 0, 0, 0, 83, 86, 3, 7, 3, 0, 84, 86, 3, 5, 2, 0, 85, 83, 1, 0, 0, 0, 85, 84, 1, 0, 0, 0, 86, 12, 1, 0, 0, 0, 87, 89, 7, 2, 0, 0, 88, 87, 1, 0, 0, 0, 89, 90, 1, 0, 0, 0, 90, 88, 1, 0, 0, 0, 90, 91, 1, 0, 0, 0, 91, 14, 1, 0, 0, 0, 92, 94, 7, 3, 0, 0, 93, 92, 1, 0, 0, 0, 94, 95, 1, 0, 0, 0, 95, 93, 1, 0, 0, 0, 95, 96, 1, 0, 0, 0, 96, 97, 1, 0, 0, 0, 97, 98, 6, 7, 0, 0, 98, 16, 1, 0, 0, 0, 99, 100, 5, 116, 0, 0, 100, 101, 5, 114, 0, 0, 101, 102, 5, 117, 0, 0, 102, 103, 5, 101, 0, 0, 103, 18, 1, 0, 0, 0, 104, 105, 5, 102, 0, 0, 105, 106, 5, 97, 0, 0, 106, 107, 5, 108, 0, 0, 107, 108, 5, 115, 0, 0, 108, 109, 5, 101, 0, 0, 109, 20, 1, 0, 0, 0, 110, 111, 5, 105, 0, 0, 111, 112, 5, 110, 0, 0, 112, 113, 5, 102, 0, 0, 113, 22, 1, 0, 0, 0, 114, 118, 7, 4, 0, 0, 115, 117, 7, 5, 0, 0, 116, 115, 1, 0, 0, 0, 117, 120, 1, 0, 0, 0, 118, 116, 1, 0, 0, 0, 118, 119, 1, 0, 0, 0, 119, 24, 1, 0, 0, 0, 120, 118, 1, 0, 0, 0, 121, 122, 5, 40, 0, 0, 122, 26, 1, 0, 0, 0, 123, 124, 5, 41, 0, 0, 124, 28, 1, 0, 0, 0, 125, 126, 5, 124, 0, 0, 126, 127, 5, 124, 0, 0, 127, 30, 1, 0, 0, 0, 128, 129, 5, 38, 0, 0, 129, 130, 5, 38, 0, 0, 130, 32, 1, 0, 0, 0, 131, 132, 5, 62, 0, 0, 132, 34, 1, 0, 0, 0, 133, 134, 5, 60, 0, 0, 134, 36, 1, 0, 0, 0, 135, 136, 5, 62, 0, 0, 136, 137, 5, 61, 0, 0, 137, 38, 1, 0, 0, 0, 138, 139, 5, 60, 0, 0, 139, 140, 5, 61, 0, 0, 140, 40, 1, 0, 0, 0, 141, 142, 5, 61, 0, 0, 142, 143, 5, 61, 0, 0, 143, 42, 1, 0, 0, 0, 144, 145, 5, 33, 0, 0, 145, 146, 5, 61, 0, 0, 146, 44, 1, 0, 0, 0, 147, 148, 5, 43, 0, 0, 148, 46, 1, 0, 0, 0, 149, 150, 5, 45, 0, 0, 150, 48, 1, 0, 0, 0, 151, 152, 5, 42, 0, 0, 152, 50, 1, 0, 0, 0, 153, 154, 5, 47, 0, 0, 154, 52, 1, 0, 0, 0, 155, 156, 5, 94, 0, 0, 156, 54, 1, 0, 0, 0, 157, 158, 5, 33, 0, 0, 158, 56, 1, 0, 0, 0, 159, 160, 5, 44, 0, 0, 160, 58, 1, 0, 0, 0, 8, 0, 63, 69, 79, 85, 90, 95, 118, 1, 6, 0, 0] diff --git a/petab/v1/math/_generated/PetabMathExprLexer.py b/petab/v1/math/_generated/PetabMathExprLexer.py new file mode 100644 index 00000000..4b16f1e8 --- /dev/null +++ b/petab/v1/math/_generated/PetabMathExprLexer.py @@ -0,0 +1,1608 @@ +# Generated from PetabMathExprLexer.g4 by ANTLR 4.13.1 +import sys + +from antlr4 import * + +if sys.version_info[1] > 5: + from typing import TextIO +else: + from typing.io import TextIO + + +def serializedATN(): + return [ + 4, + 0, + 27, + 161, + 6, + -1, + 2, + 0, + 7, + 0, + 2, + 1, + 7, + 1, + 2, + 2, + 7, + 2, + 2, + 3, + 7, + 3, + 2, + 4, + 7, + 4, + 2, + 5, + 7, + 5, + 2, + 6, + 7, + 6, + 2, + 7, + 7, + 7, + 2, + 8, + 7, + 8, + 2, + 9, + 7, + 9, + 2, + 10, + 7, + 10, + 2, + 11, + 7, + 11, + 2, + 12, + 7, + 12, + 2, + 13, + 7, + 13, + 2, + 14, + 7, + 14, + 2, + 15, + 7, + 15, + 2, + 16, + 7, + 16, + 2, + 17, + 7, + 17, + 2, + 18, + 7, + 18, + 2, + 19, + 7, + 19, + 2, + 20, + 7, + 20, + 2, + 21, + 7, + 21, + 2, + 22, + 7, + 22, + 2, + 23, + 7, + 23, + 2, + 24, + 7, + 24, + 2, + 25, + 7, + 25, + 2, + 26, + 7, + 26, + 2, + 27, + 7, + 27, + 2, + 28, + 7, + 28, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 3, + 0, + 64, + 8, + 0, + 1, + 1, + 1, + 1, + 1, + 2, + 1, + 2, + 3, + 2, + 70, + 8, + 2, + 1, + 2, + 1, + 2, + 1, + 3, + 1, + 3, + 1, + 3, + 1, + 3, + 1, + 4, + 1, + 4, + 3, + 4, + 80, + 8, + 4, + 1, + 4, + 1, + 4, + 1, + 5, + 1, + 5, + 3, + 5, + 86, + 8, + 5, + 1, + 6, + 4, + 6, + 89, + 8, + 6, + 11, + 6, + 12, + 6, + 90, + 1, + 7, + 4, + 7, + 94, + 8, + 7, + 11, + 7, + 12, + 7, + 95, + 1, + 7, + 1, + 7, + 1, + 8, + 1, + 8, + 1, + 8, + 1, + 8, + 1, + 8, + 1, + 9, + 1, + 9, + 1, + 9, + 1, + 9, + 1, + 9, + 1, + 9, + 1, + 10, + 1, + 10, + 1, + 10, + 1, + 10, + 1, + 11, + 1, + 11, + 5, + 11, + 117, + 8, + 11, + 10, + 11, + 12, + 11, + 120, + 9, + 11, + 1, + 12, + 1, + 12, + 1, + 13, + 1, + 13, + 1, + 14, + 1, + 14, + 1, + 14, + 1, + 15, + 1, + 15, + 1, + 15, + 1, + 16, + 1, + 16, + 1, + 17, + 1, + 17, + 1, + 18, + 1, + 18, + 1, + 18, + 1, + 19, + 1, + 19, + 1, + 19, + 1, + 20, + 1, + 20, + 1, + 20, + 1, + 21, + 1, + 21, + 1, + 21, + 1, + 22, + 1, + 22, + 1, + 23, + 1, + 23, + 1, + 24, + 1, + 24, + 1, + 25, + 1, + 25, + 1, + 26, + 1, + 26, + 1, + 27, + 1, + 27, + 1, + 28, + 1, + 28, + 0, + 0, + 29, + 1, + 1, + 3, + 2, + 5, + 3, + 7, + 4, + 9, + 0, + 11, + 5, + 13, + 0, + 15, + 6, + 17, + 7, + 19, + 8, + 21, + 9, + 23, + 10, + 25, + 11, + 27, + 12, + 29, + 13, + 31, + 14, + 33, + 15, + 35, + 16, + 37, + 17, + 39, + 18, + 41, + 19, + 43, + 20, + 45, + 21, + 47, + 22, + 49, + 23, + 51, + 24, + 53, + 25, + 55, + 26, + 57, + 27, + 1, + 0, + 6, + 2, + 0, + 69, + 69, + 101, + 101, + 2, + 0, + 43, + 43, + 45, + 45, + 1, + 0, + 48, + 57, + 3, + 0, + 9, + 10, + 13, + 13, + 32, + 32, + 3, + 0, + 65, + 90, + 95, + 95, + 97, + 122, + 4, + 0, + 48, + 57, + 65, + 90, + 95, + 95, + 97, + 122, + 167, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 3, + 1, + 0, + 0, + 0, + 0, + 5, + 1, + 0, + 0, + 0, + 0, + 7, + 1, + 0, + 0, + 0, + 0, + 11, + 1, + 0, + 0, + 0, + 0, + 15, + 1, + 0, + 0, + 0, + 0, + 17, + 1, + 0, + 0, + 0, + 0, + 19, + 1, + 0, + 0, + 0, + 0, + 21, + 1, + 0, + 0, + 0, + 0, + 23, + 1, + 0, + 0, + 0, + 0, + 25, + 1, + 0, + 0, + 0, + 0, + 27, + 1, + 0, + 0, + 0, + 0, + 29, + 1, + 0, + 0, + 0, + 0, + 31, + 1, + 0, + 0, + 0, + 0, + 33, + 1, + 0, + 0, + 0, + 0, + 35, + 1, + 0, + 0, + 0, + 0, + 37, + 1, + 0, + 0, + 0, + 0, + 39, + 1, + 0, + 0, + 0, + 0, + 41, + 1, + 0, + 0, + 0, + 0, + 43, + 1, + 0, + 0, + 0, + 0, + 45, + 1, + 0, + 0, + 0, + 0, + 47, + 1, + 0, + 0, + 0, + 0, + 49, + 1, + 0, + 0, + 0, + 0, + 51, + 1, + 0, + 0, + 0, + 0, + 53, + 1, + 0, + 0, + 0, + 0, + 55, + 1, + 0, + 0, + 0, + 0, + 57, + 1, + 0, + 0, + 0, + 1, + 63, + 1, + 0, + 0, + 0, + 3, + 65, + 1, + 0, + 0, + 0, + 5, + 69, + 1, + 0, + 0, + 0, + 7, + 73, + 1, + 0, + 0, + 0, + 9, + 77, + 1, + 0, + 0, + 0, + 11, + 85, + 1, + 0, + 0, + 0, + 13, + 88, + 1, + 0, + 0, + 0, + 15, + 93, + 1, + 0, + 0, + 0, + 17, + 99, + 1, + 0, + 0, + 0, + 19, + 104, + 1, + 0, + 0, + 0, + 21, + 110, + 1, + 0, + 0, + 0, + 23, + 114, + 1, + 0, + 0, + 0, + 25, + 121, + 1, + 0, + 0, + 0, + 27, + 123, + 1, + 0, + 0, + 0, + 29, + 125, + 1, + 0, + 0, + 0, + 31, + 128, + 1, + 0, + 0, + 0, + 33, + 131, + 1, + 0, + 0, + 0, + 35, + 133, + 1, + 0, + 0, + 0, + 37, + 135, + 1, + 0, + 0, + 0, + 39, + 138, + 1, + 0, + 0, + 0, + 41, + 141, + 1, + 0, + 0, + 0, + 43, + 144, + 1, + 0, + 0, + 0, + 45, + 147, + 1, + 0, + 0, + 0, + 47, + 149, + 1, + 0, + 0, + 0, + 49, + 151, + 1, + 0, + 0, + 0, + 51, + 153, + 1, + 0, + 0, + 0, + 53, + 155, + 1, + 0, + 0, + 0, + 55, + 157, + 1, + 0, + 0, + 0, + 57, + 159, + 1, + 0, + 0, + 0, + 59, + 64, + 3, + 5, + 2, + 0, + 60, + 64, + 3, + 3, + 1, + 0, + 61, + 64, + 3, + 7, + 3, + 0, + 62, + 64, + 3, + 21, + 10, + 0, + 63, + 59, + 1, + 0, + 0, + 0, + 63, + 60, + 1, + 0, + 0, + 0, + 63, + 61, + 1, + 0, + 0, + 0, + 63, + 62, + 1, + 0, + 0, + 0, + 64, + 2, + 1, + 0, + 0, + 0, + 65, + 66, + 3, + 13, + 6, + 0, + 66, + 4, + 1, + 0, + 0, + 0, + 67, + 70, + 3, + 3, + 1, + 0, + 68, + 70, + 3, + 7, + 3, + 0, + 69, + 67, + 1, + 0, + 0, + 0, + 69, + 68, + 1, + 0, + 0, + 0, + 70, + 71, + 1, + 0, + 0, + 0, + 71, + 72, + 3, + 9, + 4, + 0, + 72, + 6, + 1, + 0, + 0, + 0, + 73, + 74, + 3, + 13, + 6, + 0, + 74, + 75, + 5, + 46, + 0, + 0, + 75, + 76, + 3, + 13, + 6, + 0, + 76, + 8, + 1, + 0, + 0, + 0, + 77, + 79, + 7, + 0, + 0, + 0, + 78, + 80, + 7, + 1, + 0, + 0, + 79, + 78, + 1, + 0, + 0, + 0, + 79, + 80, + 1, + 0, + 0, + 0, + 80, + 81, + 1, + 0, + 0, + 0, + 81, + 82, + 3, + 13, + 6, + 0, + 82, + 10, + 1, + 0, + 0, + 0, + 83, + 86, + 3, + 7, + 3, + 0, + 84, + 86, + 3, + 5, + 2, + 0, + 85, + 83, + 1, + 0, + 0, + 0, + 85, + 84, + 1, + 0, + 0, + 0, + 86, + 12, + 1, + 0, + 0, + 0, + 87, + 89, + 7, + 2, + 0, + 0, + 88, + 87, + 1, + 0, + 0, + 0, + 89, + 90, + 1, + 0, + 0, + 0, + 90, + 88, + 1, + 0, + 0, + 0, + 90, + 91, + 1, + 0, + 0, + 0, + 91, + 14, + 1, + 0, + 0, + 0, + 92, + 94, + 7, + 3, + 0, + 0, + 93, + 92, + 1, + 0, + 0, + 0, + 94, + 95, + 1, + 0, + 0, + 0, + 95, + 93, + 1, + 0, + 0, + 0, + 95, + 96, + 1, + 0, + 0, + 0, + 96, + 97, + 1, + 0, + 0, + 0, + 97, + 98, + 6, + 7, + 0, + 0, + 98, + 16, + 1, + 0, + 0, + 0, + 99, + 100, + 5, + 116, + 0, + 0, + 100, + 101, + 5, + 114, + 0, + 0, + 101, + 102, + 5, + 117, + 0, + 0, + 102, + 103, + 5, + 101, + 0, + 0, + 103, + 18, + 1, + 0, + 0, + 0, + 104, + 105, + 5, + 102, + 0, + 0, + 105, + 106, + 5, + 97, + 0, + 0, + 106, + 107, + 5, + 108, + 0, + 0, + 107, + 108, + 5, + 115, + 0, + 0, + 108, + 109, + 5, + 101, + 0, + 0, + 109, + 20, + 1, + 0, + 0, + 0, + 110, + 111, + 5, + 105, + 0, + 0, + 111, + 112, + 5, + 110, + 0, + 0, + 112, + 113, + 5, + 102, + 0, + 0, + 113, + 22, + 1, + 0, + 0, + 0, + 114, + 118, + 7, + 4, + 0, + 0, + 115, + 117, + 7, + 5, + 0, + 0, + 116, + 115, + 1, + 0, + 0, + 0, + 117, + 120, + 1, + 0, + 0, + 0, + 118, + 116, + 1, + 0, + 0, + 0, + 118, + 119, + 1, + 0, + 0, + 0, + 119, + 24, + 1, + 0, + 0, + 0, + 120, + 118, + 1, + 0, + 0, + 0, + 121, + 122, + 5, + 40, + 0, + 0, + 122, + 26, + 1, + 0, + 0, + 0, + 123, + 124, + 5, + 41, + 0, + 0, + 124, + 28, + 1, + 0, + 0, + 0, + 125, + 126, + 5, + 124, + 0, + 0, + 126, + 127, + 5, + 124, + 0, + 0, + 127, + 30, + 1, + 0, + 0, + 0, + 128, + 129, + 5, + 38, + 0, + 0, + 129, + 130, + 5, + 38, + 0, + 0, + 130, + 32, + 1, + 0, + 0, + 0, + 131, + 132, + 5, + 62, + 0, + 0, + 132, + 34, + 1, + 0, + 0, + 0, + 133, + 134, + 5, + 60, + 0, + 0, + 134, + 36, + 1, + 0, + 0, + 0, + 135, + 136, + 5, + 62, + 0, + 0, + 136, + 137, + 5, + 61, + 0, + 0, + 137, + 38, + 1, + 0, + 0, + 0, + 138, + 139, + 5, + 60, + 0, + 0, + 139, + 140, + 5, + 61, + 0, + 0, + 140, + 40, + 1, + 0, + 0, + 0, + 141, + 142, + 5, + 61, + 0, + 0, + 142, + 143, + 5, + 61, + 0, + 0, + 143, + 42, + 1, + 0, + 0, + 0, + 144, + 145, + 5, + 33, + 0, + 0, + 145, + 146, + 5, + 61, + 0, + 0, + 146, + 44, + 1, + 0, + 0, + 0, + 147, + 148, + 5, + 43, + 0, + 0, + 148, + 46, + 1, + 0, + 0, + 0, + 149, + 150, + 5, + 45, + 0, + 0, + 150, + 48, + 1, + 0, + 0, + 0, + 151, + 152, + 5, + 42, + 0, + 0, + 152, + 50, + 1, + 0, + 0, + 0, + 153, + 154, + 5, + 47, + 0, + 0, + 154, + 52, + 1, + 0, + 0, + 0, + 155, + 156, + 5, + 94, + 0, + 0, + 156, + 54, + 1, + 0, + 0, + 0, + 157, + 158, + 5, + 33, + 0, + 0, + 158, + 56, + 1, + 0, + 0, + 0, + 159, + 160, + 5, + 44, + 0, + 0, + 160, + 58, + 1, + 0, + 0, + 0, + 8, + 0, + 63, + 69, + 79, + 85, + 90, + 95, + 118, + 1, + 6, + 0, + 0, + ] + + +class PetabMathExprLexer(Lexer): + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [DFA(ds, i) for i, ds in enumerate(atn.decisionToState)] + + NUMBER = 1 + INTEGER = 2 + EXPONENT_FLOAT = 3 + POINT_FLOAT = 4 + FLOAT_NUMBER = 5 + WS = 6 + TRUE = 7 + FALSE = 8 + INF = 9 + NAME = 10 + OPEN_PAREN = 11 + CLOSE_PAREN = 12 + BOOLEAN_OR = 13 + BOOLEAN_AND = 14 + GT = 15 + LT = 16 + GTE = 17 + LTE = 18 + EQ = 19 + NEQ = 20 + PLUS = 21 + MINUS = 22 + ASTERISK = 23 + SLASH = 24 + CARET = 25 + EXCLAMATION_MARK = 26 + COMMA = 27 + + channelNames = ["DEFAULT_TOKEN_CHANNEL", "HIDDEN"] + + modeNames = ["DEFAULT_MODE"] + + literalNames = [ + "", + "'true'", + "'false'", + "'inf'", + "'('", + "')'", + "'||'", + "'&&'", + "'>'", + "'<'", + "'>='", + "'<='", + "'=='", + "'!='", + "'+'", + "'-'", + "'*'", + "'/'", + "'^'", + "'!'", + "','", + ] + + symbolicNames = [ + "", + "NUMBER", + "INTEGER", + "EXPONENT_FLOAT", + "POINT_FLOAT", + "FLOAT_NUMBER", + "WS", + "TRUE", + "FALSE", + "INF", + "NAME", + "OPEN_PAREN", + "CLOSE_PAREN", + "BOOLEAN_OR", + "BOOLEAN_AND", + "GT", + "LT", + "GTE", + "LTE", + "EQ", + "NEQ", + "PLUS", + "MINUS", + "ASTERISK", + "SLASH", + "CARET", + "EXCLAMATION_MARK", + "COMMA", + ] + + ruleNames = [ + "NUMBER", + "INTEGER", + "EXPONENT_FLOAT", + "POINT_FLOAT", + "EXPONENT", + "FLOAT_NUMBER", + "DIGITS", + "WS", + "TRUE", + "FALSE", + "INF", + "NAME", + "OPEN_PAREN", + "CLOSE_PAREN", + "BOOLEAN_OR", + "BOOLEAN_AND", + "GT", + "LT", + "GTE", + "LTE", + "EQ", + "NEQ", + "PLUS", + "MINUS", + "ASTERISK", + "SLASH", + "CARET", + "EXCLAMATION_MARK", + "COMMA", + ] + + grammarFileName = "PetabMathExprLexer.g4" + + def __init__(self, input=None, output: TextIO = sys.stdout): + super().__init__(input, output) + self.checkVersion("4.13.1") + self._interp = LexerATNSimulator( + self, self.atn, self.decisionsToDFA, PredictionContextCache() + ) + self._actions = None + self._predicates = None diff --git a/petab/v1/math/_generated/PetabMathExprLexer.tokens b/petab/v1/math/_generated/PetabMathExprLexer.tokens new file mode 100644 index 00000000..bfa04b53 --- /dev/null +++ b/petab/v1/math/_generated/PetabMathExprLexer.tokens @@ -0,0 +1,47 @@ +NUMBER=1 +INTEGER=2 +EXPONENT_FLOAT=3 +POINT_FLOAT=4 +FLOAT_NUMBER=5 +WS=6 +TRUE=7 +FALSE=8 +INF=9 +NAME=10 +OPEN_PAREN=11 +CLOSE_PAREN=12 +BOOLEAN_OR=13 +BOOLEAN_AND=14 +GT=15 +LT=16 +GTE=17 +LTE=18 +EQ=19 +NEQ=20 +PLUS=21 +MINUS=22 +ASTERISK=23 +SLASH=24 +CARET=25 +EXCLAMATION_MARK=26 +COMMA=27 +'true'=7 +'false'=8 +'inf'=9 +'('=11 +')'=12 +'||'=13 +'&&'=14 +'>'=15 +'<'=16 +'>='=17 +'<='=18 +'=='=19 +'!='=20 +'+'=21 +'-'=22 +'*'=23 +'/'=24 +'^'=25 +'!'=26 +','=27 diff --git a/petab/v1/math/_generated/PetabMathExprParser.interp b/petab/v1/math/_generated/PetabMathExprParser.interp new file mode 100644 index 00000000..0d3f8f5b --- /dev/null +++ b/petab/v1/math/_generated/PetabMathExprParser.interp @@ -0,0 +1,73 @@ +token literal names: +null +null +null +null +null +null +null +'true' +'false' +'inf' +null +'(' +')' +'||' +'&&' +'>' +'<' +'>=' +'<=' +'==' +'!=' +'+' +'-' +'*' +'/' +'^' +'!' +',' + +token symbolic names: +null +NUMBER +INTEGER +EXPONENT_FLOAT +POINT_FLOAT +FLOAT_NUMBER +WS +TRUE +FALSE +INF +NAME +OPEN_PAREN +CLOSE_PAREN +BOOLEAN_OR +BOOLEAN_AND +GT +LT +GTE +LTE +EQ +NEQ +PLUS +MINUS +ASTERISK +SLASH +CARET +EXCLAMATION_MARK +COMMA + +rule names: +petabExpression +expr +comp_op +argumentList +functionCall +booleanLiteral +number +var + + +atn: +[4, 1, 27, 77, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 2, 3, 7, 3, 2, 4, 7, 4, 2, 5, 7, 5, 2, 6, 7, 6, 2, 7, 7, 7, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 33, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 51, 8, 1, 10, 1, 12, 1, 54, 9, 1, 1, 2, 1, 2, 1, 3, 1, 3, 1, 3, 5, 3, 61, 8, 3, 10, 3, 12, 3, 64, 9, 3, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 5, 1, 5, 1, 6, 1, 6, 1, 7, 1, 7, 1, 7, 0, 1, 2, 8, 0, 2, 4, 6, 8, 10, 12, 14, 0, 5, 1, 0, 21, 22, 1, 0, 23, 24, 1, 0, 13, 14, 1, 0, 15, 20, 1, 0, 7, 8, 80, 0, 16, 1, 0, 0, 0, 2, 32, 1, 0, 0, 0, 4, 55, 1, 0, 0, 0, 6, 57, 1, 0, 0, 0, 8, 65, 1, 0, 0, 0, 10, 70, 1, 0, 0, 0, 12, 72, 1, 0, 0, 0, 14, 74, 1, 0, 0, 0, 16, 17, 3, 2, 1, 0, 17, 18, 5, 0, 0, 1, 18, 1, 1, 0, 0, 0, 19, 20, 6, 1, -1, 0, 20, 21, 7, 0, 0, 0, 21, 33, 3, 2, 1, 11, 22, 23, 5, 26, 0, 0, 23, 33, 3, 2, 1, 10, 24, 25, 5, 11, 0, 0, 25, 26, 3, 2, 1, 0, 26, 27, 5, 12, 0, 0, 27, 33, 1, 0, 0, 0, 28, 33, 3, 12, 6, 0, 29, 33, 3, 10, 5, 0, 30, 33, 3, 8, 4, 0, 31, 33, 3, 14, 7, 0, 32, 19, 1, 0, 0, 0, 32, 22, 1, 0, 0, 0, 32, 24, 1, 0, 0, 0, 32, 28, 1, 0, 0, 0, 32, 29, 1, 0, 0, 0, 32, 30, 1, 0, 0, 0, 32, 31, 1, 0, 0, 0, 33, 52, 1, 0, 0, 0, 34, 35, 10, 12, 0, 0, 35, 36, 5, 25, 0, 0, 36, 51, 3, 2, 1, 12, 37, 38, 10, 9, 0, 0, 38, 39, 7, 1, 0, 0, 39, 51, 3, 2, 1, 10, 40, 41, 10, 8, 0, 0, 41, 42, 7, 0, 0, 0, 42, 51, 3, 2, 1, 9, 43, 44, 10, 6, 0, 0, 44, 45, 3, 4, 2, 0, 45, 46, 3, 2, 1, 7, 46, 51, 1, 0, 0, 0, 47, 48, 10, 5, 0, 0, 48, 49, 7, 2, 0, 0, 49, 51, 3, 2, 1, 6, 50, 34, 1, 0, 0, 0, 50, 37, 1, 0, 0, 0, 50, 40, 1, 0, 0, 0, 50, 43, 1, 0, 0, 0, 50, 47, 1, 0, 0, 0, 51, 54, 1, 0, 0, 0, 52, 50, 1, 0, 0, 0, 52, 53, 1, 0, 0, 0, 53, 3, 1, 0, 0, 0, 54, 52, 1, 0, 0, 0, 55, 56, 7, 3, 0, 0, 56, 5, 1, 0, 0, 0, 57, 62, 3, 2, 1, 0, 58, 59, 5, 27, 0, 0, 59, 61, 3, 2, 1, 0, 60, 58, 1, 0, 0, 0, 61, 64, 1, 0, 0, 0, 62, 60, 1, 0, 0, 0, 62, 63, 1, 0, 0, 0, 63, 7, 1, 0, 0, 0, 64, 62, 1, 0, 0, 0, 65, 66, 5, 10, 0, 0, 66, 67, 5, 11, 0, 0, 67, 68, 3, 6, 3, 0, 68, 69, 5, 12, 0, 0, 69, 9, 1, 0, 0, 0, 70, 71, 7, 4, 0, 0, 71, 11, 1, 0, 0, 0, 72, 73, 5, 1, 0, 0, 73, 13, 1, 0, 0, 0, 74, 75, 5, 10, 0, 0, 75, 15, 1, 0, 0, 0, 4, 32, 50, 52, 62] diff --git a/petab/v1/math/_generated/PetabMathExprParser.py b/petab/v1/math/_generated/PetabMathExprParser.py new file mode 100644 index 00000000..6341a56b --- /dev/null +++ b/petab/v1/math/_generated/PetabMathExprParser.py @@ -0,0 +1,1764 @@ +# Generated from PetabMathExprParser.g4 by ANTLR 4.13.1 +import sys + +from antlr4 import * + +if sys.version_info[1] > 5: + from typing import TextIO +else: + from typing.io import TextIO + + +def serializedATN(): + return [ + 4, + 1, + 27, + 77, + 2, + 0, + 7, + 0, + 2, + 1, + 7, + 1, + 2, + 2, + 7, + 2, + 2, + 3, + 7, + 3, + 2, + 4, + 7, + 4, + 2, + 5, + 7, + 5, + 2, + 6, + 7, + 6, + 2, + 7, + 7, + 7, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 33, + 8, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 5, + 1, + 51, + 8, + 1, + 10, + 1, + 12, + 1, + 54, + 9, + 1, + 1, + 2, + 1, + 2, + 1, + 3, + 1, + 3, + 1, + 3, + 5, + 3, + 61, + 8, + 3, + 10, + 3, + 12, + 3, + 64, + 9, + 3, + 1, + 4, + 1, + 4, + 1, + 4, + 1, + 4, + 1, + 4, + 1, + 5, + 1, + 5, + 1, + 6, + 1, + 6, + 1, + 7, + 1, + 7, + 1, + 7, + 0, + 1, + 2, + 8, + 0, + 2, + 4, + 6, + 8, + 10, + 12, + 14, + 0, + 5, + 1, + 0, + 21, + 22, + 1, + 0, + 23, + 24, + 1, + 0, + 13, + 14, + 1, + 0, + 15, + 20, + 1, + 0, + 7, + 8, + 80, + 0, + 16, + 1, + 0, + 0, + 0, + 2, + 32, + 1, + 0, + 0, + 0, + 4, + 55, + 1, + 0, + 0, + 0, + 6, + 57, + 1, + 0, + 0, + 0, + 8, + 65, + 1, + 0, + 0, + 0, + 10, + 70, + 1, + 0, + 0, + 0, + 12, + 72, + 1, + 0, + 0, + 0, + 14, + 74, + 1, + 0, + 0, + 0, + 16, + 17, + 3, + 2, + 1, + 0, + 17, + 18, + 5, + 0, + 0, + 1, + 18, + 1, + 1, + 0, + 0, + 0, + 19, + 20, + 6, + 1, + -1, + 0, + 20, + 21, + 7, + 0, + 0, + 0, + 21, + 33, + 3, + 2, + 1, + 11, + 22, + 23, + 5, + 26, + 0, + 0, + 23, + 33, + 3, + 2, + 1, + 10, + 24, + 25, + 5, + 11, + 0, + 0, + 25, + 26, + 3, + 2, + 1, + 0, + 26, + 27, + 5, + 12, + 0, + 0, + 27, + 33, + 1, + 0, + 0, + 0, + 28, + 33, + 3, + 12, + 6, + 0, + 29, + 33, + 3, + 10, + 5, + 0, + 30, + 33, + 3, + 8, + 4, + 0, + 31, + 33, + 3, + 14, + 7, + 0, + 32, + 19, + 1, + 0, + 0, + 0, + 32, + 22, + 1, + 0, + 0, + 0, + 32, + 24, + 1, + 0, + 0, + 0, + 32, + 28, + 1, + 0, + 0, + 0, + 32, + 29, + 1, + 0, + 0, + 0, + 32, + 30, + 1, + 0, + 0, + 0, + 32, + 31, + 1, + 0, + 0, + 0, + 33, + 52, + 1, + 0, + 0, + 0, + 34, + 35, + 10, + 12, + 0, + 0, + 35, + 36, + 5, + 25, + 0, + 0, + 36, + 51, + 3, + 2, + 1, + 12, + 37, + 38, + 10, + 9, + 0, + 0, + 38, + 39, + 7, + 1, + 0, + 0, + 39, + 51, + 3, + 2, + 1, + 10, + 40, + 41, + 10, + 8, + 0, + 0, + 41, + 42, + 7, + 0, + 0, + 0, + 42, + 51, + 3, + 2, + 1, + 9, + 43, + 44, + 10, + 6, + 0, + 0, + 44, + 45, + 3, + 4, + 2, + 0, + 45, + 46, + 3, + 2, + 1, + 7, + 46, + 51, + 1, + 0, + 0, + 0, + 47, + 48, + 10, + 5, + 0, + 0, + 48, + 49, + 7, + 2, + 0, + 0, + 49, + 51, + 3, + 2, + 1, + 6, + 50, + 34, + 1, + 0, + 0, + 0, + 50, + 37, + 1, + 0, + 0, + 0, + 50, + 40, + 1, + 0, + 0, + 0, + 50, + 43, + 1, + 0, + 0, + 0, + 50, + 47, + 1, + 0, + 0, + 0, + 51, + 54, + 1, + 0, + 0, + 0, + 52, + 50, + 1, + 0, + 0, + 0, + 52, + 53, + 1, + 0, + 0, + 0, + 53, + 3, + 1, + 0, + 0, + 0, + 54, + 52, + 1, + 0, + 0, + 0, + 55, + 56, + 7, + 3, + 0, + 0, + 56, + 5, + 1, + 0, + 0, + 0, + 57, + 62, + 3, + 2, + 1, + 0, + 58, + 59, + 5, + 27, + 0, + 0, + 59, + 61, + 3, + 2, + 1, + 0, + 60, + 58, + 1, + 0, + 0, + 0, + 61, + 64, + 1, + 0, + 0, + 0, + 62, + 60, + 1, + 0, + 0, + 0, + 62, + 63, + 1, + 0, + 0, + 0, + 63, + 7, + 1, + 0, + 0, + 0, + 64, + 62, + 1, + 0, + 0, + 0, + 65, + 66, + 5, + 10, + 0, + 0, + 66, + 67, + 5, + 11, + 0, + 0, + 67, + 68, + 3, + 6, + 3, + 0, + 68, + 69, + 5, + 12, + 0, + 0, + 69, + 9, + 1, + 0, + 0, + 0, + 70, + 71, + 7, + 4, + 0, + 0, + 71, + 11, + 1, + 0, + 0, + 0, + 72, + 73, + 5, + 1, + 0, + 0, + 73, + 13, + 1, + 0, + 0, + 0, + 74, + 75, + 5, + 10, + 0, + 0, + 75, + 15, + 1, + 0, + 0, + 0, + 4, + 32, + 50, + 52, + 62, + ] + + +class PetabMathExprParser(Parser): + grammarFileName = "PetabMathExprParser.g4" + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [DFA(ds, i) for i, ds in enumerate(atn.decisionToState)] + + sharedContextCache = PredictionContextCache() + + literalNames = [ + "", + "", + "", + "", + "", + "", + "", + "'true'", + "'false'", + "'inf'", + "", + "'('", + "')'", + "'||'", + "'&&'", + "'>'", + "'<'", + "'>='", + "'<='", + "'=='", + "'!='", + "'+'", + "'-'", + "'*'", + "'/'", + "'^'", + "'!'", + "','", + ] + + symbolicNames = [ + "", + "NUMBER", + "INTEGER", + "EXPONENT_FLOAT", + "POINT_FLOAT", + "FLOAT_NUMBER", + "WS", + "TRUE", + "FALSE", + "INF", + "NAME", + "OPEN_PAREN", + "CLOSE_PAREN", + "BOOLEAN_OR", + "BOOLEAN_AND", + "GT", + "LT", + "GTE", + "LTE", + "EQ", + "NEQ", + "PLUS", + "MINUS", + "ASTERISK", + "SLASH", + "CARET", + "EXCLAMATION_MARK", + "COMMA", + ] + + RULE_petabExpression = 0 + RULE_expr = 1 + RULE_comp_op = 2 + RULE_argumentList = 3 + RULE_functionCall = 4 + RULE_booleanLiteral = 5 + RULE_number = 6 + RULE_var = 7 + + ruleNames = [ + "petabExpression", + "expr", + "comp_op", + "argumentList", + "functionCall", + "booleanLiteral", + "number", + "var", + ] + + EOF = Token.EOF + NUMBER = 1 + INTEGER = 2 + EXPONENT_FLOAT = 3 + POINT_FLOAT = 4 + FLOAT_NUMBER = 5 + WS = 6 + TRUE = 7 + FALSE = 8 + INF = 9 + NAME = 10 + OPEN_PAREN = 11 + CLOSE_PAREN = 12 + BOOLEAN_OR = 13 + BOOLEAN_AND = 14 + GT = 15 + LT = 16 + GTE = 17 + LTE = 18 + EQ = 19 + NEQ = 20 + PLUS = 21 + MINUS = 22 + ASTERISK = 23 + SLASH = 24 + CARET = 25 + EXCLAMATION_MARK = 26 + COMMA = 27 + + def __init__(self, input: TokenStream, output: TextIO = sys.stdout): + super().__init__(input, output) + self.checkVersion("4.13.1") + self._interp = ParserATNSimulator( + self, self.atn, self.decisionsToDFA, self.sharedContextCache + ) + self._predicates = None + + class PetabExpressionContext(ParserRuleContext): + __slots__ = "parser" + + def __init__( + self, + parser, + parent: ParserRuleContext = None, + invokingState: int = -1, + ): + super().__init__(parent, invokingState) + self.parser = parser + + def expr(self): + return self.getTypedRuleContext(PetabMathExprParser.ExprContext, 0) + + def EOF(self): + return self.getToken(PetabMathExprParser.EOF, 0) + + def getRuleIndex(self): + return PetabMathExprParser.RULE_petabExpression + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitPetabExpression"): + return visitor.visitPetabExpression(self) + else: + return visitor.visitChildren(self) + + def petabExpression(self): + localctx = PetabMathExprParser.PetabExpressionContext( + self, self._ctx, self.state + ) + self.enterRule(localctx, 0, self.RULE_petabExpression) + try: + self.enterOuterAlt(localctx, 1) + self.state = 16 + self.expr(0) + self.state = 17 + self.match(PetabMathExprParser.EOF) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class ExprContext(ParserRuleContext): + __slots__ = "parser" + + def __init__( + self, + parser, + parent: ParserRuleContext = None, + invokingState: int = -1, + ): + super().__init__(parent, invokingState) + self.parser = parser + + def getRuleIndex(self): + return PetabMathExprParser.RULE_expr + + def copyFrom(self, ctx: ParserRuleContext): + super().copyFrom(ctx) + + class PowerExprContext(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self, i: int = None): + if i is None: + return self.getTypedRuleContexts( + PetabMathExprParser.ExprContext + ) + else: + return self.getTypedRuleContext( + PetabMathExprParser.ExprContext, i + ) + + def CARET(self): + return self.getToken(PetabMathExprParser.CARET, 0) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitPowerExpr"): + return visitor.visitPowerExpr(self) + else: + return visitor.visitChildren(self) + + class BooleanAndOrExprContext(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self, i: int = None): + if i is None: + return self.getTypedRuleContexts( + PetabMathExprParser.ExprContext + ) + else: + return self.getTypedRuleContext( + PetabMathExprParser.ExprContext, i + ) + + def BOOLEAN_AND(self): + return self.getToken(PetabMathExprParser.BOOLEAN_AND, 0) + + def BOOLEAN_OR(self): + return self.getToken(PetabMathExprParser.BOOLEAN_OR, 0) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitBooleanAndOrExpr"): + return visitor.visitBooleanAndOrExpr(self) + else: + return visitor.visitChildren(self) + + class ComparisonExprContext(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self, i: int = None): + if i is None: + return self.getTypedRuleContexts( + PetabMathExprParser.ExprContext + ) + else: + return self.getTypedRuleContext( + PetabMathExprParser.ExprContext, i + ) + + def comp_op(self): + return self.getTypedRuleContext( + PetabMathExprParser.Comp_opContext, 0 + ) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitComparisonExpr"): + return visitor.visitComparisonExpr(self) + else: + return visitor.visitChildren(self) + + class MultExprContext(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self, i: int = None): + if i is None: + return self.getTypedRuleContexts( + PetabMathExprParser.ExprContext + ) + else: + return self.getTypedRuleContext( + PetabMathExprParser.ExprContext, i + ) + + def ASTERISK(self): + return self.getToken(PetabMathExprParser.ASTERISK, 0) + + def SLASH(self): + return self.getToken(PetabMathExprParser.SLASH, 0) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitMultExpr"): + return visitor.visitMultExpr(self) + else: + return visitor.visitChildren(self) + + class BooleanLiteral_Context(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def booleanLiteral(self): + return self.getTypedRuleContext( + PetabMathExprParser.BooleanLiteralContext, 0 + ) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitBooleanLiteral_"): + return visitor.visitBooleanLiteral_(self) + else: + return visitor.visitChildren(self) + + class AddExprContext(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self, i: int = None): + if i is None: + return self.getTypedRuleContexts( + PetabMathExprParser.ExprContext + ) + else: + return self.getTypedRuleContext( + PetabMathExprParser.ExprContext, i + ) + + def PLUS(self): + return self.getToken(PetabMathExprParser.PLUS, 0) + + def MINUS(self): + return self.getToken(PetabMathExprParser.MINUS, 0) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitAddExpr"): + return visitor.visitAddExpr(self) + else: + return visitor.visitChildren(self) + + class BooleanNotExprContext(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def EXCLAMATION_MARK(self): + return self.getToken(PetabMathExprParser.EXCLAMATION_MARK, 0) + + def expr(self): + return self.getTypedRuleContext(PetabMathExprParser.ExprContext, 0) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitBooleanNotExpr"): + return visitor.visitBooleanNotExpr(self) + else: + return visitor.visitChildren(self) + + class ParenExprContext(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def OPEN_PAREN(self): + return self.getToken(PetabMathExprParser.OPEN_PAREN, 0) + + def expr(self): + return self.getTypedRuleContext(PetabMathExprParser.ExprContext, 0) + + def CLOSE_PAREN(self): + return self.getToken(PetabMathExprParser.CLOSE_PAREN, 0) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitParenExpr"): + return visitor.visitParenExpr(self) + else: + return visitor.visitChildren(self) + + class FunctionCall_Context(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def functionCall(self): + return self.getTypedRuleContext( + PetabMathExprParser.FunctionCallContext, 0 + ) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitFunctionCall_"): + return visitor.visitFunctionCall_(self) + else: + return visitor.visitChildren(self) + + class UnaryExprContext(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def expr(self): + return self.getTypedRuleContext(PetabMathExprParser.ExprContext, 0) + + def PLUS(self): + return self.getToken(PetabMathExprParser.PLUS, 0) + + def MINUS(self): + return self.getToken(PetabMathExprParser.MINUS, 0) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitUnaryExpr"): + return visitor.visitUnaryExpr(self) + else: + return visitor.visitChildren(self) + + class Number_Context(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def number(self): + return self.getTypedRuleContext( + PetabMathExprParser.NumberContext, 0 + ) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitNumber_"): + return visitor.visitNumber_(self) + else: + return visitor.visitChildren(self) + + class VarExpr_Context(ExprContext): + def __init__( + self, parser, ctx: ParserRuleContext + ): # actually a PetabMathExprParser.ExprContext + super().__init__(parser) + self.copyFrom(ctx) + + def var(self): + return self.getTypedRuleContext(PetabMathExprParser.VarContext, 0) + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitVarExpr_"): + return visitor.visitVarExpr_(self) + else: + return visitor.visitChildren(self) + + def expr(self, _p: int = 0): + _parentctx = self._ctx + _parentState = self.state + localctx = PetabMathExprParser.ExprContext( + self, self._ctx, _parentState + ) + _prevctx = localctx + _startState = 2 + self.enterRecursionRule(localctx, 2, self.RULE_expr, _p) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 32 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict(self._input, 0, self._ctx) + if la_ == 1: + localctx = PetabMathExprParser.UnaryExprContext(self, localctx) + self._ctx = localctx + _prevctx = localctx + + self.state = 20 + _la = self._input.LA(1) + if not (_la == 21 or _la == 22): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + self.state = 21 + self.expr(11) + pass + + elif la_ == 2: + localctx = PetabMathExprParser.BooleanNotExprContext( + self, localctx + ) + self._ctx = localctx + _prevctx = localctx + self.state = 22 + self.match(PetabMathExprParser.EXCLAMATION_MARK) + self.state = 23 + self.expr(10) + pass + + elif la_ == 3: + localctx = PetabMathExprParser.ParenExprContext(self, localctx) + self._ctx = localctx + _prevctx = localctx + self.state = 24 + self.match(PetabMathExprParser.OPEN_PAREN) + self.state = 25 + self.expr(0) + self.state = 26 + self.match(PetabMathExprParser.CLOSE_PAREN) + pass + + elif la_ == 4: + localctx = PetabMathExprParser.Number_Context(self, localctx) + self._ctx = localctx + _prevctx = localctx + self.state = 28 + self.number() + pass + + elif la_ == 5: + localctx = PetabMathExprParser.BooleanLiteral_Context( + self, localctx + ) + self._ctx = localctx + _prevctx = localctx + self.state = 29 + self.booleanLiteral() + pass + + elif la_ == 6: + localctx = PetabMathExprParser.FunctionCall_Context( + self, localctx + ) + self._ctx = localctx + _prevctx = localctx + self.state = 30 + self.functionCall() + pass + + elif la_ == 7: + localctx = PetabMathExprParser.VarExpr_Context(self, localctx) + self._ctx = localctx + _prevctx = localctx + self.state = 31 + self.var() + pass + + self._ctx.stop = self._input.LT(-1) + self.state = 52 + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input, 2, self._ctx) + while _alt != 2 and _alt != ATN.INVALID_ALT_NUMBER: + if _alt == 1: + if self._parseListeners is not None: + self.triggerExitRuleEvent() + _prevctx = localctx + self.state = 50 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict( + self._input, 1, self._ctx + ) + if la_ == 1: + localctx = PetabMathExprParser.PowerExprContext( + self, + PetabMathExprParser.ExprContext( + self, _parentctx, _parentState + ), + ) + self.pushNewRecursionContext( + localctx, _startState, self.RULE_expr + ) + self.state = 34 + if not self.precpred(self._ctx, 12): + from antlr4.error.Errors import ( + FailedPredicateException, + ) + + raise FailedPredicateException( + self, "self.precpred(self._ctx, 12)" + ) + self.state = 35 + self.match(PetabMathExprParser.CARET) + self.state = 36 + self.expr(12) + pass + + elif la_ == 2: + localctx = PetabMathExprParser.MultExprContext( + self, + PetabMathExprParser.ExprContext( + self, _parentctx, _parentState + ), + ) + self.pushNewRecursionContext( + localctx, _startState, self.RULE_expr + ) + self.state = 37 + if not self.precpred(self._ctx, 9): + from antlr4.error.Errors import ( + FailedPredicateException, + ) + + raise FailedPredicateException( + self, "self.precpred(self._ctx, 9)" + ) + self.state = 38 + _la = self._input.LA(1) + if not (_la == 23 or _la == 24): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + self.state = 39 + self.expr(10) + pass + + elif la_ == 3: + localctx = PetabMathExprParser.AddExprContext( + self, + PetabMathExprParser.ExprContext( + self, _parentctx, _parentState + ), + ) + self.pushNewRecursionContext( + localctx, _startState, self.RULE_expr + ) + self.state = 40 + if not self.precpred(self._ctx, 8): + from antlr4.error.Errors import ( + FailedPredicateException, + ) + + raise FailedPredicateException( + self, "self.precpred(self._ctx, 8)" + ) + self.state = 41 + _la = self._input.LA(1) + if not (_la == 21 or _la == 22): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + self.state = 42 + self.expr(9) + pass + + elif la_ == 4: + localctx = PetabMathExprParser.ComparisonExprContext( + self, + PetabMathExprParser.ExprContext( + self, _parentctx, _parentState + ), + ) + self.pushNewRecursionContext( + localctx, _startState, self.RULE_expr + ) + self.state = 43 + if not self.precpred(self._ctx, 6): + from antlr4.error.Errors import ( + FailedPredicateException, + ) + + raise FailedPredicateException( + self, "self.precpred(self._ctx, 6)" + ) + self.state = 44 + self.comp_op() + self.state = 45 + self.expr(7) + pass + + elif la_ == 5: + localctx = PetabMathExprParser.BooleanAndOrExprContext( + self, + PetabMathExprParser.ExprContext( + self, _parentctx, _parentState + ), + ) + self.pushNewRecursionContext( + localctx, _startState, self.RULE_expr + ) + self.state = 47 + if not self.precpred(self._ctx, 5): + from antlr4.error.Errors import ( + FailedPredicateException, + ) + + raise FailedPredicateException( + self, "self.precpred(self._ctx, 5)" + ) + self.state = 48 + _la = self._input.LA(1) + if not (_la == 13 or _la == 14): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + self.state = 49 + self.expr(6) + pass + + self.state = 54 + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input, 2, self._ctx) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.unrollRecursionContexts(_parentctx) + return localctx + + class Comp_opContext(ParserRuleContext): + __slots__ = "parser" + + def __init__( + self, + parser, + parent: ParserRuleContext = None, + invokingState: int = -1, + ): + super().__init__(parent, invokingState) + self.parser = parser + + def GT(self): + return self.getToken(PetabMathExprParser.GT, 0) + + def LT(self): + return self.getToken(PetabMathExprParser.LT, 0) + + def GTE(self): + return self.getToken(PetabMathExprParser.GTE, 0) + + def LTE(self): + return self.getToken(PetabMathExprParser.LTE, 0) + + def EQ(self): + return self.getToken(PetabMathExprParser.EQ, 0) + + def NEQ(self): + return self.getToken(PetabMathExprParser.NEQ, 0) + + def getRuleIndex(self): + return PetabMathExprParser.RULE_comp_op + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitComp_op"): + return visitor.visitComp_op(self) + else: + return visitor.visitChildren(self) + + def comp_op(self): + localctx = PetabMathExprParser.Comp_opContext( + self, self._ctx, self.state + ) + self.enterRule(localctx, 4, self.RULE_comp_op) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 55 + _la = self._input.LA(1) + if not (((_la) & ~0x3F) == 0 and ((1 << _la) & 2064384) != 0): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class ArgumentListContext(ParserRuleContext): + __slots__ = "parser" + + def __init__( + self, + parser, + parent: ParserRuleContext = None, + invokingState: int = -1, + ): + super().__init__(parent, invokingState) + self.parser = parser + + def expr(self, i: int = None): + if i is None: + return self.getTypedRuleContexts( + PetabMathExprParser.ExprContext + ) + else: + return self.getTypedRuleContext( + PetabMathExprParser.ExprContext, i + ) + + def COMMA(self, i: int = None): + if i is None: + return self.getTokens(PetabMathExprParser.COMMA) + else: + return self.getToken(PetabMathExprParser.COMMA, i) + + def getRuleIndex(self): + return PetabMathExprParser.RULE_argumentList + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitArgumentList"): + return visitor.visitArgumentList(self) + else: + return visitor.visitChildren(self) + + def argumentList(self): + localctx = PetabMathExprParser.ArgumentListContext( + self, self._ctx, self.state + ) + self.enterRule(localctx, 6, self.RULE_argumentList) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 57 + self.expr(0) + self.state = 62 + self._errHandler.sync(self) + _la = self._input.LA(1) + while _la == 27: + self.state = 58 + self.match(PetabMathExprParser.COMMA) + self.state = 59 + self.expr(0) + self.state = 64 + self._errHandler.sync(self) + _la = self._input.LA(1) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class FunctionCallContext(ParserRuleContext): + __slots__ = "parser" + + def __init__( + self, + parser, + parent: ParserRuleContext = None, + invokingState: int = -1, + ): + super().__init__(parent, invokingState) + self.parser = parser + + def NAME(self): + return self.getToken(PetabMathExprParser.NAME, 0) + + def OPEN_PAREN(self): + return self.getToken(PetabMathExprParser.OPEN_PAREN, 0) + + def argumentList(self): + return self.getTypedRuleContext( + PetabMathExprParser.ArgumentListContext, 0 + ) + + def CLOSE_PAREN(self): + return self.getToken(PetabMathExprParser.CLOSE_PAREN, 0) + + def getRuleIndex(self): + return PetabMathExprParser.RULE_functionCall + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitFunctionCall"): + return visitor.visitFunctionCall(self) + else: + return visitor.visitChildren(self) + + def functionCall(self): + localctx = PetabMathExprParser.FunctionCallContext( + self, self._ctx, self.state + ) + self.enterRule(localctx, 8, self.RULE_functionCall) + try: + self.enterOuterAlt(localctx, 1) + self.state = 65 + self.match(PetabMathExprParser.NAME) + self.state = 66 + self.match(PetabMathExprParser.OPEN_PAREN) + self.state = 67 + self.argumentList() + self.state = 68 + self.match(PetabMathExprParser.CLOSE_PAREN) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class BooleanLiteralContext(ParserRuleContext): + __slots__ = "parser" + + def __init__( + self, + parser, + parent: ParserRuleContext = None, + invokingState: int = -1, + ): + super().__init__(parent, invokingState) + self.parser = parser + + def TRUE(self): + return self.getToken(PetabMathExprParser.TRUE, 0) + + def FALSE(self): + return self.getToken(PetabMathExprParser.FALSE, 0) + + def getRuleIndex(self): + return PetabMathExprParser.RULE_booleanLiteral + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitBooleanLiteral"): + return visitor.visitBooleanLiteral(self) + else: + return visitor.visitChildren(self) + + def booleanLiteral(self): + localctx = PetabMathExprParser.BooleanLiteralContext( + self, self._ctx, self.state + ) + self.enterRule(localctx, 10, self.RULE_booleanLiteral) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 70 + _la = self._input.LA(1) + if not (_la == 7 or _la == 8): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class NumberContext(ParserRuleContext): + __slots__ = "parser" + + def __init__( + self, + parser, + parent: ParserRuleContext = None, + invokingState: int = -1, + ): + super().__init__(parent, invokingState) + self.parser = parser + + def NUMBER(self): + return self.getToken(PetabMathExprParser.NUMBER, 0) + + def getRuleIndex(self): + return PetabMathExprParser.RULE_number + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitNumber"): + return visitor.visitNumber(self) + else: + return visitor.visitChildren(self) + + def number(self): + localctx = PetabMathExprParser.NumberContext( + self, self._ctx, self.state + ) + self.enterRule(localctx, 12, self.RULE_number) + try: + self.enterOuterAlt(localctx, 1) + self.state = 72 + self.match(PetabMathExprParser.NUMBER) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + class VarContext(ParserRuleContext): + __slots__ = "parser" + + def __init__( + self, + parser, + parent: ParserRuleContext = None, + invokingState: int = -1, + ): + super().__init__(parent, invokingState) + self.parser = parser + + def NAME(self): + return self.getToken(PetabMathExprParser.NAME, 0) + + def getRuleIndex(self): + return PetabMathExprParser.RULE_var + + def accept(self, visitor: ParseTreeVisitor): + if hasattr(visitor, "visitVar"): + return visitor.visitVar(self) + else: + return visitor.visitChildren(self) + + def var(self): + localctx = PetabMathExprParser.VarContext(self, self._ctx, self.state) + self.enterRule(localctx, 14, self.RULE_var) + try: + self.enterOuterAlt(localctx, 1) + self.state = 74 + self.match(PetabMathExprParser.NAME) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + def sempred(self, localctx: RuleContext, ruleIndex: int, predIndex: int): + if self._predicates == None: + self._predicates = dict() + self._predicates[1] = self.expr_sempred + pred = self._predicates.get(ruleIndex, None) + if pred is None: + raise Exception("No predicate with index:" + str(ruleIndex)) + else: + return pred(localctx, predIndex) + + def expr_sempred(self, localctx: ExprContext, predIndex: int): + if predIndex == 0: + return self.precpred(self._ctx, 12) + + if predIndex == 1: + return self.precpred(self._ctx, 9) + + if predIndex == 2: + return self.precpred(self._ctx, 8) + + if predIndex == 3: + return self.precpred(self._ctx, 6) + + if predIndex == 4: + return self.precpred(self._ctx, 5) diff --git a/petab/v1/math/_generated/PetabMathExprParser.tokens b/petab/v1/math/_generated/PetabMathExprParser.tokens new file mode 100644 index 00000000..bfa04b53 --- /dev/null +++ b/petab/v1/math/_generated/PetabMathExprParser.tokens @@ -0,0 +1,47 @@ +NUMBER=1 +INTEGER=2 +EXPONENT_FLOAT=3 +POINT_FLOAT=4 +FLOAT_NUMBER=5 +WS=6 +TRUE=7 +FALSE=8 +INF=9 +NAME=10 +OPEN_PAREN=11 +CLOSE_PAREN=12 +BOOLEAN_OR=13 +BOOLEAN_AND=14 +GT=15 +LT=16 +GTE=17 +LTE=18 +EQ=19 +NEQ=20 +PLUS=21 +MINUS=22 +ASTERISK=23 +SLASH=24 +CARET=25 +EXCLAMATION_MARK=26 +COMMA=27 +'true'=7 +'false'=8 +'inf'=9 +'('=11 +')'=12 +'||'=13 +'&&'=14 +'>'=15 +'<'=16 +'>='=17 +'<='=18 +'=='=19 +'!='=20 +'+'=21 +'-'=22 +'*'=23 +'/'=24 +'^'=25 +'!'=26 +','=27 diff --git a/petab/v1/math/_generated/PetabMathExprParserVisitor.py b/petab/v1/math/_generated/PetabMathExprParserVisitor.py new file mode 100644 index 00000000..0d3e2de2 --- /dev/null +++ b/petab/v1/math/_generated/PetabMathExprParserVisitor.py @@ -0,0 +1,104 @@ +# Generated from PetabMathExprParser.g4 by ANTLR 4.13.1 +from antlr4 import * + +if "." in __name__: + from .PetabMathExprParser import PetabMathExprParser +else: + from PetabMathExprParser import PetabMathExprParser + +# This class defines a complete generic visitor for a parse tree produced by PetabMathExprParser. + + +class PetabMathExprParserVisitor(ParseTreeVisitor): + # Visit a parse tree produced by PetabMathExprParser#petabExpression. + def visitPetabExpression( + self, ctx: PetabMathExprParser.PetabExpressionContext + ): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#PowerExpr. + def visitPowerExpr(self, ctx: PetabMathExprParser.PowerExprContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#BooleanAndOrExpr. + def visitBooleanAndOrExpr( + self, ctx: PetabMathExprParser.BooleanAndOrExprContext + ): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#ComparisonExpr. + def visitComparisonExpr( + self, ctx: PetabMathExprParser.ComparisonExprContext + ): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#MultExpr. + def visitMultExpr(self, ctx: PetabMathExprParser.MultExprContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#BooleanLiteral_. + def visitBooleanLiteral_( + self, ctx: PetabMathExprParser.BooleanLiteral_Context + ): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#AddExpr. + def visitAddExpr(self, ctx: PetabMathExprParser.AddExprContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#BooleanNotExpr. + def visitBooleanNotExpr( + self, ctx: PetabMathExprParser.BooleanNotExprContext + ): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#ParenExpr. + def visitParenExpr(self, ctx: PetabMathExprParser.ParenExprContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#functionCall_. + def visitFunctionCall_( + self, ctx: PetabMathExprParser.FunctionCall_Context + ): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#UnaryExpr. + def visitUnaryExpr(self, ctx: PetabMathExprParser.UnaryExprContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#Number_. + def visitNumber_(self, ctx: PetabMathExprParser.Number_Context): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#VarExpr_. + def visitVarExpr_(self, ctx: PetabMathExprParser.VarExpr_Context): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#comp_op. + def visitComp_op(self, ctx: PetabMathExprParser.Comp_opContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#argumentList. + def visitArgumentList(self, ctx: PetabMathExprParser.ArgumentListContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#functionCall. + def visitFunctionCall(self, ctx: PetabMathExprParser.FunctionCallContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#booleanLiteral. + def visitBooleanLiteral( + self, ctx: PetabMathExprParser.BooleanLiteralContext + ): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#number. + def visitNumber(self, ctx: PetabMathExprParser.NumberContext): + return self.visitChildren(ctx) + + # Visit a parse tree produced by PetabMathExprParser#var. + def visitVar(self, ctx: PetabMathExprParser.VarContext): + return self.visitChildren(ctx) + + +del PetabMathExprParser diff --git a/petab/v1/math/_generated/__init__.py b/petab/v1/math/_generated/__init__.py new file mode 100644 index 00000000..def90ea8 --- /dev/null +++ b/petab/v1/math/_generated/__init__.py @@ -0,0 +1 @@ +# auto-generated diff --git a/petab/v1/math/regenerate.sh b/petab/v1/math/regenerate.sh new file mode 100755 index 00000000..9b531e93 --- /dev/null +++ b/petab/v1/math/regenerate.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env sh +# This script regenerates the ANTLR parser and lexer for PEtab math expressions +set -eou > /dev/null + +# ANTLR version +# IMPORTANT: when updating this, also update the version for +# `antlr4-python3-runtime` in `pyproject.toml` +antlr_version="4.13.1" + +pip show antlr4-tools > /dev/null || pip3 install antlr4-tools + +cd "$(dirname "$0")" + +antlr4 -v $antlr_version \ + -Dlanguage=Python3 \ + -visitor \ + -no-listener \ + -o _generated \ + PetabMathExprParser.g4 \ + PetabMathExprLexer.g4 + +echo "# auto-generated" > _generated/__init__.py diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py new file mode 100644 index 00000000..cc81a000 --- /dev/null +++ b/petab/v1/math/sympify.py @@ -0,0 +1,66 @@ +"""PEtab math to sympy conversion.""" + +import numpy as np +import sympy as sp +from antlr4 import CommonTokenStream, InputStream +from antlr4.error.ErrorListener import ErrorListener + +from ._generated.PetabMathExprLexer import PetabMathExprLexer +from ._generated.PetabMathExprParser import PetabMathExprParser +from .SympyVisitor import MathVisitorSympy, bool2num + +__all__ = ["sympify_petab"] + + +def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: + """Convert PEtab math expression to sympy expression. + + Args: + expr: PEtab math expression. + + Raises: + ValueError: Upon lexer/parser errors or if the expression is + otherwise invalid. + + Returns: + The sympy expression corresponding to `expr`. + Boolean values are converted to numeric values. + """ + if isinstance(expr, int) or isinstance(expr, np.integer): + return sp.Integer(expr) + if isinstance(expr, float) or isinstance(expr, np.floating): + return sp.Float(expr) + + # Set error listeners + input_stream = InputStream(expr) + lexer = PetabMathExprLexer(input_stream) + lexer.removeErrorListeners() + lexer.addErrorListener(MathErrorListener()) + + stream = CommonTokenStream(lexer) + parser = PetabMathExprParser(stream) + parser.removeErrorListeners() + parser.addErrorListener(MathErrorListener()) + + # Parse expression + try: + tree = parser.petabExpression() + except ValueError as e: + raise ValueError(f"Error parsing {expr!r}: {e.args[0]}") from None + + # Convert to sympy expression + visitor = MathVisitorSympy() + expr = visitor.visit(tree) + expr = bool2num(expr) + # check for `False`, we'll accept both `True` and `None` + if expr.is_extended_real is False: + raise ValueError(f"Expression {expr} is not real-valued.") + + return expr + + +class MathErrorListener(ErrorListener): + """Error listener for math expression parser/lexer.""" + + def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): # noqa N803 + raise ValueError(f"Syntax error at {line}:{column}: {msg}") diff --git a/petab/v1/measurements.py b/petab/v1/measurements.py new file mode 100644 index 00000000..757ce9ce --- /dev/null +++ b/petab/v1/measurements.py @@ -0,0 +1,351 @@ +"""Functions operating on the PEtab measurement table""" +# noqa: F405 + +import itertools +import math +import numbers +from pathlib import Path + +import numpy as np +import pandas as pd + +from . import core, lint, observables +from .C import * # noqa: F403 + +__all__ = [ + "assert_overrides_match_parameter_count", + "create_measurement_df", + "get_measurement_df", + "get_measurement_parameter_ids", + "get_rows_for_condition", + "get_simulation_conditions", + "measurements_have_replicates", + "measurement_is_at_steady_state", + "split_parameter_replacement_list", + "write_measurement_df", +] + + +def get_measurement_df( + measurement_file: None | str | Path | pd.DataFrame, +) -> pd.DataFrame: + """ + Read the provided measurement file into a ``pandas.Dataframe``. + + Arguments: + measurement_file: Name of file to read from or pandas.Dataframe + + Returns: + Measurement DataFrame + """ + if measurement_file is None: + return measurement_file + + if isinstance(measurement_file, str | Path): + measurement_file = pd.read_csv( + measurement_file, sep="\t", float_precision="round_trip" + ) + + lint.assert_no_leading_trailing_whitespace( + measurement_file.columns.values, MEASUREMENT + ) + + return measurement_file + + +def write_measurement_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab measurement table + + Arguments: + df: PEtab measurement table + filename: Destination file name + """ + df = get_measurement_df(df) + df.to_csv(filename, sep="\t", index=False) + + +def get_simulation_conditions(measurement_df: pd.DataFrame) -> pd.DataFrame: + """ + Create a table of separate simulation conditions. A simulation condition + is a specific combination of simulationConditionId and + preequilibrationConditionId. + + Arguments: + measurement_df: PEtab measurement table + + Returns: + Dataframe with columns 'simulationConditionId' and + 'preequilibrationConditionId'. All-null columns will be omitted. + Missing 'preequilibrationConditionId's will be set to '' (empty + string). + """ + if measurement_df.empty: + return pd.DataFrame(data={SIMULATION_CONDITION_ID: []}) + # find columns to group by (i.e. if not all nans). + # can be improved by checking for identical condition vectors + grouping_cols = core.get_notnull_columns( + measurement_df, + [SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID], + ) + + # group by cols and return dataframe containing each combination + # of those rows only once (and an additional counting row) + # We require NaN-containing rows, but they are ignored by `groupby`, + # therefore replace them before + simulation_conditions = ( + measurement_df.fillna("") + .groupby(grouping_cols) + .size() + .reset_index()[grouping_cols] + ) + # sort to be really sure that we always get the same order + return simulation_conditions.sort_values(grouping_cols, ignore_index=True) + + +def get_rows_for_condition( + measurement_df: pd.DataFrame, + condition: pd.Series | pd.DataFrame | dict, +) -> pd.DataFrame: + """ + Extract rows in `measurement_df` for `condition` according + to 'preequilibrationConditionId' and 'simulationConditionId' in + `condition`. + + Arguments: + measurement_df: + PEtab measurement DataFrame + condition: + DataFrame with single row (or Series) and columns + 'preequilibrationConditionId' and 'simulationConditionId'. + Or dictionary with those keys. + + Returns: + The subselection of rows in ``measurement_df`` for the condition + ``condition``. + """ + # filter rows for condition + row_filter = 1 + # check for equality in all grouping cols + if PREEQUILIBRATION_CONDITION_ID in condition: + row_filter = ( + measurement_df[PREEQUILIBRATION_CONDITION_ID].fillna("") + == condition[PREEQUILIBRATION_CONDITION_ID] + ) & row_filter + if SIMULATION_CONDITION_ID in condition: + row_filter = ( + measurement_df[SIMULATION_CONDITION_ID] + == condition[SIMULATION_CONDITION_ID] + ) & row_filter + # apply filter + cur_measurement_df = measurement_df.loc[row_filter, :] + + return cur_measurement_df + + +def get_measurement_parameter_ids(measurement_df: pd.DataFrame) -> list[str]: + """ + Return list of ID of parameters which occur in measurement table as + observable or noise parameter overrides. + + Arguments: + measurement_df: + PEtab measurement DataFrame + + Returns: + List of parameter IDs + """ + + def get_unique_parameters(series): + return core.unique_preserve_order( + itertools.chain.from_iterable( + series.apply(split_parameter_replacement_list) + ) + ) + + return core.unique_preserve_order( + get_unique_parameters(measurement_df[OBSERVABLE_PARAMETERS]) + + get_unique_parameters(measurement_df[NOISE_PARAMETERS]) + ) + + +def split_parameter_replacement_list( + list_string: str | numbers.Number, delim: str = PARAMETER_SEPARATOR +) -> list[str | numbers.Number]: + """ + Split values in observableParameters and noiseParameters in measurement + table. + + Arguments: + list_string: delim-separated stringified list + delim: delimiter + + Returns: + List of split values. Numeric values may be converted to `float`, + and parameter IDs are kept as strings. + """ + if list_string is None or list_string == "": + return [] + + if isinstance(list_string, numbers.Number): + # Empty cells in pandas might be turned into nan + # We might want to allow nan as replacement... + if np.isnan(list_string): + return [] + return [list_string] + + result = [x.strip() for x in list_string.split(delim)] + + def convert_and_check(x): + x = core.to_float_if_float(x) + if isinstance(x, float): + return x + if lint.is_valid_identifier(x): + return x + + raise ValueError( + f"The value '{x}' in the parameter replacement list " + f"'{list_string}' is neither a number, nor a valid parameter ID." + ) + + return list(map(convert_and_check, result)) + + +def create_measurement_df() -> pd.DataFrame: + """Create empty measurement dataframe + + Returns: + Created DataFrame + """ + return pd.DataFrame( + data={ + OBSERVABLE_ID: [], + PREEQUILIBRATION_CONDITION_ID: [], + SIMULATION_CONDITION_ID: [], + MEASUREMENT: [], + TIME: [], + OBSERVABLE_PARAMETERS: [], + NOISE_PARAMETERS: [], + DATASET_ID: [], + REPLICATE_ID: [], + } + ) + + +def measurements_have_replicates(measurement_df: pd.DataFrame) -> bool: + """Tests whether the measurements come with replicates + + Arguments: + measurement_df: Measurement table + + Returns: + ``True`` if there are replicates, ``False`` otherwise + """ + grouping_cols = core.get_notnull_columns( + measurement_df, + [ + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + PREEQUILIBRATION_CONDITION_ID, + TIME, + ], + ) + return np.any( + measurement_df.fillna("").groupby(grouping_cols).size().values - 1 + ) + + +def assert_overrides_match_parameter_count( + measurement_df: pd.DataFrame, observable_df: pd.DataFrame +) -> None: + """Ensure that number of parameters in the observable definition matches + the number of overrides in ``measurement_df`` + + Arguments: + measurement_df: PEtab measurement table + observable_df: PEtab observable table + """ + # sympify only once and save number of parameters + observable_parameters_count = { + obs_id: len( + observables.get_formula_placeholders(formula, obs_id, "observable") + ) + for obs_id, formula in zip( + observable_df.index.values, + observable_df[OBSERVABLE_FORMULA], + strict=True, + ) + } + noise_parameters_count = { + obs_id: len( + observables.get_formula_placeholders(formula, obs_id, "noise") + ) + for obs_id, formula in zip( + observable_df.index.values, + observable_df[NOISE_FORMULA], + strict=True, + ) + } + + for _, row in measurement_df.iterrows(): + # check observable parameters + try: + expected = observable_parameters_count[row[OBSERVABLE_ID]] + except KeyError as e: + raise ValueError( + f"Observable {row[OBSERVABLE_ID]} used in measurement table " + f"is not defined." + ) from e + + actual = len( + split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + ) + # No overrides are also allowed + if actual != expected: + formula = observable_df.loc[row[OBSERVABLE_ID], OBSERVABLE_FORMULA] + raise AssertionError( + f"Mismatch of observable parameter overrides for " + f"{row[OBSERVABLE_ID]} ({formula})" + f"in:\n{row}\n" + f"Expected {expected} but got {actual}" + ) + + # check noise parameters + replacements = split_parameter_replacement_list( + row.get(NOISE_PARAMETERS, None) + ) + try: + expected = noise_parameters_count[row[OBSERVABLE_ID]] + + # No overrides are also allowed + if len(replacements) != expected: + raise AssertionError( + f"Mismatch of noise parameter overrides in:\n{row}\n" + f"Expected {expected} but got {len(replacements)}" + ) + except KeyError as err: + # no overrides defined, but a numerical sigma can be provided + # anyways + if len(replacements) != 1 or not isinstance( + replacements[0], numbers.Number + ): + raise AssertionError( + f"No placeholders have been specified in the noise model " + f"for observable {row[OBSERVABLE_ID]}, but parameter ID " + "or multiple overrides were specified in the " + "noiseParameters column." + ) from err + + +def measurement_is_at_steady_state(time: float) -> bool: + """Check whether a measurement is at steady state. + + Arguments: + time: + The time. + + Returns: + Whether the measurement is at steady state. + """ + return math.isinf(time) diff --git a/petab/v1/models/__init__.py b/petab/v1/models/__init__.py new file mode 100644 index 00000000..938f55fb --- /dev/null +++ b/petab/v1/models/__init__.py @@ -0,0 +1,14 @@ +"""Handling of different model types supported by PEtab.""" +#: SBML model type as used in a PEtab v2 yaml file as `language`. +MODEL_TYPE_SBML = "sbml" +#: PySB model type as used in a PEtab v2 yaml file as `language`. +MODEL_TYPE_PYSB = "pysb" + +known_model_types = { + MODEL_TYPE_SBML, + MODEL_TYPE_PYSB, +} + +from .model import Model # noqa F401 + +__all__ = ["MODEL_TYPE_SBML", "MODEL_TYPE_PYSB", "known_model_types", "Model"] diff --git a/petab/v1/models/model.py b/petab/v1/models/model.py new file mode 100644 index 00000000..de1ebf3a --- /dev/null +++ b/petab/v1/models/model.py @@ -0,0 +1,159 @@ +"""PEtab model abstraction""" +from __future__ import annotations + +import abc +from collections.abc import Iterable +from pathlib import Path +from typing import Any + +__all__ = ["Model", "model_factory"] + + +class Model(abc.ABC): + """Base class for wrappers for any PEtab-supported model type""" + + @abc.abstractmethod + def __init__(self): + ... + + @staticmethod + @abc.abstractmethod + def from_file(filepath_or_buffer: Any, model_id: str) -> Model: + """Load the model from the given path/URL + + :param filepath_or_buffer: URL or path of the model + :param model_id: Model ID + :returns: A ``Model`` instance holding the given model + """ + ... + + @abc.abstractmethod + def to_file(self, filename: [str, Path]): + """Save the model to the given file + + :param filename: Destination filename + """ + ... + + @classmethod + @property + @abc.abstractmethod + def type_id(cls): + ... + + @property + @abc.abstractmethod + def model_id(self): + ... + + @abc.abstractmethod + def get_parameter_value(self, id_: str) -> float: + """Get a parameter value + + :param id_: ID of the parameter whose value is to be returned + :raises ValueError: If no parameter with the given ID exists + :returns: The value of the given parameter as specified in the model + """ + ... + + @abc.abstractmethod + def get_free_parameter_ids_with_values( + self, + ) -> Iterable[tuple[str, float]]: + """Get free model parameters along with their values + + Returns: + Iterator over tuples of (parameter_id, parameter_value) + """ + ... + + @abc.abstractmethod + def get_parameter_ids(self) -> Iterable[str]: + """Get all parameter IDs from this model + + :returns: Iterator over model parameter IDs + """ + ... + + @abc.abstractmethod + def has_entity_with_id(self, entity_id) -> bool: + """Check if there is a model entity with the given ID + + :param entity_id: ID to check for + :returns: + ``True``, if there is an entity with the given ID, + ``False`` otherwise + """ + ... + + @abc.abstractmethod + def get_valid_parameters_for_parameter_table(self) -> Iterable[str]: + """Get IDs of all parameters that are allowed to occur in the PEtab + parameters table + + :returns: Iterator over parameter IDs + """ + ... + + @abc.abstractmethod + def get_valid_ids_for_condition_table(self) -> Iterable[str]: + """Get IDs of all model entities that are allowed to occur as columns + in the PEtab conditions table. + + :returns: Iterator over model entity IDs + """ + ... + + @abc.abstractmethod + def symbol_allowed_in_observable_formula(self, id_: str) -> bool: + """Check if the given ID is allowed to be used in observable and noise + formulas + + :returns: ``True``, if allowed, ``False`` otherwise + """ + ... + + @abc.abstractmethod + def is_valid(self) -> bool: + """Validate this model + + :returns: + `True` if the model is valid, `False` if there are errors in + this model + """ + ... + + @abc.abstractmethod + def is_state_variable(self, id_: str) -> bool: + """Check whether the given ID corresponds to a model state variable""" + ... + + +def model_factory( + filepath_or_buffer: Any, model_language: str, model_id: str = None +) -> Model: + """Create a PEtab model instance from the given model + + :param filepath_or_buffer: Path/URL of the model + :param model_language: PEtab model language ID for the given model + :param model_id: PEtab model ID for the given model + :returns: A :py:class:`Model` instance representing the given model + """ + from . import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, known_model_types + + if model_language == MODEL_TYPE_SBML: + from .sbml_model import SbmlModel + + return SbmlModel.from_file(filepath_or_buffer, model_id=model_id) + + if model_language == MODEL_TYPE_PYSB: + from .pysb_model import PySBModel + + return PySBModel.from_file(filepath_or_buffer, model_id=model_id) + + if model_language in known_model_types: + raise NotImplementedError( + f"Unsupported model format: {model_language}" + ) + + raise ValueError(f"Unknown model format: {model_language}") diff --git a/petab/v1/models/pysb_model.py b/petab/v1/models/pysb_model.py new file mode 100644 index 00000000..7355669e --- /dev/null +++ b/petab/v1/models/pysb_model.py @@ -0,0 +1,230 @@ +"""Functions for handling PySB models""" + +import itertools +import re +import sys +from collections.abc import Iterable +from pathlib import Path +from typing import Any + +import pysb + +from . import MODEL_TYPE_PYSB +from .model import Model + +__all__ = ["PySBModel", "parse_species_name", "pattern_from_string"] + + +def _pysb_model_from_path(pysb_model_file: str | Path) -> pysb.Model: + """Load a pysb model module and return the :class:`pysb.Model` instance + + :param pysb_model_file: Full or relative path to the PySB model module + :return: The pysb Model instance + """ + pysb_model_file = Path(pysb_model_file) + pysb_model_module_name = pysb_model_file.with_suffix("").name + + import importlib.util + + spec = importlib.util.spec_from_file_location( + pysb_model_module_name, pysb_model_file + ) + module = importlib.util.module_from_spec(spec) + sys.modules[pysb_model_module_name] = module + spec.loader.exec_module(module) + + # find a pysb.Model instance in the module + # 1) check if module.model exists and is a pysb.Model + model = getattr(module, "model", None) + if model: + return model + + # 2) check if there is any other pysb.Model instance + for x in dir(module): + attr = getattr(module, x) + if isinstance(attr, pysb.Model): + return attr + + raise ValueError(f"Could not find any pysb.Model in {pysb_model_file}.") + + +class PySBModel(Model): + """PEtab wrapper for PySB models""" + + type_id = MODEL_TYPE_PYSB + + def __init__(self, model: pysb.Model, model_id: str): + super().__init__() + + self.model = model + self._model_id = model_id + + @staticmethod + def from_file(filepath_or_buffer, model_id: str): + return PySBModel( + model=_pysb_model_from_path(filepath_or_buffer), model_id=model_id + ) + + def to_file(self, filename: [str, Path]): + from pysb.export import export + + model_source = export(self.model, "pysb_flat") + with open(filename, "w") as f: + f.write(model_source) + + @property + def model_id(self): + return self._model_id + + @model_id.setter + def model_id(self, model_id): + self._model_id = model_id + + def get_parameter_ids(self) -> Iterable[str]: + return (p.name for p in self.model.parameters) + + def get_parameter_value(self, id_: str) -> float: + try: + return self.model.parameters[id_].value + except KeyError as e: + raise ValueError(f"Parameter {id_} does not exist.") from e + + def get_free_parameter_ids_with_values( + self, + ) -> Iterable[tuple[str, float]]: + return ((p.name, p.value) for p in self.model.parameters) + + def has_entity_with_id(self, entity_id) -> bool: + try: + _ = self.model.components[entity_id] + return True + except KeyError: + return False + + def get_valid_parameters_for_parameter_table(self) -> Iterable[str]: + # all parameters are allowed in the parameter table + return self.get_parameter_ids() + + def get_valid_ids_for_condition_table(self) -> Iterable[str]: + return itertools.chain( + self.get_parameter_ids(), self.get_compartment_ids() + ) + + def symbol_allowed_in_observable_formula(self, id_: str) -> bool: + return id_ in ( + x.name + for x in itertools.chain( + self.model.parameters, + self.model.observables, + self.model.expressions, + ) + ) + + def is_valid(self) -> bool: + # PySB models are always valid + return True + + def is_state_variable(self, id_: str) -> bool: + # If there is a component with that name, it's not a state variable + # (there are no dynamically-sized compartments) + if self.model.components.get(id_, None): + return False + + # Try parsing the ID + try: + result = parse_species_name(id_) + except ValueError: + return False + else: + # check if the ID is plausible + for monomer, compartment, site_config in result: + pysb_monomer: pysb.Monomer = self.model.monomers.get(monomer) + if pysb_monomer is None: + return False + if compartment: + pysb_compartment = self.model.compartments.get(compartment) + if pysb_compartment is None: + return False + for site, state in site_config.items(): + if site not in pysb_monomer.sites: + return False + if state not in pysb_monomer.site_states[site]: + return False + if set(pysb_monomer.sites) - set(site_config.keys()): + # There are undefined sites + return False + return True + + def get_compartment_ids(self) -> Iterable[str]: + return (compartment.name for compartment in self.model.compartments) + + +def parse_species_name( + name: str, +) -> list[tuple[str, str | None, dict[str, Any]]]: + """Parse a PySB species name + + :param name: Species name to parse + :returns: List of species, representing complex constituents, each as + a tuple of the monomer name, the compartment name, and a dict of sites + mapping to site states. + :raises ValueError: In case this is not a valid ID + """ + if "=MultiState(" in name: + raise NotImplementedError("MultiState is not yet supported.") + + complex_constituent_pattern = re.compile( + r"^(?P\w+)\((?P.*)\)" + r"( \*\* (?P.*))?$" + ) + result = [] + complex_constituents = name.split(" % ") + + for complex_constituent in complex_constituents: + match = complex_constituent_pattern.match(complex_constituent) + if not match: + raise ValueError( + f"Invalid species name: '{name}' " f"('{complex_constituent}')" + ) + monomer = match.groupdict()["monomer"] + site_config_str = match.groupdict()["site_config"] + compartment = match.groupdict()["compartment"] + + site_config = {} + for site_str in site_config_str.split(", "): + if not site_str: + continue + site, config = site_str.split("=") + if config == "None": + config = None + elif config.startswith("'"): + if not config.endswith("'"): + raise ValueError( + f"Invalid species name: '{name}' " f"('{config}')" + ) + # strip quotes + config = config[1:-1] + else: + config = int(config) + site_config[site] = config + result.append( + (monomer, compartment, site_config), + ) + + return result + + +def pattern_from_string(string: str, model: pysb.Model) -> pysb.ComplexPattern: + """Convert a pattern string to a Pattern instance""" + parts = parse_species_name(string) + patterns = [] + for part in parts: + patterns.append( + pysb.MonomerPattern( + monomer=model.monomers.get(part[0]), + compartment=model.compartments.get(part[1], None), + site_conditions=part[2], + ) + ) + + return pysb.ComplexPattern(patterns, compartment=None) diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py new file mode 100644 index 00000000..fd57f2dc --- /dev/null +++ b/petab/v1/models/sbml_model.py @@ -0,0 +1,224 @@ +"""Functions for handling SBML models""" + +import itertools +from collections.abc import Iterable +from pathlib import Path + +import libsbml +import sympy as sp +from sympy.abc import _clash + +from ..sbml import ( + get_sbml_model, + is_sbml_consistent, + load_sbml_from_string, + write_sbml, +) +from . import MODEL_TYPE_SBML +from .model import Model + +__all__ = ["SbmlModel"] + + +class SbmlModel(Model): + """PEtab wrapper for SBML models""" + + type_id = MODEL_TYPE_SBML + + def __init__( + self, + sbml_model: libsbml.Model = None, + sbml_reader: libsbml.SBMLReader = None, + sbml_document: libsbml.SBMLDocument = None, + model_id: str = None, + ): + super().__init__() + + self.sbml_reader: libsbml.SBMLReader | None = sbml_reader + self.sbml_document: libsbml.SBMLDocument | None = sbml_document + self.sbml_model: libsbml.Model | None = sbml_model + + self._model_id = model_id or sbml_model.getIdAttribute() + + def __getstate__(self): + """Return state for pickling""" + state = self.__dict__.copy() + + # libsbml stuff cannot be serialized directly + if self.sbml_model: + sbml_document = self.sbml_model.getSBMLDocument() + sbml_writer = libsbml.SBMLWriter() + state["sbml_string"] = sbml_writer.writeSBMLToString(sbml_document) + + exclude = ["sbml_reader", "sbml_document", "sbml_model"] + for key in exclude: + state.pop(key) + + return state + + def __setstate__(self, state): + """Set state after unpickling""" + # load SBML model from pickled string + sbml_string = state.pop("sbml_string", None) + if sbml_string: + ( + self.sbml_reader, + self.sbml_document, + self.sbml_model, + ) = load_sbml_from_string(sbml_string) + + self.__dict__.update(state) + + @staticmethod + def from_file(filepath_or_buffer, model_id: str = None): + sbml_reader, sbml_document, sbml_model = get_sbml_model( + filepath_or_buffer + ) + return SbmlModel( + sbml_model=sbml_model, + sbml_reader=sbml_reader, + sbml_document=sbml_document, + model_id=model_id, + ) + + @property + def model_id(self): + return self._model_id + + @model_id.setter + def model_id(self, model_id): + self._model_id = model_id + + def to_file(self, filename: [str, Path]): + write_sbml( + self.sbml_document or self.sbml_model.getSBMLDocument(), filename + ) + + def get_parameter_value(self, id_: str) -> float: + parameter = self.sbml_model.getParameter(id_) + if not parameter: + raise ValueError(f"Parameter {id_} does not exist.") + return parameter.getValue() + + def get_free_parameter_ids_with_values( + self, + ) -> Iterable[tuple[str, float]]: + rule_targets = { + ar.getVariable() for ar in self.sbml_model.getListOfRules() + } + + def get_initial(p): + # return the initial assignment value if there is one, and it is a + # number; `None`, if there is a non-numeric initial assignment; + # otherwise, the parameter value + if ia := self.sbml_model.getInitialAssignmentBySymbol(p.getId()): + sym_expr = sympify_sbml(ia.getMath()) + return ( + float(sym_expr.evalf()) + if sym_expr.evalf().is_Number + else None + ) + return p.getValue() + + return ( + (p.getId(), initial) + for p in self.sbml_model.getListOfParameters() + if p.getId() not in rule_targets + and (initial := get_initial(p)) is not None + ) + + def get_parameter_ids(self) -> Iterable[str]: + rule_targets = { + ar.getVariable() for ar in self.sbml_model.getListOfRules() + } + + return ( + p.getId() + for p in self.sbml_model.getListOfParameters() + if p.getId() not in rule_targets + ) + + def get_parameter_ids_with_values(self) -> Iterable[tuple[str, float]]: + rule_targets = { + ar.getVariable() for ar in self.sbml_model.getListOfRules() + } + + return ( + (p.getId(), p.getValue()) + for p in self.sbml_model.getListOfParameters() + if p.getId() not in rule_targets + ) + + def has_entity_with_id(self, entity_id) -> bool: + return self.sbml_model.getElementBySId(entity_id) is not None + + def get_valid_parameters_for_parameter_table(self) -> Iterable[str]: + # All parameters except rule-targets + disallowed_set = { + ar.getVariable() for ar in self.sbml_model.getListOfRules() + } + + return ( + p.getId() + for p in self.sbml_model.getListOfParameters() + if p.getId() not in disallowed_set + ) + + def get_valid_ids_for_condition_table(self) -> Iterable[str]: + return ( + x.getId() + for x in itertools.chain( + self.sbml_model.getListOfParameters(), + self.sbml_model.getListOfSpecies(), + self.sbml_model.getListOfCompartments(), + ) + ) + + def symbol_allowed_in_observable_formula(self, id_: str) -> bool: + return self.sbml_model.getElementBySId(id_) or id_ == "time" + + def is_valid(self) -> bool: + return is_sbml_consistent(self.sbml_model.getSBMLDocument()) + + def is_state_variable(self, id_: str) -> bool: + return ( + self.sbml_model.getSpecies(id_) is not None + or self.sbml_model.getCompartment(id_) is not None + or self.sbml_model.getRuleByVariable(id_) is not None + ) + + +def sympify_sbml(sbml_obj: libsbml.ASTNode | libsbml.SBase) -> sp.Expr: + """Convert SBML math expression to sympy expression. + + Parameters + ---------- + sbml_obj: + SBML math element or an SBML object with a math element. + + Returns + ------- + The sympy expression corresponding to ``sbml_obj``. + """ + ast_node = ( + sbml_obj + if isinstance(sbml_obj, libsbml.ASTNode) + else sbml_obj.getMath() + ) + + parser_settings = libsbml.L3ParserSettings( + ast_node.getParentSBMLObject().getModel(), + libsbml.L3P_PARSE_LOG_AS_LOG10, + libsbml.L3P_EXPAND_UNARY_MINUS, + libsbml.L3P_NO_UNITS, + libsbml.L3P_AVOGADRO_IS_CSYMBOL, + libsbml.L3P_COMPARE_BUILTINS_CASE_INSENSITIVE, + None, + libsbml.L3P_MODULO_IS_PIECEWISE, + ) + + formula_str = libsbml.formulaToL3StringWithSettings( + ast_node, parser_settings + ) + + return sp.sympify(formula_str, locals=_clash) diff --git a/petab/v1/observables.py b/petab/v1/observables.py new file mode 100644 index 00000000..1485302d --- /dev/null +++ b/petab/v1/observables.py @@ -0,0 +1,228 @@ +"""Functions for working with the PEtab observables table""" + +import re +from collections import OrderedDict +from pathlib import Path +from typing import Literal + +import pandas as pd + +from . import core, lint +from .C import * # noqa: F403 +from .math import sympify_petab +from .models import Model + +__all__ = [ + "create_observable_df", + "get_formula_placeholders", + "get_observable_df", + "get_output_parameters", + "get_placeholders", + "write_observable_df", +] + + +def get_observable_df( + observable_file: str | pd.DataFrame | Path | None, +) -> pd.DataFrame | None: + """ + Read the provided observable file into a ``pandas.Dataframe``. + + Arguments: + observable_file: Name of the file to read from or pandas.Dataframe. + + Returns: + Observable DataFrame + """ + if observable_file is None: + return observable_file + + if isinstance(observable_file, str | Path): + observable_file = pd.read_csv( + observable_file, sep="\t", float_precision="round_trip" + ) + + lint.assert_no_leading_trailing_whitespace( + observable_file.columns.values, "observable" + ) + + if not isinstance(observable_file.index, pd.RangeIndex): + observable_file.reset_index( + drop=observable_file.index.name != OBSERVABLE_ID, + inplace=True, + ) + + try: + observable_file.set_index([OBSERVABLE_ID], inplace=True) + except KeyError: + raise KeyError( + f"Observable table missing mandatory field {OBSERVABLE_ID}." + ) from None + + return observable_file + + +def write_observable_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab observable table + + Arguments: + df: PEtab observable table + filename: Destination file name + """ + df = get_observable_df(df) + df.to_csv(filename, sep="\t", index=True) + + +def get_output_parameters( + observable_df: pd.DataFrame, + model: Model, + observables: bool = True, + noise: bool = True, + mapping_df: pd.DataFrame = None, +) -> list[str]: + """Get output parameters + + Returns IDs of parameters used in observable and noise formulas that are + not defined in the model. + + Arguments: + observable_df: PEtab observable table + model: The underlying model + observables: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + mapping_df: PEtab mapping table + + Returns: + List of output parameter IDs + """ + formulas = [] + if observables: + formulas.extend(observable_df[OBSERVABLE_FORMULA]) + if noise and NOISE_FORMULA in observable_df: + formulas.extend(observable_df[NOISE_FORMULA]) + output_parameters = OrderedDict() + + for formula in formulas: + free_syms = sorted( + sympify_petab(formula).free_symbols, + key=lambda symbol: symbol.name, + ) + for free_sym in free_syms: + sym = str(free_sym) + if model.symbol_allowed_in_observable_formula(sym): + continue + + # does it map to a model entity? + if ( + mapping_df is not None + and sym in mapping_df.index + and model.symbol_allowed_in_observable_formula( + mapping_df.loc[sym, MODEL_ENTITY_ID] + ) + ): + continue + + output_parameters[sym] = None + + return list(output_parameters.keys()) + + +def get_formula_placeholders( + formula_string: str, + observable_id: str, + override_type: Literal["observable", "noise"], +) -> list[str]: + """ + Get placeholder variables in noise or observable definition for the + given observable ID. + + Arguments: + formula_string: observable formula + observable_id: ID of current observable + override_type: ``'observable'`` or ``'noise'``, depending on whether + ``formula`` is for observable or for noise model + + Returns: + List of placeholder parameter IDs in the order expected in the + observableParameter column of the measurement table. + """ + if not formula_string: + return [] + + if not isinstance(formula_string, str): + return [] + + pattern = re.compile( + r"(?:^|\W)(" + + re.escape(override_type) + + r"Parameter\d+_" + + re.escape(observable_id) + + r")(?=\W|$)" + ) + placeholder_set = set(pattern.findall(formula_string)) + + # need to sort and check that there are no gaps in numbering + placeholders = [ + f"{override_type}Parameter{i}_{observable_id}" + for i in range(1, len(placeholder_set) + 1) + ] + + if placeholder_set != set(placeholders): + raise AssertionError( + "Non-consecutive numbering of placeholder " + f"parameter for {placeholder_set}" + ) + + return placeholders + + +def get_placeholders( + observable_df: pd.DataFrame, + observables: bool = True, + noise: bool = True, +) -> list[str]: + """Get all placeholder parameters from observable table observableFormulas + and noiseFormulas + + Arguments: + observable_df: PEtab observable table + observables: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + + Returns: + List of placeholder parameters from observable table observableFormulas + and noiseFormulas. + """ + # collect placeholder parameters overwritten by + # {observable,noise}Parameters + placeholder_types = [] + formula_columns = [] + if observables: + placeholder_types.append("observable") + formula_columns.append(OBSERVABLE_FORMULA) + if noise: + placeholder_types.append("noise") + formula_columns.append(NOISE_FORMULA) + + placeholders = [] + for _, row in observable_df.iterrows(): + for placeholder_type, formula_column in zip( + placeholder_types, formula_columns, strict=True + ): + if formula_column not in row: + continue + + cur_placeholders = get_formula_placeholders( + row[formula_column], row.name, placeholder_type + ) + placeholders.extend(cur_placeholders) + return core.unique_preserve_order(placeholders) + + +def create_observable_df() -> pd.DataFrame: + """Create empty observable dataframe + + Returns: + Created DataFrame + """ + return pd.DataFrame(data={col: [] for col in OBSERVABLE_DF_COLS}) diff --git a/petab/v1/parameter_mapping.py b/petab/v1/parameter_mapping.py new file mode 100644 index 00000000..014b4a8e --- /dev/null +++ b/petab/v1/parameter_mapping.py @@ -0,0 +1,805 @@ +"""Functions related to mapping parameter from model to parameter estimation +problem +""" + +import logging +import numbers +import os +import re +import warnings +from collections.abc import Iterable +from typing import Any, Literal + +import libsbml +import numpy as np +import pandas as pd + +from . import ( + core, + lint, + measurements, + observables, + parameters, +) +from .C import * # noqa: F403 +from .mapping import resolve_mapping +from .models import Model + +# FIXME import from petab.ENV_NUM_THREADS +ENV_NUM_THREADS = "PETAB_NUM_THREADS" + + +logger = logging.getLogger(__name__) +__all__ = [ + "get_optimization_to_simulation_parameter_mapping", + "get_parameter_mapping_for_condition", + "handle_missing_overrides", + "merge_preeq_and_sim_pars", + "merge_preeq_and_sim_pars_condition", + "ParMappingDict", + "ParMappingDictTuple", + "ScaleMappingDict", + "ScaleMappingDictTuple", + "ParMappingDictQuadruple", +] + + +# Parameter mapping for condition +ParMappingDict = dict[str, str | numbers.Number] +# Parameter mapping for combination of preequilibration and simulation +# condition +ParMappingDictTuple = tuple[ParMappingDict, ParMappingDict] +# Same for scale mapping +ScaleMappingDict = dict[str, str] +ScaleMappingDictTuple = tuple[ScaleMappingDict, ScaleMappingDict] +# Parameter mapping for combination of preequilibration and simulation +# conditions, for parameter and scale mapping +ParMappingDictQuadruple = tuple[ + ParMappingDict, ParMappingDict, ScaleMappingDict, ScaleMappingDict +] + + +def get_optimization_to_simulation_parameter_mapping( + condition_df: pd.DataFrame, + measurement_df: pd.DataFrame, + parameter_df: pd.DataFrame | None = None, + observable_df: pd.DataFrame | None = None, + mapping_df: pd.DataFrame | None = None, + sbml_model: libsbml.Model = None, + simulation_conditions: pd.DataFrame | None = None, + warn_unmapped: bool | None = True, + scaled_parameters: bool = False, + fill_fixed_parameters: bool = True, + allow_timepoint_specific_numeric_noise_parameters: bool = False, + model: Model = None, +) -> list[ParMappingDictQuadruple]: + """ + Create list of mapping dicts from PEtab-problem to model parameters. + + Mapping can be performed in parallel. The number of threads is controlled + by the environment variable with the name of + :py:data:`petab.ENV_NUM_THREADS`. + + Parameters: + condition_df, measurement_df, parameter_df, observable_df: + The dataframes in the PEtab format. + sbml_model: + The SBML model (deprecated) + model: + The model. + simulation_conditions: + Table of simulation conditions as created by + ``petab.get_simulation_conditions``. + warn_unmapped: + If ``True``, log warning regarding unmapped parameters + scaled_parameters: + Whether parameter values should be scaled. + fill_fixed_parameters: + Whether to fill in nominal values for fixed parameters + (estimate=0 in parameters table). + allow_timepoint_specific_numeric_noise_parameters: + Mapping of timepoint-specific parameters overrides is generally + not supported. If this option is set to True, this function will + not fail in case of timepoint-specific fixed noise parameters, + if the noise formula consists only of one single parameter. + It is expected that the respective mapping is performed elsewhere. + The value mapped to the respective parameter here is undefined. + + Returns: + Parameter value and parameter scale mapping for all conditions. + + The length of the returned array is the number of unique combinations + of ``simulationConditionId`` s and ``preequilibrationConditionId`` s + from the measurement table. Each entry is a tuple of four dicts of + length equal to the number of model parameters. + The first two dicts map simulation parameter IDs to optimization + parameter IDs or values (where values are fixed) for preequilibration + and simulation condition, respectively. + The last two dicts map simulation parameter IDs to the parameter scale + of the respective parameter, again for preequilibration and simulation + condition. + If no preequilibration condition is defined, the respective dicts will + be empty. ``NaN`` is used where no mapping exists. + """ + if sbml_model: + warnings.warn( + "Passing a model via the `sbml_model` argument is " + "deprecated, use `model=petab.models.sbml_model." + "SbmlModel(...)` instead.", + DeprecationWarning, + stacklevel=2, + ) + from .models.sbml_model import SbmlModel + + if model: + raise ValueError( + "Arguments `model` and `sbml_model` are " "mutually exclusive." + ) + model = SbmlModel(sbml_model=sbml_model) + + # Ensure inputs are okay + _perform_mapping_checks( + measurement_df, + allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 + ) + + if simulation_conditions is None: + simulation_conditions = measurements.get_simulation_conditions( + measurement_df + ) + + simulation_parameters = dict(model.get_free_parameter_ids_with_values()) + # Add output parameters that are not already defined in the model + if observable_df is not None: + output_parameters = observables.get_output_parameters( + observable_df=observable_df, model=model, mapping_df=mapping_df + ) + for par_id in output_parameters: + simulation_parameters[par_id] = np.nan + + num_threads = int(os.environ.get(ENV_NUM_THREADS, 1)) + + # If sequential execution is requested, let's not create any + # thread-allocation overhead + if num_threads == 1: + mapping = map( + _map_condition, + _map_condition_arg_packer( + simulation_conditions, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, + fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters, + ), + ) + return list(mapping) + + # Run multi-threaded + from concurrent.futures import ThreadPoolExecutor + + with ThreadPoolExecutor(max_workers=num_threads) as executor: + mapping = executor.map( + _map_condition, + _map_condition_arg_packer( + simulation_conditions, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, + fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters, + ), + ) + return list(mapping) + + +def _map_condition_arg_packer( + simulation_conditions, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, + fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters, +): + """Helper function to pack extra arguments for _map_condition""" + for _, condition in simulation_conditions.iterrows(): + yield ( + condition, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, + fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters, + ) + + +def _map_condition(packed_args): + """Helper function for parallel condition mapping. + + For arguments see + :py:func:`get_optimization_to_simulation_parameter_mapping`. + """ + ( + condition, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, + fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters, + ) = packed_args + + cur_measurement_df = None + # Get the condition specific measurements for the current condition, but + # only if relevant for parameter mapping + if ( + OBSERVABLE_PARAMETERS in measurement_df + and measurement_df[OBSERVABLE_PARAMETERS].notna().any() + ) or ( + NOISE_PARAMETERS in measurement_df + and measurement_df[NOISE_PARAMETERS].notna().any() + ): + cur_measurement_df = measurements.get_rows_for_condition( + measurement_df, condition + ) + + if ( + PREEQUILIBRATION_CONDITION_ID not in condition + or not isinstance(condition[PREEQUILIBRATION_CONDITION_ID], str) + or not condition[PREEQUILIBRATION_CONDITION_ID] + ): + par_map_preeq = {} + scale_map_preeq = {} + else: + par_map_preeq, scale_map_preeq = get_parameter_mapping_for_condition( + condition_id=condition[PREEQUILIBRATION_CONDITION_ID], + is_preeq=True, + cur_measurement_df=cur_measurement_df, + model=model, + condition_df=condition_df, + parameter_df=parameter_df, + mapping_df=mapping_df, + simulation_parameters=simulation_parameters, + warn_unmapped=warn_unmapped, + scaled_parameters=scaled_parameters, + fill_fixed_parameters=fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 + ) + + par_map_sim, scale_map_sim = get_parameter_mapping_for_condition( + condition_id=condition[SIMULATION_CONDITION_ID], + is_preeq=False, + cur_measurement_df=cur_measurement_df, + model=model, + condition_df=condition_df, + parameter_df=parameter_df, + mapping_df=mapping_df, + simulation_parameters=simulation_parameters, + warn_unmapped=warn_unmapped, + scaled_parameters=scaled_parameters, + fill_fixed_parameters=fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 + ) + + return par_map_preeq, par_map_sim, scale_map_preeq, scale_map_sim + + +def get_parameter_mapping_for_condition( + condition_id: str, + is_preeq: bool, + cur_measurement_df: pd.DataFrame | None = None, + sbml_model: libsbml.Model = None, + condition_df: pd.DataFrame = None, + parameter_df: pd.DataFrame = None, + mapping_df: pd.DataFrame | None = None, + simulation_parameters: dict[str, str] | None = None, + warn_unmapped: bool = True, + scaled_parameters: bool = False, + fill_fixed_parameters: bool = True, + allow_timepoint_specific_numeric_noise_parameters: bool = False, + model: Model = None, +) -> tuple[ParMappingDict, ScaleMappingDict]: + """ + Create dictionary of parameter value and parameter scale mappings from + PEtab-problem to SBML parameters for the given condition. + + Parameters: + condition_id: + Condition ID for which to perform mapping + is_preeq: + If ``True``, output parameters will not be mapped + cur_measurement_df: + Measurement sub-table for current condition, can be ``None`` if + not relevant for parameter mapping + condition_df: + PEtab condition DataFrame + parameter_df: + PEtab parameter DataFrame + mapping_df: + PEtab mapping DataFrame + sbml_model: + The SBML model (deprecated) + model: + The model. + simulation_parameters: + Model simulation parameter IDs mapped to parameter values (output + of ``petab.sbml.get_model_parameters(.., with_values=True)``). + Optional, saves time if precomputed. + warn_unmapped: + If ``True``, log warning regarding unmapped parameters + scaled_parameters: + Whether parameter values should be scaled. + fill_fixed_parameters: + Whether to fill in nominal values for fixed parameters + (estimate=0 in parameters table). + allow_timepoint_specific_numeric_noise_parameters: + Mapping of timepoint-specific parameters overrides is generally + not supported. If this option is set to True, this function will + not fail in case of timepoint-specific fixed noise parameters, + if the noise formula consists only of one single parameter. + It is expected that the respective mapping is performed elsewhere. + The value mapped to the respective parameter here is undefined. + + Returns: + Tuple of two dictionaries. First dictionary mapping model parameter IDs + to mapped parameters IDs to be estimated or to filled-in values in case + of non-estimated parameters. + Second dictionary mapping model parameter IDs to their scale. + ``NaN`` is used where no mapping exists. + """ + if sbml_model: + warnings.warn( + "Passing a model via the `sbml_model` argument is " + "deprecated, use `model=petab.models.sbml_model." + "SbmlModel(...)` instead.", + DeprecationWarning, + stacklevel=2, + ) + from .models.sbml_model import SbmlModel + + if model: + raise ValueError( + "Arguments `model` and `sbml_model` are " "mutually exclusive." + ) + model = SbmlModel(sbml_model=sbml_model) + + if cur_measurement_df is not None: + _perform_mapping_checks( + cur_measurement_df, + allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 + ) + + if simulation_parameters is None: + simulation_parameters = dict( + model.get_free_parameter_ids_with_values() + ) + + # NOTE: order matters here - the former is overwritten by the latter: + # model < condition table < measurement < table parameter table + + # initialize mapping dicts + # for the case of matching simulation and optimization parameter vector + par_mapping = simulation_parameters.copy() + scale_mapping = {par_id: LIN for par_id in par_mapping.keys()} + _output_parameters_to_nan(par_mapping) + + # not strictly necessary for preequilibration, be we do it to have + # same length of parameter vectors + if cur_measurement_df is not None: + _apply_output_parameter_overrides(par_mapping, cur_measurement_df) + + if not is_preeq: + handle_missing_overrides(par_mapping, warn=warn_unmapped) + + _apply_condition_parameters( + par_mapping, + scale_mapping, + condition_id, + condition_df, + model, + mapping_df, + ) + _apply_parameter_table( + par_mapping, + scale_mapping, + parameter_df, + scaled_parameters, + fill_fixed_parameters, + ) + + return par_mapping, scale_mapping + + +def _output_parameters_to_nan(mapping: ParMappingDict) -> None: + """Set output parameters in mapping dictionary to nan""" + rex = re.compile("^(noise|observable)Parameter[0-9]+_") + for key in mapping.keys(): + try: + matches = rex.match(key) + except TypeError: + continue + + if matches: + mapping[key] = np.nan + + +def _apply_output_parameter_overrides( + mapping: ParMappingDict, cur_measurement_df: pd.DataFrame +) -> None: + """ + Apply output parameter overrides to the parameter mapping dict for a given + condition as defined in the measurement table (``observableParameter``, + ``noiseParameters``). + + Arguments: + mapping: parameter mapping dict as obtained from + :py:func:`get_parameter_mapping_for_condition`. + cur_measurement_df: + Subset of the measurement table for the current condition + """ + for _, row in cur_measurement_df.iterrows(): + # we trust that the number of overrides matches (see above) + overrides = measurements.split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + _apply_overrides_for_observable( + mapping, row[OBSERVABLE_ID], "observable", overrides + ) + + overrides = measurements.split_parameter_replacement_list( + row.get(NOISE_PARAMETERS, None) + ) + _apply_overrides_for_observable( + mapping, row[OBSERVABLE_ID], "noise", overrides + ) + + +def _apply_overrides_for_observable( + mapping: ParMappingDict, + observable_id: str, + override_type: Literal["observable", "noise"], + overrides: list[str], +) -> None: + """ + Apply parameter-overrides for observables and noises to mapping + matrix. + + Arguments: + mapping: mapping dict to which to apply overrides + observable_id: observable ID + override_type: ``'observable'`` or ``'noise'`` + overrides: list of overrides for noise or observable parameters + """ + for i, override in enumerate(overrides): + overridee_id = f"{override_type}Parameter{i+1}_{observable_id}" + mapping[overridee_id] = override + + +def _apply_condition_parameters( + par_mapping: ParMappingDict, + scale_mapping: ScaleMappingDict, + condition_id: str, + condition_df: pd.DataFrame, + model: Model, + mapping_df: pd.DataFrame | None = None, +) -> None: + """Replace parameter IDs in parameter mapping dictionary by condition + table parameter values (in-place). + + Arguments: + par_mapping: see :py:func:`get_parameter_mapping_for_condition` + condition_id: ID of condition to work on + condition_df: PEtab condition table + """ + for overridee_id in condition_df.columns: + if overridee_id == CONDITION_NAME: + continue + + overridee_id = resolve_mapping(mapping_df, overridee_id) + + # Species, compartments, and rule targets are handled elsewhere + if model.is_state_variable(overridee_id): + continue + + par_mapping[overridee_id] = core.to_float_if_float( + condition_df.loc[condition_id, overridee_id] + ) + + if isinstance(par_mapping[overridee_id], numbers.Number) and np.isnan( + par_mapping[overridee_id] + ): + # NaN in the condition table for an entity without time derivative + # indicates that the model value should be used + try: + par_mapping[overridee_id] = model.get_parameter_value( + overridee_id + ) + except ValueError as e: + raise NotImplementedError( + "Not sure how to handle NaN in condition table for " + f"{overridee_id}." + ) from e + + scale_mapping[overridee_id] = LIN + + +def _apply_parameter_table( + par_mapping: ParMappingDict, + scale_mapping: ScaleMappingDict, + parameter_df: pd.DataFrame | None = None, + scaled_parameters: bool = False, + fill_fixed_parameters: bool = True, +) -> None: + """Replace parameters from parameter table in mapping list for a given + condition and set the corresponding scale. + + Replace non-estimated parameters by ``nominalValues`` + (un-scaled / lin-scaled), replace estimated parameters by the respective + ID. + + Arguments: + par_mapping: + mapping dict obtained from + :py:func:`get_parameter_mapping_for_condition` + parameter_df: + PEtab parameter table + """ + if parameter_df is None: + return + + for row in parameter_df.itertuples(): + if row.Index not in par_mapping: + # The current parameter is not required for this condition + continue + + scale = getattr(row, PARAMETER_SCALE, LIN) + scale_mapping[row.Index] = scale + if fill_fixed_parameters and getattr(row, ESTIMATE) == 0: + val = getattr(row, NOMINAL_VALUE) + if scaled_parameters: + val = parameters.scale(val, scale) + else: + scale_mapping[row.Index] = LIN + par_mapping[row.Index] = val + else: + par_mapping[row.Index] = row.Index + + # Replace any leftover mapped parameter coming from condition table + for problem_par, sim_par in par_mapping.items(): + # string indicates unmapped + if not isinstance(sim_par, str): + continue + + try: + # the overridee is a model parameter + par_mapping[problem_par] = par_mapping[sim_par] + scale_mapping[problem_par] = scale_mapping[sim_par] + except KeyError: + if parameter_df is None: + raise + + # or the overridee is only defined in the parameter table + scale = ( + parameter_df.loc[sim_par, PARAMETER_SCALE] + if PARAMETER_SCALE in parameter_df + else LIN + ) + + if ( + fill_fixed_parameters + and ESTIMATE in parameter_df + and parameter_df.loc[sim_par, ESTIMATE] == 0 + ): + val = parameter_df.loc[sim_par, NOMINAL_VALUE] + if scaled_parameters: + val = parameters.scale(val, scale) + else: + scale = LIN + par_mapping[problem_par] = val + + scale_mapping[problem_par] = scale + + +def _perform_mapping_checks( + measurement_df: pd.DataFrame, + allow_timepoint_specific_numeric_noise_parameters: bool = False, +) -> None: + """Check for PEtab features which we can't account for during parameter + mapping. + """ + if lint.measurement_table_has_timepoint_specific_mappings( + measurement_df, + allow_scalar_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 + ): + # we could allow that for floats, since they don't matter in this + # function and would be simply ignored + raise ValueError( + "Timepoint-specific parameter overrides currently unsupported." + ) + + +def handle_missing_overrides( + mapping_par_opt_to_par_sim: ParMappingDict, + warn: bool = True, + condition_id: str = None, +) -> None: + """ + Find all observable parameters and noise parameters that were not mapped + and set their mapping to np.nan. + + Assumes that parameters matching the regular expression + ``(noise|observable)Parameter[0-9]+_`` were all supposed to be overwritten. + + Parameters: + mapping_par_opt_to_par_sim: + Output of :py:func:`get_parameter_mapping_for_condition` + warn: + If True, log warning regarding unmapped parameters + condition_id: + Optional condition ID for more informative output + """ + _missed_vals = [] + rex = re.compile("^(noise|observable)Parameter[0-9]+_") + for key, val in mapping_par_opt_to_par_sim.items(): + try: + matches = rex.match(val) + except TypeError: + continue + + if matches: + mapping_par_opt_to_par_sim[key] = np.nan + _missed_vals.append(key) + + if _missed_vals and warn: + logger.warning( + f"Could not map the following overrides for condition " + f"{condition_id}: " + f"{_missed_vals}. Usually, this is just due to missing " + f"data points." + ) + + +def merge_preeq_and_sim_pars_condition( + condition_map_preeq: ParMappingDict, + condition_map_sim: ParMappingDict, + condition_scale_map_preeq: ScaleMappingDict, + condition_scale_map_sim: ScaleMappingDict, + condition: Any, +) -> None: + """Merge preequilibration and simulation parameters and scales for a single + condition while checking for compatibility. + + This function is meant for the case where we cannot have different + parameters (and scales) for preequilibration and simulation. Therefore, + merge both and ensure matching scales and parameters. + ``condition_map_sim`` and ``condition_scale_map_sim`` will be modified in + place. + + Arguments: + condition_map_preeq, condition_map_sim: + Parameter mapping as obtained from + :py:func:`get_parameter_mapping_for_condition` + condition_scale_map_preeq, condition_scale_map_sim: + Parameter scale mapping as obtained from + :py:func:`get_parameter_mapping_for_condition` + condition: Condition identifier for more informative error messages + """ + if not condition_map_preeq: + # nothing to do + return + + all_par_ids = set(condition_map_sim.keys()) | set( + condition_map_preeq.keys() + ) + + for par_id in all_par_ids: + if par_id not in condition_map_preeq: + # nothing to do + continue + + if par_id not in condition_map_sim: + # unmapped for simulation -> just use preeq values + condition_map_sim[par_id] = condition_map_preeq[par_id] + condition_scale_map_sim[par_id] = condition_scale_map_preeq[par_id] + continue + + # present in both + par_preeq = condition_map_preeq[par_id] + par_sim = condition_map_sim[par_id] + if par_preeq != par_sim and not ( + core.is_empty(par_sim) and core.is_empty(par_preeq) + ): + # both identical or both nan is okay + if core.is_empty(par_sim): + # unmapped for simulation + condition_map_sim[par_id] = par_preeq + elif core.is_empty(par_preeq): + # unmapped for preeq is okay + pass + else: + raise ValueError( + "Cannot handle different values for dynamic " + f"parameters: for condition {condition} " + f"parameter {par_id} is {par_preeq} for preeq " + f"and {par_sim} for simulation." + ) + + scale_preeq = condition_scale_map_preeq[par_id] + scale_sim = condition_scale_map_sim[par_id] + + if scale_preeq != scale_sim: + # both identical is okay + if core.is_empty(par_sim): + # unmapped for simulation + condition_scale_map_sim[par_id] = scale_preeq + elif core.is_empty(par_preeq): + # unmapped for preeq is okay + pass + else: + raise ValueError( + "Cannot handle different parameter scales " + f"parameters: for condition {condition} " + f"scale for parameter {par_id} is {scale_preeq} for preeq " + f"and {scale_sim} for simulation." + ) + + +def merge_preeq_and_sim_pars( + parameter_mappings: Iterable[ParMappingDictTuple], + scale_mappings: Iterable[ScaleMappingDictTuple], +) -> tuple[list[ParMappingDictTuple], list[ScaleMappingDictTuple]]: + """Merge preequilibration and simulation parameters and scales for a list + of conditions while checking for compatibility. + + Parameters: + parameter_mappings: + As returned by + :py:func:`petab.get_optimization_to_simulation_parameter_mapping`. + scale_mappings: + As returned by + :py:func:`petab.get_optimization_to_simulation_parameter_mapping`. + + Returns: + The parameter and scale simulation mappings, modified and checked. + """ + parameter_mapping = [] + scale_mapping = [] + for ic, ( + (map_preeq, map_sim), + (scale_map_preeq, scale_map_sim), + ) in enumerate(zip(parameter_mappings, scale_mappings, strict=True)): + merge_preeq_and_sim_pars_condition( + condition_map_preeq=map_preeq, + condition_map_sim=map_sim, + condition_scale_map_preeq=scale_map_preeq, + condition_scale_map_sim=scale_map_sim, + condition=ic, + ) + parameter_mapping.append(map_sim) + scale_mapping.append(scale_map_sim) + + return parameter_mapping, scale_mapping diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py new file mode 100644 index 00000000..382e6b57 --- /dev/null +++ b/petab/v1/parameters.py @@ -0,0 +1,638 @@ +"""Functions operating on the PEtab parameter table""" + +import numbers +import warnings +from collections import OrderedDict +from collections.abc import Iterable, Sequence +from pathlib import Path +from typing import ( + Literal, +) + +import libsbml +import numpy as np +import pandas as pd + +from . import conditions, core, lint, measurements, observables +from .C import * # noqa: F403 +from .models import Model + +__all__ = [ + "create_parameter_df", + "get_optimization_parameter_scaling", + "get_optimization_parameters", + "get_parameter_df", + "get_priors_from_df", + "get_valid_parameters_for_parameter_table", + "map_scale", + "map_unscale", + "normalize_parameter_df", + "scale", + "unscale", + "write_parameter_df", +] + +PARAMETER_SCALE_ARGS = Literal["", "lin", "log", "log10"] + + +def get_parameter_df( + parameter_file: str + | Path + | pd.DataFrame + | Iterable[str | Path | pd.DataFrame] + | None, +) -> pd.DataFrame | None: + """ + Read the provided parameter file into a ``pandas.Dataframe``. + + Arguments: + parameter_file: Name of the file to read from or pandas.Dataframe, + or an Iterable. + + Returns: + Parameter ``DataFrame``, or ``None`` if ``None`` was passed. + """ + if parameter_file is None: + return None + if isinstance(parameter_file, pd.DataFrame): + parameter_df = parameter_file + elif isinstance(parameter_file, str | Path): + parameter_df = pd.read_csv( + parameter_file, sep="\t", float_precision="round_trip" + ) + elif isinstance(parameter_file, Iterable): + dfs = [get_parameter_df(x) for x in parameter_file if x] + + if not dfs: + return None + + parameter_df = pd.concat(dfs) + # Check for contradicting parameter definitions + _check_for_contradicting_parameter_definitions(parameter_df) + + return parameter_df + + lint.assert_no_leading_trailing_whitespace( + parameter_df.columns.values, "parameter" + ) + + if not isinstance(parameter_df.index, pd.RangeIndex): + parameter_df.reset_index( + drop=parameter_file.index.name != PARAMETER_ID, + inplace=True, + ) + + try: + parameter_df.set_index([PARAMETER_ID], inplace=True) + except KeyError as e: + raise KeyError( + f"Parameter table missing mandatory field {PARAMETER_ID}." + ) from e + _check_for_contradicting_parameter_definitions(parameter_df) + + return parameter_df + + +def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame): + """ + Raises a ValueError for non-unique parameter IDs + """ + parameter_duplicates = set( + parameter_df.index.values[parameter_df.index.duplicated()] + ) + if parameter_duplicates: + raise ValueError( + f"The values of `{PARAMETER_ID}` must be unique. The " + f"following duplicates were found:\n{parameter_duplicates}" + ) + + +def write_parameter_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab parameter table + + Arguments: + df: PEtab parameter table + filename: Destination file name + """ + df = get_parameter_df(df) + df.to_csv(filename, sep="\t", index=True) + + +def get_optimization_parameters(parameter_df: pd.DataFrame) -> list[str]: + """ + Get list of optimization parameter IDs from parameter table. + + Arguments: + parameter_df: PEtab parameter DataFrame + + Returns: + List of IDs of parameters selected for optimization. + """ + return list(parameter_df.index[parameter_df[ESTIMATE] == 1]) + + +def get_optimization_parameter_scaling( + parameter_df: pd.DataFrame, +) -> dict[str, str]: + """ + Get Dictionary with optimization parameter IDs mapped to parameter scaling + strings. + + Arguments: + parameter_df: PEtab parameter DataFrame + + Returns: + Dictionary with optimization parameter IDs mapped to parameter scaling + strings. + """ + estimated_df = parameter_df.loc[parameter_df[ESTIMATE] == 1] + return dict( + zip(estimated_df.index, estimated_df[PARAMETER_SCALE], strict=True) + ) + + +def create_parameter_df( + sbml_model: libsbml.Model | None = None, + condition_df: pd.DataFrame | None = None, + observable_df: pd.DataFrame | None = None, + measurement_df: pd.DataFrame | None = None, + model: Model | None = None, + include_optional: bool = False, + parameter_scale: str = LOG10, + lower_bound: Iterable = None, + upper_bound: Iterable = None, + mapping_df: pd.DataFrame | None = None, +) -> pd.DataFrame: + """Create a new PEtab parameter table + + All table entries can be provided as string or list-like with length + matching the number of parameters + + Arguments: + sbml_model: SBML Model (deprecated, mutually exclusive with ``model``) + model: PEtab model (mutually exclusive with ``sbml_model``) + condition_df: PEtab condition DataFrame + observable_df: PEtab observable DataFrame + measurement_df: PEtab measurement DataFrame + include_optional: By default this only returns parameters that are + required to be present in the parameter table. If set to ``True``, + this returns all parameters that are allowed to be present in the + parameter table (i.e. also including parameters specified in the + model). + parameter_scale: parameter scaling + lower_bound: lower bound for parameter value + upper_bound: upper bound for parameter value + mapping_df: PEtab mapping DataFrame + + Returns: + The created parameter DataFrame + """ + if sbml_model: + warnings.warn( + "Passing a model via the `sbml_model` argument is " + "deprecated, use `model=petab.models.sbml_model." + "SbmlModel(...)` instead.", + DeprecationWarning, + stacklevel=2, + ) + from .models.sbml_model import SbmlModel + + if model: + raise ValueError( + "Arguments `model` and `sbml_model` are " "mutually exclusive." + ) + model = SbmlModel(sbml_model=sbml_model) + if include_optional: + parameter_ids = list( + get_valid_parameters_for_parameter_table( + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, + ) + ) + else: + parameter_ids = list( + get_required_parameters_for_parameter_table( + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, + mapping_df=mapping_df, + ) + ) + + df = pd.DataFrame( + data={ + PARAMETER_ID: parameter_ids, + PARAMETER_NAME: parameter_ids, + PARAMETER_SCALE: parameter_scale, + LOWER_BOUND: lower_bound, + UPPER_BOUND: upper_bound, + NOMINAL_VALUE: np.nan, + ESTIMATE: 1, + INITIALIZATION_PRIOR_TYPE: "", + INITIALIZATION_PRIOR_PARAMETERS: "", + OBJECTIVE_PRIOR_TYPE: "", + OBJECTIVE_PRIOR_PARAMETERS: "", + } + ) + df.set_index([PARAMETER_ID], inplace=True) + + # For model parameters, set nominal values as defined in the model + for parameter_id in df.index: + try: + df.loc[parameter_id, NOMINAL_VALUE] = model.get_parameter_value( + parameter_id + ) + except ValueError: + # parameter was introduced as condition-specific override and + # is potentially not present in the model + pass + return df + + +def get_required_parameters_for_parameter_table( + model: Model, + condition_df: pd.DataFrame, + observable_df: pd.DataFrame, + measurement_df: pd.DataFrame, + mapping_df: pd.DataFrame = None, +) -> set[str]: + """ + Get set of parameters which need to go into the parameter table + + Arguments: + model: PEtab model + condition_df: PEtab condition table + observable_df: PEtab observable table + measurement_df: PEtab measurement table + mapping_df: PEtab mapping table + + Returns: + Set of parameter IDs which PEtab requires to be present in the + parameter table. That is all {observable,noise}Parameters from the + measurement table as well as all parametric condition table overrides + that are not defined in the model. + """ + # use ordered dict as proxy for ordered set + parameter_ids = OrderedDict() + + # Add parameters from measurement table, unless they are fixed parameters + def append_overrides(overrides): + for p in overrides: + if isinstance(p, str) and p not in condition_df.columns: + parameter_ids[p] = None + + for _, row in measurement_df.iterrows(): + # we trust that the number of overrides matches + append_overrides( + measurements.split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + ) + append_overrides( + measurements.split_parameter_replacement_list( + row.get(NOISE_PARAMETERS, None) + ) + ) + + # Add output parameters except for placeholders + for formula_type, placeholder_sources in ( + ( + # Observable formulae + {"observables": True, "noise": False}, + # can only contain observable placeholders + {"noise": False, "observables": True}, + ), + ( + # Noise formulae + {"observables": False, "noise": True}, + # can contain noise and observable placeholders + {"noise": True, "observables": True}, + ), + ): + output_parameters = observables.get_output_parameters( + observable_df, + model, + mapping_df=mapping_df, + **formula_type, + ) + placeholders = observables.get_placeholders( + observable_df, + **placeholder_sources, + ) + for p in output_parameters: + if p not in placeholders: + parameter_ids[p] = None + + # Add condition table parametric overrides unless already defined in the + # model + for p in conditions.get_parametric_overrides(condition_df): + if not model.has_entity_with_id(p): + parameter_ids[p] = None + + # remove parameters that occur in the condition table and are overridden + # for ALL conditions + for p in condition_df.columns[~condition_df.isnull().any()]: + try: + del parameter_ids[p] + except KeyError: + pass + return parameter_ids.keys() + + +def get_valid_parameters_for_parameter_table( + model: Model, + condition_df: pd.DataFrame, + observable_df: pd.DataFrame, + measurement_df: pd.DataFrame, + mapping_df: pd.DataFrame = None, +) -> set[str]: + """ + Get set of parameters which may be present inside the parameter table + + Arguments: + model: PEtab model + condition_df: PEtab condition table + observable_df: PEtab observable table + measurement_df: PEtab measurement table + mapping_df: PEtab mapping table for additional checks + + Returns: + Set of parameter IDs which PEtab allows to be present in the + parameter table. + """ + # - grab all allowed model parameters + # - grab corresponding names from mapping table + # - grab all output parameters defined in {observable,noise}Formula + # - grab all parameters from measurement table + # - grab all parametric overrides from condition table + # - remove parameters for which condition table columns exist + # - remove placeholder parameters + # (only partial overrides are not supported) + + # must not go into parameter table + blackset = set() + + if observable_df is not None: + placeholders = set(observables.get_placeholders(observable_df)) + + # collect assignment targets + blackset |= placeholders + + if condition_df is not None: + blackset |= set(condition_df.columns.values) - {CONDITION_NAME} + + # don't use sets here, to have deterministic ordering, + # e.g. for creating parameter tables + parameter_ids = OrderedDict.fromkeys( + p + for p in model.get_valid_parameters_for_parameter_table() + if p not in blackset + ) + + if mapping_df is not None: + for from_id, to_id in zip( + mapping_df.index.values, mapping_df[MODEL_ENTITY_ID], strict=True + ): + if to_id in parameter_ids.keys(): + parameter_ids[from_id] = None + + if observable_df is not None: + # add output parameters from observables table + output_parameters = observables.get_output_parameters( + observable_df=observable_df, model=model + ) + for p in output_parameters: + if p not in blackset: + parameter_ids[p] = None + + # Append parameters from measurement table, unless they occur as condition + # table columns + def append_overrides(overrides): + for p in overrides: + if isinstance(p, str) and p not in blackset: + parameter_ids[p] = None + + if measurement_df is not None: + for _, row in measurement_df.iterrows(): + # we trust that the number of overrides matches + append_overrides( + measurements.split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + ) + append_overrides( + measurements.split_parameter_replacement_list( + row.get(NOISE_PARAMETERS, None) + ) + ) + + # Append parameter overrides from condition table + if condition_df is not None: + for p in conditions.get_parametric_overrides(condition_df): + parameter_ids[p] = None + + return parameter_ids.keys() + + +def get_priors_from_df( + parameter_df: pd.DataFrame, + mode: Literal["initialization", "objective"], + parameter_ids: Sequence[str] = None, +) -> list[tuple]: + """Create list with information about the parameter priors + + Arguments: + parameter_df: PEtab parameter table + mode: ``'initialization'`` or ``'objective'`` + parameter_ids: A sequence of parameter IDs for which to sample starting + points. + For subsetting or reordering the parameters. + Defaults to all estimated parameters. + + Returns: + List with prior information. + """ + # get types and parameters of priors from dataframe + par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1] + + if parameter_ids: + try: + par_to_estimate = par_to_estimate.loc[parameter_ids, :] + except KeyError as e: + missing_ids = set(parameter_ids) - set(par_to_estimate.index) + raise KeyError( + "Parameter table does not contain estimated parameter(s) " + f"{missing_ids}." + ) from e + + prior_list = [] + for _, row in par_to_estimate.iterrows(): + # retrieve info about type + prior_type = str(row.get(f"{mode}PriorType", "")) + if core.is_empty(prior_type): + prior_type = PARAMETER_SCALE_UNIFORM + + # retrieve info about parameters of priors, make it a tuple of floats + pars_str = str(row.get(f"{mode}PriorParameters", "")) + if core.is_empty(pars_str): + lb, ub = map_scale( + [row[LOWER_BOUND], row[UPPER_BOUND]], + [row[PARAMETER_SCALE]] * 2, + ) + pars_str = f"{lb}{PARAMETER_SEPARATOR}{ub}" + prior_pars = tuple( + float(entry) for entry in pars_str.split(PARAMETER_SEPARATOR) + ) + + # add parameter scale and bounds, as this may be needed + par_scale = row[PARAMETER_SCALE] + par_bounds = (row[LOWER_BOUND], row[UPPER_BOUND]) + + # if no prior is specified, we assume a non-informative (uniform) one + if prior_type == "nan": + prior_type = PARAMETER_SCALE_UNIFORM + prior_pars = ( + scale(row[LOWER_BOUND], par_scale), + scale(row[UPPER_BOUND], par_scale), + ) + + prior_list.append((prior_type, prior_pars, par_scale, par_bounds)) + + return prior_list + + +def scale( + parameter: numbers.Number, + scale_str: PARAMETER_SCALE_ARGS, +) -> numbers.Number: + """Scale parameter according to ``scale_str``. + + Arguments: + parameter: + Parameter to be scaled. + scale_str: + One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``. + + Returns: + The scaled parameter. + """ + if scale_str == LIN or not scale_str: + return parameter + if scale_str == LOG: + return np.log(parameter) + if scale_str == LOG10: + return np.log10(parameter) + raise ValueError(f"Invalid parameter scaling: {scale_str}") + + +def unscale( + parameter: numbers.Number, + scale_str: PARAMETER_SCALE_ARGS, +) -> numbers.Number: + """Unscale parameter according to ``scale_str``. + + Arguments: + parameter: + Parameter to be unscaled. + scale_str: + One of ``'lin'`` (synonymous with ``''``), ``'log'``, ``'log10'``. + + Returns: + The unscaled parameter. + """ + if scale_str == LIN or not scale_str: + return parameter + if scale_str == LOG: + return np.exp(parameter) + if scale_str == LOG10: + return 10**parameter + raise ValueError(f"Invalid parameter scaling: {scale_str}") + + +def map_scale( + parameters: Sequence[numbers.Number], + scale_strs: Iterable[PARAMETER_SCALE_ARGS] | PARAMETER_SCALE_ARGS, +) -> Iterable[numbers.Number]: + """Scale the parameters, i.e. as :func:`scale`, but for Sequences. + + Arguments: + parameters: + Parameters to be scaled. + scale_strs: + Scales to apply. Broadcast if a single string. + + Returns: + The scaled parameters. + """ + if isinstance(scale_strs, str): + scale_strs = [scale_strs] * len(parameters) + return ( + scale(par_val, scale_str) + for par_val, scale_str in zip(parameters, scale_strs, strict=True) + ) + + +def map_unscale( + parameters: Sequence[numbers.Number], + scale_strs: Iterable[PARAMETER_SCALE_ARGS] | PARAMETER_SCALE_ARGS, +) -> Iterable[numbers.Number]: + """Unscale the parameters, i.e. as :func:`unscale`, but for Sequences. + + Arguments: + parameters: + Parameters to be unscaled. + scale_strs: + Scales that the parameters are currently on. + Broadcast if a single string. + + Returns: + The unscaled parameters. + """ + if isinstance(scale_strs, str): + scale_strs = [scale_strs] * len(parameters) + return ( + unscale(par_val, scale_str) + for par_val, scale_str in zip(parameters, scale_strs, strict=True) + ) + + +def normalize_parameter_df(parameter_df: pd.DataFrame) -> pd.DataFrame: + """Add missing columns and fill in default values.""" + df = parameter_df.copy(deep=True) + + if PARAMETER_NAME not in df: + df[PARAMETER_NAME] = df.reset_index()[PARAMETER_ID] + + prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE] + prior_par_cols = [ + INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_PARAMETERS, + ] + # iterate over initialization and objective priors + for prior_type_col, prior_par_col in zip( + prior_type_cols, prior_par_cols, strict=True + ): + # fill in default values for prior type + if prior_type_col not in df: + df[prior_type_col] = PARAMETER_SCALE_UNIFORM + else: + for irow, row in df.iterrows(): + if core.is_empty(row[prior_type_col]): + df.loc[irow, prior_type_col] = PARAMETER_SCALE_UNIFORM + if prior_par_col not in df: + df[prior_par_col] = None + for irow, row in df.iterrows(): + if ( + core.is_empty(row[prior_par_col]) + and row[prior_type_col] == PARAMETER_SCALE_UNIFORM + ): + lb, ub = map_scale( + [row[LOWER_BOUND], row[UPPER_BOUND]], + [row[PARAMETER_SCALE]] * 2, + ) + df.loc[irow, prior_par_col] = f"{lb}{PARAMETER_SEPARATOR}{ub}" + + return df diff --git a/petab/problem.py b/petab/v1/problem.py similarity index 95% rename from petab/problem.py rename to petab/v1/problem.py index 6c5307b2..4a5577eb 100644 --- a/petab/problem.py +++ b/petab/v1/problem.py @@ -3,10 +3,10 @@ import os import tempfile +from collections.abc import Iterable from math import nan from pathlib import Path, PurePosixPath -from typing import TYPE_CHECKING, Iterable -from urllib.parse import unquote, urlparse, urlunparse +from typing import TYPE_CHECKING from warnings import warn import pandas as pd @@ -28,6 +28,7 @@ from .models import MODEL_TYPE_SBML from .models.model import Model, model_factory from .models.sbml_model import SbmlModel +from .yaml import get_path_prefix if TYPE_CHECKING: import libsbml @@ -49,7 +50,7 @@ class Problem: Optionally it may contain visualization tables. - Attributes: + Parameters: condition_df: PEtab condition table measurement_df: PEtab measurement table parameter_df: PEtab parameter table @@ -262,34 +263,9 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: get_path = lambda filename: filename # noqa: E731 if isinstance(yaml_config, str): - yaml_path = yaml_config + path_prefix = get_path_prefix(yaml_config) yaml_config = yaml.load_yaml(yaml_config) - - # yaml_config may be path or URL - path_url = urlparse(yaml_path) - if not path_url.scheme or ( - path_url.scheme != "file" and not path_url.netloc - ): - # a regular file path string - path_prefix = Path(yaml_path).parent - get_path = lambda filename: path_prefix / filename # noqa: E731 - else: - # a URL - # extract parent path from - url_path = unquote(urlparse(yaml_path).path) - parent_path = str(PurePosixPath(url_path).parent) - path_prefix = urlunparse( - ( - path_url.scheme, - path_url.netloc, - parent_path, - path_url.params, - path_url.query, - path_url.fragment, - ) - ) - # need "/" on windows, not "\" - get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731 + get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731 if yaml.is_composite_problem(yaml_config): raise ValueError( @@ -307,6 +283,12 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: ) if yaml_config[FORMAT_VERSION] == "2.0.0": warn("Support for PEtab2.0 is experimental!", stacklevel=2) + warn( + "Using petab.v1.Problem with PEtab2.0 is deprecated. " + "Use petab.v2.Problem instead.", + DeprecationWarning, + stacklevel=2, + ) problem0 = yaml_config["problems"][0] @@ -503,7 +485,7 @@ def to_files_generic( if prefix_path is None: return filenames["yaml_file"] - return str(prefix_path / filenames["yaml_file"]) + return str(PurePosixPath(prefix_path, filenames["yaml_file"])) def to_files( self, @@ -647,7 +629,7 @@ def error(name: str) -> ValueError: mapping_files=mapping_file, ) - def get_optimization_parameters(self): + def get_optimization_parameters(self) -> list[str]: """ Return list of optimization parameter IDs. @@ -655,7 +637,7 @@ def get_optimization_parameters(self): """ return parameters.get_optimization_parameters(self.parameter_df) - def get_optimization_parameter_scales(self): + def get_optimization_parameter_scales(self) -> dict[str, str]: """ Return list of optimization parameter scaling strings. @@ -663,7 +645,7 @@ def get_optimization_parameter_scales(self): """ return parameters.get_optimization_parameter_scaling(self.parameter_df) - def get_model_parameters(self): + def get_model_parameters(self) -> list[str] | dict[str, float]: """See :py:func:`petab.sbml.get_model_parameters`""" warn( "petab.Problem.get_model_parameters is deprecated and will be " @@ -674,7 +656,7 @@ def get_model_parameters(self): return sbml.get_model_parameters(self.sbml_model) - def get_observable_ids(self): + def get_observable_ids(self) -> list[str]: """ Returns dictionary of observable ids. """ @@ -945,7 +927,7 @@ def sample_parameter_startpoints_dict( parameter values. """ return [ - dict(zip(self.x_free_ids, parameter_values)) + dict(zip(self.x_free_ids, parameter_values, strict=True)) for parameter_values in self.sample_parameter_startpoints( n_starts=n_starts ) diff --git a/petab/sampling.py b/petab/v1/sampling.py similarity index 97% rename from petab/sampling.py rename to petab/v1/sampling.py index 466c5284..be154f1c 100644 --- a/petab/sampling.py +++ b/petab/v1/sampling.py @@ -1,6 +1,6 @@ """Functions related to parameter sampling""" -from typing import Sequence, Tuple +from collections.abc import Sequence import numpy as np import pandas as pd @@ -12,7 +12,7 @@ def sample_from_prior( - prior: Tuple[str, list, str, list], n_starts: int + prior: tuple[str, list, str, list], n_starts: int ) -> np.array: """Creates samples for one parameter based on prior diff --git a/petab/sbml.py b/petab/v1/sbml.py similarity index 94% rename from petab/sbml.py rename to petab/v1/sbml.py index b177478e..0a8fd20f 100644 --- a/petab/sbml.py +++ b/petab/v1/sbml.py @@ -4,13 +4,12 @@ import logging from numbers import Number from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union from warnings import warn import libsbml from pandas.io.common import get_handle, is_file_like, is_url -import petab +import petab.v1 as petab logger = logging.getLogger(__name__) __all__ = [ @@ -132,7 +131,7 @@ def globalize_parameters( def get_model_parameters( sbml_model: libsbml.Model, with_values=False -) -> Union[List[str], Dict[str, float]]: +) -> list[str] | dict[str, float]: """Return SBML model parameters which are not Rule targets Arguments: @@ -157,9 +156,7 @@ def get_model_parameters( } -def write_sbml( - sbml_doc: libsbml.SBMLDocument, filename: Union[Path, str] -) -> None: +def write_sbml(sbml_doc: libsbml.SBMLDocument, filename: Path | str) -> None: """Write PEtab visualization table Arguments: @@ -177,7 +174,7 @@ def write_sbml( def get_sbml_model( filepath_or_buffer, -) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: +) -> tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: """Get an SBML model from file or URL or file handle :param filepath_or_buffer: @@ -195,7 +192,7 @@ def get_sbml_model( def load_sbml_from_string( sbml_string: str, -) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: +) -> tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: """Load SBML model from string :param sbml_string: Model as XML string @@ -210,24 +207,30 @@ def load_sbml_from_string( def load_sbml_from_file( sbml_file: str, -) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: +) -> tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: """Load SBML model from file :param sbml_file: Filename of the SBML file :return: The SBML reader, document, model """ + if not Path(sbml_file).is_file(): + raise FileNotFoundError(f"File not found: {sbml_file}") + sbml_reader = libsbml.SBMLReader() sbml_document = sbml_reader.readSBML(sbml_file) sbml_model = sbml_document.getModel() + if sbml_model is None: + raise ValueError(f"SBML model could not be loaded from {sbml_file}") + return sbml_reader, sbml_document, sbml_model def get_model_for_condition( petab_problem: "petab.Problem", sim_condition_id: str = None, - preeq_condition_id: Optional[str] = None, -) -> Tuple[libsbml.SBMLDocument, libsbml.Model]: + preeq_condition_id: str | None = None, +) -> tuple[libsbml.SBMLDocument, libsbml.Model]: """Create an SBML model for the given condition. Creates a copy of the model and updates parameters according to the PEtab diff --git a/petab/v1/simplify.py b/petab/v1/simplify.py new file mode 100644 index 00000000..c4cdeb91 --- /dev/null +++ b/petab/v1/simplify.py @@ -0,0 +1,115 @@ +"""Functionality for simplifying PEtab problems""" +from math import nan + +import pandas as pd + +import petab.v1 as petab + +from . import Problem +from .C import * # noqa: F403 +from .lint import lint_problem + +__all__ = [ + "remove_nan_measurements", + "remove_unused_observables", + "remove_unused_conditions", + "simplify_problem", + "condition_parameters_to_parameter_table", +] + + +def remove_nan_measurements(problem: Problem): + """Drop any measurements that are NaN""" + problem.measurement_df = problem.measurement_df[ + ~problem.measurement_df[MEASUREMENT].isna() + ] + problem.measurement_df.reset_index(inplace=True, drop=True) + + +def remove_unused_observables(problem: Problem): + """Remove observables that have no measurements""" + measured_observables = set(problem.measurement_df[OBSERVABLE_ID].unique()) + problem.observable_df = problem.observable_df[ + problem.observable_df.index.isin(measured_observables) + ] + + +def remove_unused_conditions(problem: Problem): + """Remove conditions that have no measurements""" + measured_conditions = set( + problem.measurement_df[SIMULATION_CONDITION_ID].unique() + ) + if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df: + measured_conditions |= set( + problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].unique() + ) + + problem.condition_df = problem.condition_df[ + problem.condition_df.index.isin(measured_conditions) + ] + + +def simplify_problem(problem: Problem): + if lint_problem(problem): + raise ValueError("Invalid PEtab problem supplied.") + + remove_unused_observables(problem) + remove_unused_conditions(problem) + condition_parameters_to_parameter_table(problem) + + if lint_problem(problem): + raise AssertionError("Invalid PEtab problem generated.") + + +def condition_parameters_to_parameter_table(problem: Problem): + """Move parameters from the condition table to the parameters table, if + the same parameter value is used for all conditions. + """ + if ( + problem.condition_df is None + or problem.condition_df.empty + or problem.model is None + ): + return + + replacements = {} + for parameter_id in problem.condition_df: + if parameter_id == CONDITION_NAME: + continue + + if problem.model.is_state_variable(parameter_id): + # initial states can't go the parameters table + continue + + series = problem.condition_df[parameter_id] + value = petab.to_float_if_float(series.iloc[0]) + + # same value for all conditions and no parametric overrides (str)? + if isinstance(value, float) and len(series.unique()) == 1: + replacements[parameter_id] = series.iloc[0] + + if not replacements: + return + + rows = [ + { + PARAMETER_ID: parameter_id, + PARAMETER_SCALE: LIN, + LOWER_BOUND: nan, + UPPER_BOUND: nan, + NOMINAL_VALUE: value, + ESTIMATE: 0, + } + for parameter_id, value in replacements.items() + ] + rows = pd.DataFrame(rows) + rows.set_index(PARAMETER_ID, inplace=True) + + if problem.parameter_df is None: + problem.parameter_df = rows + else: + problem.parameter_df = pd.concat([problem.parameter_df, rows]) + + problem.condition_df = problem.condition_df.drop( + columns=replacements.keys() + ) diff --git a/petab/v1/simulate.py b/petab/v1/simulate.py new file mode 100644 index 00000000..682c470f --- /dev/null +++ b/petab/v1/simulate.py @@ -0,0 +1,261 @@ +"""PEtab simulator base class and related functions.""" +from __future__ import annotations + +import abc +import pathlib +import shutil +import tempfile +from warnings import warn + +import numpy as np +import pandas as pd +import sympy as sp + +import petab.v1 as petab + +__all__ = ["Simulator", "sample_noise"] + + +class Simulator(abc.ABC): + """Base class that specific simulators should inherit. + + Specific simulators should minimally implement the + :meth:`petab.simulate.Simulator.simulate_without_noise` method. + Example (AMICI): https://bit.ly/33SUSG4 + + Attributes: + noise_formulas: + The formulae that will be used to calculate the scale of noise + distributions. + petab_problem: + A PEtab problem, which will be simulated. + rng: + A NumPy random generator, used to sample from noise distributions. + temporary_working_dir: + Whether ``working_dir`` is a temporary directory, which can be + deleted without significant consequence. + working_dir: + All simulator-specific output files will be saved here. This + directory and its contents may be modified and deleted, and + should be considered ephemeral. + """ + + def __init__( + self, + petab_problem: petab.Problem, + working_dir: pathlib.Path | str | None = None, + ): + """Initialize the simulator. + + Initialize the simulator with sufficient information to perform a + simulation. If no working directory is specified, a temporary one is + created. + + Arguments: + petab_problem: + A PEtab problem. + working_dir: + All simulator-specific output files will be saved here. This + directory and its contents may be modified and deleted, and + should be considered ephemeral. + """ + self.petab_problem = petab_problem + + self.temporary_working_dir = False + if working_dir is None: + working_dir = tempfile.mkdtemp() + self.temporary_working_dir = True + if not isinstance(working_dir, pathlib.Path): + working_dir = pathlib.Path(working_dir) + self.working_dir = working_dir + self.working_dir.mkdir(parents=True, exist_ok=True) + + self.noise_formulas = petab.calculate.get_symbolic_noise_formulas( + self.petab_problem.observable_df + ) + self.rng = np.random.default_rng() + + def remove_working_dir(self, force: bool = False, **kwargs) -> None: + """Remove the simulator working directory, and all files within. + + See the :meth:`petab.simulate.Simulator.__init__` method arguments. + + Arguments: + force: + If ``True``, the working directory is removed regardless of + whether it is a temporary directory. + **kwargs: + Additional keyword arguments are passed to + :func:`shutil.rmtree`. + """ + if force or self.temporary_working_dir: + shutil.rmtree(self.working_dir, **kwargs) + if self.working_dir.is_dir(): + warn( + "Failed to remove the working directory: " + + str(self.working_dir), + stacklevel=2, + ) + else: + warn( + "By default, specified working directories are not removed. " + "Please call this method with `force=True`, or manually " + f"delete the working directory: {self.working_dir}", + stacklevel=2, + ) + + @abc.abstractmethod + def simulate_without_noise(self) -> pd.DataFrame: + """Simulate the PEtab problem. + + This is an abstract method that should be implemented with a simulation + package. Examples of this are referenced in the class docstring. + + Returns: + Simulated data, as a PEtab measurements table, which should be + equivalent to replacing all values in the + :const:`petab.C.MEASUREMENT` column of the measurements table (of + the PEtab problem supplied to the + :meth:`petab.simulate.Simulator.__init__` method), with + simulated values. + """ + raise NotImplementedError() + + def simulate( + self, + noise: bool = False, + noise_scaling_factor: float = 1, + as_measurement: bool = False, + **kwargs, + ) -> pd.DataFrame: + """Simulate a PEtab problem, optionally with noise. + + Arguments: + noise: If True, noise is added to simulated data. + noise_scaling_factor: + A multiplier of the scale of the noise distribution. + as_measurement: + Whether the data column is named :const:`petab.C.MEASUREMENT` + (`True`) or :const:`petab.C.SIMULATION` (`False`). + **kwargs: + Additional keyword arguments are passed to + :meth:`petab.simulate.Simulator.simulate_without_noise`. + + Returns: + Simulated data, as a PEtab measurements table. + """ + simulation_df = self.simulate_without_noise(**kwargs) + if noise: + simulation_df = self.add_noise(simulation_df, noise_scaling_factor) + + columns = {petab.C.MEASUREMENT: petab.C.SIMULATION} + if as_measurement: + columns = {petab.C.SIMULATION: petab.C.MEASUREMENT} + simulation_df = simulation_df.rename(columns=columns) + + return simulation_df + + def add_noise( + self, + simulation_df: pd.DataFrame, + noise_scaling_factor: float = 1, + **kwargs, + ) -> pd.DataFrame: + """Add noise to simulated data. + + Arguments: + simulation_df: + A PEtab measurements table that contains simulated data. + noise_scaling_factor: + A multiplier of the scale of the noise distribution. + **kwargs: + Additional keyword arguments are passed to + :func:`sample_noise`. + + Returns: + Simulated data with noise, as a PEtab measurements table. + """ + simulation_df_with_noise = simulation_df.copy() + simulation_df_with_noise[petab.C.MEASUREMENT] = [ + sample_noise( + self.petab_problem, + row, + row[petab.C.MEASUREMENT], + self.noise_formulas, + self.rng, + noise_scaling_factor, + **kwargs, + ) + for _, row in simulation_df_with_noise.iterrows() + ] + return simulation_df_with_noise + + +def sample_noise( + petab_problem: petab.Problem, + measurement_row: pd.Series, + simulated_value: float, + noise_formulas: dict[str, sp.Expr] | None = None, + rng: np.random.Generator | None = None, + noise_scaling_factor: float = 1, + zero_bounded: bool = False, +) -> float: + """Generate a sample from a PEtab noise distribution. + + Arguments: + petab_problem: + The PEtab problem used to generate the simulated value. + Instance of :class:`petab.Problem`. + measurement_row: + The row in the PEtab problem measurement table that corresponds + to the simulated value. + simulated_value: + A simulated value without noise. + noise_formulas: + Processed noise formulas from the PEtab observables table, in the + form output by :func:`petab.calculate.get_symbolic_noise_formulas`. + rng: + A NumPy random generator. + noise_scaling_factor: + A multiplier of the scale of the noise distribution. + zero_bounded: + Return zero if the sign of the return value and ``simulated_value`` + differ. Can be used to ensure non-negative and non-positive values, + if the sign of ``simulated_value`` should not change. + + Returns: + The sample from the PEtab noise distribution. + """ + if noise_formulas is None: + noise_formulas = petab.calculate.get_symbolic_noise_formulas( + petab_problem.observable_df + ) + if rng is None: + rng = np.random.default_rng() + + noise_value = petab.calculate.evaluate_noise_formula( + measurement_row, + noise_formulas, + petab_problem.parameter_df, + simulated_value, + ) + + # default noise distribution is petab.C.NORMAL + noise_distribution = petab_problem.observable_df.loc[ + measurement_row[petab.C.OBSERVABLE_ID] + ].get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL) + # an empty noise distribution column in an observables table can result in + # `noise_distribution == float('nan')` + if pd.isna(noise_distribution): + noise_distribution = petab.C.NORMAL + + # below is e.g.: `np.random.normal(loc=simulation, scale=noise_value)` + simulated_value_with_noise = getattr(rng, noise_distribution)( + loc=simulated_value, scale=noise_value * noise_scaling_factor + ) + + if zero_bounded and np.sign(simulated_value) != np.sign( + simulated_value_with_noise + ): + return 0.0 + return simulated_value_with_noise diff --git a/petab/v1/visualize/__init__.py b/petab/v1/visualize/__init__.py new file mode 100644 index 00000000..924be86a --- /dev/null +++ b/petab/v1/visualize/__init__.py @@ -0,0 +1,37 @@ +""" +Visualize +========= + +PEtab comes with visualization functionality. Those need to be imported via +``import petab.visualize``. + +""" +# ruff: noqa: F401 +import importlib.util + +from .plotting import DataProvider, Figure + +__all__ = ["DataProvider", "Figure"] + +if importlib.util.find_spec("matplotlib") is not None: + from .plot_data_and_simulation import ( + plot_problem, + plot_with_vis_spec, + plot_without_vis_spec, + ) + from .plot_residuals import ( + plot_goodness_of_fit, + plot_residuals_vs_simulation, + ) + from .plotter import MPLPlotter + + __all__.extend( + [ + "plot_without_vis_spec", + "plot_with_vis_spec", + "plot_problem", + "plot_goodness_of_fit", + "plot_residuals_vs_simulation", + "MPLPlotter", + ] + ) diff --git a/petab/visualize/cli.py b/petab/v1/visualize/cli.py similarity index 99% rename from petab/visualize/cli.py rename to petab/v1/visualize/cli.py index d25a6785..72074936 100644 --- a/petab/visualize/cli.py +++ b/petab/v1/visualize/cli.py @@ -7,6 +7,8 @@ from .. import Problem, get_simulation_df, get_visualization_df from .plot_data_and_simulation import plot_problem +__all__ = [] + def _parse_cli_args(): """Parse command-line arguments.""" diff --git a/petab/v1/visualize/data_overview.py b/petab/v1/visualize/data_overview.py new file mode 100644 index 00000000..349b503c --- /dev/null +++ b/petab/v1/visualize/data_overview.py @@ -0,0 +1,91 @@ +""" +Functions for creating an overview report of a PEtab problem +""" + +from pathlib import Path +from shutil import copyfile + +import pandas as pd + +import petab.v1 as petab +from petab.v1.C import ( + MEASUREMENT, + OBSERVABLE_ID, + PREEQUILIBRATION_CONDITION_ID, + SIMULATION_CONDITION_ID, +) + +__all__ = ["create_report"] + + +def create_report( + problem: petab.Problem, model_name: str, output_path: str | Path = "" +) -> None: + """Create an HTML overview data / model overview report + + Arguments: + problem: PEtab problem + model_name: Name of the model, used for file name for report + output_path: Output directory + """ + template_dir = Path(__file__).absolute().parent / "templates" + output_path = Path(output_path) + template_file = "report.html" + + data_per_observable = get_data_per_observable(problem.measurement_df) + num_conditions = len(problem.condition_df.index) + + # Setup template engine + import jinja2 + + template_loader = jinja2.FileSystemLoader(searchpath=template_dir) + template_env = jinja2.Environment(loader=template_loader, autoescape=True) + template = template_env.get_template(template_file) + + # Render and save + output_text = template.render( + problem=problem, + model_name=model_name, + data_per_observable=data_per_observable, + num_conditions=num_conditions, + ) + with open(output_path / f"{model_name}.html", "w") as html_file: + html_file.write(output_text) + copyfile(template_dir / "mystyle.css", output_path / "mystyle.css") + + +def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame: + """Get table with number of data points per observable and condition + + Arguments: + measurement_df: PEtab measurement data frame + Returns: + Pivot table with number of data points per observable and condition + """ + my_measurements = measurement_df.copy() + + index = [SIMULATION_CONDITION_ID] + if PREEQUILIBRATION_CONDITION_ID in my_measurements: + my_measurements[PREEQUILIBRATION_CONDITION_ID] = ( + my_measurements[PREEQUILIBRATION_CONDITION_ID] + .astype("object") + .fillna("", inplace=True) + ) + index.append(PREEQUILIBRATION_CONDITION_ID) + + data_per_observable = pd.pivot_table( + my_measurements, + values=MEASUREMENT, + aggfunc="count", + index=index, + columns=[OBSERVABLE_ID], + fill_value=0, + ) + + # Add row and column sums + data_per_observable.loc["SUM", :] = data_per_observable.sum(axis=0).values + data_per_observable["SUM"] = data_per_observable.sum(axis=1).values + + data_per_observable = data_per_observable.astype(int) + + return data_per_observable diff --git a/petab/visualize/helper_functions.py b/petab/v1/visualize/helper_functions.py similarity index 92% rename from petab/visualize/helper_functions.py rename to petab/v1/visualize/helper_functions.py index b48e1ad6..b1a6f1b1 100644 --- a/petab/visualize/helper_functions.py +++ b/petab/v1/visualize/helper_functions.py @@ -4,22 +4,21 @@ hence not be directly visible/usable when using `import petab.visualize`. """ -from typing import List import pandas as pd from ..C import * # for typehints -IdsList = List[str] -NumList = List[int] +IdsList = list[str] +NumList = list[int] __all__ = [ "create_dataset_id_list_new", "generate_dataset_id_col", ] -def generate_dataset_id_col(exp_data: pd.DataFrame) -> List[str]: +def generate_dataset_id_col(exp_data: pd.DataFrame) -> list[str]: """ Generate DATASET_ID column from condition_ids and observable_ids. @@ -49,8 +48,8 @@ def generate_dataset_id_col(exp_data: pd.DataFrame) -> List[str]: def create_dataset_id_list_new( - df: pd.DataFrame, group_by: str, id_list: List[IdsList] -) -> List[IdsList]: + df: pd.DataFrame, group_by: str, id_list: list[IdsList] +) -> list[IdsList]: """ Create dataset ID list from a list of simulation condition IDs or observable IDs. diff --git a/petab/v1/visualize/lint.py b/petab/v1/visualize/lint.py new file mode 100644 index 00000000..b5de74bc --- /dev/null +++ b/petab/v1/visualize/lint.py @@ -0,0 +1,179 @@ +"""Validation of PEtab visualization files""" +from __future__ import annotations + +import logging + +import pandas as pd + +from .. import C, Problem +from ..C import VISUALIZATION_DF_REQUIRED_COLS + +logger = logging.getLogger(__name__) + +__all__ = ["validate_visualization_df"] + + +def validate_visualization_df(problem: Problem) -> bool: + """Validate visualization table + + Arguments: + problem: The PEtab problem containing a visualization table + + Returns: + ``True`` if errors occurred, ``False`` otherwise + """ + vis_df = problem.visualization_df + if vis_df is None or vis_df.empty: + return False + + errors = False + + if missing_req_cols := ( + set(VISUALIZATION_DF_REQUIRED_COLS) - set(vis_df.columns) + ): + logger.error( + f"Missing required columns {missing_req_cols} " + "in visualization table." + ) + errors = True + + # Set all unspecified optional values to their defaults to simplify + # validation + vis_df = vis_df.copy() + _apply_defaults(vis_df) + + if unknown_types := ( + set(vis_df[C.PLOT_TYPE_SIMULATION].unique()) + - set(C.PLOT_TYPES_SIMULATION) + ): + logger.error( + f"Unknown {C.PLOT_TYPE_SIMULATION}: {unknown_types}. " + f"Must be one of {C.PLOT_TYPES_SIMULATION}" + ) + errors = True + + if unknown_types := ( + set(vis_df[C.PLOT_TYPE_DATA].unique()) - set(C.PLOT_TYPES_DATA) + ): + logger.error( + f"Unknown {C.PLOT_TYPE_DATA}: {unknown_types}. " + f"Must be one of {C.PLOT_TYPES_DATA}" + ) + errors = True + + if unknown_scale := (set(vis_df[C.X_SCALE].unique()) - set(C.X_SCALES)): + logger.error( + f"Unknown {C.X_SCALE}: {unknown_scale}. " + f"Must be one of {C.X_SCALES}" + ) + errors = True + + if any( + (vis_df[C.X_SCALE] == "order") + & (vis_df[C.PLOT_TYPE_SIMULATION] != C.LINE_PLOT) + ): + logger.error( + f"{C.X_SCALE}=order is only allowed with " + f"{C.PLOT_TYPE_SIMULATION}={C.LINE_PLOT}." + ) + errors = True + + if unknown_scale := (set(vis_df[C.Y_SCALE].unique()) - set(C.Y_SCALES)): + logger.error( + f"Unknown {C.Y_SCALE}: {unknown_scale}. " + f"Must be one of {C.Y_SCALES}" + ) + errors = True + + if problem.condition_df is not None: + # check for ambiguous values + reserved_names = {C.TIME, "condition"} + for reserved_name in reserved_names: + if ( + reserved_name in problem.condition_df + and reserved_name in vis_df[C.X_VALUES] + ): + logger.error( + f"Ambiguous value for `{C.X_VALUES}`: " + f"`{reserved_name}` has a special meaning as " + f"`{C.X_VALUES}`, but there exists also a model " + "entity with that name." + ) + errors = True + + # check xValues exist in condition table + for xvalue in set(vis_df[C.X_VALUES].unique()) - reserved_names: + if xvalue not in problem.condition_df: + logger.error( + f"{C.X_VALUES} was set to `{xvalue}`, but no " + "such column exists in the conditions table." + ) + errors = True + + if problem.observable_df is not None: + # yValues must be an observable + for yvalue in vis_df[C.Y_VALUES].unique(): + if pd.isna(yvalue): + # if there is only one observable, we default to that + if len(problem.observable_df.index.unique()) == 1: + continue + + logger.error( + f"{C.Y_VALUES} must be specified if there is more " + "than one observable." + ) + errors = True + + if yvalue not in problem.observable_df.index: + logger.error( + f"{C.Y_VALUES} was set to `{yvalue}`, but no such " + "observable exists in the observables table." + ) + errors = True + + if problem.measurement_df is not None: + referenced_datasets = set(filter(bool, vis_df[C.DATASET_ID].unique())) + if referenced_datasets: + existing_datasets = set( + filter(bool, problem.measurement_df[C.DATASET_ID].unique()) + ) + if not referenced_datasets.issubset(existing_datasets): + logger.error( + f"Visualization table references {C.DATASET_ID}(s) " + f"{referenced_datasets - existing_datasets}, but no such " + "dataset(s) exist in the measurement table." + ) + errors = True + + return errors + + +def _apply_defaults(vis_df: pd.DataFrame): + """ + Set default values. + + Adds default values to the given visualization table where no value was + specified. + """ + + def set_default(column: str, value): + if column not in vis_df: + vis_df[column] = value + elif value is not None: + if isinstance(value, str): + vis_df[column] = vis_df[column].astype("object") + vis_df.fillna({column: value}, inplace=True) + + set_default(C.PLOT_NAME, "") + set_default(C.PLOT_TYPE_SIMULATION, C.LINE_PLOT) + set_default(C.PLOT_TYPE_DATA, C.MEAN_AND_SD) + set_default(C.DATASET_ID, None) + set_default(C.X_VALUES, C.TIME) + set_default(C.X_OFFSET, 0) + set_default(C.X_LABEL, vis_df[C.X_VALUES]) + set_default(C.X_SCALE, C.LIN) + set_default(C.Y_VALUES, None) + set_default(C.Y_OFFSET, 0) + set_default(C.Y_LABEL, vis_df[C.Y_VALUES]) + set_default(C.Y_SCALE, C.LIN) + set_default(C.LEGEND_ENTRY, vis_df[C.DATASET_ID]) diff --git a/petab/v1/visualize/plot_data_and_simulation.py b/petab/v1/visualize/plot_data_and_simulation.py new file mode 100644 index 00000000..c76bcd43 --- /dev/null +++ b/petab/v1/visualize/plot_data_and_simulation.py @@ -0,0 +1,222 @@ +"""Functions for plotting PEtab measurement files and simulation results in +the same format. +""" + + +import matplotlib.pyplot as plt +import pandas as pd + +from .. import problem +from ..C import * +from .plotter import MPLPlotter +from .plotting import VisSpecParser + +# for typehints +IdsList = list[str] +NumList = list[int] + +__all__ = ["plot_with_vis_spec", "plot_without_vis_spec", "plot_problem"] + + +def plot_with_vis_spec( + vis_spec_df: str | pd.DataFrame, + conditions_df: str | pd.DataFrame, + measurements_df: str | pd.DataFrame | None = None, + simulations_df: str | pd.DataFrame | None = None, + subplot_dir: str | None = None, + plotter_type: str = "mpl", + format_: str = "png", +) -> dict[str, plt.Subplot] | None: + """ + Plot measurements and/or simulations. Specification of the visualization + routines is provided in visualization table. + + Parameters + ---------- + vis_spec_df: + A visualization table. + conditions_df: + A condition DataFrame in the PEtab format or path to the condition + file. + measurements_df: + A measurement DataFrame in the PEtab format or path to the data file. + simulations_df: + A simulation DataFrame in the PEtab format or path to the simulation + output data file. + subplot_dir: + A path to the folder where single subplots should be saved. + PlotIDs will be taken as file names. + plotter_type: + Specifies which library should be used for plot generation. Currently, + only matplotlib is supported. + format_: + File format for the generated figure. + (See :py:func:`matplotlib.pyplot.savefig` for supported options). + + Returns + ------- + ax: Axis object of the created plot. + None: In case subplots are saved to a file. + """ + if measurements_df is None and simulations_df is None: + raise TypeError( + "Not enough arguments. Either measurements_data " + "or simulations_data should be provided." + ) + + vis_spec_parser = VisSpecParser( + conditions_df, measurements_df, simulations_df + ) + figure, dataprovider = vis_spec_parser.parse_from_vis_spec(vis_spec_df) + + if plotter_type == "mpl": + plotter = MPLPlotter(figure, dataprovider) + else: + raise NotImplementedError( + "Currently, only visualization with " "matplotlib is possible." + ) + + return plotter.generate_figure(subplot_dir, format_=format_) + + +def plot_without_vis_spec( + conditions_df: str | pd.DataFrame, + grouping_list: list[IdsList] | None = None, + group_by: str = "observable", + measurements_df: str | pd.DataFrame | None = None, + simulations_df: str | pd.DataFrame | None = None, + plotted_noise: str = MEAN_AND_SD, + subplot_dir: str | None = None, + plotter_type: str = "mpl", + format_: str = "png", +) -> dict[str, plt.Subplot] | None: + """ + Plot measurements and/or simulations. What exactly should be plotted is + specified in a grouping_list. + If grouping list is not provided, measurements (simulations) will be + grouped by observable, i.e. all measurements for each observable will be + visualized on one plot. + + Parameters + ---------- + grouping_list: + A list of lists. Each sublist corresponds to a plot, each subplot + contains the Ids of datasets or observables or simulation conditions + for this plot. + group_by: + Grouping type. + Possible values: 'dataset', 'observable', 'simulation'. + conditions_df: + A condition DataFrame in the PEtab format or path to the condition + file. + measurements_df: + A measurement DataFrame in the PEtab format or path to the data file. + simulations_df: + A simulation DataFrame in the PEtab format or path to the simulation + output data file. + plotted_noise: + A string indicating how noise should be visualized: + ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']. + subplot_dir: + A path to the folder where single subplots should be saved. + PlotIDs will be taken as file names. + plotter_type: + Specifies which library should be used for plot generation. Currently, + only matplotlib is supported. + format_: + File format for the generated figure. + (See :py:func:`matplotlib.pyplot.savefig` for supported options). + + Returns + ------- + ax: Axis object of the created plot. + None: In case subplots are saved to a file. + """ + if measurements_df is None and simulations_df is None: + raise TypeError( + "Not enough arguments. Either measurements_data " + "or simulations_data should be provided." + ) + + vis_spec_parser = VisSpecParser( + conditions_df, measurements_df, simulations_df + ) + + figure, dataprovider = vis_spec_parser.parse_from_id_list( + grouping_list, group_by, plotted_noise + ) + + if plotter_type == "mpl": + plotter = MPLPlotter(figure, dataprovider) + else: + raise NotImplementedError( + "Currently, only visualization with " "matplotlib is possible." + ) + + return plotter.generate_figure(subplot_dir, format_=format_) + + +def plot_problem( + petab_problem: problem.Problem, + simulations_df: str | pd.DataFrame | None = None, + grouping_list: list[IdsList] | None = None, + group_by: str = "observable", + plotted_noise: str = MEAN_AND_SD, + subplot_dir: str | None = None, + plotter_type: str = "mpl", +) -> dict[str, plt.Subplot] | None: + """ + Visualization using petab problem. + If Visualization table is part of the petab_problem, it will be used for + visualization. Otherwise, grouping_list will be used. + If neither Visualization table nor grouping_list are available, + measurements (simulations) will be grouped by observable, i.e. all + measurements for each observable will be visualized on one plot. + + Parameters + ---------- + petab_problem: + A PEtab problem. + simulations_df: + A simulation DataFrame in the PEtab format or path to the simulation + output data file. + grouping_list: + A list of lists. Each sublist corresponds to a plot, each subplot + contains the Ids of datasets or observables or simulation conditions + for this plot. + group_by: + Possible values: 'dataset', 'observable', 'simulation'. + plotted_noise: + A string indicating how noise should be visualized: + ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']. + subplot_dir: + A string which is taken as path to the folder where single subplots + should be saved. PlotIDs will be taken as file names. + plotter_type: + Specifies which library should be used for plot generation. Currently, + only matplotlib is supported. + + Returns + ------- + ax: Axis object of the created plot. + None: In case subplots are saved to a file. + """ + if petab_problem.visualization_df is not None: + return plot_with_vis_spec( + petab_problem.visualization_df, + petab_problem.condition_df, + petab_problem.measurement_df, + simulations_df, + subplot_dir, + plotter_type, + ) + return plot_without_vis_spec( + petab_problem.condition_df, + grouping_list, + group_by, + petab_problem.measurement_df, + simulations_df, + plotted_noise, + subplot_dir, + plotter_type, + ) diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py new file mode 100644 index 00000000..90298154 --- /dev/null +++ b/petab/v1/visualize/plot_residuals.py @@ -0,0 +1,211 @@ +""" +Functions for plotting residuals. +""" +from pathlib import Path + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from scipy import stats + +from ..C import * +from ..calculate import calculate_residuals +from ..core import get_simulation_df +from ..problem import Problem + +__all__ = ["plot_goodness_of_fit", "plot_residuals_vs_simulation"] + + +def plot_residuals_vs_simulation( + petab_problem: Problem, + simulations_df: str | Path | pd.DataFrame, + size: tuple | None = (10, 7), + axes: tuple[plt.Axes, plt.Axes] | None = None, +) -> matplotlib.axes.Axes: + """ + Plot residuals versus simulation values for measurements with normal noise + assumption. + + Parameters + ---------- + petab_problem: + A PEtab problem. + simulations_df: + A simulation DataFrame in the PEtab format or path to the simulation + output data file. + size: + Figure size. + axes: + Axis object. + + Returns + ------- + ax: Axis object of the created plot. + """ + if isinstance(simulations_df, str | Path): + simulations_df = get_simulation_df(simulations_df) + + if NOISE_DISTRIBUTION in petab_problem.observable_df: + if OBSERVABLE_TRANSFORMATION in petab_problem.observable_df: + observable_ids = petab_problem.observable_df[ + (petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL) + & ( + petab_problem.observable_df[OBSERVABLE_TRANSFORMATION] + == LIN + ) + ].index + + else: + observable_ids = petab_problem.observable_df[ + petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL + ].index + else: + observable_ids = petab_problem.observable_df.index + + if observable_ids.empty: + raise ValueError( + "Residuals plot is only applicable for normal " + "additive noise assumption" + ) + + if axes is None: + fig, axes = plt.subplots( + 1, 2, sharey=True, figsize=size, width_ratios=[2, 1] + ) + fig.set_layout_engine("tight") + fig.suptitle("Residuals") + + residual_df = calculate_residuals( + measurement_dfs=petab_problem.measurement_df, + simulation_dfs=simulations_df, + observable_dfs=petab_problem.observable_df, + parameter_dfs=petab_problem.parameter_df, + )[0] + + normal_residuals = residual_df[ + residual_df[OBSERVABLE_ID].isin(observable_ids) + ] + simulations_normal = simulations_df[ + simulations_df[OBSERVABLE_ID].isin(observable_ids) + ] + + # compare to standard normal distribution + ks_result = stats.kstest(normal_residuals[RESIDUAL], stats.norm.cdf) + + # plot the residuals plot + axes[0].hlines( + y=0, + xmin=min(simulations_normal[SIMULATION]), + xmax=max(simulations_normal[SIMULATION]), + ls="--", + color="gray", + ) + axes[0].scatter(simulations_normal[SIMULATION], normal_residuals[RESIDUAL]) + axes[0].text( + 0.15, + 0.85, + f"Kolmogorov-Smirnov test results:\n" + f"statistic: {ks_result[0]:.2f}\n" + f"pvalue: {ks_result[1]:.2e} ", + transform=axes[0].transAxes, + ) + axes[0].set_xlabel("simulated values") + axes[0].set_ylabel("residuals") + + # plot histogram + axes[1].hist( + normal_residuals[RESIDUAL], density=True, orientation="horizontal" + ) + axes[1].set_xlabel("distribution") + + ymin, ymax = axes[0].get_ylim() + ylim = max(abs(ymin), abs(ymax)) + axes[0].set_ylim(-ylim, ylim) + axes[1].tick_params( + left=False, labelleft=False, right=True, labelright=True + ) + + return axes + + +def plot_goodness_of_fit( + petab_problem: Problem, + simulations_df: str | Path | pd.DataFrame, + size: tuple = (10, 7), + ax: plt.Axes | None = None, +) -> matplotlib.axes.Axes: + """ + Plot goodness of fit. + + Parameters + ---------- + petab_problem: + A PEtab problem. + simulations_df: + A simulation DataFrame in the PEtab format or path to the simulation + output data file. + size: + Figure size. + ax: + Axis object. + + Returns + ------- + ax: Axis object of the created plot. + """ + if isinstance(simulations_df, str | Path): + simulations_df = get_simulation_df(simulations_df) + + if simulations_df is None or petab_problem.measurement_df is None: + raise NotImplementedError( + "Both measurements and simulation data " + "are needed for goodness_of_fit" + ) + + residual_df = calculate_residuals( + measurement_dfs=petab_problem.measurement_df, + simulation_dfs=simulations_df, + observable_dfs=petab_problem.observable_df, + parameter_dfs=petab_problem.parameter_df, + )[0] + slope, intercept, r_value, p_value, std_err = stats.linregress( + petab_problem.measurement_df["measurement"], + simulations_df["simulation"], + ) # x, y + + if ax is None: + fig, ax = plt.subplots(figsize=size) + fig.set_layout_engine("tight") + + ax.scatter( + petab_problem.measurement_df["measurement"], + simulations_df["simulation"], + ) + + ax.axis("square") + xlim = ax.get_xlim() + ylim = ax.get_ylim() + lim = [min([xlim[0], ylim[0]]), max([xlim[1], ylim[1]])] + ax.set_xlim(lim) + ax.set_ylim(lim) + x = np.linspace(lim, 100) + ax.plot(x, x, linestyle="--", color="gray") + ax.plot(x, intercept + slope * x, "r", label="fitted line") + + mse = np.mean(np.abs(residual_df["residual"])) + ax.text( + 0.1, + 0.70, + f"$R^2$: {r_value**2:.2f}\n" + f"slope: {slope:.2f}\n" + f"intercept: {intercept:.2f}\n" + f"pvalue: {std_err:.2e}\n" + f"mean squared error: {mse:.2e}\n", + transform=ax.transAxes, + ) + + ax.set_title("Goodness of fit") + ax.set_xlabel("simulated values") + ax.set_ylabel("measurements") + return ax diff --git a/petab/v1/visualize/plotter.py b/petab/v1/visualize/plotter.py new file mode 100644 index 00000000..2a1eaaa9 --- /dev/null +++ b/petab/v1/visualize/plotter.py @@ -0,0 +1,879 @@ +"""PEtab visualization plotter classes""" +import os +from abc import ABC, abstractmethod + +import matplotlib.axes +import matplotlib.ticker as mtick +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable + +from ..C import * +from .plotting import DataPlot, DataProvider, DataSeries, Figure, Subplot + +__all__ = ["Plotter", "MPLPlotter", "SeabornPlotter"] + + +#: Line style (:class:`matplotlib.lines.Line2D` options) for the measurement +# data in line plots +measurement_line_kwargs = { + "linestyle": "-.", + "marker": "x", + "markersize": 10, +} +#: Line style (:class:`matplotlib.lines.Line2D` options) for the simulation +# data in line plots +simulation_line_kwargs = { + "linestyle": "-", + "marker": "o", + "markersize": 10, +} + + +class Plotter(ABC): + """ + Plotter abstract base class. + + Attributes + ---------- + figure: + Figure instance that serves as a markup for the figure that + should be generated + data_provider: + Data provider + """ + + def __init__(self, figure: Figure, data_provider: DataProvider): + self.figure = figure + self.data_provider = data_provider + + @abstractmethod + def generate_figure( + self, subplot_dir: str | None = None + ) -> dict[str, plt.Subplot] | None: + pass + + +class MPLPlotter(Plotter): + """ + Matplotlib wrapper + """ + + def __init__(self, figure: Figure, data_provider: DataProvider): + super().__init__(figure, data_provider) + + @staticmethod + def _error_column_for_plot_type_data(plot_type_data: str) -> str | None: + """Translate PEtab plotTypeData value to column name of internal + data representation + + Parameters + ---------- + plot_type_data: PEtab plotTypeData value (the way replicates should be + handled) + + Returns + ------- + Name of corresponding column + """ + if plot_type_data == MEAN_AND_SD: + return "sd" + if plot_type_data == MEAN_AND_SEM: + return "sem" + if plot_type_data == PROVIDED: + return "noise_model" + return None + + def generate_lineplot( + self, + ax: matplotlib.axes.Axes, + dataplot: DataPlot, + plotTypeData: str, + splitaxes_params: dict, + ) -> tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]: + """ + Generate line plot. + + It is possible to plot only data or only simulation or both. + + Parameters + ---------- + ax: + Axis object. + dataplot: + Visualization settings for the plot. + plotTypeData: + Specifies how replicates should be handled. + splitaxes_params: + + """ + simu_color = None + ( + measurements_to_plot, + simulations_to_plot, + ) = self.data_provider.get_data_to_plot( + dataplot, plotTypeData == PROVIDED + ) + noise_col = self._error_column_for_plot_type_data(plotTypeData) + + label_base = dataplot.legendEntry + + # check if t_inf is there + # todo: if only t_inf, adjust appearance for that case + plot_at_t_inf = ( + measurements_to_plot is not None and measurements_to_plot.inf_point + ) or ( + simulations_to_plot is not None and simulations_to_plot.inf_point + ) + + if ( + measurements_to_plot is not None + and not measurements_to_plot.data_to_plot.empty + ): + # plotting all measurement data + + p = None + if plotTypeData == REPLICATE: + replicates = np.stack( + measurements_to_plot.data_to_plot.repl.values + ) + # sorts according to ascending order of conditions + cond, replicates = zip( + *sorted( + zip( + measurements_to_plot.conditions, + replicates, + strict=True, + ) + ), + strict=True, + ) + replicates = np.stack(replicates) + + if replicates.ndim == 1: + replicates = np.expand_dims(replicates, axis=1) + + # plot first replicate + p = ax.plot( + cond, + replicates[:, 0], + label=label_base, + **measurement_line_kwargs, + ) + + # plot other replicates with the same color + ax.plot( + cond, + replicates[:, 1:], + **measurement_line_kwargs, + color=p[0].get_color(), + ) + + # construct errorbar-plots: noise specified above + else: + # sorts according to ascending order of conditions + scond, smean, snoise = zip( + *sorted( + zip( + measurements_to_plot.conditions, + measurements_to_plot.data_to_plot["mean"], + measurements_to_plot.data_to_plot[noise_col], + strict=True, + ) + ), + strict=True, + ) + + if np.inf in scond: + # remove inf point + scond = scond[:-1] + smean = smean[:-1] + snoise = snoise[:-1] + + if len(scond) > 0 and len(smean) > 0 and len(snoise) > 0: + # if only t=inf there will be nothing to plot + p = ax.errorbar( + scond, + smean, + snoise, + label=label_base, + **measurement_line_kwargs, + ) + + # simulations should have the same colors if both measurements + # and simulations are plotted + simu_color = p[0].get_color() if p else None + + # construct simulation plot + if ( + simulations_to_plot is not None + and not simulations_to_plot.data_to_plot.empty + ): + # markers will be displayed only for points that have measurement + # counterpart + if measurements_to_plot is not None: + meas_conditions = ( + measurements_to_plot.conditions.to_numpy() + if isinstance(measurements_to_plot.conditions, pd.Series) + else measurements_to_plot.conditions + ) + every = [ + condition in meas_conditions + for condition in simulations_to_plot.conditions + ] + else: + every = None + + # sorts according to ascending order of conditions + xs, ys = map( + list, + zip( + *sorted( + zip( + simulations_to_plot.conditions, + simulations_to_plot.data_to_plot["mean"], + strict=True, + ) + ), + strict=True, + ), + ) + + if np.inf in xs: + # remove inf point + xs = xs[:-1] + ys = ys[:-1] + every = every[:-1] if every else None + + if len(xs) > 0 and len(ys) > 0: + p = ax.plot( + xs, + ys, + markevery=every, + label=label_base + " simulation", + color=simu_color, + **simulation_line_kwargs, + ) + # lines at t=inf should have the same colors also in case + # only simulations are plotted + simu_color = p[0].get_color() + + # plot inf points + if plot_at_t_inf: + ax, splitaxes_params["ax_inf"] = self._line_plot_at_t_inf( + ax, + plotTypeData, + measurements_to_plot, + simulations_to_plot, + noise_col, + label_base, + splitaxes_params, + color=simu_color, + ) + + return ax, splitaxes_params["ax_inf"] + + def generate_barplot( + self, + ax: "matplotlib.pyplot.Axes", + dataplot: DataPlot, + plotTypeData: str, + ) -> None: + """ + Generate barplot. + + Parameters + ---------- + ax: + Axis object. + dataplot: + Visualization settings for the plot. + plotTypeData: + Specifies how replicates should be handled. + """ + # TODO: plotTypeData == REPLICATE? + noise_col = self._error_column_for_plot_type_data(plotTypeData) + + ( + measurements_to_plot, + simulations_to_plot, + ) = self.data_provider.get_data_to_plot( + dataplot, plotTypeData == PROVIDED + ) + + x_name = dataplot.legendEntry + + if simulations_to_plot: + bar_kwargs = { + "align": "edge", + "width": -1 / 3, + } + else: + bar_kwargs = { + "align": "center", + "width": 2 / 3, + } + + color = plt.rcParams["axes.prop_cycle"].by_key()["color"][0] + + if measurements_to_plot is not None: + ax.bar( + x_name, + measurements_to_plot.data_to_plot["mean"], + yerr=measurements_to_plot.data_to_plot[noise_col], + color=color, + **bar_kwargs, + label="measurement", + ) + + if simulations_to_plot is not None: + bar_kwargs["width"] = -bar_kwargs["width"] + ax.bar( + x_name, + simulations_to_plot.data_to_plot["mean"], + color="white", + edgecolor=color, + **bar_kwargs, + label="simulation", + ) + + def generate_scatterplot( + self, + ax: "matplotlib.pyplot.Axes", + dataplot: DataPlot, + plotTypeData: str, + ) -> None: + """ + Generate scatterplot. + + Parameters + ---------- + ax: + Axis object. + dataplot: + Visualization settings for the plot. + plotTypeData: + Specifies how replicates should be handled. + """ + ( + measurements_to_plot, + simulations_to_plot, + ) = self.data_provider.get_data_to_plot( + dataplot, plotTypeData == PROVIDED + ) + + if simulations_to_plot is None or measurements_to_plot is None: + raise NotImplementedError( + "Both measurements and simulation data " + "are needed for scatter plots" + ) + ax.scatter( + measurements_to_plot.data_to_plot["mean"], + simulations_to_plot.data_to_plot["mean"], + label=getattr(dataplot, LEGEND_ENTRY), + ) + self._square_plot_equal_ranges(ax) + + def generate_subplot( + self, + fig: matplotlib.figure.Figure, + ax: matplotlib.axes.Axes, + subplot: Subplot, + ) -> None: + """ + Generate subplot based on markup provided by subplot. + + Parameters + ---------- + fig: + Figure object. + ax: + Axis object. + subplot: + Subplot visualization settings. + """ + # set yScale + if subplot.yScale == LIN: + ax.set_yscale("linear") + elif subplot.yScale == LOG10: + ax.set_yscale("log") + elif subplot.yScale == LOG: + ax.set_yscale("log", base=np.e) + + if subplot.plotTypeSimulation == BAR_PLOT: + for data_plot in subplot.data_plots: + self.generate_barplot(ax, data_plot, subplot.plotTypeData) + + # get rid of duplicate legends + handles, labels = ax.get_legend_handles_labels() + by_label = dict(zip(labels, handles, strict=True)) + ax.legend(by_label.values(), by_label.keys()) + + x_names = [x.legendEntry for x in subplot.data_plots] + ax.set_xticks(range(len(x_names))) + ax.set_xticklabels(x_names) + + for label in ax.get_xmajorticklabels(): + label.set_rotation(30) + label.set_horizontalalignment("right") + elif subplot.plotTypeSimulation == SCATTER_PLOT: + for data_plot in subplot.data_plots: + self.generate_scatterplot(ax, data_plot, subplot.plotTypeData) + else: + # set xScale + if subplot.xScale == LIN: + ax.set_xscale("linear") + elif subplot.xScale == LOG10: + ax.set_xscale("log") + elif subplot.xScale == LOG: + ax.set_xscale("log", base=np.e) + # equidistant + elif subplot.xScale == "order": + ax.set_xscale("linear") + # check if conditions are monotone decreasing or increasing + if np.all(np.diff(subplot.conditions) < 0): + # monot. decreasing -> reverse + xlabel = subplot.conditions[::-1] + conditions = range(len(subplot.conditions))[::-1] + ax.set_xticks(range(len(conditions)), xlabel) + elif np.all(np.diff(subplot.conditions) > 0): + xlabel = subplot.conditions + conditions = range(len(subplot.conditions)) + ax.set_xticks(range(len(conditions)), xlabel) + else: + raise ValueError( + "Error: x-conditions do not coincide, " + "some are mon. increasing, some " + "monotonically decreasing" + ) + + splitaxes_params = self._preprocess_splitaxes(fig, ax, subplot) + for data_plot in subplot.data_plots: + ax, splitaxes_params["ax_inf"] = self.generate_lineplot( + ax, + data_plot, + subplot.plotTypeData, + splitaxes_params=splitaxes_params, + ) + if splitaxes_params["ax_inf"] is not None: + self._postprocess_splitaxes( + ax, splitaxes_params["ax_inf"], splitaxes_params["t_inf"] + ) + + # show 'e' as basis not 2.7... in natural log scale cases + def ticks(y, _): + return rf"$e^{{{np.log(y):.0f}}}$" + + if subplot.xScale == LOG: + ax.xaxis.set_major_formatter(mtick.FuncFormatter(ticks)) + if subplot.yScale == LOG: + ax.yaxis.set_major_formatter(mtick.FuncFormatter(ticks)) + + if subplot.plotTypeSimulation != BAR_PLOT: + ax.legend() + ax.set_title(subplot.plotName) + if subplot.xlim: + ax.set_xlim(subplot.xlim) + if subplot.ylim: + ax.set_ylim(subplot.ylim) + ax.autoscale_view() + + # Beautify plots + ax.set_xlabel(subplot.xLabel) + ax.set_ylabel(subplot.yLabel) + + def generate_figure( + self, + subplot_dir: str | None = None, + format_: str = "png", + ) -> dict[str, plt.Subplot] | None: + """ + Generate the full figure based on the markup in the figure attribute. + + Parameters + ---------- + subplot_dir: + A path to the folder where single subplots should be saved. + PlotIDs will be taken as file names. + format_: + File format for the generated figure. + (See :py:func:`matplotlib.pyplot.savefig` for supported options). + + Returns + ------- + ax: + Axis object of the created plot. + None: + In case subplots are saved to file. + """ + if subplot_dir is None: + # compute, how many rows and columns we need for the subplots + num_row = int(np.round(np.sqrt(self.figure.num_subplots))) + num_col = int(np.ceil(self.figure.num_subplots / num_row)) + + fig, axes = plt.subplots( + num_row, num_col, squeeze=False, figsize=self.figure.size + ) + fig.set_layout_engine("tight") + + for ax in axes.flat[self.figure.num_subplots :]: + ax.remove() + + axes = dict( + zip( + [plot.plotId for plot in self.figure.subplots], + axes.flat, + strict=False, + ) + ) + + for subplot in self.figure.subplots: + if subplot_dir is not None: + fig, ax = plt.subplots(figsize=self.figure.size) + fig.set_layout_engine("tight") + else: + ax = axes[subplot.plotId] + + try: + self.generate_subplot(fig, ax, subplot) + except Exception as e: + raise RuntimeError( + f"Error plotting {getattr(subplot, PLOT_ID)}." + ) from e + + if subplot_dir is not None: + # TODO: why this doesn't work? + plt.tight_layout() + plt.savefig( + os.path.join(subplot_dir, f"{subplot.plotId}.{format_}") + ) + plt.close() + + if subplot_dir is None: + # TODO: why this doesn't work? + plt.tight_layout() + return axes + + @staticmethod + def _square_plot_equal_ranges( + ax: "matplotlib.pyplot.Axes", lim: list | tuple | None = None + ) -> "matplotlib.pyplot.Axes": + """ + Square plot with equal range for scatter plots. + + Returns + ------- + Updated axis object. + """ + ax.axis("square") + + if lim is None: + xlim = ax.get_xlim() + ylim = ax.get_ylim() + lim = [np.min([xlim[0], ylim[0]]), np.max([xlim[1], ylim[1]])] + + ax.set_xlim(lim) + ax.set_ylim(lim) + + # Same tick mark on x and y + ax.yaxis.set_major_locator(ax.xaxis.get_major_locator()) + + return ax + + @staticmethod + def _line_plot_at_t_inf( + ax: matplotlib.axes.Axes, + plotTypeData: str, + measurements_to_plot: DataSeries, + simulations_to_plot: DataSeries, + noise_col: str, + label_base: str, + split_axes_params: dict, + color=None, + ) -> tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]: + """ + Plot data at t=inf. + + Parameters + ---------- + ax: + Axis object for the data corresponding to the finite timepoints. + plotTypeData: + The way replicates should be handled. + measurements_to_plot: + Measurements to plot. + simulations_to_plot: + Simulations to plot. + noise_col: + The name of the error column for plot_type_data. + label_base: + Label base. + split_axes_params: + A dictionary of split axes parameters with + - Axis object for the data corresponding to t=inf + - Time value that represents t=inf + - left and right limits for the axis where the data corresponding + to the finite timepoints is plotted + color: + Line color. + + Returns + ------- + Two axis objects: for the data corresponding to the finite timepoints + and for the data corresponding to t=inf + """ + ax_inf = split_axes_params["ax_inf"] + t_inf = split_axes_params["t_inf"] + ax_finite_right_limit = split_axes_params["ax_finite_right_limit"] + ax_left_limit = split_axes_params["ax_left_limit"] + + timepoints_inf = [ + ax_finite_right_limit, + t_inf, + ax_finite_right_limit + + (ax_finite_right_limit - ax_left_limit) * 0.2, + ] + + # plot measurements + if measurements_to_plot is not None and measurements_to_plot.inf_point: + measurements_data_to_plot_inf = ( + measurements_to_plot.data_to_plot.loc[np.inf] + ) + + if plotTypeData == REPLICATE: + p = None + if plotTypeData == REPLICATE: + replicates = measurements_data_to_plot_inf.repl + if replicates.ndim == 0: + replicates = np.expand_dims(replicates, axis=0) + + # plot first replicate + p = ax_inf.plot( + timepoints_inf, + [replicates[0]] * 3, + markevery=[1], + label=label_base + " simulation", + color=color, + **measurement_line_kwargs, + ) + + # plot other replicates with the same color + ax_inf.plot( + timepoints_inf, + [replicates[1:]] * 3, + markevery=[1], + color=p[0].get_color(), + **measurement_line_kwargs, + ) + else: + p = ax_inf.plot( + [timepoints_inf[0], timepoints_inf[2]], + [ + measurements_data_to_plot_inf["mean"], + measurements_data_to_plot_inf["mean"], + ], + color=color, + **measurement_line_kwargs, + ) + ax_inf.errorbar( + t_inf, + measurements_data_to_plot_inf["mean"], + measurements_data_to_plot_inf[noise_col], + label=label_base + " simulation", + color=p[0].get_color(), + **measurement_line_kwargs, + ) + + if color is None: + # in case no color was provided from finite time points + # plot and measurements are available corresponding + # simulation should have the same color + color = p[0].get_color() + + # plot simulations + if simulations_to_plot is not None and simulations_to_plot.inf_point: + simulations_data_to_plot_inf = ( + simulations_to_plot.data_to_plot.loc[np.inf] + ) + + if plotTypeData == REPLICATE: + replicates = simulations_data_to_plot_inf.repl + if replicates.ndim == 0: + replicates = np.expand_dims(replicates, axis=0) + + # plot first replicate + p = ax_inf.plot( + timepoints_inf, + [replicates[0]] * 3, + markevery=[1], + label=label_base, + color=color, + **simulation_line_kwargs, + ) + + # plot other replicates with the same color + ax_inf.plot( + timepoints_inf, + [replicates[1:]] * 3, + markevery=[1], + color=p[0].get_color(), + **simulation_line_kwargs, + ) + else: + ax_inf.plot( + timepoints_inf, + [simulations_data_to_plot_inf["mean"]] * 3, + markevery=[1], + color=color, + **simulation_line_kwargs, + ) + + ax.set_xlim(right=ax_finite_right_limit) + return ax, ax_inf + + @staticmethod + def _postprocess_splitaxes( + ax: matplotlib.axes.Axes, ax_inf: matplotlib.axes.Axes, t_inf: float + ) -> None: + """ + Postprocess the splitaxes: set axes limits, turn off unnecessary + ticks and plot dashed lines highlighting the gap in the x axis. + + Parameters + ---------- + ax: + Axis object for the data corresponding to the finite timepoints. + ax_inf: + Axis object for the data corresponding to t=inf. + t_inf: + Time value that represents t=inf + """ + ax_inf.tick_params(left=False, labelleft=False) + ax_inf.spines["left"].set_visible(False) + ax_inf.set_xticks([t_inf]) + ax_inf.set_xticklabels([r"$t_{\infty}$"]) + + bottom, top = ax.get_ylim() + left, right = ax.get_xlim() + ax.spines["right"].set_visible(False) + ax_inf.set_xlim(right, right + (right - left) * 0.2) + d = (top - bottom) * 0.02 + ax_inf.vlines( + x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray" + ) # right + ax.vlines( + x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray" + ) # left + ax_inf.set_ylim(bottom, top) + ax.set_ylim(bottom, top) + + def _preprocess_splitaxes( + self, + fig: matplotlib.figure.Figure, + ax: matplotlib.axes.Axes, + subplot: Subplot, + ) -> dict: + """ + Prepare splitaxes if data at t=inf should be plotted: compute left and + right limits for the axis where the data corresponding to the finite + timepoints will be plotted, compute time point that will represent + t=inf on the plot, create additional axes for plotting data at t=inf. + """ + + def check_data_to_plot( + data_to_plot: DataSeries, + ) -> tuple[bool, float | None, float]: + """ + Check if there is data available at t=inf and compute maximum and + minimum finite time points that need to be plotted corresponding + to a dataplot. + """ + contains_inf = False + max_finite_cond, min_cond = None, np.inf + if data_to_plot is not None and len(data_to_plot.conditions): + contains_inf = np.inf in data_to_plot.conditions + finite_conditions = data_to_plot.conditions[ + data_to_plot.conditions != np.inf + ] + max_finite_cond = ( + np.max(finite_conditions) + if finite_conditions.size + else None + ) + min_cond = min(data_to_plot.conditions) + return contains_inf, max_finite_cond, min_cond + + splitaxes = False + ax_inf = None + t_inf, ax_finite_right_limit, ax_left_limit = None, None, np.inf + for dataplot in subplot.data_plots: + ( + measurements_to_plot, + simulations_to_plot, + ) = self.data_provider.get_data_to_plot( + dataplot, subplot.plotTypeData == PROVIDED + ) + + contains_inf_m, max_finite_cond_m, min_cond_m = check_data_to_plot( + measurements_to_plot + ) + contains_inf_s, max_finite_cond_s, min_cond_s = check_data_to_plot( + simulations_to_plot + ) + + if max_finite_cond_m is not None: + ax_finite_right_limit = ( + max(ax_finite_right_limit, max_finite_cond_m) + if ax_finite_right_limit is not None + else max_finite_cond_m + ) + if max_finite_cond_s is not None: + ax_finite_right_limit = ( + max(ax_finite_right_limit, max_finite_cond_s) + if ax_finite_right_limit is not None + else max_finite_cond_s + ) + + ax_left_limit = min(ax_left_limit, min(min_cond_m, min_cond_s)) + # check if t=inf is contained in any data to be plotted on the + # subplot + if not splitaxes: + splitaxes = contains_inf_m or contains_inf_s + + if splitaxes: + # if t=inf is the only time point in measurements and simulations + # ax_finite_right_limit will be None and ax_left_limit will be + # equal to np.inf + if ax_finite_right_limit is None and ax_left_limit == np.inf: + ax_finite_right_limit = 10 + ax_left_limit = 0 + t_inf = ( + ax_finite_right_limit + + (ax_finite_right_limit - ax_left_limit) * 0.1 + ) + # create axes for t=inf + divider = make_axes_locatable(ax) + ax_inf = divider.new_horizontal(size="10%", pad=0.3) + fig.add_axes(ax_inf) + + return { + "ax_inf": ax_inf, + "t_inf": t_inf, + "ax_finite_right_limit": ax_finite_right_limit, + "ax_left_limit": ax_left_limit, + } + + +class SeabornPlotter(Plotter): + """ + Seaborn wrapper. + """ + + def __init__(self, figure: Figure, data_provider: DataProvider): + super().__init__(figure, data_provider) + + def generate_figure( + self, subplot_dir: str | None = None + ) -> dict[str, plt.Subplot] | None: + pass diff --git a/petab/v1/visualize/plotting.py b/petab/v1/visualize/plotting.py new file mode 100644 index 00000000..e690df2c --- /dev/null +++ b/petab/v1/visualize/plotting.py @@ -0,0 +1,1102 @@ +"""PEtab visualization data selection and visualization settings classes""" +import warnings +from numbers import Number, Real +from pathlib import Path +from typing import Literal + +import numpy as np +import pandas as pd + +from .. import conditions, core, measurements +from ..C import * +from ..problem import Problem +from .helper_functions import ( + create_dataset_id_list_new, + generate_dataset_id_col, +) + +__all__ = [ + "DataSeries", + "DataPlot", + "Subplot", + "Figure", + "DataProvider", + "VisSpecParser", +] + +# for typehints +IdsList = list[str] +NumList = list[int] + +# The default figure size +DEFAULT_FIGSIZE = [20, 15] + +# also for type hints +# TODO: split into dataplot and subplot level dicts? +# TODO: add when only python>=3.8 is supported +# class VisDict(TypedDict): +# PLOT_NAME: str +# PLOT_TYPE_SIMULATION: str +# PLOT_TYPE_DATA: str +# X_VALUES: str +# X_OFFSET: List[Number] +# X_LABEL: str +# X_SCALE: str +# Y_VALUES: List[str] +# Y_OFFSET: List[Number] +# Y_LABEL: str +# Y_SCALE: str +# LEGEND_ENTRY: List[Number] +# DATASET_ID: List[str] + + +class DataSeries: + """ + Data for one individual line + """ + + def __init__( + self, + conditions_: np.ndarray | pd.Series | None, + data_to_plot: pd.DataFrame | None = None, + ): + self.data_to_plot = data_to_plot + self.data_to_plot.sort_index(inplace=True) + + self.conditions = conditions_ + self.inf_point = ( + np.inf in self.conditions if self.conditions is not None else False + ) + # sort index for the case that indices of conditions and + # measurements differ. if indep_var='time', conditions is a + # numpy array, if indep_var=observable it's a Series + if isinstance(self.conditions, np.ndarray): + self.conditions.sort() + elif isinstance(self.conditions, pd.Series): + self.conditions.sort_index(inplace=True) + + def add_x_offset(self, offset) -> None: + """ + Offset for the independent variable. + + Parameters + ---------- + offset: + Offset value. + + """ + if self.conditions is not None: + self.conditions += offset + + def add_y_offset(self, offset): + self.data_to_plot["mean"] += offset + self.data_to_plot["repl"] += offset + + def add_offsets(self, x_offset=0, y_offset=0) -> None: + """ + Data offsets. + + Parameters + ---------- + x_offset: + Offset for the independent variable. + y_offset: + Offsets for the observable. + """ + self.add_x_offset(x_offset) + self.add_y_offset(y_offset) + + +class DataPlot: + """ + Visualization specification of a plot of one data series, e.g. for + an individual line on a subplot. + """ + + def __init__(self, plot_settings: dict): + """ + Constructor. + + Parameters + ---------- + plot_settings: A plot spec for one dataplot + (only VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS) + """ + for key, val in plot_settings.items(): + setattr(self, key, val) + + if DATASET_ID not in vars(self): + raise ValueError(f"{DATASET_ID} must be specified") + if X_VALUES not in vars(self): # TODO: singular? + setattr(self, X_VALUES, TIME) + if X_OFFSET not in vars(self): + setattr(self, X_OFFSET, 0) + if Y_VALUES not in vars(self): + setattr(self, Y_VALUES, "") + if Y_OFFSET not in vars(self): + setattr(self, Y_OFFSET, 0.0) + if LEGEND_ENTRY not in vars(self): + setattr(self, LEGEND_ENTRY, getattr(self, DATASET_ID)) + + @classmethod + def from_df(cls, plot_spec: pd.DataFrame): + vis_spec_dict = plot_spec.to_dict() + + return cls(vis_spec_dict) + + def __repr__(self): + return f"{self.__class__.__name__}({self.__dict__})" + + +class Subplot: + """ + Visualization specification of a subplot. + """ + + def __init__( + self, + plot_id: str, + plot_settings: dict, + dataplots: list[DataPlot] | None = None, + ): + """ + Constructor. + + Parameters + ---------- + plot_id: + Plot ID. + plot_settings: + Plot spec for a subplot (only VISUALIZATION_DF_SUBPLOT_LEVEL_COLS). + dataplots: + A list of data plots that should be plotted on one subplot. + """ + # parameters of a specific subplot + + setattr(self, PLOT_ID, plot_id) + for key, val in plot_settings.items(): + setattr(self, key, val) + + if PLOT_NAME not in vars(self): + setattr(self, PLOT_NAME, "") + if PLOT_TYPE_SIMULATION not in vars(self): + setattr(self, PLOT_TYPE_SIMULATION, LINE_PLOT) + if PLOT_TYPE_DATA not in vars(self): + setattr(self, PLOT_TYPE_DATA, MEAN_AND_SD) + if X_LABEL not in vars(self): + setattr(self, X_LABEL, TIME) # TODO: getattr(self, X_VALUES) + if X_SCALE not in vars(self): + setattr(self, X_SCALE, LIN) + if Y_LABEL not in vars(self): + setattr(self, Y_LABEL, "values") + if Y_SCALE not in vars(self): + setattr(self, Y_SCALE, LIN) + + self.data_plots = dataplots if dataplots is not None else [] + self.xlim = None + self.ylim = None + + @classmethod + def from_df( + cls, + plot_id: str, + vis_spec: pd.DataFrame, + dataplots: list[DataPlot] | None = None, + ): + vis_spec_dict = {} + for col in vis_spec: + if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS: + entry = vis_spec.loc[:, col] + entry = np.unique(entry) + if entry.size > 1: + warnings.warn( + f"For {PLOT_ID} {plot_id} in column " + f"{col} contradictory settings ({entry})" + f". Proceeding with first entry " + f"({entry[0]}).", + stacklevel=2, + ) + entry = entry[0] + + # check if values are allowed + if ( + col in [Y_SCALE, X_SCALE] + and entry not in OBSERVABLE_TRANSFORMATIONS + ): + raise ValueError( + f"{X_SCALE} and {Y_SCALE} have to be " + f"one of the following: " + + ", ".join(OBSERVABLE_TRANSFORMATIONS) + ) + elif col == PLOT_TYPE_DATA and entry not in PLOT_TYPES_DATA: + raise ValueError( + f"{PLOT_TYPE_DATA} has to be one of the " + f"following: " + ", ".join(PLOT_TYPES_DATA) + ) + elif ( + col == PLOT_TYPE_SIMULATION + and entry not in PLOT_TYPES_SIMULATION + ): + raise ValueError( + f"{PLOT_TYPE_SIMULATION} has to be one of" + f" the following: " + ", ".join(PLOT_TYPES_SIMULATION) + ) + + # append new entry to dict + vis_spec_dict[col] = entry + else: + warnings.warn( + f"Column {col} cannot be used to specify subplot" + f", only settings from the following columns can" + f" be used:" + + ", ".join(VISUALIZATION_DF_SUBPLOT_LEVEL_COLS), + stacklevel=2, + ) + return cls(plot_id, vis_spec_dict, dataplots) + + def add_dataplot(self, dataplot: DataPlot) -> None: + """ + Add data plot. + + Parameters + ---------- + dataplot: + Data plot visualization settings. + + """ + self.data_plots.append(dataplot) + + def set_axes_limits( + self, + xlim: tuple[Real | None, Real | None] | None = None, + ylim: tuple[Real | None, Real | None] | None = None, + ): + """ + Set axes limits for all subplots. If xlim or ylim or any of the tuple + items is None, corresponding limit is left unchanged. + + Parameters + ---------- + xlim: + X axis limits. + ylim: + Y axis limits. + """ + self.xlim = xlim + self.ylim = ylim + + +class Figure: + """ + Visualization specification of a figure. + + Contains information regarding how data should be visualized. + """ + + def __init__( + self, + subplots: list[Subplot] | None = None, + size: tuple = DEFAULT_FIGSIZE, + title: tuple | None = None, + ): + """ + Constructor. + + Parameters + ---------- + subplots: A list of visualization specifications for each subplot + size: Figure size + title: Figure title + """ + # TODO: Isensee measurements table in doc/examples doesn't correspond + # to documentation: observableTransformation and + # noiseDistribution columns replicateId problem + # TODO: Should we put in the documentation which combination of fields + # must be unique in the measurement table and add such check? + # obs_id + sim_cond_id + preeq_cod_id (if exists) + time + + # replicate_id (if exists)? + self.size = size + self.title = title + self.subplots = subplots if subplots is not None else [] + + @property + def num_subplots(self) -> int: + return len(self.subplots) + + def add_subplot(self, subplot: Subplot) -> None: + """ + Add subplot. + + Parameters + ---------- + subplot: + Subplot visualization settings. + + """ + self.subplots.append(subplot) + + def set_axes_limits( + self, + xlim: tuple[Real | None, Real | None] | None = None, + ylim: tuple[Real | None, Real | None] | None = None, + ) -> None: + """ + Set axes limits for all subplots. If xlim or ylim or any of the tuple + items is None, corresponding limit is left unchanged. + + Parameters + ---------- + xlim: + X axis limits. + ylim: + Y axis limits. + """ + for subplot in self.subplots: + subplot.set_axes_limits(xlim, ylim) + + def save_to_tsv(self, output_file_path: str = "visuSpec.tsv") -> None: + """ + Save full Visualization specification table. + + Note that datasetId column in the resulting table might have been + generated even though datasetId column in Measurement table is missing + or is different. Please, correct it manually. + + Parameters + ---------- + output_file_path: + File path to which the generated visualization specification is + saved. + """ + # TODO: what if datasetIds were generated? + + warnings.warn( + f"Note: please check that {DATASET_ID} column " + f"corresponds to {DATASET_ID} column in Measurement " + f"(Simulation) table.", + stacklevel=2, + ) + + visu_dict = {} + for subplot in self.subplots: + subplot_level = { + key: subplot.__dict__[key] + for key in subplot.__dict__ + if key in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS + } + + for dataplot in subplot.data_plots: + dataset_level = { + key: dataplot.__dict__[key] + for key in dataplot.__dict__ + if key in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS + } + row = {**subplot_level, **dataset_level} + for key, value in row.items(): + if key in visu_dict: + visu_dict[key].append(value) + else: + visu_dict[key] = [row[key]] + visu_df = pd.DataFrame.from_dict(visu_dict) + visu_df.to_csv(output_file_path, sep="\t", index=False) + + +class DataProvider: + """ + Handles data selection. + """ + + def __init__( + self, + exp_conditions: pd.DataFrame, + measurements_data: pd.DataFrame | None = None, + simulations_data: pd.DataFrame | None = None, + ): + self.conditions_data = exp_conditions + + if measurements_data is None and simulations_data is None: + raise TypeError( + "Not enough arguments. Either measurements_data " + "or simulations_data should be provided." + ) + self.measurements_data = measurements_data + self.simulations_data = simulations_data + + @staticmethod + def _matches_plot_spec( + df: pd.DataFrame, plot_spec: "DataPlot", dataset_id + ) -> pd.Series: + """ + Construct an index for subsetting of the dataframe according to what + is specified in plot_spec. + + Parameters + ---------- + df: + A pandas data frame to subset, can be from measurement file or + simulation file. + plot_spec: + A visualization spec from the visualization file. + + Returns + ------- + Boolean series that can be used for subsetting of the passed + dataframe + """ + subset = df[DATASET_ID] == dataset_id + if getattr(plot_spec, Y_VALUES) == "": + if len(df.loc[subset, OBSERVABLE_ID].unique()) > 1: + raise ValueError( + f"{Y_VALUES} must be specified in visualization table if " + f"multiple different observables are available." + ) + else: + subset &= df[OBSERVABLE_ID] == getattr(plot_spec, Y_VALUES) + return subset + + def _get_independent_var_values( + self, data_df: pd.DataFrame, dataplot: DataPlot + ) -> tuple[np.ndarray, str, pd.Series]: + """ + Get independent variable values. + + Parameters + ---------- + data_df: + A pandas data frame to subset, can be from measurement file or + simulation file. + dataplot: + Data plot visualization settings. + + Returns + ------- + col_name_unique: + A name of the column from Measurement (Simulation) table, which + specifies independent variable values (depends on the xValues entry + of visualization specification). + Possible values: + + * TIME (independent variable values will be taken from the TIME + column of Measurement (Simulation) table) + + * SIMULATION_CONDITION_ID (independent variable values will be + taken from one of the columns of Condition table) + + uni_condition_id: + Time points + or + contains all unique condition IDs which should be + plotted together as one dataplot. Independent variable values will + be collected for these conditions + conditions_: + An independent variable values or None for the BarPlot case + possible values: time points, None, vales of independent variable + (Parameter or Species, specified in the xValues entry of + visualization specification) for each condition_id in + uni_condition_id + + """ + indep_var = getattr(dataplot, X_VALUES) + + dataset_id = getattr(dataplot, DATASET_ID) + + single_m_data = data_df[ + self._matches_plot_spec(data_df, dataplot, dataset_id) + ] + + # gather simulationConditionIds belonging to datasetId + uni_condition_id, uind = np.unique( + single_m_data[SIMULATION_CONDITION_ID], return_index=True + ) + # keep the ordering which was given by user from top to bottom + # (avoid ordering by names '1','10','11','2',...)' + uni_condition_id = uni_condition_id[np.argsort(uind)] + col_name_unique = SIMULATION_CONDITION_ID + + if indep_var == TIME: + # obtain unique observation times + uni_condition_id = single_m_data[TIME].unique() + col_name_unique = TIME + conditions_ = uni_condition_id + elif indep_var == "condition": + conditions_ = None + else: + # indep_var = parameterOrStateId case ? + # extract conditions (plot input) from condition file + ind_cond = self.conditions_data.index.isin(uni_condition_id) + conditions_ = self.conditions_data[ind_cond][indep_var] + + return uni_condition_id, col_name_unique, conditions_ + + def get_data_series( + self, + data_df: pd.DataFrame, + data_col: Literal["measurement", "simulation"], + dataplot: DataPlot, + provided_noise: bool, + ) -> DataSeries: + """ + Get data to plot from measurement or simulation DataFrame. + + Parameters + ---------- + data_df: measurement or simulation DataFrame + data_col: data column, i.e. 'measurement' or 'simulation' + dataplot: visualization specification + provided_noise: + True if numeric values for the noise level are provided in the + data table + + Returns + ------- + Data to plot + """ + ( + uni_condition_id, + col_name_unique, + conditions_, + ) = self._get_independent_var_values(data_df, dataplot) + + dataset_id = getattr(dataplot, DATASET_ID) + + # get data subset selected based on provided dataset_id + # and observable_ids + single_m_data = data_df[ + self._matches_plot_spec(data_df, dataplot, dataset_id) + ] + + # create empty dataframe for means and SDs + measurements_to_plot = pd.DataFrame( + columns=["mean", "noise_model", "sd", "sem", "repl"], + index=uni_condition_id, + ) + + for var_cond_id in uni_condition_id: + subset = single_m_data[col_name_unique] == var_cond_id + + # what has to be plotted is selected + data_measurements = single_m_data.loc[subset, data_col] + + # TODO: all this rather inside DataSeries? + # process the data + measurements_to_plot.at[var_cond_id, "mean"] = np.mean( + data_measurements + ) + measurements_to_plot.at[var_cond_id, "sd"] = np.std( + data_measurements + ) + + if provided_noise and np.any(subset): + if ( + len(single_m_data.loc[subset, NOISE_PARAMETERS].unique()) + > 1 + ): + raise NotImplementedError( + f"Datapoints with inconsistent {NOISE_PARAMETERS} " + f"is currently not implemented. Stopping." + ) + tmp_noise = single_m_data.loc[subset, NOISE_PARAMETERS].values[ + 0 + ] + if isinstance(tmp_noise, str): + raise NotImplementedError( + "No numerical noise values provided in the " + "measurement table. Stopping." + ) + if ( + isinstance(tmp_noise, Number) + or tmp_noise.dtype == "float64" + ): + measurements_to_plot.at[ + var_cond_id, "noise_model" + ] = tmp_noise + + # standard error of mean + measurements_to_plot.at[var_cond_id, "sem"] = np.std( + data_measurements + ) / np.sqrt(len(data_measurements)) + + # single replicates + measurements_to_plot.at[ + var_cond_id, "repl" + ] = data_measurements.values + + data_series = DataSeries(conditions_, measurements_to_plot) + data_series.add_offsets(dataplot.xOffset, dataplot.yOffset) + return data_series + + def get_data_to_plot( + self, dataplot: DataPlot, provided_noise: bool + ) -> tuple[DataSeries, DataSeries]: + """ + Get data to plot. + + Parameters + ---------- + dataplot: visualization specification + provided_noise: + True if numeric values for the noise level are provided in the + measurement table + + Returns + ----------- + measurements_to_plot, + simulations_to_plot + """ + measurements_to_plot = None + simulations_to_plot = None + + if self.measurements_data is not None: + measurements_to_plot = self.get_data_series( + self.measurements_data, MEASUREMENT, dataplot, provided_noise + ) + + if self.simulations_data is not None: + simulations_to_plot = self.get_data_series( + self.simulations_data, SIMULATION, dataplot, provided_noise + ) + return measurements_to_plot, simulations_to_plot + + +class VisSpecParser: + """ + Parser of visualization specification provided by user either in the form + of Visualization table or as a list of lists with datasets ids or + observable ids or condition ids. Figure instance is created containing + information regarding how data should be visualized. In addition to the + Figure instance, a DataProvider instance is created that will be + responsible for the data selection and manipulation. + """ + + def __init__( + self, + conditions_data: str | Path | pd.DataFrame, + exp_data: str | Path | pd.DataFrame | None = None, + sim_data: str | Path | pd.DataFrame | None = None, + ): + if isinstance(conditions_data, str | Path): + conditions_data = conditions.get_condition_df(conditions_data) + + # import from file in case experimental data is provided in file + if isinstance(exp_data, str | Path): + exp_data = measurements.get_measurement_df(exp_data) + + if isinstance(sim_data, str | Path): + sim_data = core.get_simulation_df(sim_data) + + if exp_data is None and sim_data is None: + raise TypeError( + "Not enough arguments. Either measurements_data " + "or simulations_data should be provided." + ) + + self.conditions_data = conditions_data + self.measurements_data = exp_data + self.simulations_data = sim_data + + @classmethod + def from_problem(cls, petab_problem: Problem, sim_data): + return cls( + petab_problem.condition_df, petab_problem.measurement_df, sim_data + ) + + @property + def _data_df(self): + return ( + self.measurements_data + if self.measurements_data is not None + else self.simulations_data + ) + + @staticmethod + def create_subplot( + plot_id: str, subplot_vis_spec: pd.DataFrame + ) -> Subplot: + """ + Create subplot. + + Parameters + ---------- + plot_id: + Plot id. + subplot_vis_spec: + A visualization specification DataFrame that contains specification + for the subplot and corresponding dataplots. + + Returns + ------- + Subplot + """ + subplot_columns = [ + col + for col in subplot_vis_spec.columns + if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS + ] + subplot = Subplot.from_df( + plot_id, subplot_vis_spec.loc[:, subplot_columns] + ) + + dataplot_cols = [ + col + for col in subplot_vis_spec.columns + if col in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS + ] + dataplot_spec = subplot_vis_spec.loc[:, dataplot_cols] + + for _, row in dataplot_spec.iterrows(): + data_plot = DataPlot.from_df(row) + subplot.add_dataplot(data_plot) + + return subplot + + def parse_from_vis_spec( + self, + vis_spec: str | Path | pd.DataFrame | None, + ) -> tuple[Figure, DataProvider]: + """ + Get visualization settings from a visualization specification. + + Parameters + ---------- + vis_spec: + Visualization specification DataFrame in the PEtab format + or a path to a visualization file. + + Returns + ------- + A figure template with visualization settings and a data provider + """ + # import visualization specification, if file was specified + if isinstance(vis_spec, str | Path): + vis_spec = core.get_visualization_df(vis_spec) + + if DATASET_ID not in vis_spec.columns: + self._add_dataset_id_col() + vis_spec = self._expand_vis_spec_settings(vis_spec) + else: + if ( + self.measurements_data is not None + and DATASET_ID not in self.measurements_data + ): + raise ValueError( + f"grouping by datasetId was requested, but " + f"{DATASET_ID} column is missing from " + f"measurement table" + ) + if ( + self.simulations_data is not None + and DATASET_ID not in self.simulations_data + ): + raise ValueError( + f"grouping by datasetId was requested, but " + f"{DATASET_ID} column is missing from " + f"simulation table" + ) + + figure = Figure() + + # get unique plotIDs preserving the order from the original vis spec + _, idx = np.unique(vis_spec[PLOT_ID], return_index=True) + plot_ids = vis_spec[PLOT_ID].iloc[np.sort(idx)] + + # loop over unique plotIds + for plot_id in plot_ids: + # get indices for specific plotId + ind_plot = vis_spec[PLOT_ID] == plot_id + + subplot = self.create_subplot(plot_id, vis_spec[ind_plot]) + figure.add_subplot(subplot) + + return figure, DataProvider( + self.conditions_data, self.measurements_data, self.simulations_data + ) + + def parse_from_id_list( + self, + ids_per_plot: list[IdsList] | None = None, + group_by: str = "observable", + plotted_noise: str | None = MEAN_AND_SD, + ) -> tuple[Figure, DataProvider]: + """ + Get visualization settings from a list of ids and a grouping parameter. + + Parameters + ---------- + ids_per_plot: + A list of lists. Each sublist corresponds to a plot, each subplot + contains the Ids of datasets or observables or simulation + conditions for this plot. + e.g. + + :: + + dataset_ids_per_plot = [['dataset_1', 'dataset_2'], + ['dataset_1', 'dataset_4', + 'dataset_5']] + + or + + :: + + cond_id_list = [['model1_data1'], + ['model1_data2', 'model1_data3'], + ['model1_data4', 'model1_data5'], + ['model1_data6']]. + + group_by: + Grouping type. Possible values: 'dataset', 'observable', + 'simulation'. + plotted_noise: + String indicating how noise should be visualized: + ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']. + + Returns + ------- + A figure template with visualization settings and a data provider + + """ + if ids_per_plot is None: + # this is the default case. If no grouping is specified, + # all observables are plotted. One observable per plot. + unique_obs_list = self._data_df[OBSERVABLE_ID].unique() + ids_per_plot = [[obs_id] for obs_id in unique_obs_list] + + if group_by == "dataset" and DATASET_ID not in self._data_df: + raise ValueError( + f"grouping by datasetId was requested, but " + f"{DATASET_ID} column is missing from data table" + ) + + if group_by != "dataset": + # datasetId_list will be created (possibly overwriting previous + # list - only in the local variable, not in the tsv-file) + self._add_dataset_id_col() + + columns_dict = self._get_vis_spec_dependent_columns_dict( + group_by, ids_per_plot + ) + + columns_dict[PLOT_TYPE_DATA] = [plotted_noise] * len( + columns_dict[DATASET_ID] + ) + + vis_spec_df = pd.DataFrame(columns_dict) + + return self.parse_from_vis_spec(vis_spec_df) + + def _add_dataset_id_col(self) -> None: + """ + Add dataset_id column to the measurement table and simulations table + (possibly overwrite). + """ + if self.measurements_data is not None: + if DATASET_ID in self.measurements_data.columns: + self.measurements_data = self.measurements_data.drop( + DATASET_ID, axis=1 + ) + self.measurements_data.insert( + loc=self.measurements_data.columns.size, + column=DATASET_ID, + value=generate_dataset_id_col(self.measurements_data), + ) + + if self.simulations_data is not None: + if DATASET_ID in self.simulations_data.columns: + self.simulations_data = self.simulations_data.drop( + DATASET_ID, axis=1 + ) + self.simulations_data.insert( + loc=self.simulations_data.columns.size, + column=DATASET_ID, + value=generate_dataset_id_col(self.simulations_data), + ) + + def _get_vis_spec_dependent_columns_dict( + self, group_by: str, id_list: list[IdsList] | None = None + ) -> dict: + """ + Helper method for creating values for columns PLOT_ID, DATASET_ID, + LEGEND_ENTRY, Y_VALUES for visualization specification file. + + Parameters + ---------- + group_by: + Grouping type. + Possible values: 'dataset', 'observable', 'simulation'. + id_list: + Grouping list. Each sublist corresponds to a subplot and + contains the Ids of datasets or observables or simulation + conditions for this subplot. + + Returns + ------- + A dictionary with values for columns PLOT_ID, DATASET_ID, \ + LEGEND_ENTRY, Y_VALUES for visualization specification. + """ + if group_by != "dataset": + dataset_id_list = create_dataset_id_list_new( + self._data_df, group_by, id_list + ) + else: + dataset_id_list = id_list + + dataset_id_column = [ + i_dataset for sublist in dataset_id_list for i_dataset in sublist + ] + + dataset_label_column = [ + self._create_legend(i_dataset) + for sublist in dataset_id_list + for i_dataset in sublist + ] + + # such dataset ids were generated that each dataset_id always + # corresponds to one observable + yvalues_column = [ + self._data_df.loc[ + self._data_df[DATASET_ID] == dataset_id, OBSERVABLE_ID + ].iloc[0] + for sublist in dataset_id_list + for dataset_id in sublist + ] + + # get number of plots and create plotId-lists + plot_id_column = [ + "plot%s" % str(ind + 1) + for ind, inner_list in enumerate(dataset_id_list) + for _ in inner_list + ] + + return { + PLOT_ID: plot_id_column, + DATASET_ID: dataset_id_column, + LEGEND_ENTRY: dataset_label_column, + Y_VALUES: yvalues_column, + } + + def _create_legend(self, dataset_id: str) -> str: + """ + Create a legend for the dataset ids. + + Parameters + ---------- + dataset_id: + Dataset id. + + Returns + ------- + A legend. + """ + # relies on the fact that dataset ids were created based on cond_ids + # and obs_ids. Therefore, in the following query all pairs will be + # the same + cond_id, obs_id = self._data_df[ + self._data_df[DATASET_ID] == dataset_id + ][[SIMULATION_CONDITION_ID, OBSERVABLE_ID]].iloc[0, :] + tmp = self.conditions_data.loc[cond_id] + if CONDITION_NAME not in tmp.index or pd.isna(tmp[CONDITION_NAME]): + cond_name = cond_id + else: + cond_name = tmp[CONDITION_NAME] + return f"{cond_name} - {obs_id}" + + def _expand_vis_spec_settings(self, vis_spec: pd.DataFrame): + """ + Expand visualization specification for the case when DATASET_ID is not + in vis_spec.columns. + + Parameters + ------- + vis_spec: + Visualization specification DataFrame in the PEtab format + or a path to a visualization file. + + Returns + ------- + A visualization specification DataFrame. + """ + if DATASET_ID in vis_spec.columns: + raise ValueError( + f"visualization specification expansion is " + f"unnecessary if column {DATASET_ID} is present" + ) + + if vis_spec.empty: + # in case of empty spec all measurements corresponding to each + # observable will be plotted on a separate subplot + observable_ids = self._data_df[OBSERVABLE_ID].unique() + + vis_spec_exp_rows = [ + self._vis_spec_rows_for_obs(obs_id, {PLOT_ID: f"plot{idx}"}) + for idx, obs_id in enumerate(observable_ids) + ] + return pd.concat(vis_spec_exp_rows, ignore_index=True) + + vis_spec_exp_rows = [] + for _, row in vis_spec.iterrows(): + if Y_VALUES in row: + vis_spec_exp_rows.append( + self._vis_spec_rows_for_obs(row[Y_VALUES], row.to_dict()) + ) + else: + observable_ids = self._data_df[OBSERVABLE_ID].unique() + + for obs_id in observable_ids: + vis_spec_exp_rows.append( + self._vis_spec_rows_for_obs(obs_id, row.to_dict()) + ) + return pd.concat(vis_spec_exp_rows, ignore_index=True) + + def _vis_spec_rows_for_obs( + self, obs_id: str, settings: dict + ) -> pd.DataFrame: + """ + Create vis_spec for one observable. + + For each dataset_id corresponding to the observable with the specified + id create a vis_spec entry with provided settings. + + Parameters + ---------- + obs_id: + Observable ID. + settings: + Additional visualization settings. For each key that is a + valid visualization specification column name, the setting + will be added to the resulting visualization specification. + + Returns + ------- + A visualization specification DataFrame. + """ + columns_to_expand = [ + PLOT_ID, + PLOT_NAME, + PLOT_TYPE_SIMULATION, + PLOT_TYPE_DATA, + X_VALUES, + X_OFFSET, + X_LABEL, + X_SCALE, + Y_OFFSET, + Y_LABEL, + Y_SCALE, + LEGEND_ENTRY, + ] + + dataset_ids = self._data_df[self._data_df[OBSERVABLE_ID] == obs_id][ + DATASET_ID + ].unique() + n_rows = len(dataset_ids) + columns_dict = {DATASET_ID: dataset_ids, Y_VALUES: [obs_id] * n_rows} + + for column in settings: + if column in columns_to_expand: + columns_dict[column] = [settings[column]] * n_rows + + if LEGEND_ENTRY not in columns_dict: + columns_dict[LEGEND_ENTRY] = [ + self._create_legend(dataset_id) + for dataset_id in columns_dict[DATASET_ID] + ] + return pd.DataFrame(columns_dict) diff --git a/petab/visualize/templates/mystyle.css b/petab/v1/visualize/templates/mystyle.css similarity index 100% rename from petab/visualize/templates/mystyle.css rename to petab/v1/visualize/templates/mystyle.css diff --git a/petab/visualize/templates/report.html b/petab/v1/visualize/templates/report.html similarity index 100% rename from petab/visualize/templates/report.html rename to petab/v1/visualize/templates/report.html diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py new file mode 100644 index 00000000..ecffc48e --- /dev/null +++ b/petab/v1/yaml.py @@ -0,0 +1,358 @@ +"""Code regarding the PEtab YAML config files""" +from __future__ import annotations + +import os +from pathlib import Path, PurePosixPath +from typing import Any +from urllib.parse import unquote, urlparse, urlunparse + +import jsonschema +import numpy as np +import pandas as pd +import yaml +from pandas.io.common import get_handle + +from .C import * # noqa: F403 + +# directory with PEtab yaml schema files +SCHEMA_DIR = Path(__file__).parent.parent / "schemas" +# map of version number to validation schema +SCHEMAS = { + "1": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", + "1.0.0": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", + "2.0.0": SCHEMA_DIR / "petab_schema.v2.0.0.yaml", +} + +__all__ = [ + "validate", + "validate_yaml_syntax", + "validate_yaml_semantics", + "load_yaml", + "is_composite_problem", + "assert_single_condition_and_sbml_file", + "write_yaml", + "create_problem_yaml", + "get_path_prefix", +] + + +def validate( + yaml_config: dict | str | Path, + path_prefix: None | str | Path = None, +): + """Validate syntax and semantics of PEtab config YAML + + Arguments: + yaml_config: + PEtab YAML config as filename or dict. + path_prefix: + Base location for relative paths. Defaults to location of YAML + file if a filename was provided for ``yaml_config`` or the current + working directory. + """ + validate_yaml_syntax(yaml_config) + validate_yaml_semantics(yaml_config=yaml_config, path_prefix=path_prefix) + + +def validate_yaml_syntax( + yaml_config: dict | str | Path, schema: None | dict | str = None +): + """Validate PEtab YAML file syntax + + Arguments: + yaml_config: + PEtab YAML file to validate, as file name or dictionary + schema: + Custom schema for validation + + Raises: + see :func:`jsonschema.validate` + """ + yaml_config = load_yaml(yaml_config) + + if schema is None: + # try get PEtab version from yaml file + # if this is not the available, the file is not valid anyways, + # but let's still use the latest PEtab schema for full validation + version = ( + yaml_config.get(FORMAT_VERSION, None) or list(SCHEMAS.values())[-1] + ) + try: + schema = SCHEMAS[str(version)] + except KeyError as e: + raise ValueError( + "Unknown PEtab version given in problem " + f"specification: {version}" + ) from e + schema = load_yaml(schema) + jsonschema.validate(instance=yaml_config, schema=schema) + + +def validate_yaml_semantics( + yaml_config: dict | str | Path, + path_prefix: None | str | Path = None, +): + """Validate PEtab YAML file semantics + + Check for existence of files. Assumes valid syntax. + + Version number and contents of referenced files are not yet checked. + + Arguments: + yaml_config: + PEtab YAML config as filename or dict. + path_prefix: + Base location for relative paths. Defaults to location of YAML + file if a filename was provided for ``yaml_config`` or the current + working directory. + + Raises: + AssertionError: in case of problems + """ + if not path_prefix: + if isinstance(yaml_config, str | Path): + path_prefix = get_path_prefix(yaml_config) + else: + path_prefix = "" + + yaml_config = load_yaml(yaml_config) + + def _check_file(_filename: str, _field: str): + # this could be a regular path or some local or remote URL + # the simplest check is just trying to load the respective table or + # sbml model + if _field == SBML_FILES: + from .models.sbml_model import SbmlModel + + try: + SbmlModel.from_file(_filename) + except Exception as e: + raise AssertionError( + f"Failed to read '{_filename}' provided as '{_field}'." + ) from e + return + + try: + pd.read_csv(_filename, sep="\t") + except pd.errors.EmptyDataError: + # at this stage, we don't care about the content + pass + except Exception as e: + raise AssertionError( + f"Failed to read '{_filename}' provided as '{_field}'." + ) from e + + # Handles both a single parameter file, and a parameter file that has been + # split into multiple subset files. + for parameter_subset_file in list( + np.array(yaml_config[PARAMETER_FILE]).flat + ): + _check_file( + f"{path_prefix}/{parameter_subset_file}" + if path_prefix + else parameter_subset_file, + parameter_subset_file, + ) + + for problem_config in yaml_config[PROBLEMS]: + for field in [ + SBML_FILES, + CONDITION_FILES, + MEASUREMENT_FILES, + VISUALIZATION_FILES, + OBSERVABLE_FILES, + ]: + if field in problem_config: + for filename in problem_config[field]: + _check_file( + f"{path_prefix}/{filename}" + if path_prefix + else filename, + field, + ) + + +def load_yaml(yaml_config: dict | Path | str) -> dict: + """Load YAML + + Convenience function to allow for providing YAML inputs as filename, URL + or as dictionary. + + Arguments: + yaml_config: + PEtab YAML config as filename or dict or URL. + + Returns: + The unmodified dictionary if ``yaml_config`` was dictionary. + Otherwise the parsed the YAML file. + """ + # already parsed? all PEtab problem yaml files are dictionaries + if isinstance(yaml_config, dict): + return yaml_config + + with get_handle(yaml_config, mode="r") as io_handle: + data = yaml.safe_load(io_handle.handle) + return data + + +def is_composite_problem(yaml_config: dict | str | Path) -> bool: + """Does this YAML file comprise multiple models? + + Arguments: + yaml_config: PEtab configuration as dictionary or YAML file name + """ + yaml_config = load_yaml(yaml_config) + return len(yaml_config[PROBLEMS]) > 1 + + +def assert_single_condition_and_sbml_file(problem_config: dict) -> None: + """Check that there is only a single condition file and a single SBML + file specified. + + Arguments: + problem_config: + Dictionary as defined in the YAML schema inside the `problems` + list. + Raises: + NotImplementedError: + If multiple condition or SBML files specified. + """ + if ( + len(problem_config[SBML_FILES]) > 1 + or len(problem_config[CONDITION_FILES]) > 1 + ): + # TODO https://github.com/ICB-DCM/PEtab/issues/188 + # TODO https://github.com/ICB-DCM/PEtab/issues/189 + raise NotImplementedError( + "Support for multiple models or condition files is not yet " + "implemented." + ) + + +def write_yaml(yaml_config: dict[str, Any], filename: str | Path) -> None: + """Write PEtab YAML file + + Arguments: + yaml_config: Data to write + filename: File to create + """ + with open(filename, "w") as outfile: + yaml.dump( + yaml_config, outfile, default_flow_style=False, sort_keys=False + ) + + +def create_problem_yaml( + sbml_files: str | Path | list[str | Path], + condition_files: str | Path | list[str | Path], + measurement_files: str | Path | list[str | Path], + parameter_file: str | Path, + observable_files: str | Path | list[str | Path], + yaml_file: str | Path, + visualization_files: str | Path | list[str | Path] | None = None, + relative_paths: bool = True, + mapping_files: str | Path | list[str | Path] = None, +) -> None: + """Create and write default YAML file for a single PEtab problem + + Arguments: + sbml_files: Path of SBML model file or list of such + condition_files: Path of condition file or list of such + measurement_files: Path of measurement file or list of such + parameter_file: Path of parameter file + observable_files: Path of observable file or list of such + yaml_file: Path to which YAML file should be written + visualization_files: + Optional Path to visualization file or list of such + relative_paths: + whether all paths in the YAML file should be relative to the + location of the YAML file. If ``False``, then paths are left + unchanged. + mapping_files: Path of mapping file + """ + if isinstance(sbml_files, Path | str): + sbml_files = [sbml_files] + if isinstance(condition_files, Path | str): + condition_files = [condition_files] + if isinstance(measurement_files, Path | str): + measurement_files = [measurement_files] + if isinstance(observable_files, Path | str): + observable_files = [observable_files] + if isinstance(visualization_files, Path | str): + visualization_files = [visualization_files] + + if relative_paths: + yaml_file_dir = Path(yaml_file).parent + + def get_rel_to_yaml(paths: list[str] | None): + if paths is None: + return paths + return [ + os.path.relpath(path, start=yaml_file_dir) for path in paths + ] + + sbml_files = get_rel_to_yaml(sbml_files) + condition_files = get_rel_to_yaml(condition_files) + measurement_files = get_rel_to_yaml(measurement_files) + observable_files = get_rel_to_yaml(observable_files) + visualization_files = get_rel_to_yaml(visualization_files) + parameter_file = get_rel_to_yaml([parameter_file])[0] + mapping_files = get_rel_to_yaml(mapping_files) + + problem_dic = { + CONDITION_FILES: condition_files, + MEASUREMENT_FILES: measurement_files, + SBML_FILES: sbml_files, + OBSERVABLE_FILES: observable_files, + } + if mapping_files: + problem_dic[MAPPING_FILES] = mapping_files + + if visualization_files is not None: + problem_dic[VISUALIZATION_FILES] = visualization_files + yaml_dic = { + PARAMETER_FILE: parameter_file, + FORMAT_VERSION: 1, + PROBLEMS: [problem_dic], + } + write_yaml(yaml_dic, yaml_file) + + +def get_path_prefix(yaml_path: Path | str) -> str: + """Get the path prefix from a PEtab problem yaml file. + + Get the path prefix to retrieve any files with relative paths referenced + in the given PEtab problem yaml file. + + Arguments: + yaml_path: PEtab problem YAML file path (local or URL). + + Returns: + The path prefix for retrieving any referenced files with relative + paths. + """ + yaml_path = str(yaml_path) + + # yaml_config may be path or URL + path_url = urlparse(yaml_path) + if not path_url.scheme or ( + path_url.scheme != "file" and not path_url.netloc + ): + # a regular file path string + return str(Path(yaml_path).parent) + + # a URL + # extract parent path + url_path = unquote(urlparse(yaml_path).path) + parent_path = str(PurePosixPath(url_path).parent) + path_prefix = urlunparse( + ( + path_url.scheme, + path_url.netloc, + parent_path, + path_url.params, + path_url.query, + path_url.fragment, + ) + ) + return path_prefix diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py new file mode 100644 index 00000000..98084fa5 --- /dev/null +++ b/petab/v2/__init__.py @@ -0,0 +1,16 @@ +"""The PEtab 2.0 subpackage. + +Contains all functionality related to handling PEtab 2.0 problems. +""" +from warnings import warn + +from ..v1 import * # noqa: F403, F401, E402 + +# import after v1 +from .problem import Problem # noqa: F401 + +warn( + "Support for PEtab2.0 and all of petab.v2 is experimental " + "and subject to changes!", + stacklevel=1, +) diff --git a/petab/v2/lint.py b/petab/v2/lint.py new file mode 100644 index 00000000..87554e64 --- /dev/null +++ b/petab/v2/lint.py @@ -0,0 +1,567 @@ +"""Validation of PEtab problems""" +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from enum import IntEnum +from pathlib import Path + +import numpy as np +import pandas as pd + +from petab.v1 import ( + assert_model_parameters_in_condition_or_parameter_table, +) +from petab.v1.C import ( + ESTIMATE, + MODEL_ENTITY_ID, + NOISE_PARAMETERS, + NOMINAL_VALUE, + OBSERVABLE_PARAMETERS, + PARAMETER_DF_REQUIRED_COLS, + PARAMETER_ID, +) +from petab.v1.conditions import get_parametric_overrides +from petab.v1.lint import ( + _check_df, + assert_no_leading_trailing_whitespace, + assert_parameter_bounds_are_numeric, + assert_parameter_estimate_is_boolean, + assert_parameter_id_is_string, + assert_parameter_prior_parameters_are_valid, + assert_parameter_prior_type_is_valid, + assert_parameter_scale_is_valid, + assert_unique_parameter_ids, + check_ids, + check_parameter_bounds, +) +from petab.v1.measurements import split_parameter_replacement_list +from petab.v1.observables import get_output_parameters, get_placeholders +from petab.v1.parameters import ( + get_valid_parameters_for_parameter_table, +) +from petab.v1.visualize.lint import validate_visualization_df + +from ..v1 import ( + assert_measurement_conditions_present_in_condition_table, + check_condition_df, + check_measurement_df, + check_observable_df, +) +from .problem import Problem + +logger = logging.getLogger(__name__) + +__all__ = [ + "ValidationIssueSeverity", + "ValidationIssue", + "ValidationResultList", + "ValidationError", + "ValidationTask", + "CheckModel", + "CheckTableExists", + "CheckMeasurementTable", + "CheckConditionTable", + "CheckObservableTable", + "CheckParameterTable", + "CheckAllParametersPresentInParameterTable", + "CheckValidParameterInConditionOrParameterTable", + "CheckVisualizationTable", + "lint_problem", + "default_validation_tasks", +] + + +class ValidationIssueSeverity(IntEnum): + """The severity of a validation issue.""" + + # INFO: Informational message, no action required + INFO = 10 + # WARNING: Warning message, potential issues + WARNING = 20 + # ERROR: Error message, action required + ERROR = 30 + # CRITICAL: Critical error message, stops further validation + CRITICAL = 40 + + +@dataclass +class ValidationIssue: + """The result of a validation task. + + Attributes: + level: The level of the validation event. + message: The message of the validation event. + """ + + level: ValidationIssueSeverity + message: str + + def __post_init__(self): + if not isinstance(self.level, ValidationIssueSeverity): + raise TypeError( + "`level` must be an instance of ValidationIssueSeverity." + ) + + def __str__(self): + return f"{self.level.name}: {self.message}" + + +@dataclass +class ValidationError(ValidationIssue): + """A validation result with level ERROR.""" + + level: ValidationIssueSeverity = field( + default=ValidationIssueSeverity.ERROR, init=False + ) + + +class ValidationResultList(list[ValidationIssue]): + """A list of validation results. + + Contains all issues found during the validation of a PEtab problem. + """ + + def log( + self, + *, + logger: logging.Logger = logger, + min_level: ValidationIssueSeverity = ValidationIssueSeverity.INFO, + ): + """Log the validation results.""" + for result in self: + if result.level < min_level: + continue + if result.level == ValidationIssueSeverity.INFO: + logger.info(result.message) + elif result.level == ValidationIssueSeverity.WARNING: + logger.warning(result.message) + elif result.level >= ValidationIssueSeverity.ERROR: + logger.error(result.message) + + if not self: + logger.info("PEtab format check completed successfully.") + + def has_errors(self) -> bool: + """Check if there are any errors in the validation results.""" + return any( + result.level >= ValidationIssueSeverity.ERROR for result in self + ) + + +def lint_problem(problem: Problem | str | Path) -> ValidationResultList: + """Validate a PEtab problem. + + Arguments: + problem: + PEtab problem to check. Instance of :class:`Problem` or path + to a PEtab problem yaml file. + Returns: + A list of validation results. Empty if no issues were found. + """ + + problem = Problem.get_problem(problem) + + return problem.validate() + + +class ValidationTask(ABC): + """A task to validate a PEtab problem.""" + + @abstractmethod + def run(self, problem: Problem) -> ValidationIssue | None: + """Run the validation task. + + Arguments: + problem: PEtab problem to check. + Returns: + Validation results or ``None`` + """ + ... + + def __call__(self, *args, **kwargs): + return self.run(*args, **kwargs) + + +class CheckModel(ValidationTask): + """A task to validate the model of a PEtab problem.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.model is None: + return ValidationError("Model is missing.") + + if not problem.model.is_valid(): + # TODO get actual model validation messages + return ValidationError("Model is invalid.") + + +class CheckTableExists(ValidationTask): + """A task to check if a table exists in the PEtab problem.""" + + def __init__(self, table_name: str): + if table_name not in ["measurement", "observable", "parameter"]: + # all others are optional + raise ValueError( + f"Table name {table_name} is not supported. " + "Supported table names are 'measurement', 'observable', " + "'parameter'." + ) + self.table_name = table_name + + def run(self, problem: Problem) -> ValidationIssue | None: + if getattr(problem, f"{self.table_name}_df") is None: + return ValidationError(f"{self.table_name} table is missing.") + + +class CheckMeasurementTable(ValidationTask): + """A task to validate the measurement table of a PEtab problem.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.measurement_df is None: + return + + try: + check_measurement_df(problem.measurement_df, problem.observable_df) + + if problem.condition_df is not None: + # TODO: handle missing condition_df + assert_measurement_conditions_present_in_condition_table( + problem.measurement_df, problem.condition_df + ) + except AssertionError as e: + return ValidationError(str(e)) + + +class CheckConditionTable(ValidationTask): + """A task to validate the condition table of a PEtab problem.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.condition_df is None: + return + + try: + check_condition_df( + problem.condition_df, + model=problem.model, + observable_df=problem.observable_df, + mapping_df=problem.mapping_df, + ) + except AssertionError as e: + return ValidationError(str(e)) + + +class CheckObservableTable(ValidationTask): + """A task to validate the observable table of a PEtab problem.""" + + def run(self, problem: Problem): + if problem.observable_df is None: + return + + try: + check_observable_df( + problem.observable_df, + ) + except AssertionError as e: + return ValidationIssue( + level=ValidationIssueSeverity.ERROR, message=str(e) + ) + + +class CheckObservablesDoNotShadowModelEntities(ValidationTask): + """A task to check that observable IDs do not shadow model entities.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.observable_df is None or problem.model is None: + return + + shadowed_entities = [ + obs_id + for obs_id in problem.observable_df.index + if problem.model.has_entity_with_id(obs_id) + ] + if shadowed_entities: + return ValidationError( + f"Observable IDs {shadowed_entities} shadow model entities." + ) + + +class CheckParameterTable(ValidationTask): + """A task to validate the parameter table of a PEtab problem.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.parameter_df is None: + return + + try: + df = problem.parameter_df + _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter") + + if df.index.name != PARAMETER_ID: + return ValidationError( + f"Parameter table has wrong index {df.index.name}." + f" Expected {PARAMETER_ID}.", + ) + + check_ids(df.index.values, kind="parameter") + + for column_name in PARAMETER_DF_REQUIRED_COLS[ + 1: + ]: # 0 is PARAMETER_ID + if not np.issubdtype(df[column_name].dtype, np.number): + assert_no_leading_trailing_whitespace( + df[column_name].values, column_name + ) + + # nominal value is required for non-estimated parameters + non_estimated_par_ids = list( + df.index[ + (df[ESTIMATE] != 1) + | ( + pd.api.types.is_string_dtype(df[ESTIMATE]) + and df[ESTIMATE] != "1" + ) + ] + ) + # TODO implement as validators + # `assert_has_fixed_parameter_nominal_values` + # and `assert_correct_table_dtypes` + if non_estimated_par_ids: + if NOMINAL_VALUE not in df: + return ValidationError( + "Parameter table contains parameters " + f"{non_estimated_par_ids} that are not " + "specified to be estimated, " + f"but column {NOMINAL_VALUE} is missing." + ) + try: + df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) + except ValueError: + return ValidationError( + f"Expected numeric values for `{NOMINAL_VALUE}` " + "in parameter table " + "for all non-estimated parameters." + ) + + assert_parameter_id_is_string(df) + assert_parameter_scale_is_valid(df) + assert_parameter_bounds_are_numeric(df) + assert_parameter_estimate_is_boolean(df) + assert_unique_parameter_ids(df) + check_parameter_bounds(df) + assert_parameter_prior_type_is_valid(df) + assert_parameter_prior_parameters_are_valid(df) + + except AssertionError as e: + return ValidationError(str(e)) + + +class CheckAllParametersPresentInParameterTable(ValidationTask): + """Ensure all required parameters are contained in the parameter table + with no additional ones.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if ( + problem.model is None + or problem.parameter_df is None + or problem.observable_df is None + or problem.measurement_df is None + ): + return + + required = get_required_parameters_for_parameter_table(problem) + + allowed = get_valid_parameters_for_parameter_table( + model=problem.model, + condition_df=problem.condition_df, + observable_df=problem.observable_df, + measurement_df=problem.measurement_df, + mapping_df=problem.mapping_df, + ) + + actual = set(problem.parameter_df.index) + missing = required - actual + extraneous = actual - allowed + + # missing parameters might be present under a different name based on + # the mapping table + if missing and problem.mapping_df is not None: + model_to_petab_mapping = {} + for map_from, map_to in zip( + problem.mapping_df.index.values, + problem.mapping_df[MODEL_ENTITY_ID], + strict=True, + ): + if map_to in model_to_petab_mapping: + model_to_petab_mapping[map_to].append(map_from) + else: + model_to_petab_mapping[map_to] = [map_from] + missing = { + missing_id + for missing_id in missing + if missing_id not in model_to_petab_mapping + or all( + mapping_parameter not in actual + for mapping_parameter in model_to_petab_mapping[missing_id] + ) + } + + if missing: + return ValidationError( + "Missing parameter(s) in the model or the " + "parameters table: " + str(missing) + ) + + if extraneous: + return ValidationError( + "Extraneous parameter(s) in parameter table: " + + str(extraneous) + ) + + +class CheckValidParameterInConditionOrParameterTable(ValidationTask): + """A task to check that all required and only allowed model parameters are + present in the condition or parameter table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if ( + problem.model is None + or problem.condition_df is None + or problem.parameter_df is None + ): + return + + try: + assert_model_parameters_in_condition_or_parameter_table( + problem.model, + problem.condition_df, + problem.parameter_df, + problem.mapping_df, + ) + except AssertionError as e: + return ValidationIssue( + level=ValidationIssueSeverity.ERROR, message=str(e) + ) + + +class CheckVisualizationTable(ValidationTask): + """A task to validate the visualization table of a PEtab problem.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.visualization_df is None: + return + + if validate_visualization_df(problem): + return ValidationIssue( + level=ValidationIssueSeverity.ERROR, + message="Visualization table is invalid.", + ) + + +def get_required_parameters_for_parameter_table( + problem: Problem, +) -> set[str]: + """ + Get set of parameters which need to go into the parameter table + + Arguments: + problem: The PEtab problem + Returns: + Set of parameter IDs which PEtab requires to be present in the + parameter table. That is all {observable,noise}Parameters from the + measurement table as well as all parametric condition table overrides + that are not defined in the model. + """ + parameter_ids = set() + + # Add parameters from measurement table, unless they are fixed parameters + def append_overrides(overrides): + parameter_ids.update( + p + for p in overrides + if isinstance(p, str) and p not in problem.condition_df.columns + ) + + for _, row in problem.measurement_df.iterrows(): + # we trust that the number of overrides matches + append_overrides( + split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + ) + append_overrides( + split_parameter_replacement_list(row.get(NOISE_PARAMETERS, None)) + ) + + # remove `observable_ids` when + # `get_output_parameters` is updated for PEtab v2/v1.1, where + # observable IDs are allowed in observable formulae + observable_ids = set(problem.observable_df.index) + + # Add output parameters except for placeholders + for formula_type, placeholder_sources in ( + ( + # Observable formulae + {"observables": True, "noise": False}, + # can only contain observable placeholders + {"noise": False, "observables": True}, + ), + ( + # Noise formulae + {"observables": False, "noise": True}, + # can contain noise and observable placeholders + {"noise": True, "observables": True}, + ), + ): + output_parameters = get_output_parameters( + problem.observable_df, + problem.model, + mapping_df=problem.mapping_df, + **formula_type, + ) + placeholders = get_placeholders( + problem.observable_df, + **placeholder_sources, + ) + parameter_ids.update( + p + for p in output_parameters + if p not in placeholders and p not in observable_ids + ) + + # Add condition table parametric overrides unless already defined in the + # model + parameter_ids.update( + p + for p in get_parametric_overrides(problem.condition_df) + if not problem.model.has_entity_with_id(p) + ) + + # remove parameters that occur in the condition table and are overridden + # for ALL conditions + for p in problem.condition_df.columns[ + ~problem.condition_df.isnull().any() + ]: + try: + parameter_ids.remove(p) + except KeyError: + pass + + return parameter_ids + + +#: Validation tasks that should be run on any PEtab problem +default_validation_tasks = [ + CheckTableExists("measurement"), + CheckTableExists("observable"), + CheckTableExists("parameter"), + CheckModel(), + CheckMeasurementTable(), + CheckConditionTable(), + CheckObservableTable(), + CheckObservablesDoNotShadowModelEntities(), + CheckParameterTable(), + CheckAllParametersPresentInParameterTable(), + CheckVisualizationTable(), + CheckValidParameterInConditionOrParameterTable(), +] diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py new file mode 100644 index 00000000..86cbe49c --- /dev/null +++ b/petab/v2/petab1to2.py @@ -0,0 +1,145 @@ +"""Convert PEtab version 1 problems to version 2.""" +import shutil +from itertools import chain +from pathlib import Path + +from pandas.io.common import get_handle, is_url + +import petab.v1.C as C +from petab.models import MODEL_TYPE_SBML +from petab.v1 import Problem as ProblemV1 +from petab.v2.lint import lint_problem as lint_v2_problem +from petab.yaml import get_path_prefix + +from ..v1 import lint_problem as lint_v1_problem +from ..v1.yaml import load_yaml, validate, write_yaml +from ..versions import get_major_version + +__all__ = ["petab1to2"] + + +def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): + """Convert from PEtab 1.0 to PEtab 2.0 format. + + Convert a PEtab problem from PEtab 1.0 to PEtab 2.0 format. + + Parameters + ---------- + yaml_config: dict | Path | str + The PEtab problem as dictionary or YAML file name. + output_dir: Path | str + The output directory to save the converted PEtab problem, or ``None``, + to return a :class:`petab.v2.Problem` instance. + + Raises + ------ + ValueError + If the input is invalid or does not pass linting or if the generated + files do not pass linting. + """ + if output_dir is None: + # TODO requires petab.v2.Problem + raise NotImplementedError("Not implemented yet.") + elif isinstance(yaml_config, dict): + raise ValueError("If output_dir is given, yaml_config must be a file.") + + if isinstance(yaml_config, Path | str): + yaml_file = str(yaml_config) + path_prefix = get_path_prefix(yaml_file) + yaml_config = load_yaml(yaml_config) + get_src_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731 + else: + yaml_file = None + path_prefix = None + get_src_path = lambda filename: filename # noqa: E731 + + get_dest_path = lambda filename: f"{output_dir}/{filename}" # noqa: E731 + + # Validate original PEtab problem + validate(yaml_config, path_prefix=path_prefix) + if get_major_version(yaml_config) != 1: + raise ValueError("PEtab problem is not version 1.") + petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config) + if lint_v1_problem(petab_problem): + raise ValueError("PEtab problem does not pass linting.") + + # Update YAML file + new_yaml_config = _update_yaml(yaml_config) + + # Write new YAML file + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + new_yaml_file = output_dir / Path(yaml_file).name + write_yaml(new_yaml_config, new_yaml_file) + + # Update tables + # condition tables, observable tables, SBML files, parameter table: + # no changes - just copy + file = yaml_config[C.PARAMETER_FILE] + _copy_file(get_src_path(file), get_dest_path(file)) + + for problem_config in yaml_config[C.PROBLEMS]: + for file in chain( + problem_config.get(C.CONDITION_FILES, []), + problem_config.get(C.OBSERVABLE_FILES, []), + ( + model[C.MODEL_LOCATION] + for model in problem_config.get(C.MODEL_FILES, {}).values() + ), + problem_config.get(C.MEASUREMENT_FILES, []), + problem_config.get(C.VISUALIZATION_FILES, []), + ): + _copy_file(get_src_path(file), get_dest_path(file)) + + # TODO: Measurements: preequilibration to experiments/timecourses once + # finalized + ... + + # validate updated Problem + validation_issues = lint_v2_problem(new_yaml_file) + + if validation_issues: + raise ValueError( + "Generated PEtab v2 problem did not pass linting: " + f"{validation_issues}" + ) + + +def _update_yaml(yaml_config: dict) -> dict: + """Update PEtab 1.0 YAML to PEtab 2.0 format.""" + yaml_config = yaml_config.copy() + + # Update format_version + yaml_config[C.FORMAT_VERSION] = "2.0.0" + + # Add extensions + yaml_config[C.EXTENSIONS] = [] + + # Move models and set IDs (filename for now) + for problem in yaml_config[C.PROBLEMS]: + problem[C.MODEL_FILES] = {} + models = problem[C.MODEL_FILES] + for sbml_file in problem[C.SBML_FILES]: + model_id = sbml_file.split("/")[-1].split(".")[0] + models[model_id] = { + C.MODEL_LANGUAGE: MODEL_TYPE_SBML, + C.MODEL_LOCATION: sbml_file, + } + problem[C.MODEL_FILES] = problem.get(C.MODEL_FILES, {}) + del problem[C.SBML_FILES] + + return yaml_config + + +def _copy_file(src: Path | str, dest: Path | str): + """Copy file.""" + src = str(src) + dest = str(dest) + + if is_url(src): + with get_handle(src, mode="r") as src_handle: + with open(dest, "w") as dest_handle: + dest_handle.write(src_handle.handle.read()) + return + + shutil.copy(str(src), str(dest)) diff --git a/petab/v2/problem.py b/petab/v2/problem.py new file mode 100644 index 00000000..612f2571 --- /dev/null +++ b/petab/v2/problem.py @@ -0,0 +1,719 @@ +"""PEtab v2 problems.""" +from __future__ import annotations + +import logging +import os +import tempfile +from math import nan +from pathlib import Path +from typing import TYPE_CHECKING + +import pandas as pd + +from ..v1 import ( + conditions, + core, + mapping, + measurements, + observables, + parameter_mapping, + parameters, + sampling, + yaml, +) +from ..v1.C import * # noqa: F403 +from ..v1.models.model import Model, model_factory +from ..v1.yaml import get_path_prefix + +if TYPE_CHECKING: + from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask + + +__all__ = ["Problem"] + + +class Problem: + """ + PEtab parameter estimation problem as defined by + + - model + - condition table + - measurement table + - parameter table + - observables table + - mapping table + + Optionally it may contain visualization tables. + + Parameters: + condition_df: PEtab condition table + measurement_df: PEtab measurement table + parameter_df: PEtab parameter table + observable_df: PEtab observable table + visualization_df: PEtab visualization table + mapping_df: PEtab mapping table + model: The underlying model + extensions_config: Information on the extensions used + """ + + def __init__( + self, + model: Model = None, + condition_df: pd.DataFrame = None, + measurement_df: pd.DataFrame = None, + parameter_df: pd.DataFrame = None, + visualization_df: pd.DataFrame = None, + observable_df: pd.DataFrame = None, + mapping_df: pd.DataFrame = None, + extensions_config: dict = None, + ): + from ..v2.lint import default_validation_tasks + + self.condition_df: pd.DataFrame | None = condition_df + self.measurement_df: pd.DataFrame | None = measurement_df + self.parameter_df: pd.DataFrame | None = parameter_df + self.visualization_df: pd.DataFrame | None = visualization_df + self.observable_df: pd.DataFrame | None = observable_df + self.mapping_df: pd.DataFrame | None = mapping_df + self.model: Model | None = model + self.extensions_config = extensions_config or {} + self.validation_tasks: list[ + ValidationTask + ] = default_validation_tasks.copy() + + def __str__(self): + model = f"with model ({self.model})" if self.model else "without model" + conditions = ( + f"{self.condition_df.shape[0]} conditions" + if self.condition_df is not None + else "without conditions table" + ) + + observables = ( + f"{self.observable_df.shape[0]} observables" + if self.observable_df is not None + else "without observables table" + ) + + measurements = ( + f"{self.measurement_df.shape[0]} measurements" + if self.measurement_df is not None + else "without measurements table" + ) + + if self.parameter_df is not None: + num_estimated_parameters = ( + sum(self.parameter_df[ESTIMATE] == 1) + if ESTIMATE in self.parameter_df + else self.parameter_df.shape[0] + ) + parameters = f"{num_estimated_parameters} estimated parameters" + else: + parameters = "without parameter_df table" + + return ( + f"PEtab Problem {model}, {conditions}, {observables}, " + f"{measurements}, {parameters}" + ) + + @staticmethod + def from_yaml(yaml_config: dict | Path | str) -> Problem: + """ + Factory method to load model and tables as specified by YAML file. + + Arguments: + yaml_config: PEtab configuration as dictionary or YAML file name + """ + if isinstance(yaml_config, Path): + yaml_config = str(yaml_config) + + if isinstance(yaml_config, str): + yaml_file = yaml_config + path_prefix = get_path_prefix(yaml_file) + yaml_config = yaml.load_yaml(yaml_config) + get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731 + else: + yaml_file = None + get_path = lambda filename: filename # noqa: E731 + + if yaml_config[FORMAT_VERSION] not in {"2.0.0"}: + # If we got a path to a v1 yaml file, try to auto-upgrade + from tempfile import TemporaryDirectory + + from ..versions import get_major_version + from .petab1to2 import petab1to2 + + if get_major_version(yaml_config) == 1 and yaml_file: + logging.debug( + "Auto-upgrading problem from PEtab 1.0 to PEtab 2.0" + ) + with TemporaryDirectory() as tmpdirname: + try: + petab1to2(yaml_file, output_dir=tmpdirname) + except Exception as e: + raise ValueError( + "Failed to auto-upgrade PEtab 1.0 problem to " + "PEtab 2.0" + ) from e + return Problem.from_yaml( + Path(tmpdirname) / Path(yaml_file).name + ) + raise ValueError( + "Provided PEtab files are of unsupported version " + f"{yaml_config[FORMAT_VERSION]}. Expected 2.0.0." + ) + + if yaml.is_composite_problem(yaml_config): + raise ValueError( + "petab.Problem.from_yaml() can only be used for " + "yaml files comprising a single model. " + "Consider using " + "petab.CompositeProblem.from_yaml() instead." + ) + + problem0 = yaml_config["problems"][0] + + if isinstance(yaml_config[PARAMETER_FILE], list): + parameter_df = parameters.get_parameter_df( + [get_path(f) for f in yaml_config[PARAMETER_FILE]] + ) + else: + parameter_df = ( + parameters.get_parameter_df( + get_path(yaml_config[PARAMETER_FILE]) + ) + if yaml_config[PARAMETER_FILE] + else None + ) + + if len(problem0[MODEL_FILES]) > 1: + # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 + raise NotImplementedError( + "Support for multiple models is not yet implemented." + ) + if not problem0[MODEL_FILES]: + model = None + else: + model_id, model_info = next(iter(problem0[MODEL_FILES].items())) + model = model_factory( + get_path(model_info[MODEL_LOCATION]), + model_info[MODEL_LANGUAGE], + model_id=model_id, + ) + + measurement_files = [ + get_path(f) for f in problem0.get(MEASUREMENT_FILES, []) + ] + # If there are multiple tables, we will merge them + measurement_df = ( + core.concat_tables( + measurement_files, measurements.get_measurement_df + ) + if measurement_files + else None + ) + + condition_files = [ + get_path(f) for f in problem0.get(CONDITION_FILES, []) + ] + # If there are multiple tables, we will merge them + condition_df = ( + core.concat_tables(condition_files, conditions.get_condition_df) + if condition_files + else None + ) + + visualization_files = [ + get_path(f) for f in problem0.get(VISUALIZATION_FILES, []) + ] + # If there are multiple tables, we will merge them + visualization_df = ( + core.concat_tables(visualization_files, core.get_visualization_df) + if visualization_files + else None + ) + + observable_files = [ + get_path(f) for f in problem0.get(OBSERVABLE_FILES, []) + ] + # If there are multiple tables, we will merge them + observable_df = ( + core.concat_tables(observable_files, observables.get_observable_df) + if observable_files + else None + ) + + mapping_files = [get_path(f) for f in problem0.get(MAPPING_FILES, [])] + # If there are multiple tables, we will merge them + mapping_df = ( + core.concat_tables(mapping_files, mapping.get_mapping_df) + if mapping_files + else None + ) + + return Problem( + condition_df=condition_df, + measurement_df=measurement_df, + parameter_df=parameter_df, + observable_df=observable_df, + model=model, + visualization_df=visualization_df, + mapping_df=mapping_df, + extensions_config=yaml_config.get(EXTENSIONS, {}), + ) + + @staticmethod + def from_combine(filename: Path | str) -> Problem: + """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). + + See also :py:func:`petab.create_combine_archive`. + + Arguments: + filename: Path to the PEtab-COMBINE archive + + Returns: + A :py:class:`petab.Problem` instance. + """ + # function-level import, because module-level import interfered with + # other SWIG interfaces + try: + import libcombine + except ImportError as e: + raise ImportError( + "To use PEtab's COMBINE functionality, libcombine " + "(python-libcombine) must be installed." + ) from e + + archive = libcombine.CombineArchive() + if archive.initializeFromArchive(str(filename)) is None: + raise ValueError(f"Invalid Combine Archive: {filename}") + + with tempfile.TemporaryDirectory() as tmpdirname: + archive.extractTo(tmpdirname) + problem = Problem.from_yaml( + os.path.join(tmpdirname, archive.getMasterFile().getLocation()) + ) + archive.cleanUp() + + return problem + + @staticmethod + def get_problem(problem: str | Path | Problem) -> Problem: + """Get a PEtab problem from a file or a problem object. + + Arguments: + problem: Path to a PEtab problem file or a PEtab problem object. + + Returns: + A PEtab problem object. + """ + if isinstance(problem, Problem): + return problem + + if isinstance(problem, str | Path): + return Problem.from_yaml(problem) + + raise TypeError( + "The argument `problem` must be a path to a PEtab problem file " + "or a PEtab problem object." + ) + + def get_optimization_parameters(self) -> list[str]: + """ + Return list of optimization parameter IDs. + + See :py:func:`petab.parameters.get_optimization_parameters`. + """ + return parameters.get_optimization_parameters(self.parameter_df) + + def get_optimization_parameter_scales(self) -> dict[str, str]: + """ + Return list of optimization parameter scaling strings. + + See :py:func:`petab.parameters.get_optimization_parameters`. + """ + return parameters.get_optimization_parameter_scaling(self.parameter_df) + + def get_observable_ids(self) -> list[str]: + """ + Returns dictionary of observable ids. + """ + return list(self.observable_df.index) + + def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): + """Apply mask of only free or only fixed values. + + Parameters + ---------- + v: + The full vector the mask is to be applied to. + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The reduced vector with applied mask. + """ + if not free and not fixed: + return [] + if not free: + return [v[ix] for ix in self.x_fixed_indices] + if not fixed: + return [v[ix] for ix in self.x_free_indices] + return v + + def get_x_ids(self, free: bool = True, fixed: bool = True): + """Generic function to get parameter ids. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The parameter IDs. + """ + v = list(self.parameter_df.index.values) + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def x_ids(self) -> list[str]: + """Parameter table parameter IDs""" + return self.get_x_ids() + + @property + def x_free_ids(self) -> list[str]: + """Parameter table parameter IDs, for free parameters.""" + return self.get_x_ids(fixed=False) + + @property + def x_fixed_ids(self) -> list[str]: + """Parameter table parameter IDs, for fixed parameters.""" + return self.get_x_ids(free=False) + + def get_x_nominal( + self, free: bool = True, fixed: bool = True, scaled: bool = False + ): + """Generic function to get parameter nominal values. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + scaled: + Whether to scale the values according to the parameter scale, + or return them on linear scale. + + Returns + ------- + The parameter nominal values. + """ + if NOMINAL_VALUE in self.parameter_df: + v = list(self.parameter_df[NOMINAL_VALUE]) + else: + v = [nan] * len(self.parameter_df) + + if scaled: + v = list( + parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) + ) + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def x_nominal(self) -> list: + """Parameter table nominal values""" + return self.get_x_nominal() + + @property + def x_nominal_free(self) -> list: + """Parameter table nominal values, for free parameters.""" + return self.get_x_nominal(fixed=False) + + @property + def x_nominal_fixed(self) -> list: + """Parameter table nominal values, for fixed parameters.""" + return self.get_x_nominal(free=False) + + @property + def x_nominal_scaled(self) -> list: + """Parameter table nominal values with applied parameter scaling""" + return self.get_x_nominal(scaled=True) + + @property + def x_nominal_free_scaled(self) -> list: + """Parameter table nominal values with applied parameter scaling, + for free parameters. + """ + return self.get_x_nominal(fixed=False, scaled=True) + + @property + def x_nominal_fixed_scaled(self) -> list: + """Parameter table nominal values with applied parameter scaling, + for fixed parameters. + """ + return self.get_x_nominal(free=False, scaled=True) + + def get_lb( + self, free: bool = True, fixed: bool = True, scaled: bool = False + ): + """Generic function to get lower parameter bounds. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + scaled: + Whether to scale the values according to the parameter scale, + or return them on linear scale. + + Returns + ------- + The lower parameter bounds. + """ + v = list(self.parameter_df[LOWER_BOUND]) + if scaled: + v = list( + parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) + ) + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def lb(self) -> list: + """Parameter table lower bounds.""" + return self.get_lb() + + @property + def lb_scaled(self) -> list: + """Parameter table lower bounds with applied parameter scaling""" + return self.get_lb(scaled=True) + + def get_ub( + self, free: bool = True, fixed: bool = True, scaled: bool = False + ): + """Generic function to get upper parameter bounds. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + scaled: + Whether to scale the values according to the parameter scale, + or return them on linear scale. + + Returns + ------- + The upper parameter bounds. + """ + v = list(self.parameter_df[UPPER_BOUND]) + if scaled: + v = list( + parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) + ) + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def ub(self) -> list: + """Parameter table upper bounds""" + return self.get_ub() + + @property + def ub_scaled(self) -> list: + """Parameter table upper bounds with applied parameter scaling""" + return self.get_ub(scaled=True) + + @property + def x_free_indices(self) -> list[int]: + """Parameter table estimated parameter indices.""" + estimated = list(self.parameter_df[ESTIMATE]) + return [j for j, val in enumerate(estimated) if val != 0] + + @property + def x_fixed_indices(self) -> list[int]: + """Parameter table non-estimated parameter indices.""" + estimated = list(self.parameter_df[ESTIMATE]) + return [j for j, val in enumerate(estimated) if val == 0] + + def get_simulation_conditions_from_measurement_df(self) -> pd.DataFrame: + """See :func:`petab.get_simulation_conditions`.""" + return measurements.get_simulation_conditions(self.measurement_df) + + def get_optimization_to_simulation_parameter_mapping(self, **kwargs): + """ + See + :py:func:`petab.parameter_mapping.get_optimization_to_simulation_parameter_mapping`, + to which all keyword arguments are forwarded. + """ + return ( + parameter_mapping.get_optimization_to_simulation_parameter_mapping( + condition_df=self.condition_df, + measurement_df=self.measurement_df, + parameter_df=self.parameter_df, + observable_df=self.observable_df, + model=self.model, + **kwargs, + ) + ) + + def create_parameter_df(self, **kwargs) -> pd.DataFrame: + """Create a new PEtab parameter table + + See :py:func:`create_parameter_df`. + """ + return parameters.create_parameter_df( + model=self.model, + condition_df=self.condition_df, + observable_df=self.observable_df, + measurement_df=self.measurement_df, + mapping_df=self.mapping_df, + **kwargs, + ) + + def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): + """Create 2D array with starting points for optimization + + See :py:func:`petab.sample_parameter_startpoints`. + """ + return sampling.sample_parameter_startpoints( + self.parameter_df, n_starts=n_starts, **kwargs + ) + + def sample_parameter_startpoints_dict( + self, n_starts: int = 100 + ) -> list[dict[str, float]]: + """Create dictionaries with starting points for optimization + + See also :py:func:`petab.sample_parameter_startpoints`. + + Returns: + A list of dictionaries with parameter IDs mapping to samples + parameter values. + """ + return [ + dict(zip(self.x_free_ids, parameter_values, strict=True)) + for parameter_values in self.sample_parameter_startpoints( + n_starts=n_starts + ) + ] + + def unscale_parameters( + self, + x_dict: dict[str, float], + ) -> dict[str, float]: + """Unscale parameter values. + + Parameters + ---------- + x_dict: + Keys are parameter IDs in the PEtab problem, values are scaled + parameter values. + + Returns + ------- + The unscaled parameter values. + """ + return { + parameter_id: parameters.unscale( + parameter_value, + self.parameter_df[PARAMETER_SCALE][parameter_id], + ) + for parameter_id, parameter_value in x_dict.items() + } + + def scale_parameters( + self, + x_dict: dict[str, float], + ) -> dict[str, float]: + """Scale parameter values. + + Parameters + ---------- + x_dict: + Keys are parameter IDs in the PEtab problem, values are unscaled + parameter values. + + Returns + ------- + The scaled parameter values. + """ + return { + parameter_id: parameters.scale( + parameter_value, + self.parameter_df[PARAMETER_SCALE][parameter_id], + ) + for parameter_id, parameter_value in x_dict.items() + } + + @property + def n_estimated(self) -> int: + """The number of estimated parameters.""" + return len(self.x_free_indices) + + @property + def n_measurements(self) -> int: + """Number of measurements.""" + return self.measurement_df[MEASUREMENT].notna().sum() + + @property + def n_priors(self) -> int: + """Number of priors.""" + if OBJECTIVE_PRIOR_PARAMETERS not in self.parameter_df: + return 0 + + return self.parameter_df[OBJECTIVE_PRIOR_PARAMETERS].notna().sum() + + def validate( + self, validation_tasks: list[ValidationTask] = None + ) -> ValidationResultList: + """Validate the PEtab problem. + + Arguments: + validation_tasks: List of validation tasks to run. If ``None`` + or empty, :attr:`Problem.validation_tasks` are used. + Returns: + A list of validation results. + """ + from ..v2.lint import ValidationIssueSeverity, ValidationResultList + + validation_results = ValidationResultList() + if self.extensions_config: + validation_results.append( + ValidationIssue( + ValidationIssueSeverity.WARNING, + "Validation of PEtab extensions is not yet implemented, " + "but the given problem uses the following extensions: " + f"{'', ''.join(self.extensions_config.keys())}", + ) + ) + + for task in validation_tasks or self.validation_tasks: + try: + cur_result = task.run(self) + except Exception as e: + cur_result = ValidationIssue( + ValidationIssueSeverity.CRITICAL, + f"Validation task {task} failed with exception: {e}", + ) + + if cur_result: + validation_results.append(cur_result) + + if cur_result.level == ValidationIssueSeverity.CRITICAL: + break + + return validation_results diff --git a/petab/version.py b/petab/version.py index 54e96123..1d88e505 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,2 +1,2 @@ """PEtab library version""" -__version__ = "0.3.0" +__version__ = "0.4.0" diff --git a/petab/versions.py b/petab/versions.py new file mode 100644 index 00000000..2e2eb2f4 --- /dev/null +++ b/petab/versions.py @@ -0,0 +1,35 @@ +"""Handling of PEtab version numbers.""" +from __future__ import annotations + +from pathlib import Path + +from petab.C import FORMAT_VERSION +from petab.v1 import Problem as V1Problem +from petab.v1.yaml import load_yaml +from petab.v2 import Problem as V2Problem + +__all__ = [ + "get_major_version", +] + + +def get_major_version( + problem: str | dict | Path | V1Problem | V2Problem, +) -> int: + """Get the major version number of the given problem.""" + if isinstance(problem, V1Problem): + return 1 + + if isinstance(problem, V2Problem): + return 2 + + if isinstance(problem, str | Path): + yaml_config = load_yaml(problem) + version = yaml_config.get(FORMAT_VERSION) + elif isinstance(problem, dict): + version = problem.get(FORMAT_VERSION) + else: + raise ValueError(f"Unsupported argument type: {type(problem)}") + + version = str(version) + return int(version.split(".")[0]) diff --git a/petab/visualize/__init__.py b/petab/visualize/__init__.py index 924be86a..2151c3f8 100644 --- a/petab/visualize/__init__.py +++ b/petab/visualize/__init__.py @@ -1,37 +1,10 @@ -""" -Visualize -========= +"""Deprecated module for visualization of PEtab problems. -PEtab comes with visualization functionality. Those need to be imported via -``import petab.visualize``. +Use petab.v1.visualize instead.""" -""" -# ruff: noqa: F401 -import importlib.util +from petab import _deprecated_import_v1 +from petab.v1.visualize import * # noqa: F403, F401, E402 -from .plotting import DataProvider, Figure +from .plotting import DataProvider, Figure # noqa: F403, F401, E402 -__all__ = ["DataProvider", "Figure"] - -if importlib.util.find_spec("matplotlib") is not None: - from .plot_data_and_simulation import ( - plot_problem, - plot_with_vis_spec, - plot_without_vis_spec, - ) - from .plot_residuals import ( - plot_goodness_of_fit, - plot_residuals_vs_simulation, - ) - from .plotter import MPLPlotter - - __all__.extend( - [ - "plot_without_vis_spec", - "plot_with_vis_spec", - "plot_problem", - "plot_goodness_of_fit", - "plot_residuals_vs_simulation", - "MPLPlotter", - ] - ) +_deprecated_import_v1(__name__) diff --git a/petab/visualize/data_overview.py b/petab/visualize/data_overview.py index a327d655..356953da 100644 --- a/petab/visualize/data_overview.py +++ b/petab/visualize/data_overview.py @@ -1,87 +1,5 @@ -""" -Functions for creating an overview report of a PEtab problem -""" +"""Deprecated module. Use petab.v1.visualize.data_overview instead.""" +from petab import _deprecated_import_v1 +from petab.v1.visualize.data_overview import * # noqa: F403, F401, E402 -from pathlib import Path -from shutil import copyfile -from typing import Union - -import pandas as pd - -import petab -from petab.C import * - -__all__ = ["create_report"] - - -def create_report( - problem: petab.Problem, model_name: str, output_path: Union[str, Path] = "" -) -> None: - """Create an HTML overview data / model overview report - - Arguments: - problem: PEtab problem - model_name: Name of the model, used for file name for report - output_path: Output directory - """ - template_dir = Path(__file__).absolute().parent / "templates" - output_path = Path(output_path) - template_file = "report.html" - - data_per_observable = get_data_per_observable(problem.measurement_df) - num_conditions = len(problem.condition_df.index) - - # Setup template engine - import jinja2 - - template_loader = jinja2.FileSystemLoader(searchpath=template_dir) - template_env = jinja2.Environment(loader=template_loader, autoescape=True) - template = template_env.get_template(template_file) - - # Render and save - output_text = template.render( - problem=problem, - model_name=model_name, - data_per_observable=data_per_observable, - num_conditions=num_conditions, - ) - with open(output_path / f"{model_name}.html", "w") as html_file: - html_file.write(output_text) - copyfile(template_dir / "mystyle.css", output_path / "mystyle.css") - - -def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame: - """Get table with number of data points per observable and condition - - Arguments: - measurement_df: PEtab measurement data frame - Returns: - Pivot table with number of data points per observable and condition - """ - my_measurements = measurement_df.copy() - - index = [SIMULATION_CONDITION_ID] - if PREEQUILIBRATION_CONDITION_ID in my_measurements: - my_measurements[PREEQUILIBRATION_CONDITION_ID] = ( - my_measurements[PREEQUILIBRATION_CONDITION_ID] - .astype("object") - .fillna("", inplace=True) - ) - index.append(PREEQUILIBRATION_CONDITION_ID) - - data_per_observable = pd.pivot_table( - my_measurements, - values=MEASUREMENT, - aggfunc="count", - index=index, - columns=[OBSERVABLE_ID], - fill_value=0, - ) - - # Add row and column sums - data_per_observable.loc["SUM", :] = data_per_observable.sum(axis=0).values - data_per_observable["SUM"] = data_per_observable.sum(axis=1).values - - data_per_observable = data_per_observable.astype(int) - - return data_per_observable +_deprecated_import_v1(__name__) diff --git a/petab/visualize/lint.py b/petab/visualize/lint.py index 0e973928..e1e6c536 100644 --- a/petab/visualize/lint.py +++ b/petab/visualize/lint.py @@ -1,175 +1,7 @@ -"""Validation of PEtab visualization files""" -import logging +"""Deprecated module for linting PEtab visualization files. -import pandas as pd +Use petab.v1.visualize.lint instead.""" +from petab import _deprecated_import_v1 +from petab.v1.visualize.lint import * # noqa: F403, F401, E402 -from .. import C, Problem -from ..C import VISUALIZATION_DF_REQUIRED_COLS - -logger = logging.getLogger(__name__) - - -def validate_visualization_df(problem: Problem) -> bool: - """Validate visualization table - - Arguments: - problem: The PEtab problem containing a visualization table - - Returns: - ``True`` if errors occurred, ``False`` otherwise - """ - vis_df = problem.visualization_df - if vis_df is None or vis_df.empty: - return False - - errors = False - - if missing_req_cols := ( - set(VISUALIZATION_DF_REQUIRED_COLS) - set(vis_df.columns) - ): - logger.error( - f"Missing required columns {missing_req_cols} " - "in visualization table." - ) - errors = True - - # Set all unspecified optional values to their defaults to simplify - # validation - vis_df = vis_df.copy() - _apply_defaults(vis_df) - - if unknown_types := ( - set(vis_df[C.PLOT_TYPE_SIMULATION].unique()) - - set(C.PLOT_TYPES_SIMULATION) - ): - logger.error( - f"Unknown {C.PLOT_TYPE_SIMULATION}: {unknown_types}. " - f"Must be one of {C.PLOT_TYPES_SIMULATION}" - ) - errors = True - - if unknown_types := ( - set(vis_df[C.PLOT_TYPE_DATA].unique()) - set(C.PLOT_TYPES_DATA) - ): - logger.error( - f"Unknown {C.PLOT_TYPE_DATA}: {unknown_types}. " - f"Must be one of {C.PLOT_TYPES_DATA}" - ) - errors = True - - if unknown_scale := (set(vis_df[C.X_SCALE].unique()) - set(C.X_SCALES)): - logger.error( - f"Unknown {C.X_SCALE}: {unknown_scale}. " - f"Must be one of {C.X_SCALES}" - ) - errors = True - - if any( - (vis_df[C.X_SCALE] == "order") - & (vis_df[C.PLOT_TYPE_SIMULATION] != C.LINE_PLOT) - ): - logger.error( - f"{C.X_SCALE}=order is only allowed with " - f"{C.PLOT_TYPE_SIMULATION}={C.LINE_PLOT}." - ) - errors = True - - if unknown_scale := (set(vis_df[C.Y_SCALE].unique()) - set(C.Y_SCALES)): - logger.error( - f"Unknown {C.Y_SCALE}: {unknown_scale}. " - f"Must be one of {C.Y_SCALES}" - ) - errors = True - - if problem.condition_df is not None: - # check for ambiguous values - reserved_names = {C.TIME, "condition"} - for reserved_name in reserved_names: - if ( - reserved_name in problem.condition_df - and reserved_name in vis_df[C.X_VALUES] - ): - logger.error( - f"Ambiguous value for `{C.X_VALUES}`: " - f"`{reserved_name}` has a special meaning as " - f"`{C.X_VALUES}`, but there exists also a model " - "entity with that name." - ) - errors = True - - # check xValues exist in condition table - for xvalue in set(vis_df[C.X_VALUES].unique()) - reserved_names: - if xvalue not in problem.condition_df: - logger.error( - f"{C.X_VALUES} was set to `{xvalue}`, but no " - "such column exists in the conditions table." - ) - errors = True - - if problem.observable_df is not None: - # yValues must be an observable - for yvalue in vis_df[C.Y_VALUES].unique(): - if pd.isna(yvalue): - # if there is only one observable, we default to that - if len(problem.observable_df.index.unique()) == 1: - continue - - logger.error( - f"{C.Y_VALUES} must be specified if there is more " - "than one observable." - ) - errors = True - - if yvalue not in problem.observable_df.index: - logger.error( - f"{C.Y_VALUES} was set to `{yvalue}`, but no such " - "observable exists in the observables table." - ) - errors = True - - if problem.measurement_df is not None: - referenced_datasets = set(filter(bool, vis_df[C.DATASET_ID].unique())) - if referenced_datasets: - existing_datasets = set( - filter(bool, problem.measurement_df[C.DATASET_ID].unique()) - ) - if not referenced_datasets.issubset(existing_datasets): - logger.error( - f"Visualization table references {C.DATASET_ID}(s) " - f"{referenced_datasets - existing_datasets}, but no such " - "dataset(s) exist in the measurement table." - ) - errors = True - - return errors - - -def _apply_defaults(vis_df: pd.DataFrame): - """ - Set default values. - - Adds default values to the given visualization table where no value was - specified. - """ - - def set_default(column: str, value): - if column not in vis_df: - vis_df[column] = value - elif value is not None: - if isinstance(value, str): - vis_df[column] = vis_df[column].astype("object") - vis_df.fillna({column: value}, inplace=True) - - set_default(C.PLOT_NAME, "") - set_default(C.PLOT_TYPE_SIMULATION, C.LINE_PLOT) - set_default(C.PLOT_TYPE_DATA, C.MEAN_AND_SD) - set_default(C.DATASET_ID, None) - set_default(C.X_VALUES, C.TIME) - set_default(C.X_OFFSET, 0) - set_default(C.X_LABEL, vis_df[C.X_VALUES]) - set_default(C.X_SCALE, C.LIN) - set_default(C.Y_VALUES, None) - set_default(C.Y_OFFSET, 0) - set_default(C.Y_LABEL, vis_df[C.Y_VALUES]) - set_default(C.Y_SCALE, C.LIN) - set_default(C.LEGEND_ENTRY, vis_df[C.DATASET_ID]) +_deprecated_import_v1(__name__) diff --git a/petab/visualize/plot_data_and_simulation.py b/petab/visualize/plot_data_and_simulation.py index 0353e71a..0151665f 100644 --- a/petab/visualize/plot_data_and_simulation.py +++ b/petab/visualize/plot_data_and_simulation.py @@ -1,223 +1,7 @@ -"""Functions for plotting PEtab measurement files and simulation results in -the same format. -""" +"""Deprecated module. -from typing import Dict, List, Optional, Union +Use petab.v1.visualize.plot_data_and_simulation instead.""" +from petab import _deprecated_import_v1 +from petab.v1.visualize.plot_data_and_simulation import * # noqa: F403, F401, E402 -import matplotlib.pyplot as plt -import pandas as pd - -from .. import problem -from ..C import * -from .plotter import MPLPlotter -from .plotting import VisSpecParser - -# for typehints -IdsList = List[str] -NumList = List[int] - -__all__ = ["plot_with_vis_spec", "plot_without_vis_spec", "plot_problem"] - - -def plot_with_vis_spec( - vis_spec_df: Union[str, pd.DataFrame], - conditions_df: Union[str, pd.DataFrame], - measurements_df: Optional[Union[str, pd.DataFrame]] = None, - simulations_df: Optional[Union[str, pd.DataFrame]] = None, - subplot_dir: Optional[str] = None, - plotter_type: str = "mpl", - format_: str = "png", -) -> Optional[Dict[str, plt.Subplot]]: - """ - Plot measurements and/or simulations. Specification of the visualization - routines is provided in visualization table. - - Parameters - ---------- - vis_spec_df: - A visualization table. - conditions_df: - A condition DataFrame in the PEtab format or path to the condition - file. - measurements_df: - A measurement DataFrame in the PEtab format or path to the data file. - simulations_df: - A simulation DataFrame in the PEtab format or path to the simulation - output data file. - subplot_dir: - A path to the folder where single subplots should be saved. - PlotIDs will be taken as file names. - plotter_type: - Specifies which library should be used for plot generation. Currently, - only matplotlib is supported. - format_: - File format for the generated figure. - (See :py:func:`matplotlib.pyplot.savefig` for supported options). - - Returns - ------- - ax: Axis object of the created plot. - None: In case subplots are saved to a file. - """ - if measurements_df is None and simulations_df is None: - raise TypeError( - "Not enough arguments. Either measurements_data " - "or simulations_data should be provided." - ) - - vis_spec_parser = VisSpecParser( - conditions_df, measurements_df, simulations_df - ) - figure, dataprovider = vis_spec_parser.parse_from_vis_spec(vis_spec_df) - - if plotter_type == "mpl": - plotter = MPLPlotter(figure, dataprovider) - else: - raise NotImplementedError( - "Currently, only visualization with " "matplotlib is possible." - ) - - return plotter.generate_figure(subplot_dir, format_=format_) - - -def plot_without_vis_spec( - conditions_df: Union[str, pd.DataFrame], - grouping_list: Optional[List[IdsList]] = None, - group_by: str = "observable", - measurements_df: Optional[Union[str, pd.DataFrame]] = None, - simulations_df: Optional[Union[str, pd.DataFrame]] = None, - plotted_noise: str = MEAN_AND_SD, - subplot_dir: Optional[str] = None, - plotter_type: str = "mpl", - format_: str = "png", -) -> Optional[Dict[str, plt.Subplot]]: - """ - Plot measurements and/or simulations. What exactly should be plotted is - specified in a grouping_list. - If grouping list is not provided, measurements (simulations) will be - grouped by observable, i.e. all measurements for each observable will be - visualized on one plot. - - Parameters - ---------- - grouping_list: - A list of lists. Each sublist corresponds to a plot, each subplot - contains the Ids of datasets or observables or simulation conditions - for this plot. - group_by: - Grouping type. - Possible values: 'dataset', 'observable', 'simulation'. - conditions_df: - A condition DataFrame in the PEtab format or path to the condition - file. - measurements_df: - A measurement DataFrame in the PEtab format or path to the data file. - simulations_df: - A simulation DataFrame in the PEtab format or path to the simulation - output data file. - plotted_noise: - A string indicating how noise should be visualized: - ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']. - subplot_dir: - A path to the folder where single subplots should be saved. - PlotIDs will be taken as file names. - plotter_type: - Specifies which library should be used for plot generation. Currently, - only matplotlib is supported. - format_: - File format for the generated figure. - (See :py:func:`matplotlib.pyplot.savefig` for supported options). - - Returns - ------- - ax: Axis object of the created plot. - None: In case subplots are saved to a file. - """ - if measurements_df is None and simulations_df is None: - raise TypeError( - "Not enough arguments. Either measurements_data " - "or simulations_data should be provided." - ) - - vis_spec_parser = VisSpecParser( - conditions_df, measurements_df, simulations_df - ) - - figure, dataprovider = vis_spec_parser.parse_from_id_list( - grouping_list, group_by, plotted_noise - ) - - if plotter_type == "mpl": - plotter = MPLPlotter(figure, dataprovider) - else: - raise NotImplementedError( - "Currently, only visualization with " "matplotlib is possible." - ) - - return plotter.generate_figure(subplot_dir, format_=format_) - - -def plot_problem( - petab_problem: problem.Problem, - simulations_df: Optional[Union[str, pd.DataFrame]] = None, - grouping_list: Optional[List[IdsList]] = None, - group_by: str = "observable", - plotted_noise: str = MEAN_AND_SD, - subplot_dir: Optional[str] = None, - plotter_type: str = "mpl", -) -> Optional[Dict[str, plt.Subplot]]: - """ - Visualization using petab problem. - If Visualization table is part of the petab_problem, it will be used for - visualization. Otherwise, grouping_list will be used. - If neither Visualization table nor grouping_list are available, - measurements (simulations) will be grouped by observable, i.e. all - measurements for each observable will be visualized on one plot. - - Parameters - ---------- - petab_problem: - A PEtab problem. - simulations_df: - A simulation DataFrame in the PEtab format or path to the simulation - output data file. - grouping_list: - A list of lists. Each sublist corresponds to a plot, each subplot - contains the Ids of datasets or observables or simulation conditions - for this plot. - group_by: - Possible values: 'dataset', 'observable', 'simulation'. - plotted_noise: - A string indicating how noise should be visualized: - ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']. - subplot_dir: - A string which is taken as path to the folder where single subplots - should be saved. PlotIDs will be taken as file names. - plotter_type: - Specifies which library should be used for plot generation. Currently, - only matplotlib is supported. - - Returns - ------- - ax: Axis object of the created plot. - None: In case subplots are saved to a file. - """ - if petab_problem.visualization_df is not None: - return plot_with_vis_spec( - petab_problem.visualization_df, - petab_problem.condition_df, - petab_problem.measurement_df, - simulations_df, - subplot_dir, - plotter_type, - ) - return plot_without_vis_spec( - petab_problem.condition_df, - grouping_list, - group_by, - petab_problem.measurement_df, - simulations_df, - plotted_noise, - subplot_dir, - plotter_type, - ) +_deprecated_import_v1(__name__) diff --git a/petab/visualize/plot_residuals.py b/petab/visualize/plot_residuals.py index 45a1e5a1..91136199 100644 --- a/petab/visualize/plot_residuals.py +++ b/petab/visualize/plot_residuals.py @@ -1,212 +1,5 @@ -""" -Functions for plotting residuals. -""" -from pathlib import Path -from typing import Optional, Tuple, Union +"""Deprecated module. Use petab.v1.visualize.plot_residuals instead.""" +from petab import _deprecated_import_v1 +from petab.v1.visualize.plot_residuals import * # noqa: F403, F401, E402 -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from scipy import stats - -from ..C import * -from ..calculate import calculate_residuals -from ..core import get_simulation_df -from ..problem import Problem - -__all__ = ["plot_goodness_of_fit", "plot_residuals_vs_simulation"] - - -def plot_residuals_vs_simulation( - petab_problem: Problem, - simulations_df: Union[str, Path, pd.DataFrame], - size: Optional[Tuple] = (10, 7), - axes: Optional[Tuple[plt.Axes, plt.Axes]] = None, -) -> matplotlib.axes.Axes: - """ - Plot residuals versus simulation values for measurements with normal noise - assumption. - - Parameters - ---------- - petab_problem: - A PEtab problem. - simulations_df: - A simulation DataFrame in the PEtab format or path to the simulation - output data file. - size: - Figure size. - axes: - Axis object. - - Returns - ------- - ax: Axis object of the created plot. - """ - if isinstance(simulations_df, (str, Path)): - simulations_df = get_simulation_df(simulations_df) - - if NOISE_DISTRIBUTION in petab_problem.observable_df: - if OBSERVABLE_TRANSFORMATION in petab_problem.observable_df: - observable_ids = petab_problem.observable_df[ - (petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL) - & ( - petab_problem.observable_df[OBSERVABLE_TRANSFORMATION] - == LIN - ) - ].index - - else: - observable_ids = petab_problem.observable_df[ - petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL - ].index - else: - observable_ids = petab_problem.observable_df.index - - if observable_ids.empty: - raise ValueError( - "Residuals plot is only applicable for normal " - "additive noise assumption" - ) - - if axes is None: - fig, axes = plt.subplots( - 1, 2, sharey=True, figsize=size, width_ratios=[2, 1] - ) - fig.set_layout_engine("tight") - fig.suptitle("Residuals") - - residual_df = calculate_residuals( - measurement_dfs=petab_problem.measurement_df, - simulation_dfs=simulations_df, - observable_dfs=petab_problem.observable_df, - parameter_dfs=petab_problem.parameter_df, - )[0] - - normal_residuals = residual_df[ - residual_df[OBSERVABLE_ID].isin(observable_ids) - ] - simulations_normal = simulations_df[ - simulations_df[OBSERVABLE_ID].isin(observable_ids) - ] - - # compare to standard normal distribution - ks_result = stats.kstest(normal_residuals[RESIDUAL], stats.norm.cdf) - - # plot the residuals plot - axes[0].hlines( - y=0, - xmin=min(simulations_normal[SIMULATION]), - xmax=max(simulations_normal[SIMULATION]), - ls="--", - color="gray", - ) - axes[0].scatter(simulations_normal[SIMULATION], normal_residuals[RESIDUAL]) - axes[0].text( - 0.15, - 0.85, - f"Kolmogorov-Smirnov test results:\n" - f"statistic: {ks_result[0]:.2f}\n" - f"pvalue: {ks_result[1]:.2e} ", - transform=axes[0].transAxes, - ) - axes[0].set_xlabel("simulated values") - axes[0].set_ylabel("residuals") - - # plot histogram - axes[1].hist( - normal_residuals[RESIDUAL], density=True, orientation="horizontal" - ) - axes[1].set_xlabel("distribution") - - ymin, ymax = axes[0].get_ylim() - ylim = max(abs(ymin), abs(ymax)) - axes[0].set_ylim(-ylim, ylim) - axes[1].tick_params( - left=False, labelleft=False, right=True, labelright=True - ) - - return axes - - -def plot_goodness_of_fit( - petab_problem: Problem, - simulations_df: Union[str, Path, pd.DataFrame], - size: Tuple = (10, 7), - ax: Optional[plt.Axes] = None, -) -> matplotlib.axes.Axes: - """ - Plot goodness of fit. - - Parameters - ---------- - petab_problem: - A PEtab problem. - simulations_df: - A simulation DataFrame in the PEtab format or path to the simulation - output data file. - size: - Figure size. - ax: - Axis object. - - Returns - ------- - ax: Axis object of the created plot. - """ - if isinstance(simulations_df, (str, Path)): - simulations_df = get_simulation_df(simulations_df) - - if simulations_df is None or petab_problem.measurement_df is None: - raise NotImplementedError( - "Both measurements and simulation data " - "are needed for goodness_of_fit" - ) - - residual_df = calculate_residuals( - measurement_dfs=petab_problem.measurement_df, - simulation_dfs=simulations_df, - observable_dfs=petab_problem.observable_df, - parameter_dfs=petab_problem.parameter_df, - )[0] - slope, intercept, r_value, p_value, std_err = stats.linregress( - petab_problem.measurement_df["measurement"], - simulations_df["simulation"], - ) # x, y - - if ax is None: - fig, ax = plt.subplots(figsize=size) - fig.set_layout_engine("tight") - - ax.scatter( - petab_problem.measurement_df["measurement"], - simulations_df["simulation"], - ) - - ax.axis("square") - xlim = ax.get_xlim() - ylim = ax.get_ylim() - lim = [min([xlim[0], ylim[0]]), max([xlim[1], ylim[1]])] - ax.set_xlim(lim) - ax.set_ylim(lim) - x = np.linspace(lim, 100) - ax.plot(x, x, linestyle="--", color="gray") - ax.plot(x, intercept + slope * x, "r", label="fitted line") - - mse = np.mean(np.abs(residual_df["residual"])) - ax.text( - 0.1, - 0.70, - f"$R^2$: {r_value**2:.2f}\n" - f"slope: {slope:.2f}\n" - f"intercept: {intercept:.2f}\n" - f"pvalue: {std_err:.2e}\n" - f"mean squared error: {mse:.2e}\n", - transform=ax.transAxes, - ) - - ax.set_title("Goodness of fit") - ax.set_xlabel("simulated values") - ax.set_ylabel("measurements") - return ax +_deprecated_import_v1(__name__) diff --git a/petab/visualize/plotter.py b/petab/visualize/plotter.py index c2ebe6e4..8b8eeba1 100644 --- a/petab/visualize/plotter.py +++ b/petab/visualize/plotter.py @@ -1,865 +1,9 @@ -"""PEtab visualization plotter classes""" -import os -from abc import ABC, abstractmethod -from typing import Dict, List, Optional, Tuple, Union - -import matplotlib.axes -import matplotlib.ticker as mtick -import numpy as np -import pandas as pd -from matplotlib import pyplot as plt -from mpl_toolkits.axes_grid1 import make_axes_locatable - -from ..C import * -from .plotting import DataPlot, DataProvider, DataSeries, Figure, Subplot - -__all__ = ["Plotter", "MPLPlotter", "SeabornPlotter"] - - -#: Line style (:class:`matplotlib.lines.Line2D` options) for the measurement -# data in line plots -measurement_line_kwargs = { - "linestyle": "-.", - "marker": "x", - "markersize": 10, -} -#: Line style (:class:`matplotlib.lines.Line2D` options) for the simulation -# data in line plots -simulation_line_kwargs = { - "linestyle": "-", - "marker": "o", - "markersize": 10, -} - - -class Plotter(ABC): - """ - Plotter abstract base class. - - Attributes - ---------- - figure: - Figure instance that serves as a markup for the figure that - should be generated - data_provider: - Data provider - """ - - def __init__(self, figure: Figure, data_provider: DataProvider): - self.figure = figure - self.data_provider = data_provider - - @abstractmethod - def generate_figure( - self, subplot_dir: Optional[str] = None - ) -> Optional[Dict[str, plt.Subplot]]: - pass - - -class MPLPlotter(Plotter): - """ - Matplotlib wrapper - """ - - def __init__(self, figure: Figure, data_provider: DataProvider): - super().__init__(figure, data_provider) - - @staticmethod - def _error_column_for_plot_type_data(plot_type_data: str) -> Optional[str]: - """Translate PEtab plotTypeData value to column name of internal - data representation - - Parameters - ---------- - plot_type_data: PEtab plotTypeData value (the way replicates should be - handled) - - Returns - ------- - Name of corresponding column - """ - if plot_type_data == MEAN_AND_SD: - return "sd" - if plot_type_data == MEAN_AND_SEM: - return "sem" - if plot_type_data == PROVIDED: - return "noise_model" - return None - - def generate_lineplot( - self, - ax: matplotlib.axes.Axes, - dataplot: DataPlot, - plotTypeData: str, - splitaxes_params: dict, - ) -> Tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]: - """ - Generate line plot. - - It is possible to plot only data or only simulation or both. - - Parameters - ---------- - ax: - Axis object. - dataplot: - Visualization settings for the plot. - plotTypeData: - Specifies how replicates should be handled. - splitaxes_params: - - """ - simu_color = None - ( - measurements_to_plot, - simulations_to_plot, - ) = self.data_provider.get_data_to_plot( - dataplot, plotTypeData == PROVIDED - ) - noise_col = self._error_column_for_plot_type_data(plotTypeData) - - label_base = dataplot.legendEntry - - # check if t_inf is there - # todo: if only t_inf, adjust appearance for that case - plot_at_t_inf = ( - measurements_to_plot is not None and measurements_to_plot.inf_point - ) or ( - simulations_to_plot is not None and simulations_to_plot.inf_point - ) - - if ( - measurements_to_plot is not None - and not measurements_to_plot.data_to_plot.empty - ): - # plotting all measurement data - - p = None - if plotTypeData == REPLICATE: - replicates = np.stack( - measurements_to_plot.data_to_plot.repl.values - ) - # sorts according to ascending order of conditions - cond, replicates = zip( - *sorted(zip(measurements_to_plot.conditions, replicates)) - ) - replicates = np.stack(replicates) - - if replicates.ndim == 1: - replicates = np.expand_dims(replicates, axis=1) - - # plot first replicate - p = ax.plot( - cond, - replicates[:, 0], - label=label_base, - **measurement_line_kwargs, - ) - - # plot other replicates with the same color - ax.plot( - cond, - replicates[:, 1:], - **measurement_line_kwargs, - color=p[0].get_color(), - ) - - # construct errorbar-plots: noise specified above - else: - # sorts according to ascending order of conditions - scond, smean, snoise = zip( - *sorted( - zip( - measurements_to_plot.conditions, - measurements_to_plot.data_to_plot["mean"], - measurements_to_plot.data_to_plot[noise_col], - ) - ) - ) - - if np.inf in scond: - # remove inf point - scond = scond[:-1] - smean = smean[:-1] - snoise = snoise[:-1] - - if len(scond) > 0 and len(smean) > 0 and len(snoise) > 0: - # if only t=inf there will be nothing to plot - p = ax.errorbar( - scond, - smean, - snoise, - label=label_base, - **measurement_line_kwargs, - ) - - # simulations should have the same colors if both measurements - # and simulations are plotted - simu_color = p[0].get_color() if p else None - - # construct simulation plot - if ( - simulations_to_plot is not None - and not simulations_to_plot.data_to_plot.empty - ): - # markers will be displayed only for points that have measurement - # counterpart - if measurements_to_plot is not None: - meas_conditions = ( - measurements_to_plot.conditions.to_numpy() - if isinstance(measurements_to_plot.conditions, pd.Series) - else measurements_to_plot.conditions - ) - every = [ - condition in meas_conditions - for condition in simulations_to_plot.conditions - ] - else: - every = None - - # sorts according to ascending order of conditions - xs, ys = map( - list, - zip( - *sorted( - zip( - simulations_to_plot.conditions, - simulations_to_plot.data_to_plot["mean"], - ) - ) - ), - ) - - if np.inf in xs: - # remove inf point - xs = xs[:-1] - ys = ys[:-1] - every = every[:-1] if every else None - - if len(xs) > 0 and len(ys) > 0: - p = ax.plot( - xs, - ys, - markevery=every, - label=label_base + " simulation", - color=simu_color, - **simulation_line_kwargs, - ) - # lines at t=inf should have the same colors also in case - # only simulations are plotted - simu_color = p[0].get_color() - - # plot inf points - if plot_at_t_inf: - ax, splitaxes_params["ax_inf"] = self._line_plot_at_t_inf( - ax, - plotTypeData, - measurements_to_plot, - simulations_to_plot, - noise_col, - label_base, - splitaxes_params, - color=simu_color, - ) - - return ax, splitaxes_params["ax_inf"] - - def generate_barplot( - self, - ax: "matplotlib.pyplot.Axes", - dataplot: DataPlot, - plotTypeData: str, - ) -> None: - """ - Generate barplot. - - Parameters - ---------- - ax: - Axis object. - dataplot: - Visualization settings for the plot. - plotTypeData: - Specifies how replicates should be handled. - """ - # TODO: plotTypeData == REPLICATE? - noise_col = self._error_column_for_plot_type_data(plotTypeData) - - ( - measurements_to_plot, - simulations_to_plot, - ) = self.data_provider.get_data_to_plot( - dataplot, plotTypeData == PROVIDED - ) - - x_name = dataplot.legendEntry - - if simulations_to_plot: - bar_kwargs = { - "align": "edge", - "width": -1 / 3, - } - else: - bar_kwargs = { - "align": "center", - "width": 2 / 3, - } - - color = plt.rcParams["axes.prop_cycle"].by_key()["color"][0] - - if measurements_to_plot is not None: - ax.bar( - x_name, - measurements_to_plot.data_to_plot["mean"], - yerr=measurements_to_plot.data_to_plot[noise_col], - color=color, - **bar_kwargs, - label="measurement", - ) - - if simulations_to_plot is not None: - bar_kwargs["width"] = -bar_kwargs["width"] - ax.bar( - x_name, - simulations_to_plot.data_to_plot["mean"], - color="white", - edgecolor=color, - **bar_kwargs, - label="simulation", - ) - - def generate_scatterplot( - self, - ax: "matplotlib.pyplot.Axes", - dataplot: DataPlot, - plotTypeData: str, - ) -> None: - """ - Generate scatterplot. - - Parameters - ---------- - ax: - Axis object. - dataplot: - Visualization settings for the plot. - plotTypeData: - Specifies how replicates should be handled. - """ - ( - measurements_to_plot, - simulations_to_plot, - ) = self.data_provider.get_data_to_plot( - dataplot, plotTypeData == PROVIDED - ) - - if simulations_to_plot is None or measurements_to_plot is None: - raise NotImplementedError( - "Both measurements and simulation data " - "are needed for scatter plots" - ) - ax.scatter( - measurements_to_plot.data_to_plot["mean"], - simulations_to_plot.data_to_plot["mean"], - label=getattr(dataplot, LEGEND_ENTRY), - ) - self._square_plot_equal_ranges(ax) - - def generate_subplot( - self, - fig: matplotlib.figure.Figure, - ax: matplotlib.axes.Axes, - subplot: Subplot, - ) -> None: - """ - Generate subplot based on markup provided by subplot. - - Parameters - ---------- - fig: - Figure object. - ax: - Axis object. - subplot: - Subplot visualization settings. - """ - # set yScale - if subplot.yScale == LIN: - ax.set_yscale("linear") - elif subplot.yScale == LOG10: - ax.set_yscale("log") - elif subplot.yScale == LOG: - ax.set_yscale("log", base=np.e) - - if subplot.plotTypeSimulation == BAR_PLOT: - for data_plot in subplot.data_plots: - self.generate_barplot(ax, data_plot, subplot.plotTypeData) - - # get rid of duplicate legends - handles, labels = ax.get_legend_handles_labels() - by_label = dict(zip(labels, handles)) - ax.legend(by_label.values(), by_label.keys()) - - x_names = [x.legendEntry for x in subplot.data_plots] - ax.set_xticks(range(len(x_names))) - ax.set_xticklabels(x_names) - - for label in ax.get_xmajorticklabels(): - label.set_rotation(30) - label.set_horizontalalignment("right") - elif subplot.plotTypeSimulation == SCATTER_PLOT: - for data_plot in subplot.data_plots: - self.generate_scatterplot(ax, data_plot, subplot.plotTypeData) - else: - # set xScale - if subplot.xScale == LIN: - ax.set_xscale("linear") - elif subplot.xScale == LOG10: - ax.set_xscale("log") - elif subplot.xScale == LOG: - ax.set_xscale("log", base=np.e) - # equidistant - elif subplot.xScale == "order": - ax.set_xscale("linear") - # check if conditions are monotone decreasing or increasing - if np.all(np.diff(subplot.conditions) < 0): - # monot. decreasing -> reverse - xlabel = subplot.conditions[::-1] - conditions = range(len(subplot.conditions))[::-1] - ax.set_xticks(range(len(conditions)), xlabel) - elif np.all(np.diff(subplot.conditions) > 0): - xlabel = subplot.conditions - conditions = range(len(subplot.conditions)) - ax.set_xticks(range(len(conditions)), xlabel) - else: - raise ValueError( - "Error: x-conditions do not coincide, " - "some are mon. increasing, some " - "monotonically decreasing" - ) - - splitaxes_params = self._preprocess_splitaxes(fig, ax, subplot) - for data_plot in subplot.data_plots: - ax, splitaxes_params["ax_inf"] = self.generate_lineplot( - ax, - data_plot, - subplot.plotTypeData, - splitaxes_params=splitaxes_params, - ) - if splitaxes_params["ax_inf"] is not None: - self._postprocess_splitaxes( - ax, splitaxes_params["ax_inf"], splitaxes_params["t_inf"] - ) - - # show 'e' as basis not 2.7... in natural log scale cases - def ticks(y, _): - return rf"$e^{{{np.log(y):.0f}}}$" - - if subplot.xScale == LOG: - ax.xaxis.set_major_formatter(mtick.FuncFormatter(ticks)) - if subplot.yScale == LOG: - ax.yaxis.set_major_formatter(mtick.FuncFormatter(ticks)) - - if subplot.plotTypeSimulation != BAR_PLOT: - ax.legend() - ax.set_title(subplot.plotName) - if subplot.xlim: - ax.set_xlim(subplot.xlim) - if subplot.ylim: - ax.set_ylim(subplot.ylim) - ax.autoscale_view() - - # Beautify plots - ax.set_xlabel(subplot.xLabel) - ax.set_ylabel(subplot.yLabel) - - def generate_figure( - self, - subplot_dir: Optional[str] = None, - format_: str = "png", - ) -> Optional[Dict[str, plt.Subplot]]: - """ - Generate the full figure based on the markup in the figure attribute. - - Parameters - ---------- - subplot_dir: - A path to the folder where single subplots should be saved. - PlotIDs will be taken as file names. - format_: - File format for the generated figure. - (See :py:func:`matplotlib.pyplot.savefig` for supported options). - - Returns - ------- - ax: - Axis object of the created plot. - None: - In case subplots are saved to file. - """ - if subplot_dir is None: - # compute, how many rows and columns we need for the subplots - num_row = int(np.round(np.sqrt(self.figure.num_subplots))) - num_col = int(np.ceil(self.figure.num_subplots / num_row)) - - fig, axes = plt.subplots( - num_row, num_col, squeeze=False, figsize=self.figure.size - ) - fig.set_layout_engine("tight") - - for ax in axes.flat[self.figure.num_subplots :]: - ax.remove() - - axes = dict( - zip([plot.plotId for plot in self.figure.subplots], axes.flat) - ) - - for subplot in self.figure.subplots: - if subplot_dir is not None: - fig, ax = plt.subplots(figsize=self.figure.size) - fig.set_layout_engine("tight") - else: - ax = axes[subplot.plotId] - - try: - self.generate_subplot(fig, ax, subplot) - except Exception as e: - raise RuntimeError( - f"Error plotting {getattr(subplot, PLOT_ID)}." - ) from e - - if subplot_dir is not None: - # TODO: why this doesn't work? - plt.tight_layout() - plt.savefig( - os.path.join(subplot_dir, f"{subplot.plotId}.{format_}") - ) - plt.close() - - if subplot_dir is None: - # TODO: why this doesn't work? - plt.tight_layout() - return axes - - @staticmethod - def _square_plot_equal_ranges( - ax: "matplotlib.pyplot.Axes", lim: Optional[Union[List, Tuple]] = None - ) -> "matplotlib.pyplot.Axes": - """ - Square plot with equal range for scatter plots. - - Returns - ------- - Updated axis object. - """ - ax.axis("square") - - if lim is None: - xlim = ax.get_xlim() - ylim = ax.get_ylim() - lim = [np.min([xlim[0], ylim[0]]), np.max([xlim[1], ylim[1]])] - - ax.set_xlim(lim) - ax.set_ylim(lim) - - # Same tick mark on x and y - ax.yaxis.set_major_locator(ax.xaxis.get_major_locator()) - - return ax - - @staticmethod - def _line_plot_at_t_inf( - ax: matplotlib.axes.Axes, - plotTypeData: str, - measurements_to_plot: DataSeries, - simulations_to_plot: DataSeries, - noise_col: str, - label_base: str, - split_axes_params: dict, - color=None, - ) -> Tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]: - """ - Plot data at t=inf. - - Parameters - ---------- - ax: - Axis object for the data corresponding to the finite timepoints. - plotTypeData: - The way replicates should be handled. - measurements_to_plot: - Measurements to plot. - simulations_to_plot: - Simulations to plot. - noise_col: - The name of the error column for plot_type_data. - label_base: - Label base. - split_axes_params: - A dictionary of split axes parameters with - - Axis object for the data corresponding to t=inf - - Time value that represents t=inf - - left and right limits for the axis where the data corresponding - to the finite timepoints is plotted - color: - Line color. - - Returns - ------- - Two axis objects: for the data corresponding to the finite timepoints - and for the data corresponding to t=inf - """ - ax_inf = split_axes_params["ax_inf"] - t_inf = split_axes_params["t_inf"] - ax_finite_right_limit = split_axes_params["ax_finite_right_limit"] - ax_left_limit = split_axes_params["ax_left_limit"] - - timepoints_inf = [ - ax_finite_right_limit, - t_inf, - ax_finite_right_limit - + (ax_finite_right_limit - ax_left_limit) * 0.2, - ] - - # plot measurements - if measurements_to_plot is not None and measurements_to_plot.inf_point: - measurements_data_to_plot_inf = ( - measurements_to_plot.data_to_plot.loc[np.inf] - ) - - if plotTypeData == REPLICATE: - p = None - if plotTypeData == REPLICATE: - replicates = measurements_data_to_plot_inf.repl - if replicates.ndim == 0: - replicates = np.expand_dims(replicates, axis=0) - - # plot first replicate - p = ax_inf.plot( - timepoints_inf, - [replicates[0]] * 3, - markevery=[1], - label=label_base + " simulation", - color=color, - **measurement_line_kwargs, - ) - - # plot other replicates with the same color - ax_inf.plot( - timepoints_inf, - [replicates[1:]] * 3, - markevery=[1], - color=p[0].get_color(), - **measurement_line_kwargs, - ) - else: - p = ax_inf.plot( - [timepoints_inf[0], timepoints_inf[2]], - [ - measurements_data_to_plot_inf["mean"], - measurements_data_to_plot_inf["mean"], - ], - color=color, - **measurement_line_kwargs, - ) - ax_inf.errorbar( - t_inf, - measurements_data_to_plot_inf["mean"], - measurements_data_to_plot_inf[noise_col], - label=label_base + " simulation", - color=p[0].get_color(), - **measurement_line_kwargs, - ) - - if color is None: - # in case no color was provided from finite time points - # plot and measurements are available corresponding - # simulation should have the same color - color = p[0].get_color() - - # plot simulations - if simulations_to_plot is not None and simulations_to_plot.inf_point: - simulations_data_to_plot_inf = ( - simulations_to_plot.data_to_plot.loc[np.inf] - ) - - if plotTypeData == REPLICATE: - replicates = simulations_data_to_plot_inf.repl - if replicates.ndim == 0: - replicates = np.expand_dims(replicates, axis=0) - - # plot first replicate - p = ax_inf.plot( - timepoints_inf, - [replicates[0]] * 3, - markevery=[1], - label=label_base, - color=color, - **simulation_line_kwargs, - ) - - # plot other replicates with the same color - ax_inf.plot( - timepoints_inf, - [replicates[1:]] * 3, - markevery=[1], - color=p[0].get_color(), - **simulation_line_kwargs, - ) - else: - ax_inf.plot( - timepoints_inf, - [simulations_data_to_plot_inf["mean"]] * 3, - markevery=[1], - color=color, - **simulation_line_kwargs, - ) - - ax.set_xlim(right=ax_finite_right_limit) - return ax, ax_inf - - @staticmethod - def _postprocess_splitaxes( - ax: matplotlib.axes.Axes, ax_inf: matplotlib.axes.Axes, t_inf: float - ) -> None: - """ - Postprocess the splitaxes: set axes limits, turn off unnecessary - ticks and plot dashed lines highlighting the gap in the x axis. - - Parameters - ---------- - ax: - Axis object for the data corresponding to the finite timepoints. - ax_inf: - Axis object for the data corresponding to t=inf. - t_inf: - Time value that represents t=inf - """ - ax_inf.tick_params(left=False, labelleft=False) - ax_inf.spines["left"].set_visible(False) - ax_inf.set_xticks([t_inf]) - ax_inf.set_xticklabels([r"$t_{\infty}$"]) - - bottom, top = ax.get_ylim() - left, right = ax.get_xlim() - ax.spines["right"].set_visible(False) - ax_inf.set_xlim(right, right + (right - left) * 0.2) - d = (top - bottom) * 0.02 - ax_inf.vlines( - x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray" - ) # right - ax.vlines( - x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray" - ) # left - ax_inf.set_ylim(bottom, top) - ax.set_ylim(bottom, top) - - def _preprocess_splitaxes( - self, - fig: matplotlib.figure.Figure, - ax: matplotlib.axes.Axes, - subplot: Subplot, - ) -> Dict: - """ - Prepare splitaxes if data at t=inf should be plotted: compute left and - right limits for the axis where the data corresponding to the finite - timepoints will be plotted, compute time point that will represent - t=inf on the plot, create additional axes for plotting data at t=inf. - """ - - def check_data_to_plot( - data_to_plot: DataSeries, - ) -> Tuple[bool, Optional[float], float]: - """ - Check if there is data available at t=inf and compute maximum and - minimum finite time points that need to be plotted corresponding - to a dataplot. - """ - contains_inf = False - max_finite_cond, min_cond = None, np.inf - if data_to_plot is not None and len(data_to_plot.conditions): - contains_inf = np.inf in data_to_plot.conditions - finite_conditions = data_to_plot.conditions[ - data_to_plot.conditions != np.inf - ] - max_finite_cond = ( - np.max(finite_conditions) - if finite_conditions.size - else None - ) - min_cond = min(data_to_plot.conditions) - return contains_inf, max_finite_cond, min_cond - - splitaxes = False - ax_inf = None - t_inf, ax_finite_right_limit, ax_left_limit = None, None, np.inf - for dataplot in subplot.data_plots: - ( - measurements_to_plot, - simulations_to_plot, - ) = self.data_provider.get_data_to_plot( - dataplot, subplot.plotTypeData == PROVIDED - ) - - contains_inf_m, max_finite_cond_m, min_cond_m = check_data_to_plot( - measurements_to_plot - ) - contains_inf_s, max_finite_cond_s, min_cond_s = check_data_to_plot( - simulations_to_plot - ) - - if max_finite_cond_m is not None: - ax_finite_right_limit = ( - max(ax_finite_right_limit, max_finite_cond_m) - if ax_finite_right_limit is not None - else max_finite_cond_m - ) - if max_finite_cond_s is not None: - ax_finite_right_limit = ( - max(ax_finite_right_limit, max_finite_cond_s) - if ax_finite_right_limit is not None - else max_finite_cond_s - ) - - ax_left_limit = min(ax_left_limit, min(min_cond_m, min_cond_s)) - # check if t=inf is contained in any data to be plotted on the - # subplot - if not splitaxes: - splitaxes = contains_inf_m or contains_inf_s - - if splitaxes: - # if t=inf is the only time point in measurements and simulations - # ax_finite_right_limit will be None and ax_left_limit will be - # equal to np.inf - if ax_finite_right_limit is None and ax_left_limit == np.inf: - ax_finite_right_limit = 10 - ax_left_limit = 0 - t_inf = ( - ax_finite_right_limit - + (ax_finite_right_limit - ax_left_limit) * 0.1 - ) - # create axes for t=inf - divider = make_axes_locatable(ax) - ax_inf = divider.new_horizontal(size="10%", pad=0.3) - fig.add_axes(ax_inf) - - return { - "ax_inf": ax_inf, - "t_inf": t_inf, - "ax_finite_right_limit": ax_finite_right_limit, - "ax_left_limit": ax_left_limit, - } - - -class SeabornPlotter(Plotter): - """ - Seaborn wrapper. - """ - - def __init__(self, figure: Figure, data_provider: DataProvider): - super().__init__(figure, data_provider) - - def generate_figure( - self, subplot_dir: Optional[str] = None - ) -> Optional[Dict[str, plt.Subplot]]: - pass +"""Deprecated module. Use petab.v1.visualize.plotter instead.""" +from petab import _deprecated_import_v1 +from petab.v1.visualize.plotter import * # noqa: F403, F401, E402 +from petab.v1.visualize.plotter import ( # noqa: F401 + measurement_line_kwargs, + simulation_line_kwargs, +) + +_deprecated_import_v1(__name__) diff --git a/petab/visualize/plotting.py b/petab/visualize/plotting.py index e1f874ce..a675cf51 100644 --- a/petab/visualize/plotting.py +++ b/petab/visualize/plotting.py @@ -1,1102 +1,6 @@ -"""PEtab visualization data selection and visualization settings classes""" -import warnings -from numbers import Number, Real -from pathlib import Path -from typing import Dict, List, Literal, Optional, Tuple, Union +"""Deprecated module. Use petab.v1.visualize.plotting instead.""" +from petab import _deprecated_import_v1 +from petab.v1.visualize.plotting import * # noqa: F403, F401, E402 +from petab.v1.visualize.plotting import DEFAULT_FIGSIZE # noqa: F401 -import numpy as np -import pandas as pd - -from .. import conditions, core, measurements -from ..C import * -from ..problem import Problem -from .helper_functions import ( - create_dataset_id_list_new, - generate_dataset_id_col, -) - -__all__ = [ - "DataSeries", - "DataPlot", - "Subplot", - "Figure", - "DataProvider", - "VisSpecParser", -] - -# for typehints -IdsList = List[str] -NumList = List[int] - -# The default figure size -DEFAULT_FIGSIZE = [20, 15] - -# also for type hints -# TODO: split into dataplot and subplot level dicts? -# TODO: add when only python>=3.8 is supported -# class VisDict(TypedDict): -# PLOT_NAME: str -# PLOT_TYPE_SIMULATION: str -# PLOT_TYPE_DATA: str -# X_VALUES: str -# X_OFFSET: List[Number] -# X_LABEL: str -# X_SCALE: str -# Y_VALUES: List[str] -# Y_OFFSET: List[Number] -# Y_LABEL: str -# Y_SCALE: str -# LEGEND_ENTRY: List[Number] -# DATASET_ID: List[str] - - -class DataSeries: - """ - Data for one individual line - """ - - def __init__( - self, - conditions_: Optional[Union[np.ndarray, pd.Series]], - data_to_plot: Optional[pd.DataFrame] = None, - ): - self.data_to_plot = data_to_plot - self.data_to_plot.sort_index(inplace=True) - - self.conditions = conditions_ - self.inf_point = ( - np.inf in self.conditions if self.conditions is not None else False - ) - # sort index for the case that indices of conditions and - # measurements differ. if indep_var='time', conditions is a - # numpy array, if indep_var=observable it's a Series - if isinstance(self.conditions, np.ndarray): - self.conditions.sort() - elif isinstance(self.conditions, pd.Series): - self.conditions.sort_index(inplace=True) - - def add_x_offset(self, offset) -> None: - """ - Offset for the independent variable. - - Parameters - ---------- - offset: - Offset value. - - """ - if self.conditions is not None: - self.conditions += offset - - def add_y_offset(self, offset): - self.data_to_plot["mean"] += offset - self.data_to_plot["repl"] += offset - - def add_offsets(self, x_offset=0, y_offset=0) -> None: - """ - Data offsets. - - Parameters - ---------- - x_offset: - Offset for the independent variable. - y_offset: - Offsets for the observable. - """ - self.add_x_offset(x_offset) - self.add_y_offset(y_offset) - - -class DataPlot: - """ - Visualization specification of a plot of one data series, e.g. for - an individual line on a subplot. - """ - - def __init__(self, plot_settings: dict): - """ - Constructor. - - Parameters - ---------- - plot_settings: A plot spec for one dataplot - (only VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS) - """ - for key, val in plot_settings.items(): - setattr(self, key, val) - - if DATASET_ID not in vars(self): - raise ValueError(f"{DATASET_ID} must be specified") - if X_VALUES not in vars(self): # TODO: singular? - setattr(self, X_VALUES, TIME) - if X_OFFSET not in vars(self): - setattr(self, X_OFFSET, 0) - if Y_VALUES not in vars(self): - setattr(self, Y_VALUES, "") - if Y_OFFSET not in vars(self): - setattr(self, Y_OFFSET, 0.0) - if LEGEND_ENTRY not in vars(self): - setattr(self, LEGEND_ENTRY, getattr(self, DATASET_ID)) - - @classmethod - def from_df(cls, plot_spec: pd.DataFrame): - vis_spec_dict = plot_spec.to_dict() - - return cls(vis_spec_dict) - - def __repr__(self): - return f"{self.__class__.__name__}({self.__dict__})" - - -class Subplot: - """ - Visualization specification of a subplot. - """ - - def __init__( - self, - plot_id: str, - plot_settings: dict, - dataplots: Optional[List[DataPlot]] = None, - ): - """ - Constructor. - - Parameters - ---------- - plot_id: - Plot ID. - plot_settings: - Plot spec for a subplot (only VISUALIZATION_DF_SUBPLOT_LEVEL_COLS). - dataplots: - A list of data plots that should be plotted on one subplot. - """ - # parameters of a specific subplot - - setattr(self, PLOT_ID, plot_id) - for key, val in plot_settings.items(): - setattr(self, key, val) - - if PLOT_NAME not in vars(self): - setattr(self, PLOT_NAME, "") - if PLOT_TYPE_SIMULATION not in vars(self): - setattr(self, PLOT_TYPE_SIMULATION, LINE_PLOT) - if PLOT_TYPE_DATA not in vars(self): - setattr(self, PLOT_TYPE_DATA, MEAN_AND_SD) - if X_LABEL not in vars(self): - setattr(self, X_LABEL, TIME) # TODO: getattr(self, X_VALUES) - if X_SCALE not in vars(self): - setattr(self, X_SCALE, LIN) - if Y_LABEL not in vars(self): - setattr(self, Y_LABEL, "values") - if Y_SCALE not in vars(self): - setattr(self, Y_SCALE, LIN) - - self.data_plots = dataplots if dataplots is not None else [] - self.xlim = None - self.ylim = None - - @classmethod - def from_df( - cls, - plot_id: str, - vis_spec: pd.DataFrame, - dataplots: Optional[List[DataPlot]] = None, - ): - vis_spec_dict = {} - for col in vis_spec: - if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS: - entry = vis_spec.loc[:, col] - entry = np.unique(entry) - if entry.size > 1: - warnings.warn( - f"For {PLOT_ID} {plot_id} in column " - f"{col} contradictory settings ({entry})" - f". Proceeding with first entry " - f"({entry[0]}).", - stacklevel=2, - ) - entry = entry[0] - - # check if values are allowed - if ( - col in [Y_SCALE, X_SCALE] - and entry not in OBSERVABLE_TRANSFORMATIONS - ): - raise ValueError( - f"{X_SCALE} and {Y_SCALE} have to be " - f"one of the following: " - + ", ".join(OBSERVABLE_TRANSFORMATIONS) - ) - elif col == PLOT_TYPE_DATA and entry not in PLOT_TYPES_DATA: - raise ValueError( - f"{PLOT_TYPE_DATA} has to be one of the " - f"following: " + ", ".join(PLOT_TYPES_DATA) - ) - elif ( - col == PLOT_TYPE_SIMULATION - and entry not in PLOT_TYPES_SIMULATION - ): - raise ValueError( - f"{PLOT_TYPE_SIMULATION} has to be one of" - f" the following: " + ", ".join(PLOT_TYPES_SIMULATION) - ) - - # append new entry to dict - vis_spec_dict[col] = entry - else: - warnings.warn( - f"Column {col} cannot be used to specify subplot" - f", only settings from the following columns can" - f" be used:" - + ", ".join(VISUALIZATION_DF_SUBPLOT_LEVEL_COLS), - stacklevel=2, - ) - return cls(plot_id, vis_spec_dict, dataplots) - - def add_dataplot(self, dataplot: DataPlot) -> None: - """ - Add data plot. - - Parameters - ---------- - dataplot: - Data plot visualization settings. - - """ - self.data_plots.append(dataplot) - - def set_axes_limits( - self, - xlim: Optional[Tuple[Optional[Real], Optional[Real]]] = None, - ylim: Optional[Tuple[Optional[Real], Optional[Real]]] = None, - ): - """ - Set axes limits for all subplots. If xlim or ylim or any of the tuple - items is None, corresponding limit is left unchanged. - - Parameters - ---------- - xlim: - X axis limits. - ylim: - Y axis limits. - """ - self.xlim = xlim - self.ylim = ylim - - -class Figure: - """ - Visualization specification of a figure. - - Contains information regarding how data should be visualized. - """ - - def __init__( - self, - subplots: Optional[List[Subplot]] = None, - size: Tuple = DEFAULT_FIGSIZE, - title: Optional[Tuple] = None, - ): - """ - Constructor. - - Parameters - ---------- - subplots: A list of visualization specifications for each subplot - size: Figure size - title: Figure title - """ - # TODO: Isensee measurements table in doc/examples doesn't correspond - # to documentation: observableTransformation and - # noiseDistribution columns replicateId problem - # TODO: Should we put in the documentation which combination of fields - # must be unique in the measurement table and add such check? - # obs_id + sim_cond_id + preeq_cod_id (if exists) + time + - # replicate_id (if exists)? - self.size = size - self.title = title - self.subplots = subplots if subplots is not None else [] - - @property - def num_subplots(self) -> int: - return len(self.subplots) - - def add_subplot(self, subplot: Subplot) -> None: - """ - Add subplot. - - Parameters - ---------- - subplot: - Subplot visualization settings. - - """ - self.subplots.append(subplot) - - def set_axes_limits( - self, - xlim: Optional[Tuple[Optional[Real], Optional[Real]]] = None, - ylim: Optional[Tuple[Optional[Real], Optional[Real]]] = None, - ) -> None: - """ - Set axes limits for all subplots. If xlim or ylim or any of the tuple - items is None, corresponding limit is left unchanged. - - Parameters - ---------- - xlim: - X axis limits. - ylim: - Y axis limits. - """ - for subplot in self.subplots: - subplot.set_axes_limits(xlim, ylim) - - def save_to_tsv(self, output_file_path: str = "visuSpec.tsv") -> None: - """ - Save full Visualization specification table. - - Note that datasetId column in the resulting table might have been - generated even though datasetId column in Measurement table is missing - or is different. Please, correct it manually. - - Parameters - ---------- - output_file_path: - File path to which the generated visualization specification is - saved. - """ - # TODO: what if datasetIds were generated? - - warnings.warn( - f"Note: please check that {DATASET_ID} column " - f"corresponds to {DATASET_ID} column in Measurement " - f"(Simulation) table.", - stacklevel=2, - ) - - visu_dict = {} - for subplot in self.subplots: - subplot_level = { - key: subplot.__dict__[key] - for key in subplot.__dict__ - if key in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS - } - - for dataplot in subplot.data_plots: - dataset_level = { - key: dataplot.__dict__[key] - for key in dataplot.__dict__ - if key in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS - } - row = {**subplot_level, **dataset_level} - for key, value in row.items(): - if key in visu_dict: - visu_dict[key].append(value) - else: - visu_dict[key] = [row[key]] - visu_df = pd.DataFrame.from_dict(visu_dict) - visu_df.to_csv(output_file_path, sep="\t", index=False) - - -class DataProvider: - """ - Handles data selection. - """ - - def __init__( - self, - exp_conditions: pd.DataFrame, - measurements_data: Optional[pd.DataFrame] = None, - simulations_data: Optional[pd.DataFrame] = None, - ): - self.conditions_data = exp_conditions - - if measurements_data is None and simulations_data is None: - raise TypeError( - "Not enough arguments. Either measurements_data " - "or simulations_data should be provided." - ) - self.measurements_data = measurements_data - self.simulations_data = simulations_data - - @staticmethod - def _matches_plot_spec( - df: pd.DataFrame, plot_spec: "DataPlot", dataset_id - ) -> pd.Series: - """ - Construct an index for subsetting of the dataframe according to what - is specified in plot_spec. - - Parameters - ---------- - df: - A pandas data frame to subset, can be from measurement file or - simulation file. - plot_spec: - A visualization spec from the visualization file. - - Returns - ------- - Boolean series that can be used for subsetting of the passed - dataframe - """ - subset = df[DATASET_ID] == dataset_id - if getattr(plot_spec, Y_VALUES) == "": - if len(df.loc[subset, OBSERVABLE_ID].unique()) > 1: - raise ValueError( - f"{Y_VALUES} must be specified in visualization table if " - f"multiple different observables are available." - ) - else: - subset &= df[OBSERVABLE_ID] == getattr(plot_spec, Y_VALUES) - return subset - - def _get_independent_var_values( - self, data_df: pd.DataFrame, dataplot: DataPlot - ) -> Tuple[np.ndarray, str, pd.Series]: - """ - Get independent variable values. - - Parameters - ---------- - data_df: - A pandas data frame to subset, can be from measurement file or - simulation file. - dataplot: - Data plot visualization settings. - - Returns - ------- - col_name_unique: - A name of the column from Measurement (Simulation) table, which - specifies independent variable values (depends on the xValues entry - of visualization specification). - Possible values: - - * TIME (independent variable values will be taken from the TIME - column of Measurement (Simulation) table) - - * SIMULATION_CONDITION_ID (independent variable values will be - taken from one of the columns of Condition table) - - uni_condition_id: - Time points - or - contains all unique condition IDs which should be - plotted together as one dataplot. Independent variable values will - be collected for these conditions - conditions_: - An independent variable values or None for the BarPlot case - possible values: time points, None, vales of independent variable - (Parameter or Species, specified in the xValues entry of - visualization specification) for each condition_id in - uni_condition_id - - """ - indep_var = getattr(dataplot, X_VALUES) - - dataset_id = getattr(dataplot, DATASET_ID) - - single_m_data = data_df[ - self._matches_plot_spec(data_df, dataplot, dataset_id) - ] - - # gather simulationConditionIds belonging to datasetId - uni_condition_id, uind = np.unique( - single_m_data[SIMULATION_CONDITION_ID], return_index=True - ) - # keep the ordering which was given by user from top to bottom - # (avoid ordering by names '1','10','11','2',...)' - uni_condition_id = uni_condition_id[np.argsort(uind)] - col_name_unique = SIMULATION_CONDITION_ID - - if indep_var == TIME: - # obtain unique observation times - uni_condition_id = single_m_data[TIME].unique() - col_name_unique = TIME - conditions_ = uni_condition_id - elif indep_var == "condition": - conditions_ = None - else: - # indep_var = parameterOrStateId case ? - # extract conditions (plot input) from condition file - ind_cond = self.conditions_data.index.isin(uni_condition_id) - conditions_ = self.conditions_data[ind_cond][indep_var] - - return uni_condition_id, col_name_unique, conditions_ - - def get_data_series( - self, - data_df: pd.DataFrame, - data_col: Literal["measurement", "simulation"], - dataplot: DataPlot, - provided_noise: bool, - ) -> DataSeries: - """ - Get data to plot from measurement or simulation DataFrame. - - Parameters - ---------- - data_df: measurement or simulation DataFrame - data_col: data column, i.e. 'measurement' or 'simulation' - dataplot: visualization specification - provided_noise: - True if numeric values for the noise level are provided in the - data table - - Returns - ------- - Data to plot - """ - ( - uni_condition_id, - col_name_unique, - conditions_, - ) = self._get_independent_var_values(data_df, dataplot) - - dataset_id = getattr(dataplot, DATASET_ID) - - # get data subset selected based on provided dataset_id - # and observable_ids - single_m_data = data_df[ - self._matches_plot_spec(data_df, dataplot, dataset_id) - ] - - # create empty dataframe for means and SDs - measurements_to_plot = pd.DataFrame( - columns=["mean", "noise_model", "sd", "sem", "repl"], - index=uni_condition_id, - ) - - for var_cond_id in uni_condition_id: - subset = single_m_data[col_name_unique] == var_cond_id - - # what has to be plotted is selected - data_measurements = single_m_data.loc[subset, data_col] - - # TODO: all this rather inside DataSeries? - # process the data - measurements_to_plot.at[var_cond_id, "mean"] = np.mean( - data_measurements - ) - measurements_to_plot.at[var_cond_id, "sd"] = np.std( - data_measurements - ) - - if provided_noise and np.any(subset): - if ( - len(single_m_data.loc[subset, NOISE_PARAMETERS].unique()) - > 1 - ): - raise NotImplementedError( - f"Datapoints with inconsistent {NOISE_PARAMETERS} " - f"is currently not implemented. Stopping." - ) - tmp_noise = single_m_data.loc[subset, NOISE_PARAMETERS].values[ - 0 - ] - if isinstance(tmp_noise, str): - raise NotImplementedError( - "No numerical noise values provided in the " - "measurement table. Stopping." - ) - if ( - isinstance(tmp_noise, Number) - or tmp_noise.dtype == "float64" - ): - measurements_to_plot.at[ - var_cond_id, "noise_model" - ] = tmp_noise - - # standard error of mean - measurements_to_plot.at[var_cond_id, "sem"] = np.std( - data_measurements - ) / np.sqrt(len(data_measurements)) - - # single replicates - measurements_to_plot.at[ - var_cond_id, "repl" - ] = data_measurements.values - - data_series = DataSeries(conditions_, measurements_to_plot) - data_series.add_offsets(dataplot.xOffset, dataplot.yOffset) - return data_series - - def get_data_to_plot( - self, dataplot: DataPlot, provided_noise: bool - ) -> Tuple[DataSeries, DataSeries]: - """ - Get data to plot. - - Parameters - ---------- - dataplot: visualization specification - provided_noise: - True if numeric values for the noise level are provided in the - measurement table - - Returns - ----------- - measurements_to_plot, - simulations_to_plot - """ - measurements_to_plot = None - simulations_to_plot = None - - if self.measurements_data is not None: - measurements_to_plot = self.get_data_series( - self.measurements_data, MEASUREMENT, dataplot, provided_noise - ) - - if self.simulations_data is not None: - simulations_to_plot = self.get_data_series( - self.simulations_data, SIMULATION, dataplot, provided_noise - ) - return measurements_to_plot, simulations_to_plot - - -class VisSpecParser: - """ - Parser of visualization specification provided by user either in the form - of Visualization table or as a list of lists with datasets ids or - observable ids or condition ids. Figure instance is created containing - information regarding how data should be visualized. In addition to the - Figure instance, a DataProvider instance is created that will be - responsible for the data selection and manipulation. - """ - - def __init__( - self, - conditions_data: Union[str, Path, pd.DataFrame], - exp_data: Optional[Union[str, Path, pd.DataFrame]] = None, - sim_data: Optional[Union[str, Path, pd.DataFrame]] = None, - ): - if isinstance(conditions_data, (str, Path)): - conditions_data = conditions.get_condition_df(conditions_data) - - # import from file in case experimental data is provided in file - if isinstance(exp_data, (str, Path)): - exp_data = measurements.get_measurement_df(exp_data) - - if isinstance(sim_data, (str, Path)): - sim_data = core.get_simulation_df(sim_data) - - if exp_data is None and sim_data is None: - raise TypeError( - "Not enough arguments. Either measurements_data " - "or simulations_data should be provided." - ) - - self.conditions_data = conditions_data - self.measurements_data = exp_data - self.simulations_data = sim_data - - @classmethod - def from_problem(cls, petab_problem: Problem, sim_data): - return cls( - petab_problem.condition_df, petab_problem.measurement_df, sim_data - ) - - @property - def _data_df(self): - return ( - self.measurements_data - if self.measurements_data is not None - else self.simulations_data - ) - - @staticmethod - def create_subplot( - plot_id: str, subplot_vis_spec: pd.DataFrame - ) -> Subplot: - """ - Create subplot. - - Parameters - ---------- - plot_id: - Plot id. - subplot_vis_spec: - A visualization specification DataFrame that contains specification - for the subplot and corresponding dataplots. - - Returns - ------- - Subplot - """ - subplot_columns = [ - col - for col in subplot_vis_spec.columns - if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS - ] - subplot = Subplot.from_df( - plot_id, subplot_vis_spec.loc[:, subplot_columns] - ) - - dataplot_cols = [ - col - for col in subplot_vis_spec.columns - if col in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS - ] - dataplot_spec = subplot_vis_spec.loc[:, dataplot_cols] - - for _, row in dataplot_spec.iterrows(): - data_plot = DataPlot.from_df(row) - subplot.add_dataplot(data_plot) - - return subplot - - def parse_from_vis_spec( - self, - vis_spec: Optional[Union[str, Path, pd.DataFrame]], - ) -> Tuple[Figure, DataProvider]: - """ - Get visualization settings from a visualization specification. - - Parameters - ---------- - vis_spec: - Visualization specification DataFrame in the PEtab format - or a path to a visualization file. - - Returns - ------- - A figure template with visualization settings and a data provider - """ - # import visualization specification, if file was specified - if isinstance(vis_spec, (str, Path)): - vis_spec = core.get_visualization_df(vis_spec) - - if DATASET_ID not in vis_spec.columns: - self._add_dataset_id_col() - vis_spec = self._expand_vis_spec_settings(vis_spec) - else: - if ( - self.measurements_data is not None - and DATASET_ID not in self.measurements_data - ): - raise ValueError( - f"grouping by datasetId was requested, but " - f"{DATASET_ID} column is missing from " - f"measurement table" - ) - if ( - self.simulations_data is not None - and DATASET_ID not in self.simulations_data - ): - raise ValueError( - f"grouping by datasetId was requested, but " - f"{DATASET_ID} column is missing from " - f"simulation table" - ) - - figure = Figure() - - # get unique plotIDs preserving the order from the original vis spec - _, idx = np.unique(vis_spec[PLOT_ID], return_index=True) - plot_ids = vis_spec[PLOT_ID].iloc[np.sort(idx)] - - # loop over unique plotIds - for plot_id in plot_ids: - # get indices for specific plotId - ind_plot = vis_spec[PLOT_ID] == plot_id - - subplot = self.create_subplot(plot_id, vis_spec[ind_plot]) - figure.add_subplot(subplot) - - return figure, DataProvider( - self.conditions_data, self.measurements_data, self.simulations_data - ) - - def parse_from_id_list( - self, - ids_per_plot: Optional[List[IdsList]] = None, - group_by: str = "observable", - plotted_noise: Optional[str] = MEAN_AND_SD, - ) -> Tuple[Figure, DataProvider]: - """ - Get visualization settings from a list of ids and a grouping parameter. - - Parameters - ---------- - ids_per_plot: - A list of lists. Each sublist corresponds to a plot, each subplot - contains the Ids of datasets or observables or simulation - conditions for this plot. - e.g. - - :: - - dataset_ids_per_plot = [['dataset_1', 'dataset_2'], - ['dataset_1', 'dataset_4', - 'dataset_5']] - - or - - :: - - cond_id_list = [['model1_data1'], - ['model1_data2', 'model1_data3'], - ['model1_data4', 'model1_data5'], - ['model1_data6']]. - - group_by: - Grouping type. Possible values: 'dataset', 'observable', - 'simulation'. - plotted_noise: - String indicating how noise should be visualized: - ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']. - - Returns - ------- - A figure template with visualization settings and a data provider - - """ - if ids_per_plot is None: - # this is the default case. If no grouping is specified, - # all observables are plotted. One observable per plot. - unique_obs_list = self._data_df[OBSERVABLE_ID].unique() - ids_per_plot = [[obs_id] for obs_id in unique_obs_list] - - if group_by == "dataset" and DATASET_ID not in self._data_df: - raise ValueError( - f"grouping by datasetId was requested, but " - f"{DATASET_ID} column is missing from data table" - ) - - if group_by != "dataset": - # datasetId_list will be created (possibly overwriting previous - # list - only in the local variable, not in the tsv-file) - self._add_dataset_id_col() - - columns_dict = self._get_vis_spec_dependent_columns_dict( - group_by, ids_per_plot - ) - - columns_dict[PLOT_TYPE_DATA] = [plotted_noise] * len( - columns_dict[DATASET_ID] - ) - - vis_spec_df = pd.DataFrame(columns_dict) - - return self.parse_from_vis_spec(vis_spec_df) - - def _add_dataset_id_col(self) -> None: - """ - Add dataset_id column to the measurement table and simulations table - (possibly overwrite). - """ - if self.measurements_data is not None: - if DATASET_ID in self.measurements_data.columns: - self.measurements_data = self.measurements_data.drop( - DATASET_ID, axis=1 - ) - self.measurements_data.insert( - loc=self.measurements_data.columns.size, - column=DATASET_ID, - value=generate_dataset_id_col(self.measurements_data), - ) - - if self.simulations_data is not None: - if DATASET_ID in self.simulations_data.columns: - self.simulations_data = self.simulations_data.drop( - DATASET_ID, axis=1 - ) - self.simulations_data.insert( - loc=self.simulations_data.columns.size, - column=DATASET_ID, - value=generate_dataset_id_col(self.simulations_data), - ) - - def _get_vis_spec_dependent_columns_dict( - self, group_by: str, id_list: Optional[List[IdsList]] = None - ) -> Dict: - """ - Helper method for creating values for columns PLOT_ID, DATASET_ID, - LEGEND_ENTRY, Y_VALUES for visualization specification file. - - Parameters - ---------- - group_by: - Grouping type. - Possible values: 'dataset', 'observable', 'simulation'. - id_list: - Grouping list. Each sublist corresponds to a subplot and - contains the Ids of datasets or observables or simulation - conditions for this subplot. - - Returns - ------- - A dictionary with values for columns PLOT_ID, DATASET_ID, \ - LEGEND_ENTRY, Y_VALUES for visualization specification. - """ - if group_by != "dataset": - dataset_id_list = create_dataset_id_list_new( - self._data_df, group_by, id_list - ) - else: - dataset_id_list = id_list - - dataset_id_column = [ - i_dataset for sublist in dataset_id_list for i_dataset in sublist - ] - - dataset_label_column = [ - self._create_legend(i_dataset) - for sublist in dataset_id_list - for i_dataset in sublist - ] - - # such dataset ids were generated that each dataset_id always - # corresponds to one observable - yvalues_column = [ - self._data_df.loc[ - self._data_df[DATASET_ID] == dataset_id, OBSERVABLE_ID - ].iloc[0] - for sublist in dataset_id_list - for dataset_id in sublist - ] - - # get number of plots and create plotId-lists - plot_id_column = [ - "plot%s" % str(ind + 1) - for ind, inner_list in enumerate(dataset_id_list) - for _ in inner_list - ] - - return { - PLOT_ID: plot_id_column, - DATASET_ID: dataset_id_column, - LEGEND_ENTRY: dataset_label_column, - Y_VALUES: yvalues_column, - } - - def _create_legend(self, dataset_id: str) -> str: - """ - Create a legend for the dataset ids. - - Parameters - ---------- - dataset_id: - Dataset id. - - Returns - ------- - A legend. - """ - # relies on the fact that dataset ids were created based on cond_ids - # and obs_ids. Therefore, in the following query all pairs will be - # the same - cond_id, obs_id = self._data_df[ - self._data_df[DATASET_ID] == dataset_id - ][[SIMULATION_CONDITION_ID, OBSERVABLE_ID]].iloc[0, :] - tmp = self.conditions_data.loc[cond_id] - if CONDITION_NAME not in tmp.index or pd.isna(tmp[CONDITION_NAME]): - cond_name = cond_id - else: - cond_name = tmp[CONDITION_NAME] - return f"{cond_name} - {obs_id}" - - def _expand_vis_spec_settings(self, vis_spec: pd.DataFrame): - """ - Expand visualization specification for the case when DATASET_ID is not - in vis_spec.columns. - - Parameters - ------- - vis_spec: - Visualization specification DataFrame in the PEtab format - or a path to a visualization file. - - Returns - ------- - A visualization specification DataFrame. - """ - if DATASET_ID in vis_spec.columns: - raise ValueError( - f"visualization specification expansion is " - f"unnecessary if column {DATASET_ID} is present" - ) - - if vis_spec.empty: - # in case of empty spec all measurements corresponding to each - # observable will be plotted on a separate subplot - observable_ids = self._data_df[OBSERVABLE_ID].unique() - - vis_spec_exp_rows = [ - self._vis_spec_rows_for_obs(obs_id, {PLOT_ID: f"plot{idx}"}) - for idx, obs_id in enumerate(observable_ids) - ] - return pd.concat(vis_spec_exp_rows, ignore_index=True) - - vis_spec_exp_rows = [] - for _, row in vis_spec.iterrows(): - if Y_VALUES in row: - vis_spec_exp_rows.append( - self._vis_spec_rows_for_obs(row[Y_VALUES], row.to_dict()) - ) - else: - observable_ids = self._data_df[OBSERVABLE_ID].unique() - - for obs_id in observable_ids: - vis_spec_exp_rows.append( - self._vis_spec_rows_for_obs(obs_id, row.to_dict()) - ) - return pd.concat(vis_spec_exp_rows, ignore_index=True) - - def _vis_spec_rows_for_obs( - self, obs_id: str, settings: dict - ) -> pd.DataFrame: - """ - Create vis_spec for one observable. - - For each dataset_id corresponding to the observable with the specified - id create a vis_spec entry with provided settings. - - Parameters - ---------- - obs_id: - Observable ID. - settings: - Additional visualization settings. For each key that is a - valid visualization specification column name, the setting - will be added to the resulting visualization specification. - - Returns - ------- - A visualization specification DataFrame. - """ - columns_to_expand = [ - PLOT_ID, - PLOT_NAME, - PLOT_TYPE_SIMULATION, - PLOT_TYPE_DATA, - X_VALUES, - X_OFFSET, - X_LABEL, - X_SCALE, - Y_OFFSET, - Y_LABEL, - Y_SCALE, - LEGEND_ENTRY, - ] - - dataset_ids = self._data_df[self._data_df[OBSERVABLE_ID] == obs_id][ - DATASET_ID - ].unique() - n_rows = len(dataset_ids) - columns_dict = {DATASET_ID: dataset_ids, Y_VALUES: [obs_id] * n_rows} - - for column in settings: - if column in columns_to_expand: - columns_dict[column] = [settings[column]] * n_rows - - if LEGEND_ENTRY not in columns_dict: - columns_dict[LEGEND_ENTRY] = [ - self._create_legend(dataset_id) - for dataset_id in columns_dict[DATASET_ID] - ] - return pd.DataFrame(columns_dict) +_deprecated_import_v1(__name__) diff --git a/petab/yaml.py b/petab/yaml.py index 364413f5..8a84221a 100644 --- a/petab/yaml.py +++ b/petab/yaml.py @@ -1,292 +1,7 @@ -"""Code regarding the PEtab YAML config files""" +"""Deprecated module for reading and writing PEtab YAML files. -import os -from pathlib import Path -from typing import Any, Dict, List, Optional, Union +Use petab.v1.yaml instead.""" +from petab import _deprecated_import_v1 +from petab.v1.yaml import * # noqa: F403, F401, E402 -import jsonschema -import numpy as np -import yaml -from pandas.io.common import get_handle - -from .C import * # noqa: F403 - -# directory with PEtab yaml schema files -SCHEMA_DIR = Path(__file__).parent / "schemas" -# map of version number to validation schema -SCHEMAS = { - "1": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", - "1.0.0": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", - "2.0.0": SCHEMA_DIR / "petab_schema.v2.0.0.yaml", -} - -__all__ = [ - "validate", - "validate_yaml_syntax", - "validate_yaml_semantics", - "load_yaml", - "is_composite_problem", - "assert_single_condition_and_sbml_file", - "write_yaml", - "create_problem_yaml", -] - - -def validate( - yaml_config: Union[Dict, str, Path], - path_prefix: Union[None, str, Path] = None, -): - """Validate syntax and semantics of PEtab config YAML - - Arguments: - yaml_config: - PEtab YAML config as filename or dict. - path_prefix: - Base location for relative paths. Defaults to location of YAML - file if a filename was provided for ``yaml_config`` or the current - working directory. - """ - validate_yaml_syntax(yaml_config) - validate_yaml_semantics(yaml_config=yaml_config, path_prefix=path_prefix) - - -def validate_yaml_syntax( - yaml_config: Union[Dict, str, Path], schema: Union[None, Dict, str] = None -): - """Validate PEtab YAML file syntax - - Arguments: - yaml_config: - PEtab YAML file to validate, as file name or dictionary - schema: - Custom schema for validation - - Raises: - see :func:`jsonschema.validate` - """ - yaml_config = load_yaml(yaml_config) - - if schema is None: - # try get PEtab version from yaml file - # if this is not the available, the file is not valid anyways, - # but let's still use the latest PEtab schema for full validation - version = ( - yaml_config.get(FORMAT_VERSION, None) or list(SCHEMAS.values())[-1] - ) - try: - schema = SCHEMAS[str(version)] - except KeyError as e: - raise ValueError( - "Unknown PEtab version given in problem " - f"specification: {version}" - ) from e - schema = load_yaml(schema) - jsonschema.validate(instance=yaml_config, schema=schema) - - -def validate_yaml_semantics( - yaml_config: Union[Dict, str, Path], - path_prefix: Union[None, str, Path] = None, -): - """Validate PEtab YAML file semantics - - Check for existence of files. Assumes valid syntax. - - Version number and contents of referenced files are not yet checked. - - Arguments: - yaml_config: - PEtab YAML config as filename or dict. - path_prefix: - Base location for relative paths. Defaults to location of YAML - file if a filename was provided for ``yaml_config`` or the current - working directory. - - Raises: - AssertionError: in case of problems - """ - if not path_prefix: - if isinstance(yaml_config, (str, Path)): - path_prefix = os.path.dirname(str(yaml_config)) - else: - path_prefix = "" - - yaml_config = load_yaml(yaml_config) - - def _check_file(_filename: str, _field: str): - if not os.path.isfile(_filename): - raise AssertionError( - f"File '{_filename}' provided as '{_field}' " "does not exist." - ) - - # Handles both a single parameter file, and a parameter file that has been - # split into multiple subset files. - for parameter_subset_file in list( - np.array(yaml_config[PARAMETER_FILE]).flat - ): - _check_file( - os.path.join(path_prefix, parameter_subset_file), - parameter_subset_file, - ) - - for problem_config in yaml_config[PROBLEMS]: - for field in [ - SBML_FILES, - CONDITION_FILES, - MEASUREMENT_FILES, - VISUALIZATION_FILES, - OBSERVABLE_FILES, - ]: - if field in problem_config: - for filename in problem_config[field]: - _check_file(os.path.join(path_prefix, filename), field) - - -def load_yaml(yaml_config: Union[Dict, Path, str]) -> Dict: - """Load YAML - - Convenience function to allow for providing YAML inputs as filename, URL - or as dictionary. - - Arguments: - yaml_config: - PEtab YAML config as filename or dict or URL. - - Returns: - The unmodified dictionary if ``yaml_config`` was dictionary. - Otherwise the parsed the YAML file. - """ - # already parsed? all PEtab problem yaml files are dictionaries - if isinstance(yaml_config, dict): - return yaml_config - - with get_handle(yaml_config, mode="r") as io_handle: - data = yaml.safe_load(io_handle.handle) - return data - - -def is_composite_problem(yaml_config: Union[Dict, str, Path]) -> bool: - """Does this YAML file comprise multiple models? - - Arguments: - yaml_config: PEtab configuration as dictionary or YAML file name - """ - yaml_config = load_yaml(yaml_config) - return len(yaml_config[PROBLEMS]) > 1 - - -def assert_single_condition_and_sbml_file(problem_config: Dict) -> None: - """Check that there is only a single condition file and a single SBML - file specified. - - Arguments: - problem_config: - Dictionary as defined in the YAML schema inside the `problems` - list. - Raises: - NotImplementedError: - If multiple condition or SBML files specified. - """ - if ( - len(problem_config[SBML_FILES]) > 1 - or len(problem_config[CONDITION_FILES]) > 1 - ): - # TODO https://github.com/ICB-DCM/PEtab/issues/188 - # TODO https://github.com/ICB-DCM/PEtab/issues/189 - raise NotImplementedError( - "Support for multiple models or condition files is not yet " - "implemented." - ) - - -def write_yaml( - yaml_config: Dict[str, Any], filename: Union[str, Path] -) -> None: - """Write PEtab YAML file - - Arguments: - yaml_config: Data to write - filename: File to create - """ - with open(filename, "w") as outfile: - yaml.dump( - yaml_config, outfile, default_flow_style=False, sort_keys=False - ) - - -def create_problem_yaml( - sbml_files: Union[str, Path, List[Union[str, Path]]], - condition_files: Union[str, Path, List[Union[str, Path]]], - measurement_files: Union[str, Path, List[Union[str, Path]]], - parameter_file: Union[str, Path], - observable_files: Union[str, Path, List[Union[str, Path]]], - yaml_file: Union[str, Path], - visualization_files: Optional[ - Union[str, Path, List[Union[str, Path]]] - ] = None, - relative_paths: bool = True, - mapping_files: Union[str, Path, List[Union[str, Path]]] = None, -) -> None: - """Create and write default YAML file for a single PEtab problem - - Arguments: - sbml_files: Path of SBML model file or list of such - condition_files: Path of condition file or list of such - measurement_files: Path of measurement file or list of such - parameter_file: Path of parameter file - observable_files: Path of observable file or list of such - yaml_file: Path to which YAML file should be written - visualization_files: - Optional Path to visualization file or list of such - relative_paths: - whether all paths in the YAML file should be relative to the - location of the YAML file. If ``False``, then paths are left - unchanged. - mapping_files: Path of mapping file - """ - if isinstance(sbml_files, (Path, str)): - sbml_files = [sbml_files] - if isinstance(condition_files, (Path, str)): - condition_files = [condition_files] - if isinstance(measurement_files, (Path, str)): - measurement_files = [measurement_files] - if isinstance(observable_files, (Path, str)): - observable_files = [observable_files] - if isinstance(visualization_files, (Path, str)): - visualization_files = [visualization_files] - - if relative_paths: - yaml_file_dir = Path(yaml_file).parent - - def get_rel_to_yaml(paths: Union[List[str], None]): - if paths is None: - return paths - return [ - os.path.relpath(path, start=yaml_file_dir) for path in paths - ] - - sbml_files = get_rel_to_yaml(sbml_files) - condition_files = get_rel_to_yaml(condition_files) - measurement_files = get_rel_to_yaml(measurement_files) - observable_files = get_rel_to_yaml(observable_files) - visualization_files = get_rel_to_yaml(visualization_files) - parameter_file = get_rel_to_yaml([parameter_file])[0] - mapping_files = get_rel_to_yaml(mapping_files) - - problem_dic = { - CONDITION_FILES: condition_files, - MEASUREMENT_FILES: measurement_files, - SBML_FILES: sbml_files, - OBSERVABLE_FILES: observable_files, - } - if mapping_files: - problem_dic[MAPPING_FILES] = mapping_files - - if visualization_files is not None: - problem_dic[VISUALIZATION_FILES] = visualization_files - yaml_dic = { - PARAMETER_FILE: parameter_file, - FORMAT_VERSION: 1, - PROBLEMS: [problem_dic], - } - write_yaml(yaml_dic, yaml_file) +_deprecated_import_v1(__name__) diff --git a/pyproject.toml b/pyproject.toml index 5fcdd9e7..1758476a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,84 @@ requires = [ ] build-backend = "setuptools.build_meta" +[project] +name = "petab" +dynamic = ["version", "readme"] +description = "Parameter estimation tabular data" +requires-python = ">=3.10" +dependencies = [ + "numpy>=1.15.1", + "pandas>=1.2.0", + # remove when pandas >= 3, see also + # https://github.com/pandas-dev/pandas/issues/54466 + "pyarrow", + "python-libsbml>=5.17.0", + "sympy", + "colorama", + "pyyaml", + "jsonschema", + "antlr4-python3-runtime==4.13.1", +] +license = {text = "MIT License"} +authors = [ + {name = "The PEtab developers"}, +] +maintainers = [ + {name = "Daniel Weindl", email = "sci@danielweindl.de"}, + {name = "Dilan Pathirana", email = "dilan.pathirana@uni-bonn.de"}, + {name = "Maren Philipps", email = "maren.philipps@uni-bonn.de"}, +] + +[project.optional-dependencies] +tests = [ + "pytest", + "pytest-cov", + "simplesbml", + "scipy", + "pysb", +] +quality = [ + "pre-commit", +] +reports = [ + # https://github.com/spatialaudio/nbsphinx/issues/641 + "Jinja2==3.0.3", +] +combine = [ + "python-libcombine>=0.2.6", +] +doc = [ + "sphinx>=3.5.3, !=5.1.0", + "sphinxcontrib-napoleon>=0.7", + "sphinx-markdown-tables>=0.0.15", + "sphinx-rtd-theme>=0.5.1", + "m2r2", + "myst-nb>=0.14.0", + # https://github.com/spatialaudio/nbsphinx/issues/687#issuecomment-1339271312 + "ipython>=7.21.0, !=8.7.0", + "pysb", +] +vis = [ + "matplotlib>=3.6.0", + "seaborn", + "scipy" +] + +[project.scripts] +petablint = "petab.petablint:main" +petab_visualize = "petab.v1.visualize.cli:_petab_visualize_main" + +[project.urls] +Repository = "https://github.com/PEtab-dev/libpetab-python" +Documentation = "https://petab.readthedocs.io/projects/libpetab-python/" + +[tool.setuptools.packages.find] +include = ["petab", "petab.*"] +namespaces = false + +[tool.setuptools.package-data] +petab = ["petab/schemas/*", "petab/visualize/templates/*"] + [tool.ruff] line-length = 79 lint.extend-select = [ @@ -20,9 +98,17 @@ lint.extend-select = [ # TODO: "ANN001", "D", # pydocstyle (PEP 257) ] lint.extend-ignore = ["F403", "F405", "S101"] +lint.exclude = [ + "petab/v1/math/_generated/*", # auto-generated +] [tool.ruff.lint.pydocstyle] convention = "pep257" [tool.ruff.lint.per-file-ignores] "tests/*" = ["T201"] + +[tool.ruff.format] +exclude = [ + "petab/math/_generated/*", # auto-generated +] diff --git a/pytest.ini b/pytest.ini index 8d2e5992..11b8918a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,9 @@ [pytest] filterwarnings = error + # TODO: until tests are reorganized for petab.v1 + ignore::DeprecationWarning + ignore:Support for PEtab2.0 and all of petab.v2 is experimental:UserWarning + ignore:Support for PEtab2.0 is experimental:UserWarning ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning diff --git a/setup.py b/setup.py index 2ff424eb..3ba41deb 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import os import re -from setuptools import find_namespace_packages, setup +from setuptools import setup def read(fname): @@ -35,68 +35,10 @@ def absolute_links(txt): # sets __version__ exec(read(version_file)) # pylint: disable=W0122 # nosec # noqa: S102 -ENTRY_POINTS = { - "console_scripts": [ - "petablint = petab.petablint:main", - "petab_visualize = petab.visualize.cli:_petab_visualize_main", - ] -} - # project metadata # noinspection PyUnresolvedReferences setup( - name="petab", - version=__version__, - description="Parameter estimation tabular data", long_description=absolute_links(read("README.md")), long_description_content_type="text/markdown", - author="The PEtab developers", - author_email="daniel.weindl@helmholtz-muenchen.de", - url="https://github.com/PEtab-dev/libpetab-python", - packages=find_namespace_packages(exclude=["doc*", "test*"]), - install_requires=[ - "numpy>=1.15.1", - "pandas>=1.2.0", - # remove when pandas >= 3, see also - # https://github.com/pandas-dev/pandas/issues/54466 - "pyarrow", - "python-libsbml>=5.17.0", - "sympy", - "colorama", - "pyyaml", - "jsonschema", - ], - include_package_data=True, - python_requires=">=3.10.0", - entry_points=ENTRY_POINTS, - extras_require={ - "tests": [ - "pytest", - "pytest-cov", - "simplesbml", - "scipy", - "pysb", - ], - "quality": [ - "pre-commit", - ], - "reports": [ - # https://github.com/spatialaudio/nbsphinx/issues/641 - "Jinja2==3.0.3", - ], - "combine": [ - "python-libcombine>=0.2.6", - ], - "doc": [ - "sphinx>=3.5.3, !=5.1.0", - "sphinxcontrib-napoleon>=0.7", - "sphinx-markdown-tables>=0.0.15", - "sphinx-rtd-theme>=0.5.1", - "m2r2", - "myst-nb>=0.14.0", - # https://github.com/spatialaudio/nbsphinx/issues/687#issuecomment-1339271312 - "ipython>=7.21.0, !=8.7.0", - ], - "vis": ["matplotlib>=3.6.0", "seaborn", "scipy"], - }, + version=__version__, ) diff --git a/tests/v1/__init__.py b/tests/v1/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/v1/math/__init__.py b/tests/v1/math/__init__.py new file mode 100644 index 00000000..45457ba7 --- /dev/null +++ b/tests/v1/math/__init__.py @@ -0,0 +1 @@ +"""PEtab math handling functionality.""" diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py new file mode 100644 index 00000000..4b350d4e --- /dev/null +++ b/tests/v1/math/test_math.py @@ -0,0 +1,94 @@ +import importlib.resources +from pathlib import Path + +import numpy as np +import pytest +import sympy as sp +import yaml +from sympy.abc import _clash +from sympy.logic.boolalg import Boolean + +from petab.math import sympify_petab + + +def test_sympify_numpy(): + assert sympify_petab(np.float64(1.0)) == sp.Float(1.0) + + +def test_parse_simple(): + """Test simple numeric expressions.""" + assert float(sympify_petab("1 + 2")) == 3 + assert float(sympify_petab("1 + 2 * 3")) == 7 + assert float(sympify_petab("(1 + 2) * 3")) == 9 + assert float(sympify_petab("1 + 2 * (3 + 4)")) == 15 + assert float(sympify_petab("1 + 2 * (3 + 4) / 2")) == 8 + + +def read_cases(): + """Read test cases from YAML file in the petab_test_suite package.""" + yaml_file = importlib.resources.files("petabtests.cases").joinpath( + str(Path("v2.0.0", "math", "math_tests.yaml")) + ) + with importlib.resources.as_file(yaml_file) as file, open(file) as file: + data = yaml.safe_load(file) + + cases = [] + for item in data["cases"]: + expr_str = item["expression"] + if item["expected"] is True or item["expected"] is False: + expected = item["expected"] + else: + try: + expected = float(item["expected"]) + except ValueError: + expected = sp.sympify(item["expected"], locals=_clash) + expected = expected.subs( + { + s: sp.Symbol(s.name, real=True) + for s in expected.free_symbols + } + ) + cases.append((expr_str, expected)) + return cases + + +@pytest.mark.parametrize("expr_str, expected", read_cases()) +def test_parse_cases(expr_str, expected): + """Test PEtab math expressions for the PEtab test suite.""" + result = sympify_petab(expr_str) + if isinstance(result, Boolean): + assert result == expected + else: + try: + result = float(result.evalf()) + assert np.isclose( + result, expected + ), f"{expr_str}: Expected {expected}, got {result}" + except TypeError: + assert ( + result == expected + ), f"{expr_str}: Expected {expected}, got {result}" + + +def test_ids(): + """Test symbols in expressions.""" + assert sympify_petab("bla * 2") == 2.0 * sp.Symbol("bla", real=True) + + +def test_syntax_error(): + """Test exceptions upon syntax errors.""" + # parser error + with pytest.raises(ValueError, match="Syntax error"): + sympify_petab("1 + ") + + # lexer error + with pytest.raises(ValueError, match="Syntax error"): + sympify_petab("0.") + + +def test_complex(): + """Test expressions producing (unsupported) complex numbers.""" + with pytest.raises(ValueError, match="not real-valued"): + sympify_petab("sqrt(-1)") + with pytest.raises(ValueError, match="not real-valued"): + sympify_petab("arctanh(inf)") diff --git a/tests/test_calculate.py b/tests/v1/test_calculate.py similarity index 99% rename from tests/test_calculate.py rename to tests/v1/test_calculate.py index d98896c8..ca93c33a 100644 --- a/tests/test_calculate.py +++ b/tests/v1/test_calculate.py @@ -4,6 +4,7 @@ import pandas as pd import pytest +import petab from petab import ( calculate_chi2, calculate_llh, @@ -55,8 +56,8 @@ def model_simple(): return ( measurement_df, - observable_df, - parameter_df, + petab.get_observable_df(observable_df), + petab.get_parameter_df(parameter_df), simulation_df, expected_residuals, expected_residuals_nonorm, diff --git a/tests/test_combine.py b/tests/v1/test_combine.py similarity index 99% rename from tests/test_combine.py rename to tests/v1/test_combine.py index 398b2737..08ad5b77 100644 --- a/tests/test_combine.py +++ b/tests/v1/test_combine.py @@ -9,7 +9,7 @@ # import fixtures pytest_plugins = [ - "tests.test_petab", + "tests.v1.test_petab", ] diff --git a/tests/test_conditions.py b/tests/v1/test_conditions.py similarity index 100% rename from tests/test_conditions.py rename to tests/v1/test_conditions.py diff --git a/tests/test_deprecated.py b/tests/v1/test_deprecated.py similarity index 100% rename from tests/test_deprecated.py rename to tests/v1/test_deprecated.py diff --git a/tests/test_lint.py b/tests/v1/test_lint.py similarity index 97% rename from tests/test_lint.py rename to tests/v1/test_lint.py index cc99f71d..b178a425 100644 --- a/tests/test_lint.py +++ b/tests/v1/test_lint.py @@ -12,7 +12,7 @@ # import fixtures pytest_plugins = [ - "tests.test_petab", + "tests.v1.test_petab", ] @@ -38,7 +38,7 @@ def test_assert_measured_observables_present(): def test_condition_table_is_parameter_free(): with patch( - "petab.get_parametric_overrides" + "petab.v1.get_parametric_overrides" ) as mock_get_parametric_overrides: mock_get_parametric_overrides.return_value = [] assert lint.condition_table_is_parameter_free(pd.DataFrame()) is True @@ -427,8 +427,8 @@ def test_petablint_succeeds(): """Run petablint and ensure we exit successfully for a file that should contain no errors """ - dir_isensee = "../doc/example/example_Isensee/" - dir_fujita = "../doc/example/example_Fujita/" + dir_isensee = "../../doc/example/example_Isensee/" + dir_fujita = "../../doc/example/example_Fujita/" # run with measurement file script_path = os.path.abspath(os.path.dirname(__file__)) @@ -642,3 +642,18 @@ def test_parameter_ids_are_unique(): parameter_df.index = ["par0", "par1"] parameter_df.index.name = "parameterId" lint.check_parameter_df(parameter_df) + + +def test_check_positive_bounds_for_scaled_parameters(): + parameter_df = pd.DataFrame( + { + PARAMETER_ID: ["par"], + PARAMETER_SCALE: [LOG10], + ESTIMATE: [1], + LOWER_BOUND: [0.0], + UPPER_BOUND: [1], + } + ).set_index(PARAMETER_ID) + + with pytest.raises(AssertionError, match="positive"): + lint.check_parameter_df(parameter_df) diff --git a/tests/test_mapping.py b/tests/v1/test_mapping.py similarity index 100% rename from tests/test_mapping.py rename to tests/v1/test_mapping.py diff --git a/tests/test_measurements.py b/tests/v1/test_measurements.py similarity index 100% rename from tests/test_measurements.py rename to tests/v1/test_measurements.py diff --git a/tests/test_model_pysb.py b/tests/v1/test_model_pysb.py similarity index 100% rename from tests/test_model_pysb.py rename to tests/v1/test_model_pysb.py diff --git a/tests/test_observables.py b/tests/v1/test_observables.py similarity index 99% rename from tests/test_observables.py rename to tests/v1/test_observables.py index 2897f86f..f9547fec 100644 --- a/tests/test_observables.py +++ b/tests/v1/test_observables.py @@ -10,7 +10,7 @@ # import fixtures pytest_plugins = [ - "tests.test_petab", + "tests.v1.test_petab", ] diff --git a/tests/test_parameter_mapping.py b/tests/v1/test_parameter_mapping.py similarity index 99% rename from tests/test_parameter_mapping.py rename to tests/v1/test_parameter_mapping.py index f1db8c02..e499bd5c 100644 --- a/tests/test_parameter_mapping.py +++ b/tests/v1/test_parameter_mapping.py @@ -7,11 +7,11 @@ import petab from petab.C import * from petab.models.sbml_model import SbmlModel -from petab.parameter_mapping import _apply_parameter_table +from petab.v1.parameter_mapping import _apply_parameter_table # import fixtures pytest_plugins = [ - "tests.test_petab", + "tests.v1.test_petab", ] diff --git a/tests/test_parameters.py b/tests/v1/test_parameters.py similarity index 99% rename from tests/test_parameters.py rename to tests/v1/test_parameters.py index a2fa5e66..c28528fe 100644 --- a/tests/test_parameters.py +++ b/tests/v1/test_parameters.py @@ -226,7 +226,7 @@ def test_normalize_parameter_df(): # check if prior parameters match for col in [INITIALIZATION_PRIOR_PARAMETERS, OBJECTIVE_PRIOR_PARAMETERS]: for (_, actual_row), (_, expected_row) in zip( - actual.iterrows(), expected.iterrows() + actual.iterrows(), expected.iterrows(), strict=True ): actual_pars = tuple( [float(val) for val in actual_row[col].split(";")] diff --git a/tests/test_petab.py b/tests/v1/test_petab.py similarity index 98% rename from tests/test_petab.py rename to tests/v1/test_petab.py index ed4ac63a..65700af5 100644 --- a/tests/test_petab.py +++ b/tests/v1/test_petab.py @@ -13,6 +13,7 @@ from yaml import safe_load import petab +import petab.v1 from petab.C import * from petab.models.sbml_model import SbmlModel @@ -110,7 +111,7 @@ def petab_problem(): @pytest.fixture def fujita_model_scaling(): - path = Path(__file__).parent.parent / "doc" / "example" / "example_Fujita" + path = Path(__file__).parents[2] / "doc" / "example" / "example_Fujita" sbml_file = path / "Fujita_model.xml" condition_file = path / "Fujita_experimentalCondition.tsv" @@ -225,7 +226,13 @@ def test_get_priors_from_df(): bounds = [entry[3] for entry in prior_list] assert ( bounds - == list(zip(parameter_df[LOWER_BOUND], parameter_df[UPPER_BOUND]))[:4] + == list( + zip( + parameter_df[LOWER_BOUND], + parameter_df[UPPER_BOUND], + strict=True, + ) + )[:4] ) # give correct value for empty @@ -304,7 +311,7 @@ def test_create_parameter_df( # Test old API with passing libsbml.Model directly with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - parameter_df = petab.create_parameter_df( + parameter_df = petab.v1.create_parameter_df( ss_model.model, condition_df_2_conditions, observable_df, diff --git a/tests/test_sbml.py b/tests/v1/test_sbml.py similarity index 100% rename from tests/test_sbml.py rename to tests/v1/test_sbml.py diff --git a/tests/test_simplify.py b/tests/v1/test_simplify.py similarity index 100% rename from tests/test_simplify.py rename to tests/v1/test_simplify.py diff --git a/tests/test_simulate.py b/tests/v1/test_simulate.py similarity index 99% rename from tests/test_simulate.py rename to tests/v1/test_simulate.py index ee85ea08..e23b63cb 100644 --- a/tests/test_simulate.py +++ b/tests/v1/test_simulate.py @@ -1,7 +1,7 @@ """Tests for petab/simulate.py.""" import functools +from collections.abc import Callable from pathlib import Path -from typing import Callable import numpy as np import pandas as pd @@ -26,7 +26,7 @@ def simulate_without_noise(self) -> pd.DataFrame: def petab_problem() -> petab.Problem: """Create a PEtab problem for use in tests.""" petab_yaml_path = ( - Path(__file__).parent.parent.absolute() + Path(__file__).parents[2].absolute() / "doc" / "example" / "example_Fujita" diff --git a/tests/test_visualization.py b/tests/v1/test_visualization.py similarity index 99% rename from tests/test_visualization.py rename to tests/v1/test_visualization.py index 1e67afff..0edd4b78 100644 --- a/tests/test_visualization.py +++ b/tests/v1/test_visualization.py @@ -20,7 +20,7 @@ # Avoid errors when plotting without X server plt.switch_backend("agg") -EXAMPLE_DIR = Path(__file__).parents[1] / "doc" / "example" +EXAMPLE_DIR = Path(__file__).parents[2] / "doc" / "example" @pytest.fixture(scope="function") diff --git a/tests/test_visualization_data_overview.py b/tests/v1/test_visualization_data_overview.py similarity index 92% rename from tests/test_visualization_data_overview.py rename to tests/v1/test_visualization_data_overview.py index 76669f43..1b42fdbb 100644 --- a/tests/test_visualization_data_overview.py +++ b/tests/v1/test_visualization_data_overview.py @@ -10,7 +10,7 @@ def test_data_overview(): repository""" with TemporaryDirectory() as temp_dir: outfile = Path(temp_dir) / "Fujita.html" - repo_root = Path(__file__).parent.parent + repo_root = Path(__file__).parents[2] yaml_filename = ( repo_root / "doc" / "example" / "example_Fujita" / "Fujita.yaml" ) diff --git a/tests/test_yaml.py b/tests/v1/test_yaml.py similarity index 73% rename from tests/test_yaml.py rename to tests/v1/test_yaml.py index f739c50b..82ab242c 100644 --- a/tests/test_yaml.py +++ b/tests/v1/test_yaml.py @@ -5,7 +5,7 @@ import pytest from jsonschema.exceptions import ValidationError -from petab.yaml import create_problem_yaml, validate +from petab.yaml import create_problem_yaml, get_path_prefix, validate def test_validate(): @@ -17,7 +17,7 @@ def test_validate(): # should be well-formed file_ = ( - Path(__file__).parents[1] + Path(__file__).parents[2] / "doc" / "example" / "example_Fujita" @@ -37,8 +37,10 @@ def test_create_problem_yaml(): observable_file = Path(outdir, "observables.tsv") yaml_file = Path(outdir, "problem.yaml") visualization_file = Path(outdir, "visualization.tsv") + + _create_dummy_sbml_model(sbml_file) + for file in ( - sbml_file, condition_file, measurement_file, parameter_file, @@ -65,13 +67,14 @@ def test_create_problem_yaml(): observable_file2 = Path(outdir, "observables2.tsv") yaml_file2 = Path(outdir, "problem2.yaml") for file in ( - sbml_file2, condition_file2, measurement_file2, observable_file2, ): file.touch() + _create_dummy_sbml_model(sbml_file2) + sbml_files = [sbml_file, sbml_file2] condition_files = [condition_file, condition_file2] measurement_files = [measurement_file, measurement_file2] @@ -85,3 +88,29 @@ def test_create_problem_yaml(): yaml_file2, ) validate(yaml_file2) + + +def test_get_path_prefix(): + assert get_path_prefix("/some/dir/file.yaml") == str(Path("/some/dir")) + assert get_path_prefix("some/dir/file.yaml") == str(Path("some/dir")) + assert ( + get_path_prefix("https://petab.rocks/dir/file.yaml") + == "https://petab.rocks/dir" + ) + + +def test_validate_remote(): + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" + ) + + validate(yaml_url) + + +def _create_dummy_sbml_model(sbml_file: Path | str): + import libsbml + + sbml_doc = libsbml.SBMLDocument() + sbml_doc.createModel() + libsbml.writeSBMLToFile(sbml_doc, str(sbml_file)) diff --git a/tests/v2/__init__.py b/tests/v2/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py new file mode 100644 index 00000000..c309a00e --- /dev/null +++ b/tests/v2/test_conversion.py @@ -0,0 +1,34 @@ +import logging +import tempfile + +from petab.v2.petab1to2 import petab1to2 + + +def test_petab1to2_remote(): + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" + ) + + with tempfile.TemporaryDirectory(prefix="test_petab1to2") as tmpdirname: + # TODO verify that the v2 files match "ground truth" + # in `petabtests/cases/v2.0.0/sbml/0001/_0001.yaml` + petab1to2(yaml_url, tmpdirname) + + +def test_benchmark_collection(): + """Test that we can upgrade all benchmark collection models.""" + import benchmark_models_petab + + logging.basicConfig(level=logging.DEBUG) + + for problem_id in benchmark_models_petab.MODELS: + if problem_id == "Lang_PLOSComputBiol2024": + # Does not pass initial linting + continue + + yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) + with tempfile.TemporaryDirectory( + prefix=f"test_petab1to2_{problem_id}" + ) as tmpdirname: + petab1to2(yaml_path, tmpdirname) diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py new file mode 100644 index 00000000..334dc86a --- /dev/null +++ b/tests/v2/test_problem.py @@ -0,0 +1,27 @@ +from petab.v2 import Problem + + +def test_load_remote(): + """Test loading remote files""" + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v2.0.0/sbml/0001/_0001.yaml" + ) + petab_problem = Problem.from_yaml(yaml_url) + + assert ( + petab_problem.measurement_df is not None + and not petab_problem.measurement_df.empty + ) + + assert petab_problem.validate() == [] + + +def test_auto_upgrade(): + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" + ) + problem = Problem.from_yaml(yaml_url) + # TODO check something specifically different in a v2 problem + assert isinstance(problem, Problem) diff --git a/tox.ini b/tox.ini index b7c94b39..d57aa91d 100644 --- a/tox.ini +++ b/tox.ini @@ -13,6 +13,10 @@ description = [testenv:unit] extras = tests,reports,combine,vis +deps= + git+https://github.com/PEtab-dev/petab_test_suite@main + git+https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab.git@master\#subdirectory=src/python + commands = python -m pip install sympy>=1.12.1 python -m pytest --cov=petab --cov-report=xml --cov-append \