diff --git a/petab/v2/normalize.py b/petab/v2/normalize.py new file mode 100644 index 00000000..a2258a29 --- /dev/null +++ b/petab/v2/normalize.py @@ -0,0 +1,160 @@ +"""Functions for normalizing PEtab tables. + +Setting data types, adding missing columns and replacing NA by default values. +""" + +from typing import TypeVar + +import pandas as pd + +from .C import * + +__all__ = [ + "normalize_parameter_df", + "normalize_measurement_df", + "normalize_condition_df", + "normalize_observable_df", +] + +DataFrameOrNone = TypeVar("DataFrameOrNone", pd.DataFrame, None) + + +def normalize_parameter_df( + df: DataFrameOrNone, inplace: bool = False +) -> DataFrameOrNone: + """Normalize parameter table. + + Arguments: + df: Parameter table + inplace: Modify DataFrame in place + Returns: + The updated DataFrame + """ + if df is None: + return + + if not inplace: + df = df.copy() + + col_to_type = { + PARAMETER_ID: str, + PARAMETER_NAME: str, + PARAMETER_SCALE: str, + NOMINAL_VALUE: float, + LOWER_BOUND: float, + UPPER_BOUND: float, + ESTIMATE: bool, + INITIALIZATION_PRIOR_TYPE: str, + INITIALIZATION_PRIOR_PARAMETERS: str, # TODO -> tuple? + OBJECTIVE_PRIOR_TYPE: str, + OBJECTIVE_PRIOR_PARAMETERS: str, # TODO -> tuple? + } + for col, dtype in col_to_type.items(): + if col in df.columns: + df[col] = df[col].astype(dtype) + else: + df[col] = pd.NA + + # TODO only if estimate is True? + df[INITIALIZATION_PRIOR_TYPE] = df[INITIALIZATION_PRIOR_TYPE].fillna( + PARAMETER_SCALE_UNIFORM + ) + # TODO: lb;ub + # df[INITIALIZATION_PRIOR_PARAMETERS] + + return df + + +def normalize_measurement_df( + df: DataFrameOrNone, inplace: bool = False +) -> DataFrameOrNone: + """Normalize measurement table. + + Arguments: + df: Measurement table + inplace: Modify DataFrame in place + Returns: + The updated DataFrame + """ + if df is None: + return + + if not inplace: + df = df.copy() + + col_to_type = { + OBSERVABLE_ID: str, + PREEQUILIBRATION_CONDITION_ID: str, + SIMULATION_CONDITION_ID: str, + MEASUREMENT: float, + TIME: float, + OBSERVABLE_PARAMETERS: str, # TODO -> tuple? + NOISE_PARAMETERS: str, # TODO -> tuple? + } + for col, dtype in col_to_type.items(): + if col in df.columns: + df[col] = df[col].astype(dtype) + else: + df[col] = pd.NA + + return df + + +def normalize_condition_df( + df: DataFrameOrNone, inplace: bool = False +) -> DataFrameOrNone: + """Normalize condition table. + + Arguments: + df: Condition table + inplace: Modify DataFrame in place + Returns: + The updated DataFrame + """ + if df is None: + return + + if not inplace: + df = df.copy() + + # TODO: always as string even if everything is numeric? + # TODO: everything but ID and Name to sympy? + df = df.astype(str) + + return df + + +def normalize_observable_df( + df: DataFrameOrNone, inplace: bool = False +) -> DataFrameOrNone: + """Normalize observable table. + + Arguments: + df: Observable table + inplace: Modify DataFrame in place + Returns: + The updated DataFrame + """ + if df is None: + return + + if not inplace: + df = df.copy() + + col_to_type = { + OBSERVABLE_ID: str, + OBSERVABLE_NAME: str, + OBSERVABLE_FORMULA: str, # TODO -> sympy? + NOISE_FORMULA: str, # TODO -> sympy? + NOISE_DISTRIBUTION: str, + OBSERVABLE_TRANSFORMATION: str, + } + for col, dtype in col_to_type.items(): + if col in df.columns: + df[col] = df[col].astype(dtype) + else: + df[col] = pd.NA + + df[OBSERVABLE_TRANSFORMATION] = df[OBSERVABLE_TRANSFORMATION].fillna(LIN) + df[NOISE_DISTRIBUTION] = df[NOISE_DISTRIBUTION].fillna(NORMAL) + df[NOISE_FORMULA] = df[NOISE_FORMULA].fillna(1.0) diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 612f2571..586812da 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -24,6 +24,7 @@ from ..v1.C import * # noqa: F403 from ..v1.models.model import Model, model_factory from ..v1.yaml import get_path_prefix +from .normalize import * if TYPE_CHECKING: from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask @@ -717,3 +718,22 @@ def validate( break return validation_results + + def normalize(self): + """Normalize tables. + + This function normalizes the tables in the PEtab problem. + It sets data types, adds missing columns, and replaces NA by default + values. + """ + normalize_condition_df(self.condition_df, inplace=True) + normalize_measurement_df(self.measurement_df, inplace=True) + normalize_parameter_df(self.parameter_df, inplace=True) + normalize_observable_df(self.observable_df, inplace=True) + # TODO + # normalize_mapping_df(self.mapping_df, inplace=True) + + if self.visualization_df is not None: + from petab.v1.visualize.lint import _apply_defaults + + _apply_defaults(self.visualization_df)