Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions for setting series data types #303

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 160 additions & 0 deletions petab/v2/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""Functions for normalizing PEtab tables.

Setting data types, adding missing columns and replacing NA by default values.
"""

from typing import TypeVar

import pandas as pd

from .C import *

__all__ = [
"normalize_parameter_df",
"normalize_measurement_df",
"normalize_condition_df",
"normalize_observable_df",
]

DataFrameOrNone = TypeVar("DataFrameOrNone", pd.DataFrame, None)


def normalize_parameter_df(
df: DataFrameOrNone, inplace: bool = False
) -> DataFrameOrNone:
"""Normalize parameter table.

Arguments:
df: Parameter table
inplace: Modify DataFrame in place
Returns:
The updated DataFrame
"""
if df is None:
return

if not inplace:
df = df.copy()

col_to_type = {
PARAMETER_ID: str,
PARAMETER_NAME: str,
PARAMETER_SCALE: str,
NOMINAL_VALUE: float,
LOWER_BOUND: float,
UPPER_BOUND: float,
ESTIMATE: bool,
INITIALIZATION_PRIOR_TYPE: str,
INITIALIZATION_PRIOR_PARAMETERS: str, # TODO -> tuple?
OBJECTIVE_PRIOR_TYPE: str,
OBJECTIVE_PRIOR_PARAMETERS: str, # TODO -> tuple?
}
for col, dtype in col_to_type.items():
if col in df.columns:
df[col] = df[col].astype(dtype)
else:
df[col] = pd.NA

# TODO only if estimate is True?
df[INITIALIZATION_PRIOR_TYPE] = df[INITIALIZATION_PRIOR_TYPE].fillna(
PARAMETER_SCALE_UNIFORM
)
# TODO: lb;ub
# df[INITIALIZATION_PRIOR_PARAMETERS]

return df


def normalize_measurement_df(
df: DataFrameOrNone, inplace: bool = False
) -> DataFrameOrNone:
"""Normalize measurement table.

Arguments:
df: Measurement table
inplace: Modify DataFrame in place
Returns:
The updated DataFrame
"""
if df is None:
return

if not inplace:
df = df.copy()

col_to_type = {
OBSERVABLE_ID: str,
PREEQUILIBRATION_CONDITION_ID: str,
SIMULATION_CONDITION_ID: str,
MEASUREMENT: float,
TIME: float,
OBSERVABLE_PARAMETERS: str, # TODO -> tuple?
NOISE_PARAMETERS: str, # TODO -> tuple?
}
for col, dtype in col_to_type.items():
if col in df.columns:
df[col] = df[col].astype(dtype)
else:
df[col] = pd.NA

return df


def normalize_condition_df(
df: DataFrameOrNone, inplace: bool = False
) -> DataFrameOrNone:
"""Normalize condition table.

Arguments:
df: Condition table
inplace: Modify DataFrame in place
Returns:
The updated DataFrame
"""
if df is None:
return

if not inplace:
df = df.copy()

# TODO: always as string even if everything is numeric?
# TODO: everything but ID and Name to sympy?
df = df.astype(str)

return df


def normalize_observable_df(
df: DataFrameOrNone, inplace: bool = False
) -> DataFrameOrNone:
"""Normalize observable table.

Arguments:
df: Observable table
inplace: Modify DataFrame in place
Returns:
The updated DataFrame
"""
if df is None:
return

if not inplace:
df = df.copy()

col_to_type = {
OBSERVABLE_ID: str,
OBSERVABLE_NAME: str,
OBSERVABLE_FORMULA: str, # TODO -> sympy?
NOISE_FORMULA: str, # TODO -> sympy?
NOISE_DISTRIBUTION: str,
OBSERVABLE_TRANSFORMATION: str,
}
for col, dtype in col_to_type.items():
if col in df.columns:
df[col] = df[col].astype(dtype)
else:
df[col] = pd.NA

df[OBSERVABLE_TRANSFORMATION] = df[OBSERVABLE_TRANSFORMATION].fillna(LIN)
df[NOISE_DISTRIBUTION] = df[NOISE_DISTRIBUTION].fillna(NORMAL)
df[NOISE_FORMULA] = df[NOISE_FORMULA].fillna(1.0)
20 changes: 20 additions & 0 deletions petab/v2/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ..v1.C import * # noqa: F403
from ..v1.models.model import Model, model_factory
from ..v1.yaml import get_path_prefix
from .normalize import *

if TYPE_CHECKING:
from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask
Expand Down Expand Up @@ -717,3 +718,22 @@ def validate(
break

return validation_results

def normalize(self):
"""Normalize tables.

This function normalizes the tables in the PEtab problem.
It sets data types, adds missing columns, and replaces NA by default
values.
"""
normalize_condition_df(self.condition_df, inplace=True)
normalize_measurement_df(self.measurement_df, inplace=True)
normalize_parameter_df(self.parameter_df, inplace=True)
normalize_observable_df(self.observable_df, inplace=True)
# TODO
# normalize_mapping_df(self.mapping_df, inplace=True)

if self.visualization_df is not None:
from petab.v1.visualize.lint import _apply_defaults

_apply_defaults(self.visualization_df)