Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move echem block to its own app submodule #433

Merged
merged 1 commit into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pydatalab/pydatalab/apps/echem/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .blocks import CycleBlock

__all__ = ("CycleBlock",)
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import os
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, Union

import bokeh
import numpy as np
import pandas as pd
from bson import ObjectId
from navani import echem as ec
Expand All @@ -15,161 +14,12 @@
from pydatalab.logger import LOGGER
from pydatalab.mongo import flask_mongo
from pydatalab.simple_bokeh_plot import mytheme
from pydatalab.utils import reduce_df_size


def reduce_echem_cycle_sampling(df: pd.DataFrame, num_samples: int = 100) -> pd.DataFrame:
"""Reduce number of cycles to at most `num_samples` points per half cycle. Will
keep the endpoint values of each half cycle.

Parameters:
df: The echem dataframe to reduce, which must have cycling data stored
under a `"half cycle"` column.
num_samples: The maximum number of sample points to include per cycle.

Returns:
The output dataframe.

"""

return_df = pd.DataFrame([])

for _, half_cycle in df.groupby("half cycle"):
return_df = pd.concat([return_df, reduce_df_size(half_cycle, num_samples, endpoint=True)])

return return_df


def compute_gpcl_differential(
df: pd.DataFrame,
mode: str = "dQ/dV",
smoothing: bool = True,
polynomial_spline: int = 3,
s_spline: float = 1e-5,
window_size_1: int = 101,
window_size_2: int = 1001,
polyorder_1: int = 5,
polyorder_2: int = 5,
use_normalized_capacity: bool = False,
) -> pd.DataFrame:
"""Compute differential dQ/dV or dV/dQ for the input dataframe.

Args:
df: The input dataframe containing the raw cycling data.
mode: Either 'dQ/dV' or 'dV/dQ'. Invalid inputs will default to 'dQ/dV'.
smoothing: Whether or not to apply additional smoothing to the output differential curve.
polynomial_spline: The degree of the B-spline fit used by navani.
s_spline: The smoothing parameter used by navani.
window_size_1: The window size for the `savgol` filter when smoothing the capacity.
window_size_2: The window size for the `savgol` filter when smoothing the final differential.
polyorder_1: The polynomial order for the `savgol` filter when smoothing the capacity.
polyorder_2: The polynomial order for the `savgol` filter when smoothing the final differential.

Returns:
A data frame containing the voltages, capacities and requested differential
on the reduced cycle list.

"""
if len(df) < 2:
LOGGER.debug(
f"compute_gpcl_differential called on dataframe with length {len(df)}, too small to calculate derivatives"
)
return df

if mode.lower().replace("/", "") == "dvdq":
y_label = "voltage (V)"
x_label = "capacity (mAh/g)" if use_normalized_capacity else "capacity (mAh)"
yp_label = "dV/dQ (V/mA)"
else:
y_label = "capacity (mAh/g)" if use_normalized_capacity else "capacity (mAh)"
x_label = "voltage (V)"
yp_label = "dQ/dV (mA/V)"

smoothing_parameters = {
"polynomial_spline": polynomial_spline,
"s_spline": s_spline,
"window_size_1": window_size_1 if window_size_1 % 2 else window_size_1 + 1,
"window_size_2": window_size_2 if window_size_2 % 2 else window_size_2 + 1,
"polyorder_1": polyorder_1,
"polyorder_2": polyorder_2,
"final_smooth": smoothing,
}

differential_df = pd.DataFrame()

# Loop over distinct half cycles
for cycle in df["half cycle"].unique():
# Extract all segments corresponding to this half cycle index
df_cycle = df[df["half cycle"] == cycle]

# Compute the desired derivative
try:
x, yp, y = ec.dqdv_single_cycle(
df_cycle[y_label], df_cycle[x_label], **smoothing_parameters
)
except TypeError as e:
LOGGER.debug(
f"""Calculating derivative {mode} of half_cycle {cycle} failed with the following error (likely it is a rest or voltage hold):
{e}
Skipping derivative calculation for this half cycle."""
)
continue

# Set up an array per cycle segment that stores the cycle and half-cycle index
cycle_index = df_cycle["full cycle"].max()
cycle_index_array = np.full(len(x), int(cycle_index), dtype=int)
half_cycle_index_array = np.full(len(x), int(cycle), dtype=int)

differential_df = pd.concat(
[
differential_df,
pd.DataFrame(
{
x_label: x,
y_label: y,
yp_label: yp,
"full cycle": cycle_index_array,
"half cycle": half_cycle_index_array,
}
),
]
)

return differential_df


def filter_df_by_cycle_index(
df: pd.DataFrame, cycle_list: Optional[List[int]] = None
) -> pd.DataFrame:
"""Filters the input dataframe by the chosen rows in the `cycle_list`.
If `half_cycle` is a column in the df, it will be used for filtering,
otherwise `cycle index` will be used.

Args:
df: The input dataframe to filter. Must have the column "half cycle".
cycle_list: The provided list of cycle indices to keep.

Returns:
A dataframe with all the data for the selected cycles.

"""
if cycle_list is None:
return df

if "half cycle" not in df.columns:
if "cycle index" not in df.columns:
raise ValueError(
"Input dataframe must have either 'half cycle' or 'cycle index' column"
)
return df[df["cycle index"].isin(i for i in cycle_list)]

try:
half_cycles = [i for item in cycle_list for i in [(2 * int(item)) - 1, 2 * int(item)]]
except ValueError as exc:
raise ValueError(
f"Unable to parse `cycle_list` as integers: {cycle_list}. Error: {exc}"
) from exc
return df[df["half cycle"].isin(half_cycles)]
from .utils import (
compute_gpcl_differential,
filter_df_by_cycle_index,
reduce_echem_cycle_sampling,
)


class CycleBlock(DataBlock):
Expand Down
162 changes: 162 additions & 0 deletions pydatalab/pydatalab/apps/echem/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from typing import List, Optional

import navani.echem as ec
import numpy as np
import pandas as pd

from pydatalab.logger import LOGGER
from pydatalab.utils import reduce_df_size


def reduce_echem_cycle_sampling(df: pd.DataFrame, num_samples: int = 100) -> pd.DataFrame:
"""Reduce number of cycles to at most `num_samples` points per half cycle. Will
keep the endpoint values of each half cycle.

Parameters:
df: The echem dataframe to reduce, which must have cycling data stored
under a `"half cycle"` column.
num_samples: The maximum number of sample points to include per cycle.

Returns:
The output dataframe.

"""

return_df = pd.DataFrame([])

for _, half_cycle in df.groupby("half cycle"):
return_df = pd.concat([return_df, reduce_df_size(half_cycle, num_samples, endpoint=True)])

return return_df


def compute_gpcl_differential(
df: pd.DataFrame,
mode: str = "dQ/dV",
smoothing: bool = True,
polynomial_spline: int = 3,
s_spline: float = 1e-5,
window_size_1: int = 101,
window_size_2: int = 1001,
polyorder_1: int = 5,
polyorder_2: int = 5,
use_normalized_capacity: bool = False,
) -> pd.DataFrame:
"""Compute differential dQ/dV or dV/dQ for the input dataframe.

Args:
df: The input dataframe containing the raw cycling data.
mode: Either 'dQ/dV' or 'dV/dQ'. Invalid inputs will default to 'dQ/dV'.
smoothing: Whether or not to apply additional smoothing to the output differential curve.
polynomial_spline: The degree of the B-spline fit used by navani.
s_spline: The smoothing parameter used by navani.
window_size_1: The window size for the `savgol` filter when smoothing the capacity.
window_size_2: The window size for the `savgol` filter when smoothing the final differential.
polyorder_1: The polynomial order for the `savgol` filter when smoothing the capacity.
polyorder_2: The polynomial order for the `savgol` filter when smoothing the final differential.

Returns:
A data frame containing the voltages, capacities and requested differential
on the reduced cycle list.

"""
if len(df) < 2:
LOGGER.debug(
f"compute_gpcl_differential called on dataframe with length {len(df)}, too small to calculate derivatives"
)
return df

if mode.lower().replace("/", "") == "dvdq":
y_label = "voltage (V)"
x_label = "capacity (mAh/g)" if use_normalized_capacity else "capacity (mAh)"
yp_label = "dV/dQ (V/mA)"
else:
y_label = "capacity (mAh/g)" if use_normalized_capacity else "capacity (mAh)"
x_label = "voltage (V)"
yp_label = "dQ/dV (mA/V)"

smoothing_parameters = {
"polynomial_spline": polynomial_spline,
"s_spline": s_spline,
"window_size_1": window_size_1 if window_size_1 % 2 else window_size_1 + 1,
"window_size_2": window_size_2 if window_size_2 % 2 else window_size_2 + 1,
"polyorder_1": polyorder_1,
"polyorder_2": polyorder_2,
"final_smooth": smoothing,
}

differential_df = pd.DataFrame()

# Loop over distinct half cycles
for cycle in df["half cycle"].unique():
# Extract all segments corresponding to this half cycle index
df_cycle = df[df["half cycle"] == cycle]

# Compute the desired derivative
try:
x, yp, y = ec.dqdv_single_cycle(
df_cycle[y_label], df_cycle[x_label], **smoothing_parameters
)
except TypeError as e:
LOGGER.debug(
f"""Calculating derivative {mode} of half_cycle {cycle} failed with the following error (likely it is a rest or voltage hold):
{e}
Skipping derivative calculation for this half cycle."""
)
continue

# Set up an array per cycle segment that stores the cycle and half-cycle index
cycle_index = df_cycle["full cycle"].max()
cycle_index_array = np.full(len(x), int(cycle_index), dtype=int)
half_cycle_index_array = np.full(len(x), int(cycle), dtype=int)

differential_df = pd.concat(
[
differential_df,
pd.DataFrame(
{
x_label: x,
y_label: y,
yp_label: yp,
"full cycle": cycle_index_array,
"half cycle": half_cycle_index_array,
}
),
]
)

return differential_df


def filter_df_by_cycle_index(
df: pd.DataFrame, cycle_list: Optional[List[int]] = None
) -> pd.DataFrame:
"""Filters the input dataframe by the chosen rows in the `cycle_list`.
If `half_cycle` is a column in the df, it will be used for filtering,
otherwise `cycle index` will be used.

Args:
df: The input dataframe to filter. Must have the column "half cycle".
cycle_list: The provided list of cycle indices to keep.

Returns:
A dataframe with all the data for the selected cycles.

"""
if cycle_list is None:
return df

if "half cycle" not in df.columns:
if "cycle index" not in df.columns:
raise ValueError(
"Input dataframe must have either 'half cycle' or 'cycle index' column"
)
return df[df["cycle index"].isin(i for i in cycle_list)]

try:
half_cycles = [i for item in cycle_list for i in [(2 * int(item)) - 1, 2 * int(item)]]
except ValueError as exc:
raise ValueError(
f"Unable to parse `cycle_list` as integers: {cycle_list}. Error: {exc}"
) from exc
return df[df["half cycle"].isin(half_cycles)]
2 changes: 1 addition & 1 deletion pydatalab/pydatalab/blocks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict, Sequence, Type

from pydatalab.apps.chat.blocks import ChatBlock
from pydatalab.apps.echem import CycleBlock
from pydatalab.apps.eis import EISBlock
from pydatalab.apps.raman import RamanBlock
from pydatalab.apps.tga import MassSpecBlock
Expand All @@ -12,7 +13,6 @@
NMRBlock,
NotSupportedBlock,
)
from pydatalab.blocks.echem_block import CycleBlock

BLOCKS: Sequence[Type[DataBlock]] = (
DataBlock,
Expand Down
2 changes: 1 addition & 1 deletion pydatalab/tests/blocks/test_echem_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
from navani.echem import echem_file_loader

from pydatalab.blocks.echem_block import (
from pydatalab.apps.echem.utils import (
compute_gpcl_differential,
filter_df_by_cycle_index,
reduce_echem_cycle_sampling,
Expand Down
Loading