diff --git a/beep/structure/biologic.py b/beep/structure/biologic.py index eba19265..fd63250e 100644 --- a/beep/structure/biologic.py +++ b/beep/structure/biologic.py @@ -108,7 +108,10 @@ def from_file(cls, path, mapping_file=None): if "cycle_index" not in columns and not mapping_file: raw["cycle_index"] = [int(float(i)) for i in raw["cycle number"]] elif "cycle_index" not in columns and mapping_file: - raise NotImplementedError("Missing cycle index and step mapping file") + if "Loop" in raw.keys(): + raw["cycle_index"] = get_cycle_index(raw["Ns"], mapping_file, loop_list=raw["Loop"]) + else: + raw["cycle_index"] = get_cycle_index(raw["Ns"], mapping_file) data = dict() for column_name in column_map.keys(): @@ -220,95 +223,66 @@ def parse_metadata(metadata_path): return metadata -def add_cycle_nums_to_file( - technique_csv_file_paths, - technique_serialized_transition_rules_file_paths, - technique_csv_out_file_paths, -): +def get_cycle_index(ns_list, serialized_transition_fp, loop_list=None): """ Processes CSV files generated from several biologic techniques and creates a new set of CSVs with an additional "cycle_index" column. - accepts - - technique_csv_file_paths: list of file paths to Biologic CSVs - - technique_serialized_transition_rules_file_paths: list of file paths to serialized CycleTransitionRules - - technique_csv_out_file_paths: list of filepaths to write new data to - - side-effects - - writes a new CSV file for every entry in csv_and_transition_rules_file_paths - - invariants - - all arguments must be of the same length - - the i-th entry form a logical tuple - - technique files appear in the order in which they were created - e.g. technique 1, then technique 2 etc. - - example call: - add_cycle_nums_to_csvs( - [ - os.path.join(MY_DIR, "protocol1_2a_technique_1.csv"), - os.path.join(MY_DIR, "protocol1_2a_technique_2.csv"), - ], - [ - os.path.join(MY_DIR, "protocol1_technique_1_transition_rules.json"), - os.path.join(MY_DIR, "protocol1_technique_2_transition_rules.json"), - ], - [ - os.path.join(MY_DIR, "protocol1_2a_technique_1_processed.csv"), - os.path.join(MY_DIR, "protocol1_2a_technique_2_processed.csv"), - ] - ) - """ - assert len(technique_csv_file_paths) == len(technique_csv_out_file_paths) - assert len(technique_csv_file_paths) == len( - technique_serialized_transition_rules_file_paths - ) + Args: + df (pandas.DataFrame): data frame of biologic file + serialized_transition_fp (path): path to mapping file containing step transitions where + cycle index should increment + + Returns: + df (pandas.DataFrame): data frame of biologic file with cycle_index and Tech Num added - technique_conversion_filepaths = zip( - technique_csv_file_paths, - technique_serialized_transition_rules_file_paths, - technique_csv_out_file_paths, - ) + """ serializer = CycleTransitionRulesSerializer() cycle_num = 1 - for csv_fp, serialized_transition_fp, csv_out_fp in technique_conversion_filepaths: - with open(serialized_transition_fp, "r") as f: - data = f.read() - cycle_transition_rules = serializer.parse_json(data) - - df = pd.read_csv(csv_fp, sep=";") - - cycle_num += cycle_transition_rules.adv_cycle_on_start - - prev_seq_num = int(df.iloc[0]["Ns"]) - prev_loop_num = int(df.iloc[0]["Loop"]) - cycle_nums = [] - tech_nums = [] - for _, row in df.iterrows(): - seq_num = int(row["Ns"]) - loop_num = int(row["Loop"]) + with open(serialized_transition_fp, "r") as f: + data = f.read() + cycle_transition_rules = serializer.parse_json(data) + + cycle_num += cycle_transition_rules.adv_cycle_on_start + + prev_seq_num = int(ns_list[0]) + if loop_list: + prev_loop_num = int(loop_list[0]) + cycle_nums = [] + tech_nums = [] + # TODO speed up by reducing logic and use list comprehension + if loop_list: + for indx, ns in enumerate(ns_list): + seq_num = int(ns) + loop_num = int(loop_list[indx]) # a transition may occur because of a loop technique or a loop seq, # it is possible to double count cycle advances if we don't handle them separately if loop_num != prev_loop_num: cycle_num += cycle_transition_rules.adv_cycle_on_tech_loop - elif seq_num != prev_seq_num: transition = (prev_seq_num, seq_num) cycle_num += cycle_transition_rules.adv_cycle_seq_transitions.get( transition, 0 ) - prev_loop_num = loop_num prev_seq_num = seq_num - + cycle_nums.append(cycle_num) + tech_nums.append(cycle_transition_rules.tech_num) + else: + for indx, ns in enumerate(ns_list): + seq_num = int(ns) + if seq_num != prev_seq_num: + transition = (prev_seq_num, seq_num) + cycle_num += cycle_transition_rules.adv_cycle_seq_transitions.get( + transition, 0 + ) + prev_seq_num = seq_num cycle_nums.append(cycle_num) tech_nums.append(cycle_transition_rules.tech_num) - df["cycle_index"] = cycle_nums - df["Tech Num"] = tech_nums - df.to_csv(csv_out_fp, sep=";") + return cycle_nums class CycleTransitionRules: diff --git a/beep/structure/tests/test_cyclerpaths.py b/beep/structure/tests/test_cyclerpaths.py index a74bd704..2761a9c5 100644 --- a/beep/structure/tests/test_cyclerpaths.py +++ b/beep/structure/tests/test_cyclerpaths.py @@ -26,7 +26,7 @@ from beep.structure.maccor import MaccorDatapath from beep.structure.neware import NewareDatapath from beep.structure.indigo import IndigoDatapath -from beep.structure.biologic import BiologicDatapath +from beep.structure.biologic import BiologicDatapath, get_cycle_index from beep.structure.battery_archive import BatteryArchiveDatapath from beep.tests.constants import TEST_FILE_DIR @@ -394,6 +394,27 @@ def test_from_txt(self): self.assertAlmostEqual(dp.structured_data["test_time"].min(), 13062.997, 3) self.assertAlmostEqual(dp.structured_data["test_time"].max(), 101972.886, 3) + def test_add_cycle_index(self): + + biologic_file = os.path.join( + TEST_FILE_DIR, "raw", "test_loopsnewoutput_MB_CE1_short10k.csv" + ) + df = pd.read_csv(biologic_file, sep=";") + ns_list = df["Ns"].tolist() + loop_list = df["Loop"].tolist() + biotest_file = os.path.join(TEST_FILE_DIR, "BioTest_000001.000.technique_1_cycle_rules.json") + cycle_index = get_cycle_index(ns_list, biotest_file, loop_list=loop_list) + c_i = pd.Series(cycle_index) + self.assertListEqual([1, 2, 3], c_i.unique().tolist()) + + def test_mapping_file(self): + biologic_file = os.path.join( + TEST_FILE_DIR, "raw", "test_loopsnewoutput_MB_CE1_short10k.txt" + ) + biotest_file = os.path.join(TEST_FILE_DIR, "BioTest_000001.000.technique_1_cycle_rules.json") + dp = BiologicDatapath.from_file(biologic_file, mapping_file=biotest_file) + self.assertIn("cycle_index", dp.raw_data.columns) + self.assertListEqual([1, 2, 3], dp.raw_data["cycle_index"].unique().tolist()) class TestNewareDatapath(unittest.TestCase): # based on RCRT.test_ingestion_neware diff --git a/beep/tests/test_files/BioTest_000001.000.technique_1_cycle_rules.json b/beep/tests/test_files/BioTest_000001.000.technique_1_cycle_rules.json new file mode 100644 index 00000000..ff0f4ce2 --- /dev/null +++ b/beep/tests/test_files/BioTest_000001.000.technique_1_cycle_rules.json @@ -0,0 +1,80 @@ +{ + "tech_num": 1, + "tech_does_loop": false, + "adv_cycle_on_start": 0, + "adv_cycle_on_tech_loop": 0, + "adv_cycle_seq_transitions": [ + { + "source": 5, + "target": 3, + "adv_cycle_count": 1 + }, + { + "source": 5, + "target": 7, + "adv_cycle_count": 1 + }, + { + "source": 16, + "target": 19, + "adv_cycle_count": 1 + }, + { + "source": 21, + "target": 24, + "adv_cycle_count": 1 + }, + { + "source": 26, + "target": 29, + "adv_cycle_count": 1 + }, + { + "source": 31, + "target": 34, + "adv_cycle_count": 1 + }, + { + "source": 39, + "target": 34, + "adv_cycle_count": 1 + } + ], + "debug_adv_cycle_on_step_transitions": [ + { + "source": 6, + "target": 5, + "adv_cycle_count": 1 + }, + { + "source": 6, + "target": 9, + "adv_cycle_count": 1 + }, + { + "source": 17, + "target": 19, + "adv_cycle_count": 1 + }, + { + "source": 20, + "target": 22, + "adv_cycle_count": 1 + }, + { + "source": 23, + "target": 25, + "adv_cycle_count": 1 + }, + { + "source": 26, + "target": 29, + "adv_cycle_count": 1 + }, + { + "source": 34, + "target": 29, + "adv_cycle_count": 1 + } + ] +}