Merge pull request #441 from TRI-AMDD/MAT-2992_create_ability_to_add_…

…cycle_index Mat 2992 create ability to add cycle index
TRI-AMDD · Oct 28, 2021 · c2e9d5c · c2e9d5c
2 parents 78fee57 + e6befe5
commit c2e9d5c
Show file tree

Hide file tree

Showing 3 changed files with 143 additions and 68 deletions.
diff --git a/beep/structure/biologic.py b/beep/structure/biologic.py
@@ -108,7 +108,10 @@ def from_file(cls, path, mapping_file=None):
         if "cycle_index" not in columns and not mapping_file:
             raw["cycle_index"] = [int(float(i)) for i in raw["cycle number"]]
         elif "cycle_index" not in columns and mapping_file:
-            raise NotImplementedError("Missing cycle index and step mapping file")
+            if "Loop" in raw.keys():
+                raw["cycle_index"] = get_cycle_index(raw["Ns"], mapping_file, loop_list=raw["Loop"])
+            else:
+                raw["cycle_index"] = get_cycle_index(raw["Ns"], mapping_file)
 
         data = dict()
         for column_name in column_map.keys():
@@ -220,95 +223,66 @@ def parse_metadata(metadata_path):
         return metadata
 
 
-def add_cycle_nums_to_file(
-    technique_csv_file_paths,
-    technique_serialized_transition_rules_file_paths,
-    technique_csv_out_file_paths,
-):
+def get_cycle_index(ns_list, serialized_transition_fp, loop_list=None):
     """
     Processes CSV files generated from several biologic techniques
     and creates a new set of CSVs with an additional "cycle_index" column.
 
-    accepts
-      - technique_csv_file_paths: list of file paths to Biologic CSVs
-      - technique_serialized_transition_rules_file_paths: list of file paths to serialized CycleTransitionRules
-      - technique_csv_out_file_paths: list of filepaths to write new data to
-
-    side-effects
-       - writes a new CSV file for every entry in csv_and_transition_rules_file_paths
-
-    invariants
-        - all arguments must be of the same length
-        - the i-th entry form a logical tuple
-        - technique files appear in the order in which they were created
-          e.g. technique 1, then technique 2 etc.
-
-    example call:
-    add_cycle_nums_to_csvs(
-        [
-            os.path.join(MY_DIR, "protocol1_2a_technique_1.csv"),
-            os.path.join(MY_DIR, "protocol1_2a_technique_2.csv"),
-        ],
-        [
-            os.path.join(MY_DIR, "protocol1_technique_1_transition_rules.json"),
-            os.path.join(MY_DIR, "protocol1_technique_2_transition_rules.json"),
-        ],
-        [
-            os.path.join(MY_DIR, "protocol1_2a_technique_1_processed.csv"),
-            os.path.join(MY_DIR, "protocol1_2a_technique_2_processed.csv"),
-        ]
-    )
-    """
-    assert len(technique_csv_file_paths) == len(technique_csv_out_file_paths)
-    assert len(technique_csv_file_paths) == len(
-        technique_serialized_transition_rules_file_paths
-    )
+    Args:
+        df (pandas.DataFrame): data frame of biologic file
+        serialized_transition_fp (path): path to mapping file containing step transitions where
+            cycle index should increment
+
+    Returns:
+        df (pandas.DataFrame): data frame of biologic file with cycle_index and Tech Num added
 
-    technique_conversion_filepaths = zip(
-        technique_csv_file_paths,
-        technique_serialized_transition_rules_file_paths,
-        technique_csv_out_file_paths,
-    )
+    """
 
     serializer = CycleTransitionRulesSerializer()
     cycle_num = 1
-    for csv_fp, serialized_transition_fp, csv_out_fp in technique_conversion_filepaths:
-        with open(serialized_transition_fp, "r") as f:
-            data = f.read()
-            cycle_transition_rules = serializer.parse_json(data)
-
-        df = pd.read_csv(csv_fp, sep=";")
-
-        cycle_num += cycle_transition_rules.adv_cycle_on_start
-
-        prev_seq_num = int(df.iloc[0]["Ns"])
-        prev_loop_num = int(df.iloc[0]["Loop"])
-        cycle_nums = []
-        tech_nums = []
-        for _, row in df.iterrows():
-            seq_num = int(row["Ns"])
-            loop_num = int(row["Loop"])
 
+    with open(serialized_transition_fp, "r") as f:
+        data = f.read()
+        cycle_transition_rules = serializer.parse_json(data)
+
+    cycle_num += cycle_transition_rules.adv_cycle_on_start
+
+    prev_seq_num = int(ns_list[0])
+    if loop_list:
+        prev_loop_num = int(loop_list[0])
+    cycle_nums = []
+    tech_nums = []
+    # TODO speed up by reducing logic and use list comprehension
+    if loop_list:
+        for indx, ns in enumerate(ns_list):
+            seq_num = int(ns)
+            loop_num = int(loop_list[indx])
             # a transition may occur because of a loop technique or a loop seq,
             # it is possible to double count cycle advances if we don't handle them separately
             if loop_num != prev_loop_num:
                 cycle_num += cycle_transition_rules.adv_cycle_on_tech_loop
-
             elif seq_num != prev_seq_num:
                 transition = (prev_seq_num, seq_num)
                 cycle_num += cycle_transition_rules.adv_cycle_seq_transitions.get(
                     transition, 0
                 )
-
             prev_loop_num = loop_num
             prev_seq_num = seq_num
-
+            cycle_nums.append(cycle_num)
+            tech_nums.append(cycle_transition_rules.tech_num)
+    else:
+        for indx, ns in enumerate(ns_list):
+            seq_num = int(ns)
+            if seq_num != prev_seq_num:
+                transition = (prev_seq_num, seq_num)
+                cycle_num += cycle_transition_rules.adv_cycle_seq_transitions.get(
+                    transition, 0
+                )
+            prev_seq_num = seq_num
             cycle_nums.append(cycle_num)
             tech_nums.append(cycle_transition_rules.tech_num)
 
-        df["cycle_index"] = cycle_nums
-        df["Tech Num"] = tech_nums
-        df.to_csv(csv_out_fp, sep=";")
+    return cycle_nums
 
 
 class CycleTransitionRules:

diff --git a/beep/structure/tests/test_cyclerpaths.py b/beep/structure/tests/test_cyclerpaths.py
@@ -26,7 +26,7 @@
 from beep.structure.maccor import MaccorDatapath
 from beep.structure.neware import NewareDatapath
 from beep.structure.indigo import IndigoDatapath
-from beep.structure.biologic import BiologicDatapath
+from beep.structure.biologic import BiologicDatapath, get_cycle_index
 from beep.structure.battery_archive import BatteryArchiveDatapath
 from beep.tests.constants import TEST_FILE_DIR
 
@@ -394,6 +394,27 @@ def test_from_txt(self):
         self.assertAlmostEqual(dp.structured_data["test_time"].min(), 13062.997, 3)
         self.assertAlmostEqual(dp.structured_data["test_time"].max(), 101972.886, 3)
 
+    def test_add_cycle_index(self):
+
+        biologic_file = os.path.join(
+            TEST_FILE_DIR, "raw", "test_loopsnewoutput_MB_CE1_short10k.csv"
+        )
+        df = pd.read_csv(biologic_file, sep=";")
+        ns_list = df["Ns"].tolist()
+        loop_list = df["Loop"].tolist()
+        biotest_file = os.path.join(TEST_FILE_DIR, "BioTest_000001.000.technique_1_cycle_rules.json")
+        cycle_index = get_cycle_index(ns_list, biotest_file, loop_list=loop_list)
+        c_i = pd.Series(cycle_index)
+        self.assertListEqual([1, 2, 3], c_i.unique().tolist())
+
+    def test_mapping_file(self):
+        biologic_file = os.path.join(
+            TEST_FILE_DIR, "raw", "test_loopsnewoutput_MB_CE1_short10k.txt"
+        )
+        biotest_file = os.path.join(TEST_FILE_DIR, "BioTest_000001.000.technique_1_cycle_rules.json")
+        dp = BiologicDatapath.from_file(biologic_file, mapping_file=biotest_file)
+        self.assertIn("cycle_index", dp.raw_data.columns)
+        self.assertListEqual([1, 2, 3], dp.raw_data["cycle_index"].unique().tolist())
 
 class TestNewareDatapath(unittest.TestCase):
     # based on RCRT.test_ingestion_neware

diff --git a/beep/tests/test_files/BioTest_000001.000.technique_1_cycle_rules.json b/beep/tests/test_files/BioTest_000001.000.technique_1_cycle_rules.json
@@ -0,0 +1,80 @@
+{
+  "tech_num": 1,
+  "tech_does_loop": false,
+  "adv_cycle_on_start": 0,
+  "adv_cycle_on_tech_loop": 0,
+  "adv_cycle_seq_transitions": [
+    {
+      "source": 5,
+      "target": 3,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 5,
+      "target": 7,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 16,
+      "target": 19,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 21,
+      "target": 24,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 26,
+      "target": 29,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 31,
+      "target": 34,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 39,
+      "target": 34,
+      "adv_cycle_count": 1
+    }
+  ],
+  "debug_adv_cycle_on_step_transitions": [
+    {
+      "source": 6,
+      "target": 5,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 6,
+      "target": 9,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 17,
+      "target": 19,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 20,
+      "target": 22,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 23,
+      "target": 25,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 26,
+      "target": 29,
+      "adv_cycle_count": 1
+    },
+    {
+      "source": 34,
+      "target": 29,
+      "adv_cycle_count": 1
+    }
+  ]
+}