StanfordSpezi · Vicbi · Jul 24, 2024 · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -94,9 +94,16 @@ jobs:
 
       - name: Check if version already exists on PyPI/Test PyPI
         run: |
-          VERSION_EXISTS=$(curl -s ${{ needs.determine_environment.outputs.repo }}pypi/spezi_data_pipeline/json
-          | jq -r ".releases
-          | has(\"${{ needs.determine_environment.outputs.version }}\")")
+          run: |
+          REPO_URL=${{ needs.determine_environment.outputs.repo }}
+          PACKAGE_VERSION=${{ needs.determine_environment.outputs.version }}
+          if [ "$REPO_URL" == "https://upload.pypi.org/legacy/" ]; then
+            PYPI_URL="https://pypi.org/pypi/spezi_data_pipeline/json"
+          else
+            PYPI_URL="https://test.pypi.org/pypi/spezi_data_pipeline/json"
+          fi
+          RESPONSE=$(curl -s $PYPI_URL)
+          VERSION_EXISTS=$(echo $RESPONSE | jq -r ".releases | has(\"$PACKAGE_VERSION\")")
           if [ "$VERSION_EXISTS" = "true" ]; then
             echo "Version already exists. Exiting."
             exit 1

diff --git a/README.md b/README.md
@@ -182,8 +182,9 @@ visualizer.set_date_range(selected_start_date, selected_end_date)
 figs = visualizer.create_static_plot(processed_fhir_dataframe)
 ```
 
-![daily_steps_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/daily_steps_data_plot.png)
-![heart_rate_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/heart_rate_data_plot.png)
+![daily_steps_data_plot.png](https://raw.githubusercontent.com/StanfordSpezi/SpeziDataPipeline/main/Figures/daily_steps_data_plot.png)
+![heart_rate_data_plot.png](https://raw.githubusercontent.com/StanfordSpezi/SpeziDataPipeline/main/Figures/heart_rate_data_plot.png)
+
 
 ## ECG Observations
 
@@ -209,7 +210,7 @@ visualizer.set_date_range(selected_start_date, selected_end_date)
 figs = visualizer.plot_ecg_subplots(processed_fhir_dataframe)
 ```
 
-![ecg_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/ecg_data_plot.png)
+![ecg_data_plot.png](https://raw.githubusercontent.com/StanfordSpezi/SpeziDataPipeline/main/Figures/ecg_data_plot.png)
 
 
 ### Questionnaire Responses

diff --git a/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py b/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py
@@ -35,7 +35,7 @@
 # Standard library imports
 import json
 import os
-from typing import Any
+from typing import Any, Optional
 
 # Related third-party imports
 from dataclasses import dataclass
@@ -77,22 +77,31 @@ class FirebaseFHIRAccess:  # pylint: disable=unused-variable
 
     Attributes:
         project_id (str): Identifier of the Firebase project.
-        service_account_key_file (str): Path to the Firebase service account key file for
+        service_account_key_file (str | None): Path to the Firebase service account key file for
                                         authentication.
-        db (Optional[firestore.Client]): A Firestore client instance for database operations,
+        db (firestore.Client | None): A Firestore client instance for database operations,
                                           initialized upon successful connection.
     """
 
     def __init__(
-        self, project_id: str, service_account_key_file: str | None = None
+        self,
+        project_id: Optional[  # pylint: disable=consider-alternative-union-syntax
+            str
+        ] = None,
+        service_account_key_file: Optional[  # pylint: disable=consider-alternative-union-syntax
+            str
+        ] = None,
+        db: Optional[  # pylint: disable=consider-alternative-union-syntax
+            firestore.client
+        ] = None,
     ) -> None:
         """
         Initializes the FirebaseFHIRAccess instance with Firebase service account
         credentials and project ID.
         """
         self.project_id = project_id
         self.service_account_key_file = service_account_key_file
-        self.db = None
+        self.db = db
 
     def connect(self) -> None:
         """

diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py
@@ -425,11 +425,22 @@
             )
 
             if row[ColumnNames.ECG_RECORDING.value] is not None:
-                ecg_array = np.array(
-                    row[ColumnNames.ECG_RECORDING.value].split(), dtype=float
-                )
+                if isinstance(row[ColumnNames.ECG_RECORDING.value], list):
+                    ecg_array = np.array(
+                        row[ColumnNames.ECG_RECORDING.value], dtype=float
+                    )
+                else:
+                    ecg_array = np.array(
+                        row[ColumnNames.ECG_RECORDING.value].split(), dtype=float
+                    )
+
                 if row[ColumnNames.ECG_RECORDING_UNIT.value] == ECG_MICROVOLT_UNIT:
                     ecg_array = ecg_array / 1000  # Convert uV to mV
+                elif row[ColumnNames.ECG_RECORDING_UNIT.value] != ECG_MICROVOLT_UNIT:
+                    print(
+                        "ECG units must be in either uV or mV. Check units and plot again."
+                    )
+                    return figures
 
                 sample_rate = row.get(
                     ColumnNames.SAMPLING_FREQUENCY.value, DEFAULT_SAMPLE_RATE_VALUE
@@ -690,34 +701,42 @@
     - None
     """
 
-    df["EffectiveDateTime"] = pd.to_datetime(df["EffectiveDateTime"])
+    df[ColumnNames.EFFECTIVE_DATE_TIME.value] = pd.to_datetime(
+        df[ColumnNames.EFFECTIVE_DATE_TIME.value]
+    )
 
     if start_date is not None and end_date is not None:
         df = df[
-            (df["EffectiveDateTime"] >= start_date)
-            & (df["EffectiveDateTime"] <= end_date)
+            (df[ColumnNames.EFFECTIVE_DATE_TIME.value] >= start_date)
+            & (df[ColumnNames.EFFECTIVE_DATE_TIME.value] <= end_date)
         ]
 
     if isinstance(user_ids, str):
         user_ids = [user_ids]
 
     if user_ids is not None:
-        df = df[df["UserId"].isin(user_ids)]
+        df = df[df[ColumnNames.USER_ID.value].isin(user_ids)]
 
-    counts = df.groupby(["LoincCode", "UserId"]).size().unstack(fill_value=0)
+    counts = (
+        df.groupby([ColumnNames.LOINC_CODE.value, ColumnNames.USER_ID.value])
+        .size()
+        .unstack(fill_value=0)
+    )
 
-    plt.figure(figsize=(40, 50))
-    counts.plot(kind="bar")
-    plt.title("Number of records by Loinc code", fontsize=16)
-    plt.xlabel("Loinc code", fontsize=14)
-    plt.ylabel("Count", fontsize=14)
-    plt.xticks(rotation=45, ha="right", fontsize=12)
+    plt.figure(figsize=(20, 10))
+    ax = counts.plot(kind="bar", stacked=True, figsize=(20, 10))
+    plt.title("Number of Records by LOINC Code", fontsize=20)
+    plt.xlabel("LOINC Code", fontsize=20)
+    plt.ylabel("Count", fontsize=20)
+    plt.xticks(rotation=45, ha="right", fontsize=16)
     plt.legend(
         title="User ID",
-        fontsize=12,
+        fontsize=14,
         title_fontsize=14,
         bbox_to_anchor=(1.05, 1),
         loc="upper left",
     )
     plt.tight_layout()
     plt.show()
+
+    return ax # For test inspection
diff --git a/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py b/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py
@@ -155,7 +155,7 @@ class ColumnNames(Enum):
         NUMBER_OF_MEASUREMENTS: Number of measurements taken.
         SAMPLING_FREQUENCY: Frequency at which data was sampled.
         SAMPLING_FREQUENCY_UNIT: Unit for the sampling frequency.
-        ELECTROCARDIOGRAM_CLASSIFICATION: Classification of the ECG observation.
+        APPLE_ELECTROCARDIOGRAM_CLASSIFICATION: Classification of the ECG observation.
         HEART_RATE: Observed heart rate.
         HEART_RATE_UNIT: Unit of the observed heart rate.
         ECG_RECORDING_UNIT: Unit for ECG recording data.
@@ -179,10 +179,10 @@ class ColumnNames(Enum):
     NUMBER_OF_MEASUREMENTS = "NumberOfMeasurements"
     SAMPLING_FREQUENCY = "SamplingFrequency"
     SAMPLING_FREQUENCY_UNIT = "SamplingFrequencyUnit"
-    ELECTROCARDIOGRAM_CLASSIFICATION = "ElectrocardiogramClassification"
+    APPLE_ELECTROCARDIOGRAM_CLASSIFICATION = "AppleElectrocardiogramClassification"
     HEART_RATE = "HeartRate"
     HEART_RATE_UNIT = "HeartRateUnit"
-    ECG_RECORDING_UNIT = "ECGDataRecordingUnit"
+    ECG_RECORDING_UNIT = "ECGRecordingUnit"
     ECG_RECORDING = "ECGRecording"
     AUTHORED_DATE = "AuthoredDate"
     QUESTIONNAIRE_TITLE = "QuestionnaireTitle"
@@ -382,7 +382,7 @@ def __init__(self, resource_type: FHIRResourceType):
                 ColumnNames.NUMBER_OF_MEASUREMENTS,
                 ColumnNames.SAMPLING_FREQUENCY,
                 ColumnNames.SAMPLING_FREQUENCY_UNIT,
-                ColumnNames.ELECTROCARDIOGRAM_CLASSIFICATION,
+                ColumnNames.APPLE_ELECTROCARDIOGRAM_CLASSIFICATION,
                 ColumnNames.HEART_RATE,
                 ColumnNames.HEART_RATE_UNIT,
                 ColumnNames.ECG_RECORDING_UNIT,
@@ -586,7 +586,7 @@ def flatten(
                 .get(KeyNames.COMPONENT.value, [{}])[1]
                 .get(KeyNames.VALUE_QUANTITY.value, {})
                 .get(KeyNames.UNIT.value, None),
-                ColumnNames.ELECTROCARDIOGRAM_CLASSIFICATION.value: observation.dict()
+                ColumnNames.APPLE_ELECTROCARDIOGRAM_CLASSIFICATION.value: observation.dict()
                 .get(KeyNames.COMPONENT.value, [{}])[2]
                 .get(KeyNames.VALUE_STRING.value, None),
                 ColumnNames.HEART_RATE.value: observation.dict()

diff --git a/tests/test_data_access.py b/tests/test_data_access.py
@@ -61,6 +61,7 @@ class TestFirebaseFHIRAccess(unittest.TestCase):  # pylint: disable=unused-varia
     def setUp(self):
         self.project_id = "test-project"
         self.service_account_key_file = "/path/to/service/account.json"
+        self.mock_db = MagicMock()
 
     @patch("os.path.exists")
     @patch("os.environ")
@@ -167,7 +168,6 @@ def test_fetch_data_valid_loinc_code(self, mock_firestore):
             "users", "HealthKit", [ECG_RECORDING_LOINC_CODE]
         )
 
-        # Verify
         self.assertIsNotNone(result)
         self.assertEqual(len(result), 0)
 

diff --git a/tests/test_data_exploration.py b/tests/test_data_exploration.py
@@ -48,6 +48,7 @@
     DataExplorer,
     ECGExplorer,
     QuestionnaireResponseExplorer,
+    explore_total_records_number,
 )
 
 USER_ID1 = "user1"
@@ -176,10 +177,12 @@
         user_data = self.fhir_dataframe.df[
             self.fhir_dataframe.df[ColumnNames.USER_ID.value] == USER_ID1
         ]
-        figs = self.explorer.plot_single_user_ecg(user_data, USER_ID1)
-        self.assertIsNotNone(figs)
-        self.assertIsInstance(figs, list)
-        self.assertIsInstance(figs[0], plt.Figure)
+
+        if figs := self.explorer.plot_single_user_ecg(user_data, USER_ID1):
+            self.assertIsInstance(figs[0], plt.Figure)
+            self.assertIsInstance(figs, list)
+        else:
+            self.assertEqual(len(figs), 0)
 
     def test_no_ecg_data(self):
         self.explorer.set_date_range("2024-01-01", "2024-01-31")
@@ -269,5 +272,60 @@
         self.assertIsNone(fig)
 
 
+class TestExploreTotalRecordsNumber(
+    unittest.TestCase
+):  # pylint: disable=unused-variable
+    """
+    Test the explore_total_records_number function.
+
+    This test class ensures that the function behaves correctly by creating a bar plot
+    showing the count of rows with the same LoincCode column value within the specified
+    date range and for the specified user IDs.
+
+    The tests include:
+    - Verifying that the function can handle input data and generate a plot.
+    - Ensuring that plt.show() is called to display the plot.
+    - Checking that the number of bars in the plot corresponds to the number of unique
+      LOINC codes in the input data.
+
+    Methods:
+    - setUp: Initializes mock data and the required objects for testing.
+    - test_explore_total_records_number: Tests the function with mock data, ensuring the
+      plot is generated and the number of bars is correct.
+    """
+
+    @patch("matplotlib.pyplot.show")
+    def test_explore_total_records_number(self, mock_show):
+
+        data = {
+            ColumnNames.EFFECTIVE_DATE_TIME.value: [
+                "2023-01-01",
+                "2023-01-02",
+                "2023-01-03",
+            ],
+            ColumnNames.USER_ID.value: ["user1", "user2", "user1"],
+            ColumnNames.LOINC_CODE.value: ["code1", "code1", "code2"],
+        }
+        df = pd.DataFrame(data)
+
+        df[ColumnNames.EFFECTIVE_DATE_TIME.value] = pd.to_datetime(
+            df[ColumnNames.EFFECTIVE_DATE_TIME.value]
+        )
+
+        ax = explore_total_records_number(
+            df,
+            start_date="2023-01-01",
+            end_date="2023-01-31",
+            user_ids=["user1", "user2"],
+        )
+
+        mock_show.assert_called_once()
+        num_unique_loinc_codes = df[ColumnNames.LOINC_CODE.value].nunique()
+        num_bars = (
+            len(ax.patches) // num_unique_loinc_codes
+        )  # Since bars are stacked, divide by num_unique_loinc_codes
+        self.assertEqual(num_bars, num_unique_loinc_codes)
+
+
 if __name__ == "__main__":
     unittest.main()