From a42a4bfd5135f450353508e6b4154e497c884e92 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Sun, 7 Jul 2024 21:09:01 -0700 Subject: [PATCH 01/24] Removed notebooks and test and build job in the workflow --- .github/workflows/build-and-test.yml | 50 +--- SpeziDataPipelineTemplate.ipynb | 264 ------------------ SpeziDataPipelineTemplate.ipynb.license | 6 - ...ineTemplateForQuestionnaireResponses.ipynb | 232 --------------- ...ateForQuestionnaireResponses.ipynb.license | 6 - 5 files changed, 1 insertion(+), 557 deletions(-) delete mode 100644 SpeziDataPipelineTemplate.ipynb delete mode 100644 SpeziDataPipelineTemplate.ipynb.license delete mode 100644 SpeziDataPipelineTemplateForQuestionnaireResponses.ipynb delete mode 100644 SpeziDataPipelineTemplateForQuestionnaireResponses.ipynb.license diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 86e5f33..8eab7bd 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -66,52 +66,4 @@ jobs: files: ./coverage.xml flags: unittests name: codecov-umbrella - slug: StanfordSpezi/SpeziDataPipelineTemplate - build_and_test_notebook: - name: Build and Test Notebook - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - - name: Setup NodeJS - uses: actions/setup-node@v3 - - name: Setup Java - uses: actions/setup-java@v3 - with: - distribution: "microsoft" - java-version: "17" - - name: Update apt-get - run: sudo apt-get update - - name: Setup LaTex - run: | - sudo apt-get install -y --fix-missing pandoc texlive-xetex texlive-fonts-recommended texlive-plain-generic - - name: Cache Firebase Emulators - uses: actions/cache@v3 - with: - path: ~/.cache/firebase/emulators - key: ${{ runner.os }}-${{ runner.arch }}-firebase-emulators-${{ hashFiles('~/.cache/firebase/emulators/**') }} - - name: Install Firebase CLI Tools - run: npm install -g firebase-tools - - name: Install Infrastructure - run: | - python -m pip install --upgrade pip - pip install jupyterlab - - name: Install SpeziDataPipelineTemplate and Dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install -e . - - name: Set Firestore Emulator Environment Variable - run: | - echo "FIRESTORE_EMULATOR_HOST=localhost:8080" >> $GITHUB_ENV - echo "GCLOUD_PROJECT=spezidatapipelinetemplate" >> $GITHUB_ENV - - name: Run Firebase Emulator & Execute Notebook - run: | - firebase emulators:exec --import=./sample_data "jupyter nbconvert --to pdf --execute SpeziDataPipelineTemplate.ipynb" - env: - CI: true - - uses: actions/upload-artifact@v4 - with: - name: SpeziDataPipelineTemplate.pdf - path: SpeziDataPipelineTemplate.pdf + slug: StanfordSpezi/SpeziDataPipeline diff --git a/SpeziDataPipelineTemplate.ipynb b/SpeziDataPipelineTemplate.ipynb deleted file mode 100644 index 4715729..0000000 --- a/SpeziDataPipelineTemplate.ipynb +++ /dev/null @@ -1,264 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "1b4070c1-59f7-474e-8ea1-156cd42ef811", - "metadata": {}, - "source": [ - "## Import modules\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3da82a85-6035-4757-a666-e56b5f270513", - "metadata": {}, - "outputs": [], - "source": [ - "from spezi_data_pipeline.data_access.firebase_fhir_data_access import FirebaseFHIRAccess\n", - "from spezi_data_pipeline.data_flattening.fhir_resources_flattener import flatten_fhir_resources, FHIRDataFrame\n", - "from spezi_data_pipeline.data_processing.data_processor import FHIRDataProcessor\n", - "from spezi_data_pipeline.data_processing.observation_processor import calculate_activity_index\n", - "from spezi_data_pipeline.data_exploration.data_explorer import DataExplorer, visualizer_factory, explore_total_records_number\n", - "from spezi_data_pipeline.data_export.data_exporter import DataExporter" - ] - }, - { - "cell_type": "markdown", - "id": "c3623d09-656a-4abe-a4b9-a433ec86a91c", - "metadata": {}, - "source": [ - "## Define credential files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e844a4e1-1d9e-4c4a-a6a5-46da5b08f17c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Define your Firebase project ID\n", - "project_id = \"spezi-data-pipeline\" # Replace with your Firebase project ID\n", - "\n", - "# Define the service account key file\n", - "service_account_key_file = \"path_to_service_account_key_file.json\" # Replace with your service account key file\n", - "\n", - "# Define the collection name where your FHIR observations are stored and the input code if filtering is needed\n", - "collection_name = \"users\"\n", - "subcollection_name = \"HealthKit\"\n", - "\n", - "# Define the list of HealthKit quantities to query from Firebase Firestore\n", - "loinc_codes = [\"55423-8\", \"8867-4\"]\n", - "# loinc_codes = [\"131328\"]" - ] - }, - { - "cell_type": "markdown", - "id": "dee3e7e7-e273-4367-b36b-6a9148ad898f", - "metadata": {}, - "source": [ - "## Initialize the FirebaseFHIRAccess class using your Firebase credentials" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "62b89662-db30-4976-8ecf-da907b0721ba", - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize the FirebaseFHIRAccess class using your Firebase credentials\n", - "firebase_access = FirebaseFHIRAccess(project_id, service_account_key_file)\n", - "firebase_access.connect()\n", - "\n", - "fhir_observations = firebase_access.fetch_data(collection_name, subcollection_name, loinc_codes)\n", - "\n", - "flattened_fhir_dataframe = flatten_fhir_resources(fhir_observations)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "495b1d17-6414-4718-870a-2f12d6c9c5b8", - "metadata": {}, - "outputs": [], - "source": [ - "flattened_fhir_dataframe.df.head()" - ] - }, - { - "cell_type": "markdown", - "id": "01210300-19f1-4cd6-a820-9864928d2b90", - "metadata": {}, - "source": [ - "## Explore the number of recordings in the database by LOINC code " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9de99b98-4b1d-4b35-8e0c-79925b830f60", - "metadata": {}, - "outputs": [], - "source": [ - "explore_total_records_number(flattened_fhir_dataframe.df)" - ] - }, - { - "cell_type": "markdown", - "id": "43616584-8c5d-4580-baaa-59f25e5f334f", - "metadata": {}, - "source": [ - "## Apply basic processing for convenient data readability" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2d878a15-3382-4952-83d2-0507fcff471a", - "metadata": {}, - "outputs": [], - "source": [ - "processed_fhir_dataframe = FHIRDataProcessor().process_fhir_data(flattened_fhir_dataframe)\n", - "processed_fhir_dataframe.df.head()" - ] - }, - { - "cell_type": "markdown", - "id": "6e91ed96-c86e-4486-ab28-ef55fdd1947e", - "metadata": {}, - "source": [ - "## Explore Data" - ] - }, - { - "cell_type": "markdown", - "id": "f8ddd7a7-25a2-40e1-ac34-c6de3adeb204", - "metadata": {}, - "source": [ - "### HealthKit Quantity Example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5a09ab04-4c93-44dd-b95c-1b825f645b58", - "metadata": {}, - "outputs": [], - "source": [ - "selected_users = [\"3EUoHxIuYkWMKcnLfK38nTGOqHn1\",\"7uMKVmPZdwgtb9hc6r9YZyYXnwc2\", \"sgsxyilwB3T3xf3LIvkpSajN3NW2\"]\n", - "selected_start_date = \"2024-02-22\"\n", - "selected_end_date = \"2024-04-02\"\n", - "explorer = visualizer_factory(processed_fhir_dataframe)\n", - "\n", - "# explorer.set_user_ids(selected_users)\n", - "# explorer.set_date_range(selected_start_date, selected_end_date)\n", - "# explorer.set_y_bounds(50, 50000)\n", - "\n", - "figs = explorer.create_static_plot(processed_fhir_dataframe)" - ] - }, - { - "cell_type": "markdown", - "id": "ab1b9dfe-1cfa-4838-bb5a-28989b6211a0", - "metadata": {}, - "source": [ - "### Calculate Activity Index" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "93970c18-acc4-45cf-9d58-495a6c0eb280", - "metadata": {}, - "outputs": [], - "source": [ - "activity_index_fhir_dataframe = calculate_activity_index(processed_fhir_dataframe)\n", - "\n", - "selected_users = [\"XrftRMc358NndzcRWEQ7P2MxvabZ\"]\n", - "# selected_users = [\"7uMKVmPZdwgtb9hc6r9YZyYXnwc2\"]\n", - "explorer = visualizer_factory(activity_index_fhir_dataframe)\n", - "\n", - "explorer.set_user_ids(selected_users)\n", - "# explorer.set_date_range(selected_start_date, selected_end_date)\n", - "\n", - "figs = explorer.create_static_plot(activity_index_fhir_dataframe)\n", - "activity_index_fhir_dataframe.df.head()" - ] - }, - { - "cell_type": "markdown", - "id": "f6558f0c-8035-40ae-b45c-429109398e44", - "metadata": {}, - "source": [ - "### ECG Recording Example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b908976-582f-42cd-8f86-dd588202ec79", - "metadata": {}, - "outputs": [], - "source": [ - "# selected_users = [\"k3BnzOGAO0fIaxkDVXTZKlj3LAu2\", \"3EUoHxIuYkWMKcnLfK38nTGOqHn1\"]\n", - "\n", - "# selected_start_date = \"2023-03-13\"\n", - "# selected_end_date = \"2023-03-13\"\n", - "\n", - "# visualizer = visualizer_factory(processed_fhir_dataframe)\n", - "# visualizer.set_user_ids(selected_users)\n", - "# visualizer.set_date_range(selected_start_date, selected_end_date)\n", - "# figs = visualizer.plot_ecg_subplots(processed_fhir_dataframe) " - ] - }, - { - "cell_type": "markdown", - "id": "a91e9dce-e956-4694-8880-cf4508ccd8ae", - "metadata": {}, - "source": [ - "## Export data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1bd3d6a-1aec-4bdc-9325-7c85efba0ff3", - "metadata": {}, - "outputs": [], - "source": [ - "selected_users = [\"sEmijWpn0vXe1cj60GO5kkjkrdT4\"]\n", - "\n", - "exporter = DataExporter(processed_fhir_dataframe)\n", - "# exporter.set_user_ids(selected_users)\n", - "# exporter.set_date_range(selected_start_date, selected_end_date)\n", - "# exporter.set_date_range(selected_start_date, selected_start_date)\n", - "exporter.create_and_save_plot(\"data_plot\") " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/SpeziDataPipelineTemplate.ipynb.license b/SpeziDataPipelineTemplate.ipynb.license deleted file mode 100644 index e8e1cfb..0000000 --- a/SpeziDataPipelineTemplate.ipynb.license +++ /dev/null @@ -1,6 +0,0 @@ - -This source file is part of the Stanford Spezi open-source project - -SPDX-FileCopyrightText: 2024 Stanford University and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT diff --git a/SpeziDataPipelineTemplateForQuestionnaireResponses.ipynb b/SpeziDataPipelineTemplateForQuestionnaireResponses.ipynb deleted file mode 100644 index 67cc4d4..0000000 --- a/SpeziDataPipelineTemplateForQuestionnaireResponses.ipynb +++ /dev/null @@ -1,232 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "1b4070c1-59f7-474e-8ea1-156cd42ef811", - "metadata": {}, - "source": [ - "## Import modules\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3da82a85-6035-4757-a666-e56b5f270513", - "metadata": {}, - "outputs": [], - "source": [ - "from spezi_data_pipeline.data_access.firebase_fhir_data_access import FirebaseFHIRAccess, FHIRResourceType\n", - "from spezi_data_pipeline.data_flattening.fhir_resources_flattener import extract_questionnaire_mappings, flatten_fhir_resources, FHIRDataFrame, QuestionnaireResponseFlattener\n", - "from spezi_data_pipeline.data_processing.data_processor import FHIRDataProcessor\n", - "from spezi_data_pipeline.data_processing.questionnaire_processor import calculate_risk_score\n", - "from spezi_data_pipeline.data_exploration.data_explorer import DataExplorer, visualizer_factory, explore_total_records_number\n", - "from spezi_data_pipeline.data_export.data_exporter import DataExporter" - ] - }, - { - "cell_type": "markdown", - "id": "c3623d09-656a-4abe-a4b9-a433ec86a91c", - "metadata": {}, - "source": [ - "## Define credential files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e844a4e1-1d9e-4c4a-a6a5-46da5b08f17c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Define your Firebase project ID\n", - "project_id = \"spezi-data-pipeline\" # Replace with your Firebase project ID\n", - "\n", - "# Define the service account key file\n", - "service_account_key_file = \"path_to_service_account_key_file.json\" # Replace with your service account key file\n", - "\n", - "# Define the collection name where your FHIR observations are stored and the input code if filtering is needed\n", - "collection_name = \"users\"\n", - "subcollection_name = \"QuestionnaireResponse\"\n", - "\n", - "# Define the survey path for creating questionnaire mappings\n", - "questionnaire_resource_path = \"path_to_the_questionnaire_resource_file.json\"" - ] - }, - { - "cell_type": "markdown", - "id": "dee3e7e7-e273-4367-b36b-6a9148ad898f", - "metadata": {}, - "source": [ - "## Initialize the FirebaseFHIRAccess class using your Firebase credentials" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "62b89662-db30-4976-8ecf-da907b0721ba", - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize the FirebaseFHIRAccess class using your Firebase credentials\n", - "firebase_access = FirebaseFHIRAccess(project_id, service_account_key_file)\n", - "firebase_access.connect()\n", - "\n", - "fhir_questionnaires = firebase_access.fetch_data(collection_name, subcollection_name)\n", - "\n", - "flattened_fhir_dataframe = flatten_fhir_resources(fhir_questionnaires, questionnaire_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc4148b9-5c8e-4ea3-89b6-6999f1d72c3a", - "metadata": {}, - "outputs": [], - "source": [ - "flattened_fhir_dataframe.df.head(15)" - ] - }, - { - "cell_type": "markdown", - "id": "6e91ed96-c86e-4486-ab28-ef55fdd1947e", - "metadata": {}, - "source": [ - "## Explore Data" - ] - }, - { - "cell_type": "markdown", - "id": "a91e9dce-e956-4694-8880-cf4508ccd8ae", - "metadata": {}, - "source": [ - "## Export data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1bd3d6a-1aec-4bdc-9325-7c85efba0ff3", - "metadata": {}, - "outputs": [], - "source": [ - "exporter = DataExporter(flattened_fhir_dataframe)\n", - "exporter.export_to_csv(\"survey_data.csv\") " - ] - }, - { - "cell_type": "markdown", - "id": "dea043c0-9129-47ba-ab54-0978c374bbcb", - "metadata": {}, - "source": [ - "# PHQ-9 example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "26c38af1-e416-475c-8cca-cd675d7bc997", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import numpy as np\n", - "from fhir.resources.bundle import Bundle\n", - "from fhir.resources.questionnaireresponse import QuestionnaireResponse\n", - "\n", - "def load_bundle_from_json(file_path: str) -> Bundle:\n", - " with open(file_path, 'r', encoding='utf-8') as f:\n", - " json_content = json.load(f)\n", - " try:\n", - " bundle = Bundle.parse_obj(json_content)\n", - " except FHIRValidationError as e:\n", - " print(f\"Failed to parse Bundle: {e}\")\n", - " bundle = None\n", - " return bundle\n", - "\n", - "def extract_questionnaire_responses(bundle: Bundle) -> list:\n", - " questionnaire_responses = []\n", - " for entry in bundle.entry:\n", - " if entry.resource.resource_type == \"QuestionnaireResponse\":\n", - " try:\n", - " qr = QuestionnaireResponse.parse_obj(entry.resource.dict())\n", - " questionnaire_responses.append(qr)\n", - " except FHIRValidationError as e:\n", - " print(f\"Failed to parse QuestionnaireResponse: {e}\")\n", - " return questionnaire_responses\n", - "\n", - "file_path = 'sample_data/phq9responses.json'\n", - "\n", - "bundle = load_bundle_from_json(file_path)\n", - "\n", - "if bundle:\n", - " questionnaire_responses = extract_questionnaire_responses(bundle)\n", - " survey_path = 'Resources/PHQ-9.json'\n", - " flattener = QuestionnaireResponseFlattener()\n", - " \n", - " flattened_fhir_dataframe = flattener.flatten(questionnaire_responses, survey_path)\n", - "\n", - "flattened_fhir_dataframe.df.tail()" - ] - }, - { - "cell_type": "markdown", - "id": "3e3be2f4-a0ae-4b3f-bb30-cded80beb00a", - "metadata": {}, - "source": [ - "## Calculate risk score" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9dc610de-cfca-4b1f-ab08-34116fc7274d", - "metadata": {}, - "outputs": [], - "source": [ - "questionnaire_title = \"PHQ-9\"\n", - "flattened_fhir_dataframe_with_score = calculate_risk_score(flattened_fhir_dataframe, questionnaire_title)\n", - "\n", - "random_user_ids = np.random.randint(100000, 999999, size=len(flattened_fhir_dataframe_with_score.df))\n", - "flattened_fhir_dataframe_with_score.df['UserId'] = random_user_ids\n", - "flattened_fhir_dataframe_with_score.df['ResourceId'] = random_user_ids\n", - "flattened_fhir_dataframe_with_score.df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd43029c-dd32-4a03-9208-d99aa9f071f6", - "metadata": {}, - "outputs": [], - "source": [ - "explorer = visualizer_factory(flattened_fhir_dataframe_with_score, questionnaire_title=\"PHQ-9\")\n", - "# explorer.set_user_ids([\"User1\", \"User2\"])\n", - "# explorer.set_date_range(\"2023-01-13\", \"2023-02-16\")\n", - "fig = explorer.create_score_plot(flattened_fhir_dataframe_with_score)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/SpeziDataPipelineTemplateForQuestionnaireResponses.ipynb.license b/SpeziDataPipelineTemplateForQuestionnaireResponses.ipynb.license deleted file mode 100644 index e8e1cfb..0000000 --- a/SpeziDataPipelineTemplateForQuestionnaireResponses.ipynb.license +++ /dev/null @@ -1,6 +0,0 @@ - -This source file is part of the Stanford Spezi open-source project - -SPDX-FileCopyrightText: 2024 Stanford University and the project authors (see CONTRIBUTORS.md) - -SPDX-License-Identifier: MIT From f1de97ef6586e75840e4ab029bfbb3b2ff964e66 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Sun, 7 Jul 2024 22:33:21 -0700 Subject: [PATCH 02/24] Updated README.md --- README.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e7b371c..f0f21ba 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,11 @@ SPDX-License-Identifier: MIT --> -# Spezi Data Pipeline Template +# Spezi Data Pipeline -[![Build and Test](https://github.com/StanfordSpezi/SpeziDataPipelineTemplate/actions/workflows/build-and-test.yml/badge.svg)](https://github.com/StanfordSpezi/SpeziDataPipelineTemplate/actions/workflows/build-and-test.yml) -[![codecov](https://codecov.io/gh/StanfordSpezi/SpeziDataPipelineTemplate/branch/main/graph/badge.svg)](https://codecov.io/gh/StanfordSpezi/SpeziDataPipelineTemplate) - +[![Build and Test](https://github.com/StanfordSpezi/SpeziDataPipeline/actions/workflows/build-and-test.yml/badge.svg)](https://github.com/StanfordSpezi/SpeziDataPipeline/actions/workflows/build-and-test.yml) +[![codecov](https://codecov.io/gh/StanfordSpezi/SpeziDataPipeline/branch/main/graph/badge.svg)](https://codecov.io/gh/StanfordSpezi/SpeziDataPipeline) + Open In Colab @@ -24,7 +24,7 @@ The Spezi Data Pipeline is engineered to improve workflows associated with data ## Package Structure -The SpeziDataPipelineTemplate is organized into several directories, each serving a specific function as part of the overall application. This guide will walk you through the package structure, highlighting the key components and their usage based on your needs and challenges. +The Spezi Data Pipeline is organized into several directories, each serving a specific function as part of the overall application. This guide will walk you through the package structure, highlighting the key components and their usage based on your needs and challenges. 1. `data_access/` @@ -70,12 +70,12 @@ _DataExporter_ ### How to Use Based on Your Needs -- **Downloading Data from Firestore**: Start with FirebaseFHIRAccess to connect and fetch data. -- **Converting and Structuring FHIR Data**: Use ResourceCreator and its subclasses to convert Firestore documents to FHIR resources. -- **Flattening Nested FHIR Data**: Utilize ResourceFlattener and its specific implementations to transform data into flat DataFrames. +- **Downloading Data from Firestore**: Start with `FirebaseFHIRAccess` to connect and fetch data. +- **Converting and Structuring FHIR Data**: Use `ResourceCreator` and its subclasses to convert Firestore documents to FHIR resources. +- **Flattening Nested FHIR Data**: Utilize `ResourceFlattener` and its specific implementations to transform data into flat `DataFrames`. - **Processing Data**: Apply FHIRDataProcessor for filtering, selecting, and general data processing tasks. -- **Exploring and Visualizing Data**: Leverage DataExplorer and ECGExplorer to create visualizations and explore your data. -- **Exporting Data**: Use DataExporter to save processed data and plots. +- **Exploring and Visualizing Data**: Leverage `DataExplorer` and `ECGExplorer`, and `QuestionnaireResponseExplorer` to create visualizations and explore your data. +- **Exporting Data**: Use `DataExporter` to save processed data and plots. ## Dependencies @@ -185,8 +185,8 @@ visualizer.set_date_range(selected_start_date, selected_end_date) figs = visualizer.create_static_plot(processed_fhir_dataframe) ``` -![daily_steps_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipelineTemplate/blob/main/Figures/daily_steps_data_plot.png) -![heart_rate_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipelineTemplate/blob/main/Figures/heart_rate_data_plot.png) +![daily_steps_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/daily_steps_data_plot.png) +![heart_rate_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/heart_rate_data_plot.png) ## ECG Observations @@ -212,7 +212,7 @@ visualizer.set_date_range(selected_start_date, selected_end_date) figs = visualizer.plot_ecg_subplots(processed_fhir_dataframe) ``` -![ecg_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipelineTemplate/blob/main/Figures/ecg_data_plot.png) +![ecg_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/ecg_data_plot.png) ### Questionnaire Responses From a7464e90eba410edd9b4d87ea68454cef08f85e4 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Sun, 7 Jul 2024 22:47:11 -0700 Subject: [PATCH 03/24] Renamed QuestionnaireExplorer to QuestionnaireResponseExplorer and updated documentation --- .../data_exploration/data_explorer.py | 14 +++++++++----- tests/test_data_exploration.py | 14 +++++++------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py index 637542a..5132d7c 100644 --- a/src/spezi_data_pipeline/data_exploration/data_explorer.py +++ b/src/spezi_data_pipeline/data_exploration/data_explorer.py @@ -14,8 +14,12 @@ - `DataExplorer`: Provides functionalities to visualize FHIR data, supporting various filtering options and the ability to generate static plots either combined or separate for multiple users. -- `ECGExplorer`: Extends `DataExplorer` to specialize in visualizing ECG data, offering methods to +- `ECGExplorer`: Provides functionalitites to visualize ECG data, offering methods to plot individual ECG leads and configure specific visualization parameters. +- `QuestionnaireResponseExplorer`: Provides functionalitites to visualize risk scores calculated + from the questionnaire responses of specific `Questionnaire` + resources (e.g., PHQ-9) + Functions: - `plot_data_based_on_condition`: Dynamically plots data using scatter or bar plots based on the @@ -539,7 +543,7 @@ def _plot_single_lead_ecg( self._ax_plot(ax, np.arange(0, len(ecg) * step, step), ecg, seconds) -class QuestionnaireExplorer: # pylint: disable=unused-variable +class QuestionnaireResponseExplorer: # pylint: disable=unused-variable """ Provides functionalities to visualize questionnaire responses by calculating risk scores and generating plots. @@ -555,7 +559,7 @@ class QuestionnaireExplorer: # pylint: disable=unused-variable """ def __init__(self, questionnaire_title): - """Initializes the QuestionnaireExplorer with default parameters for data visualization.""" + """Initializes the QuestionnaireResponseExplorer with default parameters for data visualization.""" self.start_date = None self.end_date = None self.user_ids = None @@ -640,7 +644,7 @@ def visualizer_factory( # pylint: disable=unused-variable Required if resource_type is QuestionnaireResponse. Returns: - An instance of DataExplorer, ECGExplorer, or QuestionnaireExplorer based on the + An instance of DataExplorer, ECGExplorer, or QuestionnaireResponseExplorer based on the resource_type. """ if fhir_dataframe.resource_type == FHIRResourceType.OBSERVATION: @@ -654,7 +658,7 @@ def visualizer_factory( # pylint: disable=unused-variable raise ValueError( "Questionnaire title must be provided for QuestionnaireResponse type" ) - return QuestionnaireExplorer(questionnaire_title) + return QuestionnaireResponseExplorer(questionnaire_title) raise ValueError(f"Unsupported resource type: {fhir_dataframe.resource_type}") diff --git a/tests/test_data_exploration.py b/tests/test_data_exploration.py index d02962b..73a0158 100644 --- a/tests/test_data_exploration.py +++ b/tests/test_data_exploration.py @@ -7,7 +7,7 @@ # """ -This module provides test cases for the `DataExplorer`, `ECGExplorer`, and `QuestionnaireExplorer` +This module provides test cases for the `DataExplorer`, `ECGExplorer`, and `QuestionnaireResponseExplorer` classes from the data_exploration module. The tests focus on the initialization and configuration of the explorer instances, including @@ -22,7 +22,7 @@ Classes: `TestDataExplorer`: Contains all the unit tests for testing the `DataExplorer` functionalities. - `TestQuestionnaireExplorer`: Contains all the unit tests for testing the `QuestionnaireExplorer` + `TestQuestionnaireResponseExplorerExplorer`: Contains all the unit tests for testing the `QuestionnaireResponseExplorer` functionalities. `TestECGExplorer`: Contains all the unit tests for testing the `ECGExplorer` functionalities. """ @@ -47,7 +47,7 @@ from spezi_data_pipeline.data_exploration.data_explorer import ( DataExplorer, ECGExplorer, - QuestionnaireExplorer, + QuestionnaireResponseExplorer, ) USER_ID1 = "user1" @@ -189,16 +189,16 @@ def test_no_ecg_data(self): self.assertEqual(figs, []) -class TestQuestionnaireExplorer(unittest.TestCase): # pylint: disable=unused-variable +class TestQuestionnaireResponseExplorer(unittest.TestCase): # pylint: disable=unused-variable """ - Test suite for the QuestionnaireExplorer class. + Test suite for the QuestionnaireResponseExplorer class. - This class tests the functionality of setting up a QuestionnaireExplorer instance, including + This class tests the functionality of setting up a QuestionnaireResponseExplorer instance, including setting date ranges, user IDs, and generating score plots based on filtered data. """ def setUp(self): - self.explorer = QuestionnaireExplorer("Test Questionnaire") + self.explorer = QuestionnaireResponseExplorer("Test Questionnaire") data = { ColumnNames.USER_ID.value: [USER_ID1, "user2", USER_ID1, "user2"], ColumnNames.AUTHORED_DATE.value: [ From 33089ee43b7f2567e53520ba5d0527d9de9462ea Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Sun, 7 Jul 2024 22:53:43 -0700 Subject: [PATCH 04/24] Linting --- .../data_exploration/data_explorer.py | 8 ++++---- tests/test_data_exploration.py | 16 +++++++++------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py index 5132d7c..196618a 100644 --- a/src/spezi_data_pipeline/data_exploration/data_explorer.py +++ b/src/spezi_data_pipeline/data_exploration/data_explorer.py @@ -7,8 +7,8 @@ # """ -This module provides tools for visualizing healthcare data, focusing on FHIR data visualization and -specifically extending to electrocardiogram (ECG) data exploration. +This module provides tools for visualizing healthcare data, focusing on FHIR data visualization +and specifically extending to electrocardiogram (ECG) data exploration. Classes: - `DataExplorer`: Provides functionalities to visualize FHIR data, supporting various filtering @@ -24,8 +24,8 @@ Functions: - `plot_data_based_on_condition`: Dynamically plots data using scatter or bar plots based on the condition of duplicate `EffectiveDateTime` entries for a user. -- `visualizer_factory`: Factory function to create either a `DataExplorer` or `ECGExplorer` instance - based on the resource_type attribute of a given `FHIRDataFrame`. +- `visualizer_factory`: Factory function to create either a `DataExplorer` or `ECGExplorer` + instance based on the resource_type attribute of a given `FHIRDataFrame`. - `explore_total_records_number`: Creates a bar plot showing the count of rows with the same LoincCode column value within a specified date range and for specified user IDs. diff --git a/tests/test_data_exploration.py b/tests/test_data_exploration.py index 73a0158..ee643d5 100644 --- a/tests/test_data_exploration.py +++ b/tests/test_data_exploration.py @@ -7,8 +7,8 @@ # """ -This module provides test cases for the `DataExplorer`, `ECGExplorer`, and `QuestionnaireResponseExplorer` -classes from the data_exploration module. +This module provides test cases for the `DataExplorer`, `ECGExplorer`, and +`QuestionnaireResponseExplorer` classes from the data_exploration module. The tests focus on the initialization and configuration of the explorer instances, including the capabilities to set specific date ranges, user IDs, and y-axis boundaries for data @@ -22,8 +22,8 @@ Classes: `TestDataExplorer`: Contains all the unit tests for testing the `DataExplorer` functionalities. - `TestQuestionnaireResponseExplorerExplorer`: Contains all the unit tests for testing the `QuestionnaireResponseExplorer` - functionalities. + `TestQuestionnaireResponseExplorerExplorer`: Contains all the unit tests for testing the + `QuestionnaireResponseExplorer` functionalities. `TestECGExplorer`: Contains all the unit tests for testing the `ECGExplorer` functionalities. """ @@ -189,12 +189,14 @@ def test_no_ecg_data(self): self.assertEqual(figs, []) -class TestQuestionnaireResponseExplorer(unittest.TestCase): # pylint: disable=unused-variable +class TestQuestionnaireResponseExplorer( + unittest.TestCase +): # pylint: disable=unused-variable """ Test suite for the QuestionnaireResponseExplorer class. - This class tests the functionality of setting up a QuestionnaireResponseExplorer instance, including - setting date ranges, user IDs, and generating score plots based on filtered data. + This class tests the functionality of setting up a QuestionnaireResponseExplorer instance, + including setting date ranges, user IDs, and generating score plots based on filtered data. """ def setUp(self): From f9a4736432d4eb4033c8213b9e037a390d8f8f03 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Sun, 7 Jul 2024 22:55:42 -0700 Subject: [PATCH 05/24] Linting --- src/spezi_data_pipeline/data_exploration/data_explorer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py index 196618a..40a6a6a 100644 --- a/src/spezi_data_pipeline/data_exploration/data_explorer.py +++ b/src/spezi_data_pipeline/data_exploration/data_explorer.py @@ -559,7 +559,10 @@ class QuestionnaireResponseExplorer: # pylint: disable=unused-variable """ def __init__(self, questionnaire_title): - """Initializes the QuestionnaireResponseExplorer with default parameters for data visualization.""" + """ + Initializes the QuestionnaireResponseExplorer with default parameters for data + visualization. + """ self.start_date = None self.end_date = None self.user_ids = None From cd067d9070b5da8e74518179873cc09575e736fe Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Sun, 7 Jul 2024 23:55:04 -0700 Subject: [PATCH 06/24] Removed Colab badge from README --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index f0f21ba..cf1b668 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,6 @@ SPDX-License-Identifier: MIT [![Build and Test](https://github.com/StanfordSpezi/SpeziDataPipeline/actions/workflows/build-and-test.yml/badge.svg)](https://github.com/StanfordSpezi/SpeziDataPipeline/actions/workflows/build-and-test.yml) [![codecov](https://codecov.io/gh/StanfordSpezi/SpeziDataPipeline/branch/main/graph/badge.svg)](https://codecov.io/gh/StanfordSpezi/SpeziDataPipeline) - -Open In Colab - The Spezi Data Pipeline offers a comprehensive suite of tools designed to facilitate the management, analysis, and visualization of healthcare data from Firebase Firestore. By adhering to the Fast Healthcare Interoperability Resources (FHIR) standards, this platform ensures that data handling remains robust, standardized, and interoperable across different systems and software. From b654e961202ceda9dd289f2d40d7b6d03b5c3396 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 11:33:45 -0700 Subject: [PATCH 07/24] Fixed mutli-line job for checking if the version exists --- .github/workflows/publish-to-pypi.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 8d1ecd5..c8c8737 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -94,9 +94,8 @@ jobs: - name: Check if version already exists on PyPI/Test PyPI run: | - VERSION_EXISTS=$(curl -s ${{ needs.determine_environment.outputs.repo }}pypi/spezi_data_pipeline/json - | jq -r ".releases - | has(\"${{ needs.determine_environment.outputs.version }}\")") + VERSION_EXISTS=$(curl -s ${{ needs.determine_environment.outputs.repo }}pypi/spezi_data_pipeline/json | \ + jq -r ".releases | has(\"${{ needs.determine_environment.outputs.version }}\")") if [ "$VERSION_EXISTS" = "true" ]; then echo "Version already exists. Exiting." exit 1 From 03291db85b7ca036c90c30b297f9f3a5aeb64b86 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 11:43:00 -0700 Subject: [PATCH 08/24] Fixed mutli-line job for checking if the version exists --- .github/workflows/publish-to-pypi.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index c8c8737..b9e3c07 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -94,8 +94,9 @@ jobs: - name: Check if version already exists on PyPI/Test PyPI run: | - VERSION_EXISTS=$(curl -s ${{ needs.determine_environment.outputs.repo }}pypi/spezi_data_pipeline/json | \ - jq -r ".releases | has(\"${{ needs.determine_environment.outputs.version }}\")") + RESPONSE=$(curl -s ${{ needs.determine_environment.outputs.repo }}pypi/spezi_data_pipeline/json) + echo "Response from PyPI/Test PyPI: $RESPONSE" + VERSION_EXISTS=$(echo $RESPONSE | jq -r ".releases | has(\"${{ needs.determine_environment.outputs.version }}\")") if [ "$VERSION_EXISTS" = "true" ]; then echo "Version already exists. Exiting." exit 1 From b647345c7e891bbfb6125ae037301c6fdc9e155d Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 11:54:52 -0700 Subject: [PATCH 09/24] Fixed mutli-line job for checking if the version exists --- .github/workflows/publish-to-pypi.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index b9e3c07..2211e64 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -94,9 +94,9 @@ jobs: - name: Check if version already exists on PyPI/Test PyPI run: | - RESPONSE=$(curl -s ${{ needs.determine_environment.outputs.repo }}pypi/spezi_data_pipeline/json) + RESPONSE=$(curl -s ${PYPI_REPOSITORY_URL}pypi/spezi_data_pipeline/json) echo "Response from PyPI/Test PyPI: $RESPONSE" - VERSION_EXISTS=$(echo $RESPONSE | jq -r ".releases | has(\"${{ needs.determine_environment.outputs.version }}\")") + VERSION_EXISTS=$(echo $RESPONSE | jq -r ".releases | has(\"${PACKAGE_VERSION}\")") if [ "$VERSION_EXISTS" = "true" ]; then echo "Version already exists. Exiting." exit 1 From cf505c2bc7a80477d453ade49bab1e3f0f6a8fdd Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 11:59:28 -0700 Subject: [PATCH 10/24] Fixed mutli-line job for checking if the version exists --- .github/workflows/publish-to-pypi.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 2211e64..e6b4598 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -94,9 +94,17 @@ jobs: - name: Check if version already exists on PyPI/Test PyPI run: | - RESPONSE=$(curl -s ${PYPI_REPOSITORY_URL}pypi/spezi_data_pipeline/json) + run: | + REPO_URL=${{ needs.determine_environment.outputs.repo }} + PACKAGE_VERSION=${{ needs.determine_environment.outputs.version }} + if [ "$REPO_URL" == "https://upload.pypi.org/legacy/" ]; then + PYPI_URL="https://pypi.org/pypi/spezi_data_pipeline/json" + else + PYPI_URL="https://test.pypi.org/pypi/spezi_data_pipeline/json" + fi + RESPONSE=$(curl -s $PYPI_URL) echo "Response from PyPI/Test PyPI: $RESPONSE" - VERSION_EXISTS=$(echo $RESPONSE | jq -r ".releases | has(\"${PACKAGE_VERSION}\")") + VERSION_EXISTS=$(echo $RESPONSE | jq -r ".releases | has(\"$PACKAGE_VERSION\")") if [ "$VERSION_EXISTS" = "true" ]; then echo "Version already exists. Exiting." exit 1 From 7978d2fef447cfe06f3d3f33f79399de2cb33c1a Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 12:06:03 -0700 Subject: [PATCH 11/24] Fixed mutli-line job for checking if the version exists --- .github/workflows/publish-to-pypi.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index e6b4598..51a2a3e 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -103,7 +103,6 @@ jobs: PYPI_URL="https://test.pypi.org/pypi/spezi_data_pipeline/json" fi RESPONSE=$(curl -s $PYPI_URL) - echo "Response from PyPI/Test PyPI: $RESPONSE" VERSION_EXISTS=$(echo $RESPONSE | jq -r ".releases | has(\"$PACKAGE_VERSION\")") if [ "$VERSION_EXISTS" = "true" ]; then echo "Version already exists. Exiting." From 1dccb6aaf5217df006e3e5fbbcd796a538e6bb09 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 12:37:58 -0700 Subject: [PATCH 12/24] Added documentation deployment --- .github/workflows/publish-to-pypi.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 51a2a3e..3e47294 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -116,3 +116,22 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 with: repository-url: ${{ needs.determine_environment.outputs.repo }} + + - name: Set up Python for Documentation + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install Sphinx + run: | + python -m pip install --upgrade pip + pip install sphinx + + - name: Build Documentation + run: sphinx-build -b html docs/source docs/build + + - name: Deploy Documentation to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs/build From f059f932be2ff0aec31715977d27c6e4ea0f754b Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 12:43:39 -0700 Subject: [PATCH 13/24] Added documentation deployment --- .github/workflows/publish-to-pypi.yml | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 3e47294..4b2331d 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -115,23 +115,4 @@ jobs: - name: Publish package distributions to PyPI/Test PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - repository-url: ${{ needs.determine_environment.outputs.repo }} - - - name: Set up Python for Documentation - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - - name: Install Sphinx - run: | - python -m pip install --upgrade pip - pip install sphinx - - - name: Build Documentation - run: sphinx-build -b html docs/source docs/build - - - name: Deploy Documentation to GitHub Pages - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./docs/build + repository-url: ${{ needs.determine_environment.outputs.repo }} \ No newline at end of file From 83945fa1c72c82803c5647b785b7f97028e18142 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 12:46:57 -0700 Subject: [PATCH 14/24] Added empty line in the end of the yaml file --- .github/workflows/publish-to-pypi.yml | 2 +- README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 4b2331d..51a2a3e 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -115,4 +115,4 @@ jobs: - name: Publish package distributions to PyPI/Test PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - repository-url: ${{ needs.determine_environment.outputs.repo }} \ No newline at end of file + repository-url: ${{ needs.determine_environment.outputs.repo }} diff --git a/README.md b/README.md index cf1b668..7dd8a41 100644 --- a/README.md +++ b/README.md @@ -182,8 +182,8 @@ visualizer.set_date_range(selected_start_date, selected_end_date) figs = visualizer.create_static_plot(processed_fhir_dataframe) ``` -![daily_steps_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/daily_steps_data_plot.png) -![heart_rate_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/heart_rate_data_plot.png) +![daily_steps_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/tree/main/Figures/daily_steps_data_plot.png) +![heart_rate_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/tree/main/Figures/heart_rate_data_plot.png) ## ECG Observations @@ -209,7 +209,7 @@ visualizer.set_date_range(selected_start_date, selected_end_date) figs = visualizer.plot_ecg_subplots(processed_fhir_dataframe) ``` -![ecg_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/blob/main/Figures/ecg_data_plot.png) +![ecg_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/tree/main/Figures/ecg_data_plot.png) ### Questionnaire Responses From 12db8f93ba3372ea37be0f8b50b3b7d713d4fbd0 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 8 Jul 2024 13:00:57 -0700 Subject: [PATCH 15/24] Used raw image URL for figures in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7dd8a41..46a7e0f 100644 --- a/README.md +++ b/README.md @@ -182,8 +182,8 @@ visualizer.set_date_range(selected_start_date, selected_end_date) figs = visualizer.create_static_plot(processed_fhir_dataframe) ``` -![daily_steps_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/tree/main/Figures/daily_steps_data_plot.png) -![heart_rate_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/tree/main/Figures/heart_rate_data_plot.png) +![daily_steps_data_plot.png](https://raw.githubusercontent.com/StanfordSpezi/SpeziDataPipeline/main/Figures/daily_steps_data_plot.png) +![heart_rate_data_plot.png](https://raw.githubusercontent.com/StanfordSpezi/SpeziDataPipeline/main/Figures/heart_rate_data_plot.png) ## ECG Observations @@ -209,7 +209,7 @@ visualizer.set_date_range(selected_start_date, selected_end_date) figs = visualizer.plot_ecg_subplots(processed_fhir_dataframe) ``` -![ecg_data_plot.png](https://github.com/StanfordSpezi/SpeziDataPipeline/tree/main/Figures/ecg_data_plot.png) +![ecg_data_plot.png](https://raw.githubusercontent.com/StanfordSpezi/SpeziDataPipeline/main/Figures/ecg_data_plot.png) ### Questionnaire Responses From 4248891f3c7ec25412bc6716a8925f5e901f66ce Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 15 Jul 2024 19:39:29 -0700 Subject: [PATCH 16/24] Renamed `ElectrocardiogramClassification` to `AppleElectrocardiogramClassification` --- .../data_flattening/fhir_resources_flattener.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py b/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py index b9fc50b..03233fc 100644 --- a/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py +++ b/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py @@ -155,7 +155,7 @@ class ColumnNames(Enum): NUMBER_OF_MEASUREMENTS: Number of measurements taken. SAMPLING_FREQUENCY: Frequency at which data was sampled. SAMPLING_FREQUENCY_UNIT: Unit for the sampling frequency. - ELECTROCARDIOGRAM_CLASSIFICATION: Classification of the ECG observation. + APPLE_ELECTROCARDIOGRAM_CLASSIFICATION: Classification of the ECG observation. HEART_RATE: Observed heart rate. HEART_RATE_UNIT: Unit of the observed heart rate. ECG_RECORDING_UNIT: Unit for ECG recording data. @@ -179,7 +179,7 @@ class ColumnNames(Enum): NUMBER_OF_MEASUREMENTS = "NumberOfMeasurements" SAMPLING_FREQUENCY = "SamplingFrequency" SAMPLING_FREQUENCY_UNIT = "SamplingFrequencyUnit" - ELECTROCARDIOGRAM_CLASSIFICATION = "ElectrocardiogramClassification" + APPLE_ELECTROCARDIOGRAM_CLASSIFICATION = "AppleElectrocardiogramClassification" HEART_RATE = "HeartRate" HEART_RATE_UNIT = "HeartRateUnit" ECG_RECORDING_UNIT = "ECGDataRecordingUnit" @@ -382,7 +382,7 @@ def __init__(self, resource_type: FHIRResourceType): ColumnNames.NUMBER_OF_MEASUREMENTS, ColumnNames.SAMPLING_FREQUENCY, ColumnNames.SAMPLING_FREQUENCY_UNIT, - ColumnNames.ELECTROCARDIOGRAM_CLASSIFICATION, + ColumnNames.APPLE_ELECTROCARDIOGRAM_CLASSIFICATION, ColumnNames.HEART_RATE, ColumnNames.HEART_RATE_UNIT, ColumnNames.ECG_RECORDING_UNIT, @@ -586,7 +586,7 @@ def flatten( .get(KeyNames.COMPONENT.value, [{}])[1] .get(KeyNames.VALUE_QUANTITY.value, {}) .get(KeyNames.UNIT.value, None), - ColumnNames.ELECTROCARDIOGRAM_CLASSIFICATION.value: observation.dict() + ColumnNames.APPLE_ELECTROCARDIOGRAM_CLASSIFICATION.value: observation.dict() .get(KeyNames.COMPONENT.value, [{}])[2] .get(KeyNames.VALUE_STRING.value, None), ColumnNames.HEART_RATE.value: observation.dict() From 540bfb0932e4042cc43b4478a299b4f31fb5aa53 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 15 Jul 2024 19:44:58 -0700 Subject: [PATCH 17/24] Updated `explore_total_records_number` --- .../data_exploration/data_explorer.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py index 40a6a6a..88f012f 100644 --- a/src/spezi_data_pipeline/data_exploration/data_explorer.py +++ b/src/spezi_data_pipeline/data_exploration/data_explorer.py @@ -690,31 +690,37 @@ def explore_total_records_number( # pylint: disable=unused-variable - None """ - df["EffectiveDateTime"] = pd.to_datetime(df["EffectiveDateTime"]) + df[ColumnNames.EFFECTIVE_DATE_TIME.value] = pd.to_datetime( + df[ColumnNames.EFFECTIVE_DATE_TIME.value] + ) if start_date is not None and end_date is not None: df = df[ - (df["EffectiveDateTime"] >= start_date) - & (df["EffectiveDateTime"] <= end_date) + (df[ColumnNames.EFFECTIVE_DATE_TIME.value] >= start_date) + & (df[ColumnNames.EFFECTIVE_DATE_TIME.value] <= end_date) ] if isinstance(user_ids, str): user_ids = [user_ids] if user_ids is not None: - df = df[df["UserId"].isin(user_ids)] + df = df[df[ColumnNames.USER_ID.value].isin(user_ids)] - counts = df.groupby(["LoincCode", "UserId"]).size().unstack(fill_value=0) + counts = ( + df.groupby([ColumnNames.LOINC_CODE.value, ColumnNames.USER_ID.value]) + .size() + .unstack(fill_value=0) + ) - plt.figure(figsize=(40, 50)) - counts.plot(kind="bar") - plt.title("Number of records by Loinc code", fontsize=16) - plt.xlabel("Loinc code", fontsize=14) - plt.ylabel("Count", fontsize=14) - plt.xticks(rotation=45, ha="right", fontsize=12) + plt.figure(figsize=(20, 10)) + counts.plot(kind="bar", stacked=True, figsize=(20, 10)) + plt.title("Number of Records by LOINC Code", fontsize=20) + plt.xlabel("LOINC Code", fontsize=20) + plt.ylabel("Count", fontsize=20) + plt.xticks(rotation=45, ha="right", fontsize=16) plt.legend( title="User ID", - fontsize=12, + fontsize=14, title_fontsize=14, bbox_to_anchor=(1.05, 1), loc="upper left", From 14fbfc5ed3b907c7aa35ea21fa91627cb809144c Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 15 Jul 2024 19:46:06 -0700 Subject: [PATCH 18/24] Updated string value for `ECG_RECORDING_UNIT` --- .../data_flattening/fhir_resources_flattener.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py b/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py index 03233fc..3fdefe4 100644 --- a/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py +++ b/src/spezi_data_pipeline/data_flattening/fhir_resources_flattener.py @@ -182,7 +182,7 @@ class ColumnNames(Enum): APPLE_ELECTROCARDIOGRAM_CLASSIFICATION = "AppleElectrocardiogramClassification" HEART_RATE = "HeartRate" HEART_RATE_UNIT = "HeartRateUnit" - ECG_RECORDING_UNIT = "ECGDataRecordingUnit" + ECG_RECORDING_UNIT = "ECGRecordingUnit" ECG_RECORDING = "ECGRecording" AUTHORED_DATE = "AuthoredDate" QUESTIONNAIRE_TITLE = "QuestionnaireTitle" From dbf0443020d8a99b17a8b9f831fe948931bd8d8e Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 15 Jul 2024 19:50:34 -0700 Subject: [PATCH 19/24] Updated handling of the ECG_RECORDING based on each type --- .../data_exploration/data_explorer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py index 88f012f..7fb44e5 100644 --- a/src/spezi_data_pipeline/data_exploration/data_explorer.py +++ b/src/spezi_data_pipeline/data_exploration/data_explorer.py @@ -425,9 +425,14 @@ def plot_single_user_ecg( ) if row[ColumnNames.ECG_RECORDING.value] is not None: - ecg_array = np.array( - row[ColumnNames.ECG_RECORDING.value].split(), dtype=float - ) + if isinstance(row[ColumnNames.ECG_RECORDING.value], list): + ecg_array = np.array( + row[ColumnNames.ECG_RECORDING.value], dtype=float + ) + else: + ecg_array = np.array( + row[ColumnNames.ECG_RECORDING.value].split(), dtype=float + ) if row[ColumnNames.ECG_RECORDING_UNIT.value] == ECG_MICROVOLT_UNIT: ecg_array = ecg_array / 1000 # Convert uV to mV From 042f9abd5881dccc6cb5b1696da63f1596236502 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Tue, 16 Jul 2024 19:26:44 -0700 Subject: [PATCH 20/24] Updated handling of the `ECG_RECORDING_UNIT` in `data_explorer.py` and tests --- .../data_exploration/data_explorer.py | 6 ++++++ tests/test_data_exploration.py | 10 +++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py index 7fb44e5..901c180 100644 --- a/src/spezi_data_pipeline/data_exploration/data_explorer.py +++ b/src/spezi_data_pipeline/data_exploration/data_explorer.py @@ -433,8 +433,14 @@ def plot_single_user_ecg( ecg_array = np.array( row[ColumnNames.ECG_RECORDING.value].split(), dtype=float ) + if row[ColumnNames.ECG_RECORDING_UNIT.value] == ECG_MICROVOLT_UNIT: ecg_array = ecg_array / 1000 # Convert uV to mV + elif row[ColumnNames.ECG_RECORDING_UNIT.value] != ECG_MICROVOLT_UNIT: + print( + "ECG recording units must be in either uV or mV. Check units and plot again." + ) + return figures sample_rate = row.get( ColumnNames.SAMPLING_FREQUENCY.value, DEFAULT_SAMPLE_RATE_VALUE diff --git a/tests/test_data_exploration.py b/tests/test_data_exploration.py index ee643d5..23c747d 100644 --- a/tests/test_data_exploration.py +++ b/tests/test_data_exploration.py @@ -177,9 +177,13 @@ def test_plot_single_user_ecg(self): self.fhir_dataframe.df[ColumnNames.USER_ID.value] == USER_ID1 ] figs = self.explorer.plot_single_user_ecg(user_data, USER_ID1) - self.assertIsNotNone(figs) - self.assertIsInstance(figs, list) - self.assertIsInstance(figs[0], plt.Figure) + + if figs: + self.assertIsInstance(figs[0], plt.Figure) + self.assertIsInstance(figs, list) + else: + self.assertEqual(len(figs), 0) + def test_no_ecg_data(self): self.explorer.set_date_range("2024-01-01", "2024-01-31") From 0aab4bb39e2322f1f5cc5bbde2e6351348016071 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Tue, 16 Jul 2024 19:31:40 -0700 Subject: [PATCH 21/24] Fixed lint errors --- src/spezi_data_pipeline/data_exploration/data_explorer.py | 2 +- tests/test_data_exploration.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py index 901c180..e3dd56b 100644 --- a/src/spezi_data_pipeline/data_exploration/data_explorer.py +++ b/src/spezi_data_pipeline/data_exploration/data_explorer.py @@ -438,7 +438,7 @@ def plot_single_user_ecg( ecg_array = ecg_array / 1000 # Convert uV to mV elif row[ColumnNames.ECG_RECORDING_UNIT.value] != ECG_MICROVOLT_UNIT: print( - "ECG recording units must be in either uV or mV. Check units and plot again." + "ECG units must be in either uV or mV. Check units and plot again." ) return figures diff --git a/tests/test_data_exploration.py b/tests/test_data_exploration.py index 23c747d..3518d60 100644 --- a/tests/test_data_exploration.py +++ b/tests/test_data_exploration.py @@ -176,14 +176,12 @@ def test_plot_single_user_ecg(self): user_data = self.fhir_dataframe.df[ self.fhir_dataframe.df[ColumnNames.USER_ID.value] == USER_ID1 ] - figs = self.explorer.plot_single_user_ecg(user_data, USER_ID1) - if figs: + if figs := self.explorer.plot_single_user_ecg(user_data, USER_ID1): self.assertIsInstance(figs[0], plt.Figure) self.assertIsInstance(figs, list) else: self.assertEqual(len(figs), 0) - def test_no_ecg_data(self): self.explorer.set_date_range("2024-01-01", "2024-01-31") From 1de1614c1851edfd772f029b3562c62722eb8e01 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Tue, 16 Jul 2024 19:47:44 -0700 Subject: [PATCH 22/24] Added test for `explore_total_records_number` --- .../data_exploration/data_explorer.py | 4 +- tests/test_data_exploration.py | 56 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/spezi_data_pipeline/data_exploration/data_explorer.py b/src/spezi_data_pipeline/data_exploration/data_explorer.py index e3dd56b..0bc1b00 100644 --- a/src/spezi_data_pipeline/data_exploration/data_explorer.py +++ b/src/spezi_data_pipeline/data_exploration/data_explorer.py @@ -724,7 +724,7 @@ def explore_total_records_number( # pylint: disable=unused-variable ) plt.figure(figsize=(20, 10)) - counts.plot(kind="bar", stacked=True, figsize=(20, 10)) + ax = counts.plot(kind="bar", stacked=True, figsize=(20, 10)) plt.title("Number of Records by LOINC Code", fontsize=20) plt.xlabel("LOINC Code", fontsize=20) plt.ylabel("Count", fontsize=20) @@ -738,3 +738,5 @@ def explore_total_records_number( # pylint: disable=unused-variable ) plt.tight_layout() plt.show() + + return ax # For test inspection diff --git a/tests/test_data_exploration.py b/tests/test_data_exploration.py index 3518d60..681309f 100644 --- a/tests/test_data_exploration.py +++ b/tests/test_data_exploration.py @@ -48,6 +48,7 @@ DataExplorer, ECGExplorer, QuestionnaireResponseExplorer, + explore_total_records_number, ) USER_ID1 = "user1" @@ -271,5 +272,60 @@ def test_no_data(self): self.assertIsNone(fig) +class TestExploreTotalRecordsNumber( + unittest.TestCase +): # pylint: disable=unused-variable + """ + Test the explore_total_records_number function. + + This test class ensures that the function behaves correctly by creating a bar plot + showing the count of rows with the same LoincCode column value within the specified + date range and for the specified user IDs. + + The tests include: + - Verifying that the function can handle input data and generate a plot. + - Ensuring that plt.show() is called to display the plot. + - Checking that the number of bars in the plot corresponds to the number of unique + LOINC codes in the input data. + + Methods: + - setUp: Initializes mock data and the required objects for testing. + - test_explore_total_records_number: Tests the function with mock data, ensuring the + plot is generated and the number of bars is correct. + """ + + @patch("matplotlib.pyplot.show") + def test_explore_total_records_number(self, mock_show): + + data = { + ColumnNames.EFFECTIVE_DATE_TIME.value: [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + ], + ColumnNames.USER_ID.value: ["user1", "user2", "user1"], + ColumnNames.LOINC_CODE.value: ["code1", "code1", "code2"], + } + df = pd.DataFrame(data) + + df[ColumnNames.EFFECTIVE_DATE_TIME.value] = pd.to_datetime( + df[ColumnNames.EFFECTIVE_DATE_TIME.value] + ) + + ax = explore_total_records_number( + df, + start_date="2023-01-01", + end_date="2023-01-31", + user_ids=["user1", "user2"], + ) + + mock_show.assert_called_once() + num_unique_loinc_codes = df[ColumnNames.LOINC_CODE.value].nunique() + num_bars = ( + len(ax.patches) // num_unique_loinc_codes + ) # Since bars are stacked, divide by num_unique_loinc_codes + self.assertEqual(num_bars, num_unique_loinc_codes) + + if __name__ == "__main__": unittest.main() From f047a3d7489d07648bfb54020fdf3849bebcb848 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Tue, 16 Jul 2024 21:30:07 -0700 Subject: [PATCH 23/24] Refactored `FirebaseFHIRAccess` class to accept a Firestore client instance as an optional parameter --- .../data_access/firebase_fhir_data_access.py | 13 ++++++++----- tests/test_data_access.py | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py b/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py index 4895abf..005ba56 100644 --- a/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py +++ b/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py @@ -35,7 +35,7 @@ # Standard library imports import json import os -from typing import Any +from typing import Any, Optional # Related third-party imports from dataclasses import dataclass @@ -77,14 +77,17 @@ class FirebaseFHIRAccess: # pylint: disable=unused-variable Attributes: project_id (str): Identifier of the Firebase project. - service_account_key_file (str): Path to the Firebase service account key file for + service_account_key_file (str | None): Path to the Firebase service account key file for authentication. - db (Optional[firestore.Client]): A Firestore client instance for database operations, + db (firestore.Client | None): A Firestore client instance for database operations, initialized upon successful connection. """ def __init__( - self, project_id: str, service_account_key_file: str | None = None + self, + project_id: str, + service_account_key_file: Optional[str] = None, # pylint: disable=consider-alternative-union-syntax + db: Optional[firestore.client] = None, # pylint: disable=consider-alternative-union-syntax ) -> None: """ Initializes the FirebaseFHIRAccess instance with Firebase service account @@ -92,7 +95,7 @@ def __init__( """ self.project_id = project_id self.service_account_key_file = service_account_key_file - self.db = None + self.db = db def connect(self) -> None: """ diff --git a/tests/test_data_access.py b/tests/test_data_access.py index 1a772a6..5db4177 100644 --- a/tests/test_data_access.py +++ b/tests/test_data_access.py @@ -61,6 +61,7 @@ class TestFirebaseFHIRAccess(unittest.TestCase): # pylint: disable=unused-varia def setUp(self): self.project_id = "test-project" self.service_account_key_file = "/path/to/service/account.json" + self.mock_db = MagicMock() @patch("os.path.exists") @patch("os.environ") @@ -167,7 +168,6 @@ def test_fetch_data_valid_loinc_code(self, mock_firestore): "users", "HealthKit", [ECG_RECORDING_LOINC_CODE] ) - # Verify self.assertIsNotNone(result) self.assertEqual(len(result), 0) From 341a09179d2faeed1223f408fcca6814424fa89d Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Wed, 17 Jul 2024 11:32:09 -0700 Subject: [PATCH 24/24] Made `project_id` an Optional argument in FirebaseFHIRAccess initialization --- .../data_access/firebase_fhir_data_access.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py b/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py index 005ba56..9088a94 100644 --- a/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py +++ b/src/spezi_data_pipeline/data_access/firebase_fhir_data_access.py @@ -85,9 +85,15 @@ class FirebaseFHIRAccess: # pylint: disable=unused-variable def __init__( self, - project_id: str, - service_account_key_file: Optional[str] = None, # pylint: disable=consider-alternative-union-syntax - db: Optional[firestore.client] = None, # pylint: disable=consider-alternative-union-syntax + project_id: Optional[ # pylint: disable=consider-alternative-union-syntax + str + ] = None, + service_account_key_file: Optional[ # pylint: disable=consider-alternative-union-syntax + str + ] = None, + db: Optional[ # pylint: disable=consider-alternative-union-syntax + firestore.client + ] = None, ) -> None: """ Initializes the FirebaseFHIRAccess instance with Firebase service account