Merge pull request #125 from uc-cdis/chore/add-vlmd-unit-tests

Chore/add unit tests for vlmd-submission-tools
uc-cdis · Oct 10, 2023 · 278249f · 278249f
2 parents 3ce7146 + 96e9bad
commit 278249f
Show file tree

Hide file tree

Showing 20 changed files with 1,533 additions and 201 deletions.
diff --git a/.github/workflows/ci_vlmd.yaml b/.github/workflows/ci_vlmd.yaml
@@ -2,16 +2,15 @@ name: CI Workflow
 
 on:
   push:
-    branches:
-      - master
     paths:
-      - vlmd-submission-tools/
+      - vlmd-submission-tools/**
+      - .github/workflows/ci_vlmd.yaml
   pull_request:
-    branches:
-      - master
+    paths:
+      - vlmd-submission-tools/
 
 jobs:
-  test:
+  vlmd-wf-test:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v1
@@ -28,9 +27,10 @@ jobs:
     - name: Install dependencies
       working-directory: ./vlmd-submission-tools
       run: |
-        python -m pip install --upgrade pip poetry
-        poetry install
-    - name: Test with unittest
+        pip install poetry
+        poetry install -vv --no-interaction
+        poetry show -vv
+    - name: Test with pytest
       working-directory: ./vlmd-submission-tools
       run: |
-        poetry run python -m unittest discover -v -s tests/
+        poetry run pytest -vv --cov=vlmd_submission_tools tests
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2023-09-12T16:27:46Z",
+  "generated_at": "2023-09-28T19:27:35Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -468,11 +468,35 @@
     ],
     "vlmd-submission-tools/poetry.lock": [
       {
-        "hashed_secret": "300b593b449a1c9030d236dac2ad355ab188b9cb",
+        "hashed_secret": "5b240644452ed40dfe194673b7db6b641971c720",
         "is_verified": false,
-        "line_number": 654,
+        "line_number": 1221,
         "type": "Hex High Entropy String"
       }
+    ],
+    "vlmd-submission-tools/tests/test_common_utils.py": [
+      {
+        "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114",
+        "is_verified": false,
+        "line_number": 100,
+        "type": "Secret Keyword"
+      }
+    ],
+    "vlmd-submission-tools/tests/test_subcommand_get_dictionary_url.py": [
+      {
+        "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114",
+        "is_verified": false,
+        "line_number": 151,
+        "type": "Secret Keyword"
+      }
+    ],
+    "vlmd-submission-tools/tests/test_subcommand_upload_dictionary_to_mds.py": [
+      {
+        "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114",
+        "is_verified": false,
+        "line_number": 129,
+        "type": "Secret Keyword"
+      }
     ]
   },
   "version": "0.13.1",

diff --git a/vlmd-submission-tools/poetry.lock b/vlmd-submission-tools/poetry.lock
diff --git a/vlmd-submission-tools/pyproject.toml b/vlmd-submission-tools/pyproject.toml
@@ -25,6 +25,9 @@ boto3 = "^1.26.122"
 kubernetes = "^26.1.0"
 petl = "^1.7.12"
 frictionless = "^5.12.1"
+parameterized = "^0.9.0"
+pytest = "^7.4.2"
+pytest-cov = "^4.1.0"
 
 [tool.poetry.dev-dependencies]
 

diff --git a/vlmd-submission-tools/tests/templates/template_submission.csv b/vlmd-submission-tools/tests/templates/template_submission.csv
@@ -0,0 +1,7 @@
+name,title,description,type,format,constraints.maxLength,constraints.pattern,constraints.minimum,constraints.maximum,ordered,missingValues,trueValues,falseValues,repo_link,cde_id,ontology_id,encoding,constraints.enum
+participant_id,Participant Id,Unique identifier for participant,string,,,[A-Z][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9],,,,,,,,,,,
+race,Race,Self-reported race,integer,,,,,,,99,,,,NLM=Fakc6Jy2x|NLM=m1_atF7L7U,,1=White|2=Black or African American|3=American Indian or Alaska Native|4=Native| 5=Hawaiian or Other Pacific Islander|6=Asian|7=Some other race|8=Multiracial|99=Not reported,1|2|3|4|5|6|7|8
+age,Age,What is your age? (age at enrollment),integer,years,,,0,90,,,,,,,,,
+hispanic,"Hispanic, Latino, or Spanish Origin","Are you of Hispanic, Latino, or Spanish origin?",boolean,,,,,,,Not reported,No,Yes,,,,,
+sex_at_birth,Sex at Birth,The self-reported sex of the participant/subject at birth,string,,,,,,,Prefer not to answer|Unknown,,,,NLM=ezelurehr2,,,Male|Female|Intersex|None of these describe me|Prefer not to answer|Unknown
+SU4,Heroin Days Used,During the past 30 days how many days did you use heroin (alone or mixed with other drugs)? ] [Write 0 days if no use],integer,days,,,,,,,,,,,is=CHEBI=27808|is=RXNORM=3304,,
diff --git a/vlmd-submission-tools/tests/templates/template_submission.json b/vlmd-submission-tools/tests/templates/template_submission.json
@@ -0,0 +1,107 @@
+[
+    {
+        "name": "participant_id",
+        "title": "Participant Id",
+        "description": "Unique identifier for participant",
+        "type": "string",
+        "constraints": {
+            "pattern": "[A-Z][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]"
+        }
+    },
+    {
+        "name": "race",
+        "title": "Race",
+        "description": "Self-reported race",
+        "type": "integer",
+        "cde_id": [
+            {
+                "source": "NLM",
+                "id": "Fakc6Jy2x"
+            },
+            {
+                "source": "NLM",
+                "id": "m1_atF7L7U"
+            }
+        ],
+        "encoding": {
+            "1": "White",
+            "2": "Black or African American",
+            "3": "American Indian or Alaska Native",
+            "4": "Native",
+            "5": "Hawaiian or Other Pacific Islander",
+            "6": "Asian",
+            "7": "Some other race",
+            "8": "Multiracial",
+            "99": "Not reported"
+        },
+        "constraints": {
+            "enum": [
+                "1",
+                "2",
+                "3",
+                "4",
+                "5",
+                "6",
+                "7",
+                "8"
+            ]
+        }
+    },
+    {
+        "name": "age",
+        "title": "Age",
+        "description": "What is your age? (age at enrollment)",
+        "type": "integer",
+        "format": "years",
+        "constraints": {
+            "maximum": 90
+        }
+    },
+    {
+        "name": "hispanic",
+        "title": "Hispanic, Latino, or Spanish Origin",
+        "description": "Are you of Hispanic, Latino, or Spanish origin?",
+        "type": "boolean"
+    },
+    {
+        "name": "sex_at_birth",
+        "title": "Sex at Birth",
+        "description": "The self-reported sex of the participant/subject at birth",
+        "type": "string",
+        "cde_id": [
+            {
+                "source": "NLM",
+                "id": "ezelurehr2"
+            }
+        ],
+        "constraints": {
+            "enum": [
+                "Male",
+                "Female",
+                "Intersex",
+                "None of these describe me",
+                "Prefer not to answer",
+                "Unknown"
+            ]
+        }
+    },
+    {
+        "name": "SU4",
+        "title": "Heroin Days Used",
+        "description": "During the past 30 days how many days did you use heroin (alone or mixed with other drugs)? ] [Write 0 days if no use]",
+        "type": "integer",
+        "format": "days",
+        "ontology_id": [
+            {
+                "relation": "is",
+                "source": "CHEBI",
+                "id": "27808"
+            },
+            {
+                "relation": "is",
+                "source": "RXNORM",
+                "id": "3304"
+            }
+        ]
+    }
+]
diff --git a/vlmd-submission-tools/tests/templates/template_submission.tsv b/vlmd-submission-tools/tests/templates/template_submission.tsv
@@ -0,0 +1,7 @@
+name	title	description	type
+participant_id	Participant Id	Unique identifier for participant	string
+race	Race	Self-reported race	integer
+age	Age	What is your age? (age at enrollment)	integer
+hispanic	"Hispanic	 Latino	 or Spanish Origin"	"Are you of Hispanic	 Latino	 or Spanish origin?"	boolean
+sex_at_birth	Sex at Birth	The self-reported sex of the participant/subject at birth	string
+SU4	Heroin Days Used	During the past 30 days how many days did you use heroin (alone or mixed with other drugs)? ] [Write 0 days if no use]	integer
diff --git a/vlmd-submission-tools/tests/templates/template_submission_bad_format.csv b/vlmd-submission-tools/tests/templates/template_submission_bad_format.csv
@@ -0,0 +1,7 @@
+name,title,description,type,format,constraints.maxLength,constraints.pattern,constraints.minimum,constraints.maximum,ordered,missingValues,trueValues,falseValues,repo_link,cde_id,ontology_id,encoding,constraints.enum
+participant_id,Participant Id,Unique identifier for participant,string,9999,,[A-Z][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9],,,,,,,,,,,
+race,Race,Self-reported race,integer,,,,,,,99,,,,NLM=Fakc6Jy2x|NLM=m1_atF7L7U,,1=White|2=Black or African American|3=American Indian or Alaska Native|4=Native| 5=Hawaiian or Other Pacific Islander|6=Asian|7=Some other race|8=Multiracial|99=Not reported,1|2|3|4|5|6|7|8
+age,Age,What is your age? (age at enrollment),integer,years,,,0,90,,,,,,,,,
+hispanic,"Hispanic, Latino, or Spanish Origin","Are you of Hispanic, Latino, or Spanish origin?",boolean,,,,,,,Not reported,No,Yes,,,,,
+sex_at_birth,Sex at Birth,The self-reported sex of the participant/subject at birth,string,,,,,,,Prefer not to answer|Unknown,,,,NLM=ezelurehr2,,,Male|Female|Intersex|None of these describe me|Prefer not to answer|Unknown
+SU4,Heroin Days Used,During the past 30 days how many days did you use heroin (alone or mixed with other drugs)? ] [Write 0 days if no use],integer,days,,,,,,,,,,,is=CHEBI=27808|is=RXNORM=3304,,
diff --git a/vlmd-submission-tools/tests/templates/template_submission_minimal.json b/vlmd-submission-tools/tests/templates/template_submission_minimal.json
@@ -0,0 +1,41 @@
+{
+    "title": "Minimal Example VLMD",
+    "description": "This is an minimally filled out template",
+    "data_dictionary": [
+        {
+            "name": "participant_id",
+            "description": "Unique identifier for participant",
+            "type": "string"
+        },
+        {
+            "name": "race",
+            "description": "Self-reported race",
+            "type": "integer"
+        },
+        {
+            "name": "age",
+            "description": "What is your age? (age at enrollment)",
+            "type": "integer"
+        },
+        {
+            "name": "hispanic",
+            "description": "Are you of Hispanic, Latino, or Spanish origin?",
+            "type": "boolean"
+        },
+        {
+            "name": "sex_at_birth",
+            "description": "The self-reported sex of the participant/subject at birth",
+            "type": "string"
+        },
+        {
+            "name": "SU4",
+            "description": "During the past 30 days how many days did you use heroin (alone or mixed with other drugs)? ] [Write 0 days if no use]",
+            "type": "integer"
+        },
+        {
+            "name": "pulse_rate",
+            "description": "Heart rate measured at systemic artery",
+            "type": "number"
+        }
+    ]
+}
diff --git a/vlmd-submission-tools/tests/test_common_utils.py b/vlmd-submission-tools/tests/test_common_utils.py
@@ -0,0 +1,109 @@
+import base64
+from unittest.mock import MagicMock, patch
+
+import json
+import pytest
+import requests
+
+from vlmd_submission_tools.common import utils
+
+class TestCommonsUtils():
+
+    @patch('kubernetes.config.load_kube_config')
+    @patch('kubernetes.client.CoreV1Api.read_namespaced_secret')
+    def test_get_client_secret(self, mocked_kube_client, mocked_kube_config):
+        # method should parse out the client_id and client_secret from the kubernetes secret
+        client_secret_name="my_g3auto_secret"
+        client_secret_key="fence_client_credentials.json"
+        client_id_config="my_client_id"
+        client_secret_config="my_client_secret"
+        namespace = "default"
+
+        expected_client_id = "test_client_id"
+        expected_client_key = "test_client_key"
+
+        expected_secret_json = {
+            client_id_config: expected_client_id,
+            client_secret_config: expected_client_key
+        }
+        mocked_kube_secret = json.dumps(expected_secret_json).encode('utf-8')
+        mocked_kube_secret = base64.b64encode(mocked_kube_secret)
+        mocked_kube_client.return_value = MagicMock(data={client_secret_key: mocked_kube_secret})
+        mocked_kube_config.return_value = MagicMock()
+
+        response = utils.get_client_secret(
+            client_secret_name, client_secret_key, client_id_config, client_secret_config, namespace
+        )
+        assert response == (expected_client_id, expected_client_key)
+
+
+    @patch('requests.get')
+    def test_check_mds_study_id(self, mocked_post):
+        # method should parse the existing data dictionaries out of a mds request
+        hostname = "mycommons.planx-pla.net"
+        study_id = "my_study_id"
+        expected_data_dictionaries = {
+            "my first dictionary": "guid1",
+            "my second dictionary": "guid2"
+        }
+        mock_mds_response = MagicMock(requests.Response)
+        mock_mds_response.status_code = 200
+        mock_mds_response.json.return_value = {
+            "_guid_type": "discovery_metadata",
+            "data_dictionaries": expected_data_dictionaries
+        }
+        mocked_post.return_value = mock_mds_response
+
+        result = utils.check_mds_study_id(study_id, hostname)
+        assert result == expected_data_dictionaries
+
+
+    @patch('requests.get')
+    def test_check_mds_study_id_exception(self, mocked_post):
+        # test study_id is missing or _guid_type is not 'discovery_metadata'
+
+        hostname = "mycommons.planx-pla.net"
+        study_id = "my_study_id"
+        expected_data_dictionaries = {
+            "my first dictionary": "guid1",
+            "my second dictionary": "guid2"
+        }
+        # mds returns 404
+        mock_mds_response = MagicMock(requests.Response)
+        mock_mds_response.status_code = 404
+        mock_mds_response.json.return_value = {
+            "_guid_type": "discovery_metadata",
+            "data_dictionaries": expected_data_dictionaries
+        }
+        mocked_post.return_value = mock_mds_response
+        expected_error = f"Study ID {study_id} not found in MDS"
+        with pytest.raises(ValueError, match=expected_error):
+            utils.check_mds_study_id(study_id, hostname)
+
+        # _guid_type is not discovery_metadata
+        mock_mds_response.status_code = 200
+        mock_mds_response.json.return_value = {
+            "_guid_type": "some_other_guid_type",
+            "data_dictionaries": expected_data_dictionaries
+        }
+        mocked_post.return_value = mock_mds_response
+        expected_error = "Study ID is not dicovery metadata"
+        with pytest.raises(ValueError, match=expected_error):
+            utils.check_mds_study_id(study_id, hostname)
+
+
+    @patch('requests.post')
+    def test_get_client_token(self, mocked_post):
+        # method should parse out the token from the fence response
+        hostname = "mycommons.planx-pla.net"
+        client_id = "client_id"
+        client_secret = "client_secret"
+        expected_token = "my_token"
+        # mock the fence response for requesting token
+        mock_fence_response = MagicMock(requests.Response)
+        mock_fence_response.status_code = 200
+        mock_fence_response.json.return_value = {"access_token": expected_token}
+        mocked_post.return_value = mock_fence_response
+
+        result = utils.get_client_token(hostname, client_id, client_secret)
+        assert result == expected_token