Merge pull request #281 from 4dn-dcic/kmp_schemas_from_vapp

Allow ff_utils.get_schema and ff_utils.get_schemas to take a portal_vapp
4dn-dcic · Sep 7, 2023 · b079275 · b079275
2 parents 2a684c0 + 0963900
commit b079275
Show file tree

Hide file tree

Showing 6 changed files with 200 additions and 19 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -7,6 +7,23 @@ Change Log
 ----------
 
 
+7.10.0
+======
+
+* In ``ff_utils``:
+
+  * New arguments ``portal_env=`` and ``portal_vapp`` to ``get_schema``
+    for function ``get_schema`` and ``get_schemas``.
+
+* In ``s3_utils``:
+
+  * Fix a failing test (caused by an environmental change, no functional change).
+
+* In ``license_utils``:
+
+  * Allow C4 infrastructure to use the ``chardet`` library.
+
+
 7.9.0
 =====
 

diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py
@@ -8,7 +8,7 @@
 
 from collections import namedtuple
 from elasticsearch.exceptions import AuthorizationException
-from typing import Optional, Dict, List
+from typing import Dict, List, Optional
 from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
 from . import s3_utils, es_utils
 from .common import (
@@ -17,7 +17,7 @@
     # S3BucketName, S3KeyName,
 )
 from .lang_utils import disjoined_list
-from .misc_utils import PRINT, to_camel_case, remove_suffix
+from .misc_utils import PRINT, to_camel_case, remove_suffix, VirtualApp
 
 
 # TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP.
@@ -419,7 +419,7 @@ def search_result_generator(page_generator):
     but where a page size of 3 is used with start position 0. That call will return A,C,E. The
     user may expect G,I on the second page, but before it can be done, suppose an element D is
     indexed and that the stored data is A,C,D,E,G,I,K,M. Requesting data from start position 0 would
-    now return A,C,D but we already had the first page, so we request data starting at position 3
+    now return A,C,D, but we already had the first page, so we request data starting at position 3
     for the second page and get E,G,I.  That means our sequence of return values would be A,C,E,E,G,I,K,M,
     or, in other words, showing a duplication. To avoid this, we keep track of the IDs we've seen
     and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D, but we weren't
@@ -647,7 +647,7 @@ def get_associated_qc_metrics(uuid, key=None, ff_env=None, include_processed_fil
                               include_raw_files=False,
                               include_supplementary_files=False):
     """
-    Given a uuid of an experimentSet return a dictionary of dictionaries with each dictionary
+    Given a UUID of an experimentSet return a dictionary of dictionaries with each dictionary
     representing a quality metric.
 
     Args:
@@ -942,41 +942,90 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
                 yield hit['_source']  # yield individual items from ES
 
 
-def get_schema(name, key=None, ff_env=None) -> Dict:
+def resolve_portal_env(ff_env: Optional[str], portal_env: Optional[str],
+                       portal_vapp: Optional[VirtualApp]) -> Optional[str]:
+    """
+    Resolves which of ff_env and portal_env to use (after doing consistency checking).
+    There are two consistency checks performed, for which an error is raised on failure:
+        1. If neither ff_env= and portal_env= is None, the values must be compatible.
+        2. If either ff_env= or portal_env= is not None, portal_vapp= must be None.
+
+    The intent is that callers will do:
+        portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
+    and then afterward not have to worry that arguments are inconsistent.
+
+    Args:
+        ff_env:      an environment name or None
+        portal_env:  an environment name or None
+        portal_vapp: a VirtualApp or None
+    """
+    if ff_env:
+        if portal_env and portal_env != ff_env:
+            raise ValueError("You may not supply both portal_env= and ff_env= together.")
+        portal_env = ff_env
+    if portal_env and portal_vapp:
+        env_arg_name = 'ff_env=' if ff_env else 'portal_env='
+        raise ValueError(f"You may not supply both portal_vapp= and {env_arg_name} together.")
+    return portal_env
+
+
+def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optional[str] = None,
+               portal_vapp: Optional[VirtualApp] = None) -> Dict:
     """
     Gets the schema definition with the given name.
 
+    Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained.
+
     Args:
         name (str):   a schema name (CamelCase or snake_case), or None
         key (dict):   standard ff_utils authentication key
-        ff_env (str): standard ff environment string
+        ff_env (str): standard environment string (deprecated, please prefer portal_env=)
+        portal_env:   standard environment string (compatible replacement for ff_env=)
+        portal_vapp:  a VirtualApp or None
 
     Returns:
         dict: contains key schema names and value item class names
     """
-    auth = get_authentication_with_server(key, ff_env)
-    url = f"profiles/{to_camel_case(name)}.json"
-    schema = get_metadata(url, key=auth, add_on='frame=raw')
-    return schema
+    portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
+    base_url = f"profiles/{to_camel_case(name)}.json"
+    add_on = 'frame=raw'
+    if portal_vapp:
+        full_url = f"{base_url}?{add_on}"
+        res = portal_vapp.get(full_url)
+        return get_response_json(res)
+    else:
+        schema = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on)
+        return schema
 
 
-def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) -> Dict[str, Dict]:
+def get_schemas(key=None, ff_env: Optional[str] = None, *, allow_abstract: bool = True, require_id: bool = False,
+                portal_env: Optional[str] = None, portal_vapp: Optional[VirtualApp] = None) -> Dict[str, Dict]:
     """
     Gets a dictionary of all schema definitions.
     By default, this returns all schemas, but the allow_abstract= and require_id= keywords allow limited filtering.
 
+    Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained.
+
     Args:
         key (dict):               standard ff_utils authentication key
-        ff_env (str):             standard ff environment string
+        ff_env (str):             standard environment string (deprecated, please prefer portal_env=)
+        portal_env:               standard environment string (compatible replacement for ff_env=)
+        portal_vapp:              a VirtualApp or None
         allow_abstract (boolean): controls whether abstract schemas can be returned (default True, return them)
         require_id (boolean):     controls whether a '$id' field is required for schema to be included
                                   (default False, include even if no $id)
 
     Returns:
         dict: a mapping from keys that are schema names to schema definitions
     """
-    auth = get_authentication_with_server(key, ff_env)
-    schemas: Dict[str, Dict] = get_metadata('profiles/', key=auth, add_on='frame=raw')
+    portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
+    base_url = 'profiles/'
+    add_on = 'frame=raw'
+    if portal_vapp:
+        full_url = f"{base_url}?{add_on}"
+        schemas: Dict[str, Dict] = portal_vapp.get(full_url)
+    else:
+        schemas: Dict[str, Dict] = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on)
     filtered_schemas = {}
     for schema_name, schema in schemas.items():
         if allow_abstract or not schema.get('isAbstract'):

diff --git a/dcicutils/license_utils.py b/dcicutils/license_utils.py
@@ -810,6 +810,12 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
             'pytest-timeout',  # MIT Licensed
         ],
 
+        # Linking = With Restrictions, Private Use = Yes
+        # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
+        'GNU Lesser General Public License v2 or later (LGPLv2+)': [
+            'chardet'  # used at runtime during server operation (ingestion), but not modified or distributed
+        ],
+
         # Linking = With Restrictions, Private Use = Yes
         # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
         'GNU Lesser General Public License v3 or later (LGPLv3+)': [

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcicutils"
-version = "7.9.0"
+version = "7.10.0"
 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
 authors = ["4DN-DCIC Team <[email protected]>"]
 license = "MIT"

diff --git a/test/test_ff_utils.py b/test/test_ff_utils.py
@@ -1316,6 +1316,111 @@ def get_it():
         time.sleep(2)
 
 
+@pytest.mark.unit
+def test_get_schema_with_vapp():
+
+    sample_vapp = mock.MagicMock()
+    sample_schema_metadata = {"foo": "foo-schema", "bar": "bar-schema"}
+    sample_auth = mock.MagicMock()
+
+    with pytest.raises(ValueError) as exc:
+        ff_utils.get_schema('User', ff_env='foo', portal_env='bar')
+    assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.'
+
+    with pytest.raises(ValueError) as exc:
+        ff_utils.get_schema('User', ff_env='foo', portal_vapp=sample_vapp)
+    assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.'
+
+    with pytest.raises(ValueError) as exc:
+        ff_utils.get_schema('User', portal_env='foo', portal_vapp=sample_vapp)
+    assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.'
+
+    for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]:
+
+        with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
+            with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server:
+
+                expected_env = list(env_args.items())[0][1] if env_args else None
+
+                mock_get_metadata.return_value = sample_schema_metadata
+                mock_get_authentication_with_server.return_value = sample_auth
+
+                # When called with no vapp, get_metadata is consulted (after getting auth info)
+                assert ff_utils.get_schema('User', **env_args) == sample_schema_metadata
+
+                mock_get_authentication_with_server.assert_not_called()
+                mock_get_metadata.assert_called_once_with(obj_id='profiles/User.json', key=None, ff_env=expected_env,
+                                                          add_on='frame=raw')
+
+                sample_vapp.get.assert_not_called()
+
+    sample_vapp.get.assert_not_called()
+
+    with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
+        with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server:
+
+            sample_vapp.get.return_value = MockResponse(200, json=sample_schema_metadata)
+
+            assert ff_utils.get_schema('User', portal_vapp=sample_vapp) == sample_schema_metadata
+
+            mock_get_authentication_with_server.assert_not_called()
+            mock_get_metadata.assert_not_called()
+
+            sample_vapp.get.assert_called_once_with('profiles/User.json?frame=raw')
+
+
+@pytest.mark.unit
+def test_get_schemas_with_vapp():
+
+    sample_vapp = mock.MagicMock()
+    sample_schema_metadata = {"foo": {"$id": "Foo.json"}, "bar": {"$id": "Bar.json"}}
+    sample_auth = mock.MagicMock()
+
+    with pytest.raises(ValueError) as exc:
+        ff_utils.get_schemas(ff_env='foo', portal_env='bar')
+    assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.'
+
+    with pytest.raises(ValueError) as exc:
+        ff_utils.get_schemas(ff_env='foo', portal_vapp=sample_vapp)
+    assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.'
+
+    with pytest.raises(ValueError) as exc:
+        ff_utils.get_schemas(portal_env='foo', portal_vapp=sample_vapp)
+    assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.'
+
+    for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]:
+
+        with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
+            with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server:
+
+                expected_env = list(env_args.items())[0][1] if env_args else None
+
+                mock_get_metadata.return_value = sample_schema_metadata
+                mock_get_authentication_with_server.return_value = sample_auth
+
+                assert ff_utils.get_schemas(**env_args) == sample_schema_metadata
+
+                mock_get_authentication_with_server.assert_not_called()
+                mock_get_metadata.assert_called_once_with(obj_id='profiles/', key=None, ff_env=expected_env,
+                                                          add_on='frame=raw')
+
+                sample_vapp.get.assert_not_called()
+
+    sample_vapp.get.assert_not_called()
+
+    with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
+        with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server:
+
+            sample_vapp.get.return_value = sample_schema_metadata
+
+            assert ff_utils.get_schemas(portal_vapp=sample_vapp) == sample_schema_metadata
+
+            mock_get_authentication_with_server.assert_not_called()
+            mock_get_metadata.assert_not_called()
+
+            sample_vapp.get.assert_called_once_with('profiles/?frame=raw')
+
+
 def test_get_schemas_options():
 
     mocked_schemas = {
@@ -1350,9 +1455,10 @@ def mocked_schemas_subset(keys):
 
         with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
 
-            def mocked_get_metadata(url, key, add_on):
-                assert url == "profiles/"  # this is the web API to ask for all profiles
-                assert key == 'some-auth'  # we assume auth is tested elsewhere
+            def mocked_get_metadata(obj_id, key, ff_env, add_on):
+                assert obj_id == "profiles/"  # this is the web API to ask for all profiles
+                assert key is None  # it would get looked up
+                assert ff_env is None  # it would get looked up, too
                 assert add_on == "frame=raw"  # we presently always send this
                 return mocked_schemas
 

diff --git a/test/test_s3_utils.py b/test/test_s3_utils.py
@@ -402,14 +402,17 @@ def test_s3utils_get_higlass_key(portal_env):
     #     assert key[dict_key]
 
 
+_GOOGLE_FOURFRONT_PROJECT_ID = "fourfront-396315"
+
+
 @pytest.mark.integrated
 @using_fresh_ff_state_for_testing()
 def test_s3utils_get_google_key():
     s3u = s3Utils(env='staging')
     keys = s3u.get_google_key()
     assert isinstance(keys, dict)
     assert keys['type'] == 'service_account'
-    assert keys["project_id"] == "fourdn-fourfront"
+    assert keys["project_id"] == _GOOGLE_FOURFRONT_PROJECT_ID
     for dict_key in ['private_key_id', 'private_key', 'client_email', 'client_id', 'auth_uri', 'client_x509_cert_url']:
         assert keys[dict_key]