diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8cae87006..b74dac182 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,23 @@ Change Log ---------- +7.9.0 +===== + +* In ``misc_utils``: + + * New function ``to_camelcase`` that can take either snake_case or CamelCase input. + +* In ``qa_utils``: + + * New function ``is_subdict`` for asymmetric testing of dictionary equivalence. + +* In ``ff_utils``: + + * New function ``get_schema`` that will pull down an individual schema definition. + * New function ``get_schemas`` that will pull down all schema definitions. + + 7.8.0 ===== diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 37a0439db..101db132e 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -6,7 +6,6 @@ import time from collections import namedtuple -from dcicutils.lang_utils import disjoined_list from elasticsearch.exceptions import AuthorizationException from typing import Optional, Dict, List from urllib.parse import parse_qs, urlencode, urlparse, urlunparse @@ -16,7 +15,8 @@ AnyAuthDict, AuthDict, SimpleAuthPair, AuthData, AnyAuthData, PortalEnvName, # S3BucketName, S3KeyName, ) -from .misc_utils import PRINT +from .lang_utils import disjoined_list +from .misc_utils import PRINT, to_camel_case # TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP. @@ -281,7 +281,7 @@ def get_metadata(obj_id, key=None, ff_env=None, check_queue=False, add_on=''): Function to get metadata for a given obj_id (uuid or @id, most likely). Either takes a dictionary form authentication (MUST include 'server') or a string fourfront-environment. - Also a boolean 'check_queue', which if True + Also, a boolean 'check_queue', which, if True, will use information from the queues and/or datastore=database to ensure that the metadata is accurate. Takes an optional string add_on that should contain things like @@ -421,7 +421,7 @@ def search_result_generator(page_generator): now return A,C,D but we already had the first page, so we request data starting at position 3 for the second page and get E,G,I. That means our sequence of return values would be A,C,E,E,G,I,K,M, or, in other words, showing a duplication. To avoid this, we keep track of the IDs we've seen - and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D but we weren't + and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D, but we weren't going to see it anyway, and it wasn't available the time we started, so the timing was already close.) Unfortunately, we aren't so lucky for deletion, though that happens more rarely. That will cause @@ -687,7 +687,7 @@ def get_associated_qc_metrics(uuid, key=None, ff_env=None, include_processed_fil resp = get_metadata(uuid, key=key, ff_env=ff_env) - # Checks wheter the input is a experiment or experimentset otherwise throws an error + # Checks whether the input is an Experiment or ExperimentSet, and otherwise throws an error. if 'ExperimentSet' not in resp['@type']: raise TypeError('Expected ExperimentSet') @@ -862,7 +862,7 @@ def get_es_metadata(uuids, es_client=None, filters=None, sources=None, chunk_siz sources = ['embedded.files.uuid'] i.e. getting all fields for lab in embedded frame sources = ['embedded.lab.*'] - i.e. for getting a only object frame + i.e. for getting only an object frame sources = ['object.*'] chunk_size: Integer chunk_size may be used to control the number of uuids that are @@ -870,7 +870,7 @@ def get_es_metadata(uuids, es_client=None, filters=None, sources=None, chunk_siz ES reads to timeout. is_generator: Boolean is_generator will return a generator for individual results if True; - if False (default), returns a list of results. + if False, (default), returns a list of results. key: authentication key for ff_env (see get_authentication_with_server) ff_env: authentication by env (needs system variables) """ @@ -941,6 +941,40 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth): yield hit['_source'] # yield individual items from ES +def get_schemas(key=None, ff_env=None) -> Dict[str, Dict]: + """ + Gets a dictionary of all schema definitions + + Args: + key (dict): standard ff_utils authentication key + ff_env (str): standard ff environment string + + Returns: + dict: a mapping from keys that are schema names to schema definitions + """ + auth = get_authentication_with_server(key, ff_env) + schemas = get_metadata('profiles/', key=auth, add_on='frame=raw') + return schemas + + +def get_schema(name, key=None, ff_env=None) -> Dict: + """ + Gets the schema definition with the given name. + + Args: + name (str): a schema name (CamelCase or snake_case), or None + key (dict): standard ff_utils authentication key + ff_env (str): standard ff environment string + + Returns: + dict: contains key schema names and value item class names + """ + auth = get_authentication_with_server(key, ff_env) + url = f"profiles/{to_camel_case(name)}.json" + schema = get_metadata(url, key=auth, add_on='frame=raw') + return schema + + def get_schema_names(key=None, ff_env=None): """ Create a dictionary of all schema names to item class names @@ -1034,7 +1068,7 @@ def remove_keys(my_dict, remove_list): chunk = 100 # chunk the requests - don't want to hurt es performance while uuid_list: - uuids_to_check = [] # uuids to add to uuid_list if not if not in item_uuids + uuids_to_check = [] # uuids to add to uuid_list if not in item_uuids # get the next page of data, recreating the es_client if need be try: @@ -1121,7 +1155,7 @@ def _get_page(*, page, key=None, ff_env=None): def get_health_page(key=None, ff_env=None): """ - Simple function to return the json for a FF health page + Simple function to return the json for an environment's health page """ return _get_page(page='/health', key=key, ff_env=ff_env) @@ -1143,7 +1177,7 @@ def get_indexing_status(key=None, ff_env=None): def get_counts_summary(env): """ Returns a named tuple given an FF name to check representing the counts state. CountSummary - are_even: boolean on whether or not counts are even + are_even: boolean that is True if counts are even and False otherwise summary_total: raw value of counts """ totals = get_counts_page(ff_env=env) @@ -1182,7 +1216,7 @@ def execute_search(self, index, query, is_generator=False, page_size=200): :arg index: index to search under :arg query: query to run - :arg is_generator: boolean on whether or not to use a generator + :arg is_generator: boolean that is True if a generator is requested and otherwise False :arg page_size: if using a generator, how many results to give per request :returns: list of results of query or None @@ -1194,7 +1228,7 @@ def execute_search(self, index, query, is_generator=False, page_size=200): def search_es_metadata(index, query, key=None, ff_env=None, is_generator=False): """ - Executes a lucene search query on on the ES Instance for this + Executes a lucene search query on the ES Instance for this environment. NOTE: It is okay to use this function directly but for repeat usage please use @@ -1204,7 +1238,7 @@ def search_es_metadata(index, query, key=None, ff_env=None, is_generator=False): :arg query: dictionary of query :arg key: optional, 2-tuple authentication key (access_key_id, secret) :arg ff_env: ff_env to use - :arg is_generator: boolean on whether or not to use a generator + :arg is_generator: boolean that is True if a generator is requested and otherwise False :returns: list of results of query or None """ @@ -1484,7 +1518,7 @@ def dump_results_to_json(store, folder): def parse_s3_bucket_and_key_url(url: str) -> (str, str): - """ Parses the given s3 URL into its pair of bucket, key + """ Parses the given s3 URL into its pair of (bucket, key). Note that this function works the way it does because of how these urls end up in our database. Eventually we should clean this up. Format: diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py index fd0747d43..88c228c7f 100644 --- a/dcicutils/misc_utils.py +++ b/dcicutils/misc_utils.py @@ -1331,6 +1331,16 @@ def snake_case_to_camel_case(s, separator='_'): return s.title().replace(separator, '') +def to_camel_case(s): + """ + Converts a string that might be in snake_case or CamelCase into CamelCase. + """ + if s[:1].isupper() and '_' not in s: + return s + else: + return snake_case_to_camel_case(s) + + def capitalize1(s): """ Capitalizes the first letter of a string and leaves the others alone. diff --git a/dcicutils/qa_utils.py b/dcicutils/qa_utils.py index 6a04fa510..472b198eb 100644 --- a/dcicutils/qa_utils.py +++ b/dcicutils/qa_utils.py @@ -10,6 +10,7 @@ import functools import hashlib import io +import json import logging import os import pytest @@ -720,7 +721,7 @@ def mock_action_handler(self, wrapped_action, *args, **kwargs): texts = remove_suffix('\n', text).split('\n') last_text = texts[-1] result = wrapped_action(text, **kwargs) # noQA - This call to print is low-level implementation - # This only captures non-file output output. + # This only captures non-file output. file = kwargs.get('file') if file is None: file = sys.stdout @@ -853,7 +854,7 @@ def __init__(self, *, region_name=None, boto3=None, **kwargs): self._aws_secret_access_key = kwargs.get("aws_secret_access_key") self._aws_region = region_name - # These is specific for testing. + # This is specific for testing. self._aws_credentials_dir = None # FYI: Some things to note about how boto3 (and probably any AWS client) reads AWS credentials/region. @@ -915,7 +916,7 @@ def put_credentials_for_testing(self, self._aws_secret_access_key = aws_secret_access_key self._aws_region = region_name - # These is specific for testing. + # This is specific for testing. self._aws_credentials_dir = aws_credentials_dir @staticmethod @@ -2270,8 +2271,7 @@ def create_object_for_testing(self, object_content: str, *, Bucket: str, Key: st def upload_fileobj(self, Fileobj, Bucket, Key, **kwargs): # noqa - Uppercase argument names are chosen by AWS self.check_for_kwargs_required_by_mock("upload_fileobj", Bucket=Bucket, Key=Key, **kwargs) data = Fileobj.read() - PRINT("Uploading %s (%s bytes) to bucket %s key %s" - % (Fileobj, len(data), Bucket, Key)) + PRINT(f"Uploading {Fileobj} ({len(data)} bytes) to bucket {Bucket} key {Key}") with self.s3_files.open(os.path.join(Bucket, Key), 'wb') as fp: fp.write(data) @@ -2284,8 +2284,7 @@ def download_fileobj(self, Bucket, Key, Fileobj, **kwargs): # noqa - Uppercase self.check_for_kwargs_required_by_mock("download_fileobj", Bucket=Bucket, Key=Key, **kwargs) with self.s3_files.open(os.path.join(Bucket, Key), 'rb') as fp: data = fp.read() - PRINT("Downloading bucket %s key %s (%s bytes) to %s" - % (Bucket, Key, len(data), Fileobj)) + PRINT(f"Downloading bucket {Bucket} key {Key} ({len(data)} bytes) to {Fileobj}") Fileobj.write(data) def download_file(self, Bucket, Key, Filename, **kwargs): # noqa - Uppercase argument names are chosen by AWS @@ -2382,7 +2381,7 @@ def head_bucket(self, Bucket): # noQA - AWS argument naming style raise ClientError(operation_name='HeadBucket', error_response={ # noQA - PyCharm wrongly complains about this dictionary "Error": {"Code": "404", "Message": "Not Found"}, - "ResponseMetadata": {"HTTPStatusCode": 404}, + "ResponseMetadata": self.compute_mock_response_metadata(http_status_code=404), }) def get_object_tagging(self, Bucket, Key): @@ -2645,7 +2644,7 @@ def list_objects(self, Bucket, Prefix=None): # noQA - AWS argument naming style } def list_objects_v2(self, Bucket): # noQA - AWS argument naming style - # This is different but similar to list_objects. However we don't really care about that. + # This is different but similar to list_objects. However, we don't really care about that. return self.list_objects(Bucket=Bucket) def copy_object(self, CopySource, Bucket, Key, CopySourceVersionId=None, @@ -2698,7 +2697,7 @@ def _copy_object(self, CopySource, Bucket, Key, CopySourceVersionId, StorageClas new_storage_class = target_storage_class if (copy_in_place and GlacierUtils.transition_involves_glacier_restoration(source_storage_class, target_storage_class)): - new_storage_class = None # For a restoration, the don't update the glacier data. It's restored elsewhere. + new_storage_class = None # For a restoration, don't update the glacier data. It's restored elsewhere. target_attribute_block.restore_temporarily(delay_seconds=self.RESTORATION_DELAY_SECONDS, duration_days=1, storage_class=target_storage_class) PRINT(f"Set up restoration {target_attribute_block.restoration}") @@ -2806,6 +2805,7 @@ def _delete_versioned_object(self, s3_filename, version_id) -> Dict[str, Any]: def restore_object(self, Bucket, Key, RestoreRequest, VersionId: Optional[str] = None, StorageClass: Optional[S3StorageClass] = None): + # TODO: VersionId is unused in the arglist. Is that OK? -kmp 19-Aug-2023 duration_days: int = RestoreRequest.get('Days') storage_class: S3StorageClass = StorageClass or self.storage_class s3_filename = f"{Bucket}/{Key}" @@ -3047,8 +3047,8 @@ def known_bug_expected(jira_ticket=None, fixed=False, error_class=None): with known_bug_expected(jira_ticket="TST-00001", error_class=RuntimeError, fixed=True): ... stuff that fails ... - If the previously-expected error (now thought to be fixed) happens, an error will result so it's easy to tell - if there's been a regression. + If the previously-expected error (now thought to be fixed) happens, an error will result + so that it's easy to tell if there's been a regression. Parameters: @@ -3088,7 +3088,7 @@ def client_failer(operation_name, code=400): def fail(message, code=code): raise ClientError( { # noQA - PyCharm wrongly complains about this dictionary - "Error": {"Message": message, "Code": code} + "Error": {"Message": message, "Code": code} # noQA - Boto3 declares a string here but allows int code }, operation_name=operation_name) return fail @@ -3473,3 +3473,79 @@ def mocked_input(*args, **kwargs): inputs.append(item) yield assert not inputs, "Did not use all inputs." + + +def is_subdict(json1, json2, desc1="json1", desc2="json2", verbose=True): + """ + Does asymmetric testing of dictionary equivalence, assuring json2 has all the content of json1, + even if not vice versa. In other words, the dictionary structure is equivalent to the extent + that (recursively) all dictionary keys on the left occur in the right hand side even if not + necessarily all dictionary keys on the right occur in the left. + + For example, + x = {"foo": 3} + y = {"foo": 3, "bar": 4} + is_subdict(x, y) is True + is_subdict(y, x) is False + + The desc1 and desc2 can be provided to help with verbose mode, identifying what is on the left + and what is on the right. + + :param json1: a JSON structure, the outer part of which is a dictionary + :param json2: a JSON structure, the outer part of which is a dictionary + :param desc1: a name or brief description for the json1 (default "json1") + :param desc2: a name or brief description for the json2 (default "json2") + :param verbose: a boolean (default True) that controls whether the comparison is verbose, showing + output explaining failures or near-failures when True, and otherwise, if False, not showing such output. + """ + + + def out(x): + if verbose: + PRINT(x) + + def sorted_set_repr(x): + return f"{{{repr(sorted(x))[1:-1]}}}" + + def recurse(json1, json2, path=""): + if isinstance(json1, dict) and isinstance(json2, dict): + k1 = set(json1.keys()) + k2 = set(json2.keys()) + result = k1 <= k2 + if result: + if k1 != k2: + out(f"Non-fatal keyword mismatch at {path!r}:") + out(f" {desc1} keys: {sorted_set_repr(k1)}") + out(f" {desc2} keys: {sorted_set_repr(k2)}") + result = all(recurse(value, json2[key], path=f"{path}.{key}") + for key, value in json1.items()) + if not result: + # out(f"Recursive failure at {path!r} in object comparison") + pass + else: + out(f"Failed at {path!r} in object comparison due to key set mismatch:") + out(f" {desc1} keys: {sorted_set_repr(k1)}") + out(f" {desc2} keys: {sorted_set_repr(k2)}") + elif isinstance(json1, list) and isinstance(json2, list): + len1 = len(json1) + len2 = len(json2) + result = len1 == len2 + if not result: + out(f"Failed at {path!r} in list comparison due to length mismatch: {len1} vs {len2}") + else: + result = all(recurse(json1[i], json2[i], path=f"{path}[{i}]") for i in range(len1)) + if not result: + # out(f"Recursive failure at {path!r} in list comparison") + pass + elif type(json1) == type(json2): + result = json1 == json2 + if not result: + out(f"Failed at {path!r} due to value mismatch: {json.dumps(json1)} != {json.dumps(json2)}") + else: + result = False + if not result: + out(f"Mismatch at {path}.") + out(f" {desc1}: {json1}") + out(f" {desc2}: {json2}") + return result + return recurse(json1, json2) diff --git a/pyproject.toml b/pyproject.toml index f31d26f91..70f90b624 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "7.8.0" +version = "7.9.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/test/test_ff_utils.py b/test/test_ff_utils.py index 3037e0ad3..145d3bc86 100644 --- a/test/test_ff_utils.py +++ b/test/test_ff_utils.py @@ -1,4 +1,6 @@ +import base64 import copy +import hashlib import json import os import pytest @@ -11,7 +13,7 @@ from dcicutils.ff_mocks import mocked_s3utils_with_sse, TestScenarios, RequestsTestRecorder from dcicutils.misc_utils import make_counter, remove_prefix, remove_suffix, check_true from dcicutils.qa_utils import ( - check_duplicated_items_by_key, ignored, raises_regexp, MockResponse, MockBoto3, MockBotoSQSClient, + check_duplicated_items_by_key, ignored, raises_regexp, MockResponse, MockBoto3, MockBotoSQSClient, is_subdict, ) from types import GeneratorType from unittest import mock @@ -177,7 +179,7 @@ def test_url_params_functions(): def test_unified_authentication_decoding(integrated_ff): """ - Test that we decode the various formats and locations of keys and secrets in an uniform way. + Test that we decode the various formats and locations of keys and secrets in a uniform way. """ any_id, any_secret, any_env = 'any-id', 'any-secret', 'any-env' @@ -369,7 +371,7 @@ def test_unified_authentication_integrated(integrated_ff): @using_fresh_ff_deployed_state_for_testing() def test_unified_authentication_prod_envs_integrated_only(): # This is ONLY an integration test. There is no unit test functionality tested here. - # All of the functionality used here is already tested elsewhere. + # All functionality used here is already tested elsewhere. # Fourfront prod ff_prod_auth = ff_utils.unified_authentication(ff_env="data") @@ -473,7 +475,7 @@ def compute_mock_queue_attribute(self, QueueUrl, Attribute): # noQA - Amazon AW @pytest.mark.flaky def test_stuff_in_queues_integrated(integrated_ff): """ - Gotta index a bunch of stuff to make this work + We need to index a bunch of stuff to make this work """ search_res = ff_utils.search_metadata('search/?limit=all&type=File', key=integrated_ff['ff_key']) # just take the first handful @@ -552,7 +554,7 @@ def test_get_metadata_unit(): ts = TestScenarios - # The first part of this function sets up some common tools and then we test various scenarios + # The first part of this function sets up some common tools, and then we test various scenarios. counter = make_counter() # used to generate some sample data in mock calls unsupplied = object() # used in defaulting to prove an argument wasn't called @@ -579,7 +581,7 @@ def mocked_authorized_request(url, auth=None, ff_env=None, verb='GET', (a) that the mock uses a URL on the 'bar' scenario environment ('http://fourfront-bar.example/') so that we know some other authorized request wasn't also attempted that wasn't expecting this mock. (b) that the retry_fxn was not passed, since this mock doesn't know what to do with that - (c) that it's a GET operation, since we're not prepared to store anything and we're testing a getter function + (c) that it's a GET operation, since we're not prepared to store anything, and we're testing a getter function (d) that proper authorization for the 'bar' scenario was given It returns mock data that identifies itself as what was asked for. """ @@ -621,7 +623,7 @@ def test_it(n, check_queue=None, expect_suffix="", **kwargs): res_w_key = ff_utils.get_metadata(test_item, key=ts.bar_env_auth_dict, check_queue=check_queue, **kwargs) # Check that the data flow back from our mock authorized_request call did what we expect assert res_w_key == {'@id': test_item_id, 'mock_data': n} - # Check that the call out to the mock authorized_request is the thing we think. + # Check that the call-out to the mock authorized_request is the thing we think. # In particular, we expect that # (a) this is a GET # (b) it has appropriate auth @@ -1005,8 +1007,8 @@ def check_search_metadata(integrated_ff, url, expect_shortfall=False): if url != '': # replace stub with actual url from integrated_ff url = integrated_ff['ff_key']['server'] + '/' - # Note that we do some some .reset() calls on a mock that are not needed when servicing the integration test, - # but they are harmless and it seemed pointless to make it conditional. -kmp 15-Jan-2021 + # Note that we do some .reset() calls on a mock that are not needed when servicing the integration test, + # but they are harmless, and it seemed pointless to make it conditional. -kmp 15-Jan-2021 InsertingMockedSearchItems.reset() search_res = ff_utils.search_metadata(url + 'search/?limit=all&type=File', key=integrated_ff['ff_key']) assert isinstance(search_res, list) @@ -1290,6 +1292,141 @@ def test_get_health_page(integrated_ff): assert bad_health_res and 'error' in bad_health_res +@pytest.mark.xfail(reason=("Probably long-standing problem, recently discovered." + " System functions OK in spite of it. Tracked as C4-1087.")) +@pytest.mark.integrated +def test_get_schema_consistent(integrated_ff): + + def get_it(): + return ff_utils.get_schema('User', + key=integrated_ff['ff_key'], + ff_env=integrated_ff['ff_env']) + + first_try = get_it() + + for i in range(5): + print(f"Try {i}...") + nth_try = get_it() + assert first_try == nth_try + time.sleep(2) + + +@pytest.mark.integrated +@pytest.mark.flaky +def test_get_schema_and_get_schemas(integrated_ff): + + def tidy(x: dict) -> dict: + assert isinstance(x, dict) + # We have observed some variability in the facets and return values of get_metadata, + # so this is intended to paper over that since it seems irrelevant to what we're + # wanting to find out here. We'll track that other bug (C4-1087) elsewhere. + tidied = x.copy() + tidied.pop('facets', None) + tidied.pop('columns', None) + return tidied + + user_schema = ff_utils.get_schema('User', + key=integrated_ff['ff_key'], + ff_env=integrated_ff['ff_env']) + user_schema = tidy(user_schema) + + assert user_schema.get('title') == 'User' + assert user_schema.get('type') == 'object' + assert isinstance(user_schema.get('properties'), dict) + + experiment_set_schema_snake = ff_utils.get_schema('experiment_set', + key=integrated_ff['ff_key'], + ff_env=integrated_ff['ff_env']) + experiment_set_schema_snake = tidy(experiment_set_schema_snake) + assert experiment_set_schema_snake.get('title') == 'Experiment Set' + + experiment_set_schema_camel = ff_utils.get_schema('ExperimentSet', + key=integrated_ff['ff_key'], + ff_env=integrated_ff['ff_env']) + experiment_set_schema_camel = tidy(experiment_set_schema_camel) + assert experiment_set_schema_camel.get('title') == 'Experiment Set' + + all_schemas = ff_utils.get_schemas(key=integrated_ff['ff_key'], + ff_env=integrated_ff['ff_env']) + # We have these already loaded, so check them in full... + user_schema_from_all_schemas = all_schemas.get('User') + user_schema_from_all_schemas = tidy(user_schema_from_all_schemas) + + experiment_set_schema_from_all_schemas = all_schemas.get('ExperimentSet') + experiment_set_schema_from_all_schemas = tidy(experiment_set_schema_from_all_schemas) + + assert is_subdict(desc1=" profiles/User.keys()", + desc2="profiles/['User'].keys()", + json1=user_schema, + json2=user_schema_from_all_schemas) + + assert is_subdict(desc1=" profiles/ExperimentSet.keys()", + desc2="profiles/['ExperimentSet'].keys()", + json1=experiment_set_schema_camel, + json2=experiment_set_schema_from_all_schemas) + + assert is_subdict(desc1=" profiles/experiment_set.keys()", + desc2="profiles/['ExperimentSet'].keys()", + json1=experiment_set_schema_snake, + json2=experiment_set_schema_from_all_schemas) + + # Do some random spot-checking... + assert 'Lab' in all_schemas + assert 'ExperimentType' in all_schemas + + +@pytest.mark.xfail(reason=("Probably long-standing problem, recently discovered." + " System functions OK in spite of it. Tracked as C4-1087.")) +@pytest.mark.integrated +def test_get_metadata_consistent(integrated_ff): + + def get_it(): + return ff_utils.get_metadata('profiles/User.json', + key=integrated_ff['ff_key'], + ff_env=integrated_ff['ff_env']) + + first_try = get_it() + + for i in range(5): + print(f"Try {i}...") + nth_try = get_it() + try: + assert first_try == nth_try + except AssertionError: + is_subdict(first_try, nth_try, verbose=True) # This is called for output side-effect + raise # Go ahead and continue raising. We just wanted more debugging info. + time.sleep(2) + + +@pytest.mark.xfail(reason=("Probably long-standing problem, recently discovered." + " System functions OK in spite of it. Tracked as C4-1087.")) +@pytest.mark.integrated +def test_get_metadata_consistent_noting_pattern(integrated_ff): + + print() + + def hash(json): + return base64.b64encode(hashlib.md5(str(json).encode('utf-8')).digest()) + + def get_it(): + return ff_utils.get_metadata('profiles/User.json', + key=integrated_ff['ff_key'], + ff_env=integrated_ff['ff_env']) + + first_try = get_it() + print(f"first_try={hash(first_try)}") + + failed = False + + for i in range(2, 10): + nth_try = get_it() + print(f"Try {i}: {hash(nth_try)}") + failed = failed or not is_subdict(nth_try, first_try) + time.sleep(2) + + assert not failed + + @pytest.mark.integrated @pytest.mark.flaky def test_get_schema_names(integrated_ff): @@ -1599,7 +1736,7 @@ def get_mocked_result(*, kind, dirname, uuid, ignored_kwargs=None): @pytest.mark.unit def test_get_qc_metrics_logic_unit(): """ - End to end test on 'get_associated_qc_metrics' to check the logic of the fuction to make sure + End-to-end test on 'get_associated_qc_metrics' to check the logic of the fuction to make sure it is getting the qc metrics. """ with mock.patch("dcicutils.ff_utils.get_metadata") as mock_get_metadata: diff --git a/test/test_misc_utils.py b/test/test_misc_utils.py index 895a25757..a07c6d234 100644 --- a/test/test_misc_utils.py +++ b/test/test_misc_utils.py @@ -30,7 +30,7 @@ classproperty, classproperty_cached, classproperty_cached_each_subclass, Singleton, NamedObject, obsolete, ObsoleteError, CycleError, TopologicalSorter, keys_and_values_to_dict, dict_to_keys_and_values, is_c4_arn, deduplicate_list, chunked, parse_in_radix, format_in_radix, managed_property, future_datetime, - MIN_DATETIME, MIN_DATETIME_UTC, INPUT, builtin_print, map_chunked, + MIN_DATETIME, MIN_DATETIME_UTC, INPUT, builtin_print, map_chunked, to_camel_case, ) from dcicutils.qa_utils import ( Occasionally, ControlledTime, override_environ as qa_override_environ, MockFileSystem, printed_output, @@ -1978,6 +1978,23 @@ def test_snake_case_to_camel_case_hyphenated(token, expected): assert snake_case_to_camel_case(token, separator='-') == expected +@pytest.mark.parametrize('token, expected', [ + ('variant_sample', 'VariantSample'), + ('variant', 'Variant'), + ('_variant_', 'Variant'), + ('__variant', 'Variant'), + ('higlass_view_config', 'HiglassViewConfig'), + ('a_b_c_d', 'ABCD'), + ('', ''), + ('oneverylongthing1234567895_d', 'Oneverylongthing1234567895D'), + ('x_m_l_container', 'XMLContainer'), + ('X_M_L_Container', 'XMLContainer'), +]) +def test_to_camel_case_hyphenated(token, expected): + assert to_camel_case(token) == expected + assert to_camel_case(expected) == expected # make sure it's stable + + @pytest.mark.parametrize('token, expected', [ ('', ''), ('x', 'X'), diff --git a/test/test_qa_utils.py b/test/test_qa_utils.py index d369dc16f..0c2a87bd7 100644 --- a/test/test_qa_utils.py +++ b/test/test_qa_utils.py @@ -20,7 +20,7 @@ from dcicutils.lang_utils import there_are from dcicutils.misc_utils import Retry, PRINT, file_contents, REF_TZ, local_attrs, ignored from dcicutils.qa_utils import ( - mock_not_called, override_environ, override_dict, show_elapsed_time, timed, + mock_not_called, override_environ, override_dict, show_elapsed_time, timed, is_subdict, ControlledTime, Occasionally, RetryManager, MockFileSystem, NotReallyRandom, MockUUIDModule, MockedCommandArgs, MockResponse, printed_output, MockBotoS3Client, MockKeysNotImplemented, MockBoto3, known_bug_expected, raises_regexp, VersionChecker, check_duplicated_items_by_key, guess_local_timezone_for_testing, @@ -2256,3 +2256,66 @@ def test_mock_boto3_iam_role_collection(): collection = MockBoto3IamRoleCollection() assert collection["Roles"] == [] assert collection["Foo"] is None + + +def test_is_subdict(): + + print() # start on fresh line + + for same in [{}, {"foo": 3}, {"foo": [1, "x", {"bar": 17}]}]: + with printed_output() as printed: + assert is_subdict(same, same) + assert printed.lines == [] + + for verbose in [False, True]: + with printed_output() as printed: + assert is_subdict({"foo": 3}, {"foo": 3, "bar": 4}, verbose=verbose) + if verbose: + assert printed.lines == [ + "Non-fatal keyword mismatch at '':", + " json1 keys: {'foo'}", + " json2 keys: {'bar', 'foo'}", + ] + else: + assert printed.lines == [] + + for verbose in [True, False]: + with printed_output() as printed: + assert not is_subdict({"foo": 3, "bar": {"x": 3, "y": 4}}, + {"foo": 3, "bar": {"x": 3, "y": 5, "baz": 0}}, verbose=verbose) + if verbose: + assert printed.lines == [ + "Non-fatal keyword mismatch at '.bar':", + " json1 keys: {'x', 'y'}", + " json2 keys: {'baz', 'x', 'y'}", + "Failed at '.bar.y' due to value mismatch: 4 != 5", + # "Recursive failure at '.bar' in object comparison", + # "Recursive failure at '' in object comparison", + ] + else: + assert printed.lines == [] + + for verbose in [True, False]: + with printed_output() as printed: + assert not is_subdict({"foo": 3, "bar": [1, 2, 3]}, + {"foo": 3, "bar": [1, 2, 3, 4]}, verbose=verbose) + if verbose: + assert printed.lines == [ + "Failed at '.bar' in list comparison due to length mismatch: 3 vs 4", + # "Recursive failure at '' in object comparison" + ] + else: + assert printed.lines == [] + + for verbose in [True, False]: + with printed_output() as printed: + assert not is_subdict({"foo": 3, "baz": [1, 2, 3]}, + {"foo": 3, "bar": [1, 2, 3, 4]}, verbose=verbose) + if verbose: + assert printed.lines == [ + "Failed at '' in object comparison due to key set mismatch:", + " json1 keys: {'baz', 'foo'}", + " json2 keys: {'bar', 'foo'}", + ] + else: + assert printed.lines == []