From f8af15dfa5a16176b4018497653c6d36560eb4ac Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Thu, 12 Dec 2024 13:15:54 -0800 Subject: [PATCH] -> Proper condensation of values --- cohorts/metadata_counting.py | 25 ++++++++++++++----------- google_helpers/bigquery/utils.py | 1 + 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/cohorts/metadata_counting.py b/cohorts/metadata_counting.py index b148ea5f..6afaa1e7 100644 --- a/cohorts/metadata_counting.py +++ b/cohorts/metadata_counting.py @@ -947,18 +947,21 @@ def get_full_case_metadata(ids, source_type, source): elif idx in sample_idx: col_name = sample_idx data_store = case['samples'] - val = val.split("|") if isinstance(val, str) and re.search(r'\|', val) else val + val = val.split("|") if isinstance(val, str) and re.search(r'\|', val) else [val] if col_name[idx] not in data_store: - data_store[col_name[idx]] = val - else: - if isinstance(data_store[col_name[idx]], list): - if isinstance(val, list): - data_store[col_name[idx]].extend(val) - else: - data_store[col_name[idx]].append(val) - data_store[col_name[idx]] = list(set(data_store[col_name[idx]])) - elif data_store[col_name[idx]] != val: - data_store[col_name[idx]] = [data_store[col_name[idx]], val] + data_store[col_name[idx]] = [] + data_store[col_name[idx]].extend(val) + for case, case_data in cases.items(): + for data_type, data in case_data.items(): + if data_type != id_type: + for col_name, vals in data.items(): + data[col_name] = list(set(vals)) + if None in data[col_name]: + data[col_name].remove(None) + if len(data[col_name]) == 1: + data[col_name] = vals[0] + elif not len(data[col_name]): + data[col_name] = "N/A" not_found = [x for x in ids if x not in cases] diff --git a/google_helpers/bigquery/utils.py b/google_helpers/bigquery/utils.py index af86c9e0..02fc3966 100644 --- a/google_helpers/bigquery/utils.py +++ b/google_helpers/bigquery/utils.py @@ -151,6 +151,7 @@ def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with # If the values are arrays we assume the first value in the first array is indicative of all # other values (since we don't support multi-typed fields) type_check = values[0] if type(values[0]) is not list else values[0][0] + type_check = values[0] if type(values[0]) is not list else values[0][0] parameter_type = ( 'STRING' if ( type(type_check) not in [int, float, complex] and re.compile(r'[^0-9\.,]', re.UNICODE).search(