Merge pull request #631 from isb-cgc/isb-cgc-prod-sp

Sprint 27 release
isb-cgc · Jun 15, 2018 · 9a0e7b2 · 9a0e7b2
2 parents 8e28332 + 2d7dae3
commit 9a0e7b2
Show file tree

Hide file tree

Showing 11 changed files with 769 additions and 433 deletions.
diff --git a/accounts/sa_utils.py b/accounts/sa_utils.py
@@ -595,7 +595,12 @@ def unregister_all_gcp_sa(user_id, gcp_id):
 def unregister_sa(user_id, sa_name):
     st_logger = StackDriverLogger.build_from_django_settings()
 
-    sa = ServiceAccount.objects.get(service_account=sa_name, active=1)
+    sa = ServiceAccount.objects.get(service_account=sa_name)
+    # papid multi-clicks on button can cause this sa to be inactive already. Nothing to be done...
+    if not sa.active:
+        st_logger.write_struct_log_entry(SERVICE_ACCOUNT_LOG_NAME, {
+            'message': '[STATUS] Attempted to remove INACTIVE SA {0}'.format(str(sa.service_account))})
+        return
     saads = ServiceAccountAuthorizedDatasets.objects.filter(service_account=sa)
 
     st_logger.write_text_log_entry(SERVICE_ACCOUNT_LOG_NAME, "[STATUS] User {} is unregistering SA {}".format(
@@ -629,7 +634,7 @@ def unregister_sa(user_id, sa_name):
                     'message': '[ERROR] There was an error in removing SA {0} from Google Group {1}.'.format(
                         str(saad.service_account.service_account), saad.authorized_dataset.acl_google_group)})
                 st_logger.write_struct_log_entry(SERVICE_ACCOUNT_LOG_NAME, {
-                    'message': '[ERROR] {}}.'.format(str(e))})
+                    'message': '[ERROR] {}.'.format(str(e))})
                 logger.error('[ERROR] There was an error in removing SA {0} from Google Group {1}: {2}'.format(
                     str(saad.service_account.service_account), saad.authorized_dataset.acl_google_group, e))
                 logger.exception(e)
@@ -1091,10 +1096,10 @@ def demo_process_success(auth, user_id, saml_response):
             st_logger.write_text_log_entry(LOG_NAME_ERA_LOGIN_VIEW,
                                            "[ERROR] Failed to publish to PubSub topic: {}".format(str(e)))
 
+        retval.messages.append(warn_message)
         return retval
 
 
-
 def deactivate_nih_add_to_open(user_id, user_email):
     try:
         nih_user = NIH_User.objects.get(user_id=user_id, linked=True)
@@ -1146,3 +1151,33 @@ def get_nih_user_details(user_id):
     return user_details
 
 
+def verify_user_is_in_gcp(user_id, gcp_id):
+    user_in_gcp = False
+    user_email = None
+    try:
+        user_email = User.objects.get(id=user_id).email
+        crm_service = get_special_crm_resource()
+
+        iam_policy = crm_service.projects().getIamPolicy(resource=gcp_id, body={}).execute()
+        bindings = iam_policy['bindings']
+        for val in bindings:
+            members = val['members']
+            for member in members:
+                if member.startswith('user:'):
+                    if user_email.lower() == member.split(':')[1].lower():
+                        user_in_gcp = True
+
+    except Exception as e:
+        user = None
+        if type(e) is ObjectDoesNotExist:
+            user = str(user_id)
+            logger.error("[ERROR] While validating user {} membership in GCP {}:".format(user, gcp_id))
+            logger.error("Could not find user with ID {}!".format(user))
+        else:
+            user = user_email
+            logger.error("[ERROR] While validating user {} membership in GCP {}:".format(user, gcp_id))
+            logger.exception(e)
+        logger.warn("[WARNING] Because we can't confirm if user {} is in GCP {} we must assume they're not.".format(user, gcp_id))
+        user_in_gcp = False
+
+    return user_in_gcp
diff --git a/cohorts/metadata_counting.py b/cohorts/metadata_counting.py
diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
@@ -32,7 +32,6 @@
 from uuid import uuid4
 from django.conf import settings
 
-
 debug = settings.DEBUG # RO global for this file
 
 logger = logging.getLogger('main_logger')
@@ -607,76 +606,23 @@ def format_for_display(item):
 
     return formatted_item
 
-
-# Builds a BQ API v2 QueryParameter set and WHERE clause string from a set of filters of the form:
-# {
-#     'field_name': [<value>,...]
-# }
-# Breaks out '<ATTR> IS NULL'
-# 2+ values are converted to IN (<value>,...)
-# Filters must already be pre-bucketed or formatted
-# TODO: add support for BETWEEN
-# TODO: add support for <>=
-def build_bq_filter_and_params(filters):
-    result = {
-        'filter_string': '',
-        'parameters': []
-    }
-
-    filter_set = []
-
-    for attr, values in filters.items():
-        filter_string = ''
-        query_param = {
-            'name': attr,
-            'parameterType': {
-
-            },
-            'parameterValue': {
-
-            }
-        }
-        if 'None' in values:
-            values.remove('None')
-            filter_string = "{} IS NULL".format(attr)
-
-        if len(values) > 0:
-            if len(filter_string):
-                filter_string += " OR "
-            if len(values) == 1:
-                # Scalar param
-                query_param['parameterType']['type'] = ('STRING' if re.compile(ur'[^0-9\.]', re.UNICODE).search(values[0]) else 'INT64')
-                query_param['parameterValue']['value'] = values[0]
-                filter_string += "{} = @{}".format(attr, attr)
-            else:
-                # Array param
-                query_param['parameterType']['type'] = "ARRAY"
-                query_param['parameterValue'] = {'arrayValues': [{'value': x} for x in values]}
-                query_param['parameterType']['arrayType'] = {'type': ('STRING' if re.compile(ur'[^0-9\.]', re.UNICODE).search(values[0]) else 'INT64')}
-                filter_string += "{} IN UNNEST(@{})".format(attr,attr)
-
-        filter_set.append('({})'.format(filter_string))
-        result['parameters'].append(query_param)
-
-    result['filter_string'] = " AND ".join(filter_set)
-
-    return result
-
 # Construct WHERE clauses for BigQuery and CloudSQL based on a set of filters
 # If the names of the columns differ across the 2 platforms, the alt_key_map can be
 # used to map a filter 'key' to a different column name
-def build_where_clause(filters, alt_key_map=False, program=None, for_files=False):
+def build_where_clause(filters, alt_key_map=False, program=None, for_files=False, comb_with='OR'):
     first = True
     query_str = ''
     big_query_str = ''  # todo: make this work for non-string values -- use {}.format
     value_tuple = ()
     key_order = []
     keyType = None
-    gene = None
 
     grouped_filters = None
 
     for key, value in filters.items():
+        gene = None
+        invert = False
+
         if isinstance(value, dict) and 'values' in value:
             value = value['values']
 
@@ -695,6 +641,7 @@ def build_where_clause(filters, alt_key_map=False, program=None, for_files=False
             keyType = key.split(':')[0]
             if keyType == 'MUT':
                 gene = key.split(':')[2]
+                invert = bool(key.split(':')[3] == 'NOT')
             key = key.split(':')[-1]
 
         # Multitable filter lists don't come in as string as they can contain arbitrary text in values
@@ -714,20 +661,20 @@ def build_where_clause(filters, alt_key_map=False, program=None, for_files=False
             if first:
                 first = False
             else:
-                big_query_str += ' AND'
+                big_query_str += ' {}'.format(comb_with)
 
-            big_query_str += " %s = '{hugo_symbol}' AND " % 'Hugo_Symbol'
+            big_query_str += " (%s = '{hugo_symbol}' AND " % 'Hugo_Symbol'
             params['gene'] = gene
 
             if(key == 'category'):
                 if value == 'any':
-                    big_query_str += '%s IS NOT NULL' % 'Variant_Classification'
+                    big_query_str += '%s IS NOT NULL)' % 'Variant_Classification'
                     params['var_class'] = ''
                 else:
-                    big_query_str += '%s IN ({var_class})' % 'Variant_Classification'
+                    big_query_str += '%s {}IN ({var_class}))'.format('Variant_Classification', "NOT " if invert else "")
                     values = MOLECULAR_CATEGORIES[value]['attrs']
             else:
-                big_query_str += '%s IN ({var_class})' % 'Variant_Classification'
+                big_query_str += '%s {}IN ({var_class}))'.format('Variant_Classification', "NOT " if invert else "")
                 values = value
 
             if value != 'any':
@@ -854,7 +801,7 @@ def build_where_clause(filters, alt_key_map=False, program=None, for_files=False
 
 
 def sql_simple_number_by_200(value, field):
-    if debug: print >> sys.stderr, 'Called ' + sys._getframe().f_code.co_name
+    if debug: logger.debug('[DEBUG] Called ' + sys._getframe().f_code.co_name)
     result = ''
 
     if isinstance(value, basestring):
@@ -1011,7 +958,7 @@ def sql_bmi_by_ranges(value):
 
 
 def sql_age_by_ranges(value, bin_by_five=False):
-    if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name
+    if debug: logger.debug('[DEBUG] Called '+sys._getframe().f_code.co_name)
     result = ''
     if isinstance(value, basestring):
        value = [value]
@@ -1064,7 +1011,7 @@ def sql_age_by_ranges(value, bin_by_five=False):
 
 
 def gql_age_by_ranges(q, key, value):
-    if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name
+    if debug: logger.debug('[DEBUG] Called '+sys._getframe().f_code.co_name)
     result = ''
     if not isinstance(value, basestring):
         # value is a list of ranges
@@ -1111,7 +1058,7 @@ def submit_bigquery_job(bq_service, project_id, query_body, batch=False):
     job_data = {
         'jobReference': {
             'projectId': project_id,
-            'job_id': str(uuid4())
+            'jobId': str(uuid4())
         },
         'configuration': {
             'query': {

diff --git a/cohorts/models.py b/cohorts/models.py
@@ -24,6 +24,7 @@
 from django.db.models import Q
 from django.utils.html import escape
 from projects.models import Project, Program, User_Feature_Definitions
+from django.core.exceptions import ObjectDoesNotExist
 from sharing.models import Shared_Resource
 from metadata_helpers import fetch_metadata_value_set, fetch_program_data_types, MOLECULAR_DISPLAY_STRINGS
 
@@ -301,23 +302,38 @@ def format_filters_for_display(cls, filters):
         prog_data_types = None
 
         for cohort_filter in filters:
-            prog_id = Program.objects.get(name=cohort_filter['program'], is_public=True, active=True).id
-            if prog_id not in prog_vals:
-                prog_vals[prog_id] = fetch_metadata_value_set(prog_id)
-            if prog_id not in prog_dts:
-                prog_dts[prog_id] = fetch_program_data_types(prog_id, True)
-
-            prog_values = prog_vals[prog_id]
-            prog_data_types = prog_dts[prog_id]
+            prog = None
+            prog_id = None
+            is_private = False
+            try:
+                prog_id = Program.objects.get(name=cohort_filter['program'], is_public=True, active=True).id
+            except ObjectDoesNotExist:
+                is_private = True
+
+            if not is_private:
+                if prog_id not in prog_vals:
+                    prog_vals[prog_id] = fetch_metadata_value_set(prog_id)
+                if prog_id not in prog_dts:
+                    prog_dts[prog_id] = fetch_program_data_types(prog_id, True)
+
+                prog_values = prog_vals[prog_id]
+                prog_data_types = prog_dts[prog_id]
 
             if 'MUT:' in cohort_filter['name']:
-                cohort_filter['displ_name'] = cohort_filter['name'].split(':')[2].upper() + ' [' + cohort_filter['name'].split(':')[1].upper() + ',' + string.capwords(cohort_filter['name'].split(':')[3])
-                cohort_filter['displ_val'] = (MOLECULAR_DISPLAY_STRINGS['values'][cohort_filter['value']] if cohort_filter['name'].split(':')[3] != 'category' else MOLECULAR_DISPLAY_STRINGS['categories'][cohort_filter['value']]) + ']'
+                cohort_filter['displ_name'] = ("NOT(" if 'NOT:' in cohort_filter['name'] else '') \
+                      + cohort_filter['name'].split(':')[2].upper() \
+                      + ' [' + cohort_filter['name'].split(':')[1].upper() + ',' \
+                      + string.capwords(cohort_filter['name'].split(':')[-1])
+                cohort_filter['displ_val'] = (
+                    MOLECULAR_DISPLAY_STRINGS['values'][cohort_filter['value']] if cohort_filter['name'].split(':')[-1] != 'category'
+                    else MOLECULAR_DISPLAY_STRINGS['categories'][cohort_filter['value']]) \
+                    + ']' \
+                    + (")" if 'NOT:' in cohort_filter['name'] else '')
             elif cohort_filter['name'] == 'data_type':
                 cohort_filter['displ_name'] = 'Data Type'
                 cohort_filter['displ_val'] = prog_data_types[cohort_filter['value']]
             else:
-                if cohort_filter['name'] not in prog_values:
+                if not prog_values or cohort_filter['name'] not in prog_values:
                     cohort_filter['displ_name'] = cohort_filter['name']
                     cohort_filter['displ_val'] = cohort_filter['value']
                 else: