Skip to content

Commit

Permalink
Merge pull request #631 from isb-cgc/isb-cgc-prod-sp
Browse files Browse the repository at this point in the history
Sprint 27 release
  • Loading branch information
s-paquette authored Jun 15, 2018
2 parents 8e28332 + 2d7dae3 commit 9a0e7b2
Show file tree
Hide file tree
Showing 11 changed files with 769 additions and 433 deletions.
41 changes: 38 additions & 3 deletions accounts/sa_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,12 @@ def unregister_all_gcp_sa(user_id, gcp_id):
def unregister_sa(user_id, sa_name):
st_logger = StackDriverLogger.build_from_django_settings()

sa = ServiceAccount.objects.get(service_account=sa_name, active=1)
sa = ServiceAccount.objects.get(service_account=sa_name)
# papid multi-clicks on button can cause this sa to be inactive already. Nothing to be done...
if not sa.active:
st_logger.write_struct_log_entry(SERVICE_ACCOUNT_LOG_NAME, {
'message': '[STATUS] Attempted to remove INACTIVE SA {0}'.format(str(sa.service_account))})
return
saads = ServiceAccountAuthorizedDatasets.objects.filter(service_account=sa)

st_logger.write_text_log_entry(SERVICE_ACCOUNT_LOG_NAME, "[STATUS] User {} is unregistering SA {}".format(
Expand Down Expand Up @@ -629,7 +634,7 @@ def unregister_sa(user_id, sa_name):
'message': '[ERROR] There was an error in removing SA {0} from Google Group {1}.'.format(
str(saad.service_account.service_account), saad.authorized_dataset.acl_google_group)})
st_logger.write_struct_log_entry(SERVICE_ACCOUNT_LOG_NAME, {
'message': '[ERROR] {}}.'.format(str(e))})
'message': '[ERROR] {}.'.format(str(e))})
logger.error('[ERROR] There was an error in removing SA {0} from Google Group {1}: {2}'.format(
str(saad.service_account.service_account), saad.authorized_dataset.acl_google_group, e))
logger.exception(e)
Expand Down Expand Up @@ -1091,10 +1096,10 @@ def demo_process_success(auth, user_id, saml_response):
st_logger.write_text_log_entry(LOG_NAME_ERA_LOGIN_VIEW,
"[ERROR] Failed to publish to PubSub topic: {}".format(str(e)))

retval.messages.append(warn_message)
return retval



def deactivate_nih_add_to_open(user_id, user_email):
try:
nih_user = NIH_User.objects.get(user_id=user_id, linked=True)
Expand Down Expand Up @@ -1146,3 +1151,33 @@ def get_nih_user_details(user_id):
return user_details


def verify_user_is_in_gcp(user_id, gcp_id):
user_in_gcp = False
user_email = None
try:
user_email = User.objects.get(id=user_id).email
crm_service = get_special_crm_resource()

iam_policy = crm_service.projects().getIamPolicy(resource=gcp_id, body={}).execute()
bindings = iam_policy['bindings']
for val in bindings:
members = val['members']
for member in members:
if member.startswith('user:'):
if user_email.lower() == member.split(':')[1].lower():
user_in_gcp = True

except Exception as e:
user = None
if type(e) is ObjectDoesNotExist:
user = str(user_id)
logger.error("[ERROR] While validating user {} membership in GCP {}:".format(user, gcp_id))
logger.error("Could not find user with ID {}!".format(user))
else:
user = user_email
logger.error("[ERROR] While validating user {} membership in GCP {}:".format(user, gcp_id))
logger.exception(e)
logger.warn("[WARNING] Because we can't confirm if user {} is in GCP {} we must assume they're not.".format(user, gcp_id))
user_in_gcp = False

return user_in_gcp
216 changes: 143 additions & 73 deletions cohorts/metadata_counting.py

Large diffs are not rendered by default.

81 changes: 14 additions & 67 deletions cohorts/metadata_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from uuid import uuid4
from django.conf import settings


debug = settings.DEBUG # RO global for this file

logger = logging.getLogger('main_logger')
Expand Down Expand Up @@ -607,76 +606,23 @@ def format_for_display(item):

return formatted_item


# Builds a BQ API v2 QueryParameter set and WHERE clause string from a set of filters of the form:
# {
# 'field_name': [<value>,...]
# }
# Breaks out '<ATTR> IS NULL'
# 2+ values are converted to IN (<value>,...)
# Filters must already be pre-bucketed or formatted
# TODO: add support for BETWEEN
# TODO: add support for <>=
def build_bq_filter_and_params(filters):
result = {
'filter_string': '',
'parameters': []
}

filter_set = []

for attr, values in filters.items():
filter_string = ''
query_param = {
'name': attr,
'parameterType': {

},
'parameterValue': {

}
}
if 'None' in values:
values.remove('None')
filter_string = "{} IS NULL".format(attr)

if len(values) > 0:
if len(filter_string):
filter_string += " OR "
if len(values) == 1:
# Scalar param
query_param['parameterType']['type'] = ('STRING' if re.compile(ur'[^0-9\.]', re.UNICODE).search(values[0]) else 'INT64')
query_param['parameterValue']['value'] = values[0]
filter_string += "{} = @{}".format(attr, attr)
else:
# Array param
query_param['parameterType']['type'] = "ARRAY"
query_param['parameterValue'] = {'arrayValues': [{'value': x} for x in values]}
query_param['parameterType']['arrayType'] = {'type': ('STRING' if re.compile(ur'[^0-9\.]', re.UNICODE).search(values[0]) else 'INT64')}
filter_string += "{} IN UNNEST(@{})".format(attr,attr)

filter_set.append('({})'.format(filter_string))
result['parameters'].append(query_param)

result['filter_string'] = " AND ".join(filter_set)

return result

# Construct WHERE clauses for BigQuery and CloudSQL based on a set of filters
# If the names of the columns differ across the 2 platforms, the alt_key_map can be
# used to map a filter 'key' to a different column name
def build_where_clause(filters, alt_key_map=False, program=None, for_files=False):
def build_where_clause(filters, alt_key_map=False, program=None, for_files=False, comb_with='OR'):
first = True
query_str = ''
big_query_str = '' # todo: make this work for non-string values -- use {}.format
value_tuple = ()
key_order = []
keyType = None
gene = None

grouped_filters = None

for key, value in filters.items():
gene = None
invert = False

if isinstance(value, dict) and 'values' in value:
value = value['values']

Expand All @@ -695,6 +641,7 @@ def build_where_clause(filters, alt_key_map=False, program=None, for_files=False
keyType = key.split(':')[0]
if keyType == 'MUT':
gene = key.split(':')[2]
invert = bool(key.split(':')[3] == 'NOT')
key = key.split(':')[-1]

# Multitable filter lists don't come in as string as they can contain arbitrary text in values
Expand All @@ -714,20 +661,20 @@ def build_where_clause(filters, alt_key_map=False, program=None, for_files=False
if first:
first = False
else:
big_query_str += ' AND'
big_query_str += ' {}'.format(comb_with)

big_query_str += " %s = '{hugo_symbol}' AND " % 'Hugo_Symbol'
big_query_str += " (%s = '{hugo_symbol}' AND " % 'Hugo_Symbol'
params['gene'] = gene

if(key == 'category'):
if value == 'any':
big_query_str += '%s IS NOT NULL' % 'Variant_Classification'
big_query_str += '%s IS NOT NULL)' % 'Variant_Classification'
params['var_class'] = ''
else:
big_query_str += '%s IN ({var_class})' % 'Variant_Classification'
big_query_str += '%s {}IN ({var_class}))'.format('Variant_Classification', "NOT " if invert else "")
values = MOLECULAR_CATEGORIES[value]['attrs']
else:
big_query_str += '%s IN ({var_class})' % 'Variant_Classification'
big_query_str += '%s {}IN ({var_class}))'.format('Variant_Classification', "NOT " if invert else "")
values = value

if value != 'any':
Expand Down Expand Up @@ -854,7 +801,7 @@ def build_where_clause(filters, alt_key_map=False, program=None, for_files=False


def sql_simple_number_by_200(value, field):
if debug: print >> sys.stderr, 'Called ' + sys._getframe().f_code.co_name
if debug: logger.debug('[DEBUG] Called ' + sys._getframe().f_code.co_name)
result = ''

if isinstance(value, basestring):
Expand Down Expand Up @@ -1011,7 +958,7 @@ def sql_bmi_by_ranges(value):


def sql_age_by_ranges(value, bin_by_five=False):
if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name
if debug: logger.debug('[DEBUG] Called '+sys._getframe().f_code.co_name)
result = ''
if isinstance(value, basestring):
value = [value]
Expand Down Expand Up @@ -1064,7 +1011,7 @@ def sql_age_by_ranges(value, bin_by_five=False):


def gql_age_by_ranges(q, key, value):
if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name
if debug: logger.debug('[DEBUG] Called '+sys._getframe().f_code.co_name)
result = ''
if not isinstance(value, basestring):
# value is a list of ranges
Expand Down Expand Up @@ -1111,7 +1058,7 @@ def submit_bigquery_job(bq_service, project_id, query_body, batch=False):
job_data = {
'jobReference': {
'projectId': project_id,
'job_id': str(uuid4())
'jobId': str(uuid4())
},
'configuration': {
'query': {
Expand Down
38 changes: 27 additions & 11 deletions cohorts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from django.db.models import Q
from django.utils.html import escape
from projects.models import Project, Program, User_Feature_Definitions
from django.core.exceptions import ObjectDoesNotExist
from sharing.models import Shared_Resource
from metadata_helpers import fetch_metadata_value_set, fetch_program_data_types, MOLECULAR_DISPLAY_STRINGS

Expand Down Expand Up @@ -301,23 +302,38 @@ def format_filters_for_display(cls, filters):
prog_data_types = None

for cohort_filter in filters:
prog_id = Program.objects.get(name=cohort_filter['program'], is_public=True, active=True).id
if prog_id not in prog_vals:
prog_vals[prog_id] = fetch_metadata_value_set(prog_id)
if prog_id not in prog_dts:
prog_dts[prog_id] = fetch_program_data_types(prog_id, True)

prog_values = prog_vals[prog_id]
prog_data_types = prog_dts[prog_id]
prog = None
prog_id = None
is_private = False
try:
prog_id = Program.objects.get(name=cohort_filter['program'], is_public=True, active=True).id
except ObjectDoesNotExist:
is_private = True

if not is_private:
if prog_id not in prog_vals:
prog_vals[prog_id] = fetch_metadata_value_set(prog_id)
if prog_id not in prog_dts:
prog_dts[prog_id] = fetch_program_data_types(prog_id, True)

prog_values = prog_vals[prog_id]
prog_data_types = prog_dts[prog_id]

if 'MUT:' in cohort_filter['name']:
cohort_filter['displ_name'] = cohort_filter['name'].split(':')[2].upper() + ' [' + cohort_filter['name'].split(':')[1].upper() + ',' + string.capwords(cohort_filter['name'].split(':')[3])
cohort_filter['displ_val'] = (MOLECULAR_DISPLAY_STRINGS['values'][cohort_filter['value']] if cohort_filter['name'].split(':')[3] != 'category' else MOLECULAR_DISPLAY_STRINGS['categories'][cohort_filter['value']]) + ']'
cohort_filter['displ_name'] = ("NOT(" if 'NOT:' in cohort_filter['name'] else '') \
+ cohort_filter['name'].split(':')[2].upper() \
+ ' [' + cohort_filter['name'].split(':')[1].upper() + ',' \
+ string.capwords(cohort_filter['name'].split(':')[-1])
cohort_filter['displ_val'] = (
MOLECULAR_DISPLAY_STRINGS['values'][cohort_filter['value']] if cohort_filter['name'].split(':')[-1] != 'category'
else MOLECULAR_DISPLAY_STRINGS['categories'][cohort_filter['value']]) \
+ ']' \
+ (")" if 'NOT:' in cohort_filter['name'] else '')
elif cohort_filter['name'] == 'data_type':
cohort_filter['displ_name'] = 'Data Type'
cohort_filter['displ_val'] = prog_data_types[cohort_filter['value']]
else:
if cohort_filter['name'] not in prog_values:
if not prog_values or cohort_filter['name'] not in prog_values:
cohort_filter['displ_name'] = cohort_filter['name']
cohort_filter['displ_val'] = cohort_filter['value']
else:
Expand Down
Loading

0 comments on commit 9a0e7b2

Please sign in to comment.