Skip to content

Commit

Permalink
Merge pull request #392 from isb-cgc/isb-cgc-prod-sp
Browse files Browse the repository at this point in the history
Sprint 20 Release
  • Loading branch information
s-paquette authored Oct 13, 2017
2 parents 6bc2ade + e98f746 commit 07107cc
Show file tree
Hide file tree
Showing 6 changed files with 387 additions and 82 deletions.
1 change: 1 addition & 0 deletions accounts/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
url(r'^users/(?P<user_id>\d+)/verify_gcp/$', views.verify_gcp, name='verify_gcp'),
url(r'^users/(?P<user_id>\d+)/register_sa/$', views.register_sa, name='register_sa'),
url(r'^users/(?P<user_id>\d+)/verify_sa/$', views.verify_sa, name='verify_sa'),
url(r'^users/(?P<user_id>\d+)/adjust_sa/$', views.register_sa, name='adjust_sa'),
url(r'^users/(?P<user_id>\d+)/delete_sa/(?P<sa_id>\d+)/$', views.delete_sa, name='delete_sa'),
url(r'^users/(?P<user_id>\d+)/register_bucket/(?P<gcp_id>\d+)/$', views.register_bucket, name='register_bucket'),
url(r'^users/(?P<user_id>\d+)/delete_bucket/(?P<bucket_id>\d+)/$', views.delete_bucket, name='delete_bucket'),
Expand Down
111 changes: 88 additions & 23 deletions accounts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from googleapiclient.errors import HttpError
from models import *
from projects.models import User_Data_Tables
from django.utils.html import escape

from dataset_utils.dataset_access_support_factory import DatasetAccessSupportFactory
from .utils import ServiceAccountBlacklist, is_email_in_iam_roles
Expand Down Expand Up @@ -409,7 +410,7 @@ def user_gcp_delete(request, user_id, gcp_id):
return redirect('user_gcp_list', user_id=request.user.id)


def verify_service_account(gcp_id, service_account, datasets, user_email, is_refresh=False):
def verify_service_account(gcp_id, service_account, datasets, user_email, is_refresh=False, is_adjust=False, remove_all=False):
# Only verify for protected datasets
dataset_objs = AuthorizedDataset.objects.filter(id__in=datasets, public=False)
dataset_obj_names = dataset_objs.values_list('name', flat=True)
Expand Down Expand Up @@ -442,26 +443,39 @@ def verify_service_account(gcp_id, service_account, datasets, user_email, is_ref
# Refreshes require a service account to exist, and, you cannot register an account if it already exists with the same datasets
try:
sa = ServiceAccount.objects.get(service_account=service_account)
if not is_refresh:
if is_adjust or not is_refresh:
reg_change = False
# If there are private datasets requested, it might not be a duplicate
if len(dataset_objs):
saads = AuthorizedDataset.objects.filter(id__in=ServiceAccountAuthorizedDatasets.objects.filter(service_account=sa).values_list('authorized_dataset', flat=True), public=False).values_list('whitelist_id',flat=True)
ads = dataset_objs.values_list('whitelist_id', flat=True)
# Only if the lengthes of the 2 dataset lists are the same do we need to check them against one another
if not reg_change:
# Check the private datasets to see if there's a registration change
saads = AuthorizedDataset.objects.filter( id__in=ServiceAccountAuthorizedDatasets.objects.filter(service_account=sa).values_list('authorized_dataset', flat=True), public=False).values_list('whitelist_id', flat=True)

# If we're removing all datasets and there are 1 or more, this is automatically a registration change
if remove_all and len(saads):
reg_change = True
else:
if len(dataset_objs) or len(saads):
ads = dataset_objs.values_list('whitelist_id', flat=True)
# A private dataset missing from either list means this is a registration change
for ad in ads:
if ad not in saads:
reg_change = True
# but if there are not, it's only not a duplicate if the public dataset isn't yet registered
else:
reg_change = (len(AuthorizedDataset.objects.filter(id__in=ServiceAccountAuthorizedDatasets.objects.filter(service_account=sa).values_list('authorized_dataset', flat=True), public=True)) <= 0)
# If this isn't a refresh and the requested datasets aren't changing, we don't need to re-register
if not reg_change:
for saad in saads:
if saad not in ads:
reg_change = True
else:
reg_change = (len(AuthorizedDataset.objects.filter(id__in=ServiceAccountAuthorizedDatasets.objects.filter(service_account=sa).values_list('authorized_dataset', flat=True), public=True)) <= 0)
# If this isn't a refresh but the requested datasets aren't changing (except to be removed), we don't need to do anything
if not reg_change:
return {'message': 'Service account {} already exists with these datasets, and so does not need to be registered'.format(str(service_account))}
return {
'message': 'Service account {} already exists with these datasets, and so does not need to be {}.'.format(str(service_account),('re-registered' if not is_adjust else 'adjusted')),
'level': 'warning'
}
except ObjectDoesNotExist:
if is_refresh:
return {'message': 'Service account {} was not found so cannot be refreshed.'.format(str(service_account))}
if is_refresh or is_adjust:
return {
'message': 'Service account {} was not found so cannot be {}.'.format(str(service_account), ("adjusted" if is_adjust else "refreshed")),
'level': 'error'
}


# 1. GET ALL USERS ON THE PROJECT.
Expand Down Expand Up @@ -602,7 +616,15 @@ def verify_sa(request, user_id):
user_sa = request.POST.get('user_sa')
datasets = request.POST.getlist('datasets')
is_refresh = bool(request.POST.get('is_refresh') == 'true')
result = verify_service_account(gcp_id, user_sa, datasets, user_email, is_refresh)
is_adjust = bool(request.POST.get('is_adjust') == 'true')
remove_all = bool(request.POST.get('select-datasets') == 'remove')

# If we have received a 'remove all' request, there's nothing to verify, so set the datasets to empty
if remove_all:
datasets = []

result = verify_service_account(gcp_id, user_sa, datasets, user_email, is_refresh, is_adjust, remove_all)

if 'message' in result.keys():
status = '400'
st_logger.write_struct_log_entry(SERVICE_ACCOUNT_LOG_NAME, {'message': '{0}: {1}'.format(user_sa, result['message'])})
Expand Down Expand Up @@ -631,29 +653,46 @@ def register_sa(request, user_id):
st_logger = StackDriverLogger.build_from_django_settings()

try:
# This is a Service Account dataset adjustment or an initial load of the service account registration page
if request.GET.get('sa_id') or request.GET.get('gcp_id'):
template = 'GenespotRE/register_sa.html'
context = {
'authorized_datasets': AuthorizedDataset.objects.filter(public=False)
}

if request.GET.get('sa_id'):
template = 'GenespotRE/adjust_sa.html'
service_account = ServiceAccount.objects.get(id=request.GET.get('sa_id'))
context['gcp_id'] = service_account.google_project.project_id
context['sa_datasets'] = service_account.get_auth_datasets()
context['sa_id'] = service_account.service_account
else:
context['gcp_id'] = escape(request.GET.get('gcp_id'))

if request.GET.get('gcp_id'):
authorized_datasets = AuthorizedDataset.objects.filter(public=False)
return render(request, template, context)

context = {'gcp_id': request.GET.get('gcp_id'),
'authorized_datasets': authorized_datasets}
return render(request, 'GenespotRE/register_sa.html', context)
# This is an attempt to formally register the service account, post verification
elif request.POST.get('gcp_id'):
user_email = request.user.email
gcp_id = request.POST.get('gcp_id')
user_sa = request.POST.get('user_sa')
datasets = request.POST.get('datasets').split(',')
is_refresh = bool(request.POST.get('is_refresh') == 'true')
is_adjust = bool(request.POST.get('is_adjust') == 'true')
remove_all = bool(request.POST.get('remove_all') == 'true')
user_gcp = GoogleProject.objects.get(project_id=gcp_id)

# If we've received a remove-all request, ignore any provided datasets
if remove_all:
datasets = ['']

if len(datasets) == 1 and datasets[0] == '':
datasets = []
else:
datasets = map(int, datasets)

# VERIFY AGAIN JUST IN CASE USER TRIED TO GAME THE SYSTEM
result = verify_service_account(gcp_id, user_sa, datasets, user_email, is_refresh)
logger.info("[STATUS] result of verification for {}: {}".format(user_sa,str(result)))
result = verify_service_account(gcp_id, user_sa, datasets, user_email, is_refresh, is_adjust)

# If the verification was successful, finalize access
if result['all_user_datasets_verified']:
Expand Down Expand Up @@ -696,6 +735,32 @@ def register_sa(request, user_id):
st_logger.write_struct_log_entry(SERVICE_ACCOUNT_LOG_NAME, {'message': '{0}: There was an error in adding the service account to Google Group {1}. {2}'.format(str(service_account_obj.service_account), dataset.acl_google_group, e)})
logger.info(e)

# If we're adjusting, check for currently authorized private datasets not in the incoming set, and delete those entries.
if is_adjust:
saads = ServiceAccountAuthorizedDatasets.objects.filter(service_account=service_account_obj).filter(authorized_dataset__public=0)
for saad in saads:
if saad.authorized_dataset not in protected_datasets or remove_all:
try:
directory_service, http_auth = get_directory_resource()
directory_service.members().delete(groupKey=saad.authorized_dataset.acl_google_group,
memberKey=saad.service_account.service_account).execute(
http=http_auth)
st_logger.write_struct_log_entry(SERVICE_ACCOUNT_LOG_NAME, {
'message': '{0}: Attempting to delete service account from Google Group {1}.'.format(
saad.service_account.service_account, saad.authorized_dataset.acl_google_group)})
logger.info("Attempting to delete service account {} from group {}. "
"If an error message doesn't follow, they were successfully deleted"
.format(saad.service_account.service_account,
saad.authorized_dataset.acl_google_group))
except HttpError as e:
st_logger.write_struct_log_entry(SERVICE_ACCOUNT_LOG_NAME, {
'message': '{0}: There was an error in removing the service account to Google Group {1}.'.format(
str(saad.service_account.service_account), saad.authorized_dataset.acl_google_group)})
logger.error("[ERROR] When trying to remove a service account from a Google Group:")
logger.exception(e)

saad.delete()

return redirect('user_gcp_list', user_id=user_id)

# if verification was unsuccessful, report errors, and revoke current access if there is any
Expand Down
161 changes: 159 additions & 2 deletions cohorts/metadata_counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from metadata_helpers import *
from projects.models import Program, Project, User_Data_Tables, Public_Metadata_Tables
from google_helpers.bigquery_service import authorize_credentials_with_Google
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned

BQ_ATTEMPT_MAX = 10

Expand All @@ -38,7 +39,7 @@
WHITELIST_RE = settings.WHITELIST_RE
BQ_SERVICE = None

logger = logging.getLogger(__name__)
logger = logging.getLogger('main_logger')

USER_DATA_ON = settings.USER_DATA_ON
BIG_QUERY_API_URL = settings.BASE_API_URL + '/_ah/api/bq_api/v1'
Expand Down Expand Up @@ -588,6 +589,11 @@ def count_public_metadata(user, cohort_id=None, inc_filters=None, program_id=Non
data_counts[row[1]]['counts'][int(row[0])] = int(row[2])
data_counts[row[1]]['total'] += int(row[2])

# Make sure GROUP_CONCAT has enough space--it can get big
cursor.execute("""
SET SESSION group_concat_max_len = 1000000;
""")

if len(params) > 0:
cursor.execute(data_avail_query, params)
else:
Expand Down Expand Up @@ -728,7 +734,8 @@ def count_public_metadata(user, cohort_id=None, inc_filters=None, program_id=Non
return counts_and_total

except Exception as e:
logger.error(traceback.format_exc())
logger.error("[ERROR] While counting public metadata: ")
logger.exception(e)
finally:
if cursor: cursor.close()
if db and db.open: db.close()
Expand Down Expand Up @@ -824,4 +831,154 @@ def user_metadata_counts(user, user_data_filters, cohort_id):
logger.exception(e)
logger.error(traceback.format_exc())


def validate_and_count_barcodes(barcodes, user_id):

tmp_validation_table = 'tmp_val_table_{}_'.format(user_id) + make_id(6)

db = None
cursor = None

barcode_index_map = {}

TEMP_TABLE_CREATION = """
CREATE TEMPORARY TABLE {}
(
INDEX (sample_barcode),
case_barcode VARCHAR(100),
sample_barcode VARCHAR(100),
program VARCHAR(50)
);
""".format(tmp_validation_table)

insertion_stmt = """
INSERT INTO {} (case_barcode,sample_barcode,program) VALUES
""".format(tmp_validation_table)

validation_query = """
SELECT ts.case_barcode AS provided_case, ts.sample_barcode AS provided_sample, ts.program AS provided_program,
COALESCE(msc.case_barcode, mss.case_barcode) AS found_case,
COALESCE(msc.sample_barcode, mss.sample_barcode) AS found_sample,
COALESCE(msc.program_name, mss.program_name) AS found_program,
COALESCE(msc.project_short_name, mss.project_short_name) AS found_project
FROM {} ts
LEFT JOIN {} msc
ON ts.case_barcode = msc.case_barcode
LEFT JOIN {} mss
ON ts.sample_barcode = mss.sample_barcode
WHERE ts.program = %s AND (ts.sample_barcode = msc.sample_barcode OR ts.sample_barcode IS NULL OR ts.case_barcode IS NULL)
"""

count_query = """
SELECT COUNT(DISTINCT cs.{})
FROM (
SELECT ts.case_barcode AS provided_case, ts.sample_barcode AS provided_sample, ts.program AS provided_program,
COALESCE(msc.case_barcode, mss.case_barcode) AS found_case,
COALESCE(msc.sample_barcode, mss.sample_barcode) AS found_sample,
COALESCE(msc.program_name, mss.program_name) AS found_program
FROM {} ts
LEFT JOIN {} msc
ON ts.case_barcode = msc.case_barcode
LEFT JOIN {} mss
ON ts.sample_barcode = mss.sample_barcode
WHERE ts.program = %s AND (ts.sample_barcode = msc.sample_barcode OR ts.sample_barcode IS NULL OR ts.case_barcode IS NULL)
) cs
"""

try:
db = get_sql_connection()
cursor = db.cursor()
db.autocommit(True)

cursor.execute(TEMP_TABLE_CREATION)

insertion_stmt += (",".join(['(%s,%s,%s)'] * len(barcodes)))

param_vals = ()

result = {
'valid_barcodes': [],
'invalid_barcodes': [],
'counts': [],
'messages': []
}

for barcode in barcodes:
param_vals += ((None if not len(barcode['case']) else barcode['case']), (None if not len(barcode['sample']) else barcode['sample']), barcode['program'], )
barcode_index_map[barcode['case']+"{}"+barcode['sample']+"{}"+barcode['program']] = []

cursor.execute(insertion_stmt, param_vals)

programs = set([x['program'] for x in barcodes])

projects_to_lookup = {}

for program in programs:

try:
prog_obj = Program.objects.get(name=program, active=1, is_public=True)
program_tables = Public_Metadata_Tables.objects.get(program=prog_obj)
except ObjectDoesNotExist:
logger.info("[STATUS] While validating barcodes for cohort creation, saw an invalid program: {}".format(program))
result['messages'].append('An invalid program was supplied: {}'.format(program))
continue

program_query = validation_query.format(tmp_validation_table, program_tables.samples_table, program_tables.samples_table)
cursor.execute(program_query, (program,))

row_eval = []

for row in cursor.fetchall():
if row[3]:
barcode_index_map[(row[0] if row[0] else '')+"{}"+(row[1] if row[1] else '')+"{}"+row[2]].append(
{'case': row[3], 'sample': row[4], 'program': row[5], 'program_id': prog_obj.id, 'project': row[6].split('-',1)[-1]}
)
if row[5] not in projects_to_lookup:
projects_to_lookup[row[5]] = {}
projects_to_lookup[row[5]][row[6].split('-',1)[-1]] = None

count_obj = {
'cases': 0,
'samples': 0,
'program': program
}

for val in ['found_sample','found_case']:
cursor.execute(count_query.format(val,tmp_validation_table,program_tables.samples_table,program_tables.samples_table), (program,))
for row in cursor.fetchall():
count_obj[val.replace('found_','')+'s'] = row[0]

result['counts'].append(count_obj)

# Convert the project names into project IDs
for prog in projects_to_lookup:
proj_names = projects_to_lookup[prog].keys()
projects = Project.objects.filter(name__in=proj_names, program=Program.objects.get(name=prog, active=1))
for proj in projects:
projects_to_lookup[prog][proj.name] = proj.id

for key in barcode_index_map:
entries = barcode_index_map[key]
for barcode in entries:
barcode['project'] = projects_to_lookup[barcode['program']][barcode['project']]

for barcode in barcodes:
if len(barcode_index_map[barcode['case']+"{}"+barcode['sample']+"{}"+barcode['program']]):
for found_barcode in barcode_index_map[barcode['case']+"{}"+barcode['sample']+"{}"+barcode['program']]:
if found_barcode not in result['valid_barcodes']:
result['valid_barcodes'].append(found_barcode)
else:
result['invalid_barcodes'].append(barcode)

cursor.execute("""DROP TEMPORARY TABLE IF EXISTS {}""".format(tmp_validation_table))

except Exception as e:
logger.error("[ERROR] While validating barcodes: ")
logger.exception(e)
finally:
if cursor: cursor.close()
if db and db.open: db.close()

return result

'''------------------------------------- End metadata counting methods -------------------------------------'''
Loading

0 comments on commit 07107cc

Please sign in to comment.