Skip to content

Commit

Permalink
Merge pull request #80 from ImagingDataCommons/idc_v16
Browse files Browse the repository at this point in the history
Idc v16, post-release updates
  • Loading branch information
bcli4d authored Sep 18, 2023
2 parents 2700fee + 6254c45 commit 62a0f69
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 24 deletions.
7 changes: 1 addition & 6 deletions bq/bq_IO/upload_psql_to_bq.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,4 @@ def upload_to_bq(args, tables):
else:
successlogger.info(f'{table} upload completed in {time()-b:.2f}s')
else:
successlogger.info(f'{table} upload completed in {time() - b:.2f}s')





successlogger.info(f'{table} upload completed in {time() - b:.2f}s')
3 changes: 1 addition & 2 deletions bq/bq_IO/upload_psql_to_bq.vnext.dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import argparse
import settings
from upload_psql_to_bq import upload_to_bq, upload_version, upload_collection, upload_patient, upload_study, \
upload_series, upload_instance, upload_table, create_all_joined, create_idc_all_joined
upload_series, upload_instance, upload_table
from google.cloud import bigquery
from utilities.bq_helpers import create_BQ_dataset

Expand All @@ -32,7 +32,6 @@
'collection': {"func":upload_collection, "order_by":"collection_id"},
'collection_id_map': {"func": upload_table, "order_by": "idc_webapp_collection_id"},
'collection_patient': {"func": upload_table, "order_by": "collection_uuid"},
'idc_all_joined': {"func": create_idc_all_joined, "order_by": ""},
'idc_collection': {"func": upload_table, "order_by": "collection_id"},
'idc_instance': {"func": upload_table, "order_by": "sop_instance_uid"},
'idc_patient': {"func": upload_table, "order_by": "submitter_case_id"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,9 @@
parser.add_argument('--version', default=settings.CURRENT_VERSION, help='IDC version for which to build the table')
parser.add_argument('--target', default='dev', help="dev or prod")
parser.add_argument('--merged', default=True, help='True if premerge buckets have been merged in dev buckets')
# parser.add_argument('--src_project', default='idc-dev-etl')
parser.add_argument('--dst_project', default=f'{settings.DEV_PROJECT}')
# parser.add_argument('--dev_bqdataset_name', default=f'idc_v{args.version}_dev', help='BQ dataset containing development tables')
# parser.add_argument('--pub_bqdataset_name', default=f'idc_v{args.version}_pub', help='BQ dataset containing public tables')
parser.add_argument('--trg_bqdataset_name', default=f'idc_v{settings.CURRENT_VERSION}_pub', help='BQ dataset of resulting table')
parser.add_argument('--bqtable_name', default='auxiliary_metadata', help='BQ table name')
parser.add_argument('--temp_license_table_name', default='temp_licenses', help='BQ table name')
args = parser.parse_args()

args.access ='Public' # Fixed value
Expand Down
6 changes: 3 additions & 3 deletions gcs/empty_staging_buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--processes', default=1, help="Number of concurrent processes")
parser.add_argument('--processes', default=16, help="Number of concurrent processes")
parser.add_argument('--batch', default=1000, help='Size of batch assigned to each process')
parser.add_argument('--project', default='idc-pdp-staging')

args = parser.parse_args()

for bucket in ['public_datasets_idc_staging', 'idc-open-cr-staging', 'idc-open-idc1-staging']:
for bucket in ['public-datasets-idc-staging', 'idc-open-cr-staging', 'idc-open-idc1-staging']:
args.bucket = bucket
del_all_instances (args)
del_all_instances (args)
18 changes: 9 additions & 9 deletions gcs/validate_buckets/validate_bucket_mp.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,15 @@ def check_all_instances_mp(args, premerge=False):
expected_blobs = set(open(args.expected_blobs).read().splitlines())
# json.dump(psql_blobs, open(args.blob_names), 'w')

# try:
# # found_blobs = set(open(args.found_blobs).read().splitlines())
# found_blobs = open(f'{successlogger.handlers[0].baseFilename}').read().splitlines()
# progresslogger.info(f'Already have found blobs')
# except:
# progresslogger.info(f'Getting found blobs')
# get_found_blobs_in_bucket(args)
# found_blobs = open(f'{successlogger.handlers[0].baseFilename}').read().splitlines()
# # json.dump(psql_blobs, open(args.blob_names), 'w')
try:
found_blobs = set(open(args.found_blobs).read().splitlines())
# found_blobs = open(f'{successlogger.handlers[0].baseFilename}').read().splitlines()
progresslogger.info(f'Already have found blobs')
except:
progresslogger.info(f'Getting found blobs')
get_found_blobs_in_bucket(args)
found_blobs = open(f'{successlogger.handlers[0].baseFilename}').read().splitlines()
# json.dump(psql_blobs, open(args.blob_names), 'w')


progresslogger.info(f'Getting found blobs')
Expand Down

0 comments on commit 62a0f69

Please sign in to comment.