diff --git a/Makefile b/Makefile index 1df4ead96..d0acb3d9d 100644 --- a/Makefile +++ b/Makefile @@ -217,7 +217,7 @@ check-env: BEAGLE_AUTH_LDAP_SERVER_URI \ BEAGLE_LIMS_PASSWORD \ BEAGLE_LIMS_USERNAME; do \ - [ -z "$$(printenv BEAGLE_LIMS_USERNAME)" ] && echo ">>> env variable $$i is not set; some features may not work" || : ; done + [ -z "$$(printenv $$i)" ] && echo ">>> env variable $$i is not set; some features may not work" || : ; done # start the RabbitMQ server in the background rabbitmq-start: $(LOG_DIR_ABS) @@ -436,10 +436,11 @@ file-get: http://$(DJANGO_BEAGLE_IP):$(DJANGO_BEAGLE_PORT)/v0/fs/files/?filename=$(REQFILE) # start a Roslin run for a given request in the Beagle db -run-request: +run-request: $(AUTH_FILE) + @token=$$( jq -r '.token' "$(AUTH_FILE)" ) && \ curl -H "Content-Type: application/json" \ -X POST \ - -H "Authorization: Bearer $(TOKEN)" \ + -H "Authorization: Bearer $$token" \ --data '{"request_ids":["$(REQID)"], "pipeline_name": "roslin"}' \ http://$(DJANGO_BEAGLE_IP):$(DJANGO_BEAGLE_PORT)/v0/run/request/ @@ -467,12 +468,20 @@ $(DEMO_INPUT): $(INPUT_TEMPLATE) $(AUTH_FILE) .PHONY: $(DEMO_INPUT) # submit a demo Roslin run using the dev Roslin pipeline entry in the database -demo-run: register-dev-pipeline $(DEMO_INPUT) +# submit using the API endpoint; bypasses the Operator +demo-run-api: register-dev-pipeline $(DEMO_INPUT) @python manage.py loaddata fixtures/tests/juno_roslin_demo2.file.json @python manage.py loaddata fixtures/tests/juno_roslin_demo2.filemetadata.json @python manage.py loaddata fixtures/tests/roslin_reference_files.json @$(MAKE) run-request-api REQID=DemoRequest1 REQJSON=$(DEMO_INPUT) +# submit using standard request; uses the Operator +demo-run: register-dev-pipeline $(DEMO_INPUT) + @python manage.py loaddata fixtures/tests/juno_roslin_demo2.file.json + @python manage.py loaddata fixtures/tests/juno_roslin_demo2.filemetadata.json + @python manage.py loaddata fixtures/tests/roslin_reference_files.json + $(MAKE) run-request REQID=DemoRequest1 + # check if the ports needed for services and servers are already in use on this system ifeq ($(UNAME), Darwin) # On macOS High Sierra, use this command: lsof -nP -i4TCP:$PORT | grep LISTEN diff --git a/runner/operator/roslin_operator/construct_roslin_pair.py b/runner/operator/roslin_operator/construct_roslin_pair.py index d9326b36d..7a5097069 100644 --- a/runner/operator/roslin_operator/construct_roslin_pair.py +++ b/runner/operator/roslin_operator/construct_roslin_pair.py @@ -5,7 +5,8 @@ from .bin.make_sample import remove_with_caveats from .bin.pair_request import compile_pairs - +class InvalidAssay(Exception): + pass # TODO: generalize def load_references(): @@ -50,7 +51,6 @@ def format_sample(data): return sample - def construct_roslin_jobs(samples): samples, error_samples = remove_with_caveats(samples) pairs = compile_pairs(samples) @@ -67,7 +67,18 @@ def construct_roslin_jobs(samples): job['pair'] = [tumor_sample, normal_sample] references = convert_references(project_id, assay) job.update(references) - roslin_jobs.append(job) + + job_metadata = {} + job_metadata['assay'] = assay + job_metadata['request_id'] = project_id + job_metadata['tumor'] = {} + job_metadata['tumor']['igo_id'] = tumor['igo_id'] + job_metadata['tumor']['patient_id'] = tumor['patient_id'] + job_metadata['normal'] = {} + job_metadata['normal']['igo_id'] = normal['igo_id'] + job_metadata['normal']['patient_id'] = normal['patient_id'] + + roslin_jobs.append((job, job_metadata)) return roslin_jobs, error_samples @@ -85,13 +96,11 @@ def get_curated_bams(assay,request_files): array.append({'class': 'File', 'location': str(bam)}) return array - -def get_baits_and_targets(assay, roslin_resources): - # probably need similar rules for whatever "Exome" string is in rquest - targets = roslin_resources['targets'] - - target_assay = assay - +def get_target_assay(assay): + """ + Return a target assay label for a provided assay; the provided assay may not exactly match the desired target assay so resolve it here to the desired output value + """ + target_assay = None if assay.find("IMPACT410") > -1: target_assay = "IMPACT410_b37" if assay.find("IMPACT468") > -1: @@ -104,6 +113,15 @@ def get_baits_and_targets(assay, roslin_resources): target_assay = "IMPACT468_08390" if assay.find("IMPACT468+Poirier_RB1_intron_V2") > -1: target_assay = "IMPACT468_08050" + if target_assay == None: + raise InvalidAssay(assay) + return(target_assay) + +def get_baits_and_targets(assay, roslin_resources): + # probably need similar rules for whatever "Exome" string is in rquest + targets = roslin_resources['targets'] + + target_assay = get_target_assay(assay) if target_assay in targets: return {"bait_intervals": {"class": "File", 'location': str(targets[target_assay]['baits_list'])}, diff --git a/runner/operator/roslin_operator/roslin_operator.py b/runner/operator/roslin_operator/roslin_operator.py index 51ffc526a..716a3a7fb 100644 --- a/runner/operator/roslin_operator/roslin_operator.py +++ b/runner/operator/roslin_operator/roslin_operator.py @@ -5,7 +5,7 @@ from .construct_roslin_pair import construct_roslin_jobs from .bin.pair_request import compile_pairs from .bin.make_sample import build_sample - +from pprint import pprint class RoslinOperator(Operator): @@ -43,11 +43,25 @@ def get_jobs(self): roslin_inputs, error_samples = construct_roslin_jobs(samples) number_of_inputs = len(roslin_inputs) - for i, job in enumerate(roslin_inputs): + for i, job_items in enumerate(roslin_inputs): + job = job_items[0] + job_metadata = job_items[1] tumor_sample_name = job['pair'][0]['ID'] normal_sample_name = job['pair'][1]['ID'] name = "ROSLIN %s, %i of %i" % (self.request_id, i + 1, number_of_inputs) - roslin_jobs.append((APIRunCreateSerializer( - data={'app': self.get_pipeline_id(), 'inputs': roslin_inputs, 'name': name, - 'tags': {'requestId': self.request_id}}), job)) + data = { + 'app': self.get_pipeline_id(), + 'inputs': roslin_inputs, + 'name': name, + 'tags': {'requestId': self.request_id}, + 'output_metadata': { + 'assay': job_metadata['assay'], + 'request_id': job_metadata['request_id'], + 'tumor_igo_id': job_metadata['tumor']['igo_id'], + 'tumor_patient_id': job_metadata['tumor']['patient_id'], + 'normal_igo_id': job_metadata['normal']['igo_id'], + 'normal_patient_id': job_metadata['normal']['patient_id'], + } + } + roslin_jobs.append((APIRunCreateSerializer(data = data), job)) return roslin_jobs diff --git a/runner/tests/operator/roslin_operator/test_construct_roslin_pair.py b/runner/tests/operator/roslin_operator/test_construct_roslin_pair.py index a12285889..891adeaa4 100644 --- a/runner/tests/operator/roslin_operator/test_construct_roslin_pair.py +++ b/runner/tests/operator/roslin_operator/test_construct_roslin_pair.py @@ -7,6 +7,9 @@ from uuid import UUID from django.test import TestCase from runner.operator.roslin_operator.construct_roslin_pair import construct_roslin_jobs +from runner.operator.roslin_operator.construct_roslin_pair import get_baits_and_targets +from runner.operator.roslin_operator.construct_roslin_pair import get_target_assay +from runner.operator.roslin_operator.construct_roslin_pair import InvalidAssay from runner.operator.roslin_operator.bin.make_sample import build_sample from file_system.models import File, FileMetadata, FileGroup, FileType from django.conf import settings @@ -55,5 +58,54 @@ def test_construct_roslin_jobs1(self): samples.append(build_sample(igo_id_group[igo_id])) roslin_inputs, error_samples = construct_roslin_jobs(samples) + # pprint(">>> roslin_inputs: ") + # print(json.dumps(roslin_inputs, indent = 4)) expected_inputs = json.load(open(os.path.join(settings.TEST_FIXTURE_DIR, "10075_D_single_TN_pair.roslin.input.json"))) self.assertTrue(roslin_inputs == expected_inputs) + + def test_get_baits_and_targets1(self): + """ + Test that the correct baits and targets are returned for a given assay + """ + roslin_resources = json.load(open("runner/operator/roslin_operator/reference_jsons/roslin_resources.json", 'rb')) + targets = roslin_resources['targets'] + + # invalid assay throws a TypeError + with self.assertRaises(InvalidAssay): + get_baits_and_targets(assay = "foo", roslin_resources = roslin_resources) + + # known combinations of assay label pattern vs. true assay type to use for targets lookup + combinations = [ + ("IMPACT410", "IMPACT410_b37"), + ("IMPACT468", "IMPACT468_b37"), + ("IMPACT341", "IMPACT341_b37"), + ("IDT_Exome_v1_FP", "IDT_Exome_v1_FP_b37"), + ("IMPACT468+08390", "IMPACT468_08390"), + ("IMPACT468+Poirier_RB1_intron_V2", "IMPACT468_08050") + ] + + for find_assay, target_assay in combinations: + expected_targets = {"bait_intervals": {"class": "File", 'location': str(targets[target_assay]['baits_list'])}, + "target_intervals": {"class": "File", 'location': str(targets[target_assay]['targets_list'])}, + "fp_intervals": {"class": "File", 'location': str(targets[target_assay]['FP_intervals'])}, + "fp_genotypes": {"class": "File", 'location': str(targets[target_assay]['FP_genotypes'])}} + self.assertEqual( get_baits_and_targets(assay = find_assay, roslin_resources = roslin_resources), expected_targets) + + def test_get_target_assay1(self): + """ + Test that the correct target assay label is returned for a given assay label which might be different from the actual target assay to use + """ + with self.assertRaises(InvalidAssay): + get_target_assay(assay = "foo") + + # known combinations of assay label pattern vs. true assay type to use for targets lookup + combinations = [ + ("IMPACT410", "IMPACT410_b37"), + ("IMPACT468", "IMPACT468_b37"), + ("IMPACT341", "IMPACT341_b37"), + ("IDT_Exome_v1_FP", "IDT_Exome_v1_FP_b37"), + ("IMPACT468+08390", "IMPACT468_08390"), + ("IMPACT468+Poirier_RB1_intron_V2", "IMPACT468_08050") + ] + for find_assay, target_assay in combinations: + self.assertEqual( get_target_assay(assay = find_assay), target_assay) diff --git a/runner/tests/serializers/__init__.py b/runner/tests/serializers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/runner/tests/serializers/test_serializers.py b/runner/tests/serializers/test_serializers.py new file mode 100644 index 000000000..1ea9e5ff2 --- /dev/null +++ b/runner/tests/serializers/test_serializers.py @@ -0,0 +1,63 @@ +""" +Tests for serialzers +""" +from django.test import TestCase +from uuid import UUID +from runner.serializers import APIRunCreateSerializer +from runner.models import Run + +class TestSerializers(TestCase): + fixtures = [ + "file_system.filegroup.json", + "file_system.filetype.json", + "file_system.storage.json", + "runner.pipeline.json" + ] + + def test_create_run_serializer1(self): + """ + Test that the API Run Create Serializer works and creates a Run + """ + # start with 0 runs in the database + self.assertEqual(len(Run.objects.all()), 0) + + # data to pass to serializer + data = { + 'app': 'cb5d793b-e650-4b7d-bfcd-882858e29cc5', + 'inputs': [], + 'name': 'ROSLIN 10075_D, 1 of 1', + 'tags': {'requestId': '10075_D'} + } + + # run the serialzer + serializer = APIRunCreateSerializer(data = data) + serializer.is_valid() + run = serializer.save() + + # should be a Run in the database now + self.assertEqual(len(Run.objects.all()), 1) + + run_instance = Run.objects.all()[0] + self.assertEqual(run_instance.app_id, UUID('cb5d793b-e650-4b7d-bfcd-882858e29cc5')) + self.assertTrue(run_instance.name.startswith(data['name'])) + self.assertEqual(run_instance.tags, {'requestId': '10075_D'}) + self.assertEqual(run_instance.status, 0) + + def test_create_run_with_output_metadata1(self): + """ + Test that output_metadata propagates to the Run instance created + """ + data = { + 'app': 'cb5d793b-e650-4b7d-bfcd-882858e29cc5', + 'inputs': [], + 'name': 'foo Run', + 'output_metadata': {'assay':'IMPACT486'} + } + serializer = APIRunCreateSerializer(data = data) + serializer.is_valid() + run = serializer.save() + run_instance = Run.objects.all()[0] + self.assertEqual(run_instance.app_id, UUID('cb5d793b-e650-4b7d-bfcd-882858e29cc5')) + self.assertTrue(run_instance.name.startswith(data['name'])) + self.assertEqual(run_instance.status, 0) + self.assertEqual(run_instance.output_metadata, data['output_metadata'])