Skip to content

Commit

Permalink
Merge pull request #1386 from mskcc/feature/update_mainfest_op
Browse files Browse the repository at this point in the history
Feature/update mainfest operator
  • Loading branch information
buehlere authored Dec 9, 2024
2 parents 563d541 + b4e318f commit 7d131b3
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 20 deletions.
10 changes: 8 additions & 2 deletions file_system/helper/access_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,14 @@ def construct_csv(self):
"""

# get fastq metadata for a given request
fastqs = File.objects.filter(file_group__slug="lims", request_id__in=self.request_ids)
fastq_metadata = [fastq.filemetadata_set.values()[0]["metadata"] for fastq in fastqs]
files = FileRepository.filter(
file_group=settings.IMPORT_FILE_GROUP,
metadata={settings.IGO_COMPLETE_METADATA_KEY: True},
filter_redact=True,
).all()
fastqs = files.filter(metadata__igoRequestId__in=self.request_ids)
fastq_metadata = [fastq.metadata for fastq in fastqs]
fastq_metadata = sorted(fastq_metadata, key=lambda d: d["cmoSampleName"])
cmoPatientId = set([fastq["cmoPatientId"] for fastq in fastq_metadata])
# get DMP BAM file group
dmp_bams = FileRepository.filter(file_group=settings.DMP_BAM_FILE_GROUP)
Expand Down
11 changes: 3 additions & 8 deletions runner/operator/manifest/access_manifest_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,13 @@ def write_to_file(self, fname, s):
Writes manifest csv to temporary location, registers it as tmp file
:return: manifest csv path
"""
# Split the string into rows using "\r\n" as the delimiter
rows = s.split("\r\n")
# Split each row into columns using "," as the delimiter
data = [row.split(",") for row in rows]
# tmp file creation
# output path
tmpdir = os.path.join(settings.BEAGLE_SHARED_TMPDIR, str(uuid.uuid4()))
Path(tmpdir).mkdir(parents=True, exist_ok=True)
output = os.path.join(tmpdir, fname)
# write csv to tmp file group
with open(output, "w+", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerows(data)
with open(output, mode="w", encoding="utf-8", newline="") as file:
file.write(s)
# register output as tmp file
self.register_temp_file(output)
# return with juno formatting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
from beagle_etl.models import Operator
from file_system.models import File, FileMetadata
from runner.operator.operator_factory import OperatorFactory
import datetime
import glob
import shutil

# general fixtures
COMMON_FIXTURES = [
Expand All @@ -32,10 +29,8 @@
class TestAcessManifestOperator(TestCase):
# test db
fixtures = [os.path.join(ROOT_DIR, f) for f in COMMON_FIXTURES]
# variables to help check operator output
expected_csv_content = [
'igoRequestId,primaryId,cmoPatientId,cmoSampleName,dmpPatientId,dmpImpactSamples,dmpAccessSamples,baitSet,libraryVolume,investigatorSampleId,preservation,species,libraryConcentrationNgul,tissueLocation,sampleClass,sex,cfDNA2dBarcode,sampleOrigin,tubeId,tumorOrNormal,captureConcentrationNm,oncotreeCode,dnaInputNg,collectionYear,captureInputNg\n13893_B,13893_B_1,ALLANT2,C-ALLANT2-N001-d01,P-0000002,P-0000005-T01-IM6;P-0000004-T01-IM6,,MSK-ACCESS-v1_0-probesAllwFP,25.0,P-1234567-N00-XS1,EDTA-Streck,,69.0,,Blood,M,8042889270,Whole Blood,,Normal,14.49275362,,200.0,,999.99999978\n13893_B,13893_B_3,ALLANT,C-ALLANT-N001-d01,P-0000001,P-0000002-T01-IM6;P-0000001-T01-IM6,,MSK-ACCESS-v1_0-probesAllwFP,25.0,P-1234567-N00-XS1,EDTA-Streck,,102.5,,Blood,M,8042889270,Whole Blood,,Normal,9.756097561,,200.0,,1000.0000000025001\n13893_B,13893_B_2,ALLANT3,C-ALLANT3-N003-d02,,,,MSK-ACCESS-v1_0-probesAllwFP,25.0,P-1234567-N00-XS1,EDTA-Streck,,74.5,,Blood,M,8042889270,Whole Blood,,Normal,13.42281879,,200.0,,999.999999855\n""\n'
]
header_control = "igoRequestId,primaryId,cmoPatientId,cmoSampleName,dmpPatientId,dmpImpactSamples,dmpAccessSamples,baitSet,libraryVolume,investigatorSampleId,preservation,species,libraryConcentrationNgul,tissueLocation,sampleClass,sex,cfDNA2dBarcode,sampleOrigin,tubeId,tumorOrNormal,captureConcentrationNm,oncotreeCode,dnaInputNg,collectionYear,captureInputNg"
id_control = "C-ALLANT-N001-d01"

def test_access_manifest_operator(self):
"""
Expand All @@ -60,6 +55,9 @@ def test_access_manifest_operator(self):
self.assertEqual(len(input_json["manifest_data"]), 2)
# Check contents
manifest_path = input_json["manifest_data"]["location"].replace("juno:", "")
with open(manifest_path, "r") as file:
csv_string = file.read()
self.assertEqual(csv_string, self.expected_csv_content[i])
with open(manifest_path, mode="r", newline="", encoding="utf-8") as file:
content = file.read()
header = content.split("\r\n")[0]
id = content.split("\r\n")[1].split(",")[3]
self.assertEqual(header, self.header_control)
self.assertEqual(id, self.id_control)

0 comments on commit 7d131b3

Please sign in to comment.