Merge pull request #25 from broadinstitute/dp-demux

add --append_run_id to illumina_demux
broadinstitute · Jun 9, 2020 · 00e55c9 · 00e55c9
2 parents c67f9ff + 6ce3fa3
commit 00e55c9
Showing 1 changed file with 14 additions and 4 deletions.
diff --git a/illumina.py b/illumina.py
@@ -62,6 +62,9 @@ def parser_illumina_demux(parser=argparse.ArgumentParser()):
     parser.add_argument('--read_structure',
                         help='Override read structure (default: read from RunInfo.xml).',
                         default=None)
+    parser.add_argument('--append_run_id',
+                        help='If specified, output filenames will include the flowcell ID and lane number.',
+                        action='store_true')
 
     for opt in tools.picard.ExtractIlluminaBarcodesTool.option_list:
         if opt not in ('read_structure', 'num_processors'):
@@ -96,7 +99,6 @@ def main_illumina_demux(args):
         while handling the various required input formats. Also can
         read Illumina BCL directories, tar.gz BCL directories.
     '''
-    # TO DO: read BCL or tar.gz BCL directories from S3 / object store.
 
     # prepare
     illumina = IlluminaDirectory(args.inDir)
@@ -118,10 +120,14 @@ def main_illumina_demux(args):
         read_structure = args.read_structure
     else:
         read_structure = runinfo.get_read_structure()
+    if args.append_run_id:
+        run_id = "{}.{}".format(flowcell, args.lane)
+    else:
+        run_id = None
     if args.sampleSheet:
-        samples = SampleSheet(args.sampleSheet, only_lane=args.lane)
+        samples = SampleSheet(args.sampleSheet, only_lane=args.lane, append_run_id=run_id)
     else:
-        samples = illumina.get_SampleSheet(only_lane=args.lane)
+        samples = illumina.get_SampleSheet(only_lane=args.lane, append_run_id=run_id)
 
 
     link_locs=False
@@ -718,13 +724,14 @@ class SampleSheet(object):
         tab-delimited versions as well.
     '''
 
-    def __init__(self, infile, use_sample_name=True, only_lane=None, allow_non_unique=False):
+    def __init__(self, infile, use_sample_name=True, only_lane=None, allow_non_unique=False, append_run_id=None):
         self.fname = infile
         self.use_sample_name = use_sample_name
         if only_lane is not None:
             only_lane = str(only_lane)
         self.only_lane = only_lane
         self.allow_non_unique = allow_non_unique
+        self.append_run_id = append_run_id
         self.rows = []
         self._detect_and_load_sheet(infile)
 
@@ -857,6 +864,9 @@ def _detect_and_load_sheet(self, infile):
                     row['run'] += '.r' + str(unique_count[row['library']])
             else:
                 raise SampleSheetError('non-unique library IDs in this lane', infile)
+        if self.append_run_id:
+            for row in self.rows:
+                row['run'] += '.' + self.append_run_id
 
         # escape sample, run, and library IDs to be filename-compatible
         for row in self.rows: