Skip to content

Commit

Permalink
Merge pull request #25 from broadinstitute/dp-demux
Browse files Browse the repository at this point in the history
add --append_run_id to illumina_demux
  • Loading branch information
dpark01 authored Jun 9, 2020
2 parents c67f9ff + 6ce3fa3 commit 00e55c9
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ def parser_illumina_demux(parser=argparse.ArgumentParser()):
parser.add_argument('--read_structure',
help='Override read structure (default: read from RunInfo.xml).',
default=None)
parser.add_argument('--append_run_id',
help='If specified, output filenames will include the flowcell ID and lane number.',
action='store_true')

for opt in tools.picard.ExtractIlluminaBarcodesTool.option_list:
if opt not in ('read_structure', 'num_processors'):
Expand Down Expand Up @@ -96,7 +99,6 @@ def main_illumina_demux(args):
while handling the various required input formats. Also can
read Illumina BCL directories, tar.gz BCL directories.
'''
# TO DO: read BCL or tar.gz BCL directories from S3 / object store.

# prepare
illumina = IlluminaDirectory(args.inDir)
Expand All @@ -118,10 +120,14 @@ def main_illumina_demux(args):
read_structure = args.read_structure
else:
read_structure = runinfo.get_read_structure()
if args.append_run_id:
run_id = "{}.{}".format(flowcell, args.lane)
else:
run_id = None
if args.sampleSheet:
samples = SampleSheet(args.sampleSheet, only_lane=args.lane)
samples = SampleSheet(args.sampleSheet, only_lane=args.lane, append_run_id=run_id)
else:
samples = illumina.get_SampleSheet(only_lane=args.lane)
samples = illumina.get_SampleSheet(only_lane=args.lane, append_run_id=run_id)


link_locs=False
Expand Down Expand Up @@ -718,13 +724,14 @@ class SampleSheet(object):
tab-delimited versions as well.
'''

def __init__(self, infile, use_sample_name=True, only_lane=None, allow_non_unique=False):
def __init__(self, infile, use_sample_name=True, only_lane=None, allow_non_unique=False, append_run_id=None):
self.fname = infile
self.use_sample_name = use_sample_name
if only_lane is not None:
only_lane = str(only_lane)
self.only_lane = only_lane
self.allow_non_unique = allow_non_unique
self.append_run_id = append_run_id
self.rows = []
self._detect_and_load_sheet(infile)

Expand Down Expand Up @@ -857,6 +864,9 @@ def _detect_and_load_sheet(self, infile):
row['run'] += '.r' + str(unique_count[row['library']])
else:
raise SampleSheetError('non-unique library IDs in this lane', infile)
if self.append_run_id:
for row in self.rows:
row['run'] += '.' + self.append_run_id

# escape sample, run, and library IDs to be filename-compatible
for row in self.rows:
Expand Down

0 comments on commit 00e55c9

Please sign in to comment.