Skip to content

Commit

Permalink
In cmd.py argparse common_args(), set default number of threads to al…
Browse files Browse the repository at this point in the history
…l available if --threads is unspecified (#104)

* In cmd.py argparse common_args(), set default number of threads to all available if --threads is unspecified

In cmd.py argparse common_args(), set default number of threads to all available if `--threads` is unspecified; previously if the threads arg were None, it would be up to the consuming function to set the thread count to all available. With this change, the new default is to use all available cores. Additionally, this sanitizes the user-requested thread count via util.misc.sanitize_thread_count(), if a value is specified. This was already the behavior in most multi-threaded functions, by separate calls to util.misc.sanitize_thread_count() where a threads arg is consumed; the latter could potentially be refactored out if we are relying solely on the argparse interface, though it should be preserved for python import usage of the same functions (including some test cases). Changing the default will cause no changes where existing separate sanitize_thread_count() calls are used. This also corrects a call to count_and_sort_barcodes() where the threads arg was not being passed.

* add pandas to python dependencies
  • Loading branch information
tomkinsc committed Jun 7, 2024
1 parent f91e419 commit 853bea1
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 11 deletions.
4 changes: 2 additions & 2 deletions illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ def parser_common_barcodes(parser=argparse.ArgumentParser()):
parser.add_argument('--JVMmemory',
help='JVM virtual memory size (default: %(default)s)',
default=tools.picard.ExtractIlluminaBarcodesTool.jvmMemDefault)
util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
util.cmd.common_args(parser, (('threads',None), ('loglevel', None), ('version', None), ('tmp_dir', None)))
util.cmd.attach_main(parser, main_common_barcodes)
return parser

Expand Down Expand Up @@ -506,7 +506,7 @@ def main_common_barcodes(args):
except IndexError:
barcode2_len = 0

count_and_sort_barcodes(barcodes_tmpdir, args.outSummary, barcode1_len, barcode2_len, args.truncateToLength, args.includeNoise, args.omitHeader)
count_and_sort_barcodes(barcodes_tmpdir, args.outSummary, barcode1_len, barcode2_len, args.truncateToLength, args.includeNoise, args.omitHeader, args.threads)

# clean up
os.unlink(barcode_file)
Expand Down
6 changes: 3 additions & 3 deletions read_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,7 @@ def _merge_fastqs_and_mvicuna(lb, files):

return readList

def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None):
def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None, threads=None):
''' Remove duplicate reads from BAM file using M-Vicuna. The
primary advantage to this approach over Picard's MarkDuplicates tool
is that Picard requires that input reads are aligned to a reference,
Expand All @@ -943,7 +943,7 @@ def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None):
# For each library, merge FASTQs and run rmdup for entire library
readListAll = mkstempfname('.keep_reads_all.txt')
per_lb_read_lists = []
with concurrent.futures.ProcessPoolExecutor(max_workers=util.misc.available_cpu_count()) as executor:
with concurrent.futures.ProcessPoolExecutor(max_workers=threads or util.misc.available_cpu_count()) as executor:
futures = [executor.submit(_merge_fastqs_and_mvicuna, lb, files) for lb, files in lb_to_files.items()]
for future in concurrent.futures.as_completed(futures):
log.info("mvicuna finished processing library")
Expand Down Expand Up @@ -972,7 +972,7 @@ def parser_rmdup_mvicuna_bam(parser=argparse.ArgumentParser()):
default=tools.picard.FilterSamReadsTool.jvmMemDefault,
help='JVM virtual memory size (default: %(default)s)'
)
util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None)))
util.cmd.common_args(parser, (('threads',None), ('loglevel', None), ('version', None), ('tmp_dir', None)))
util.cmd.attach_main(parser, rmdup_mvicuna_bam, split_args=True)
return parser

Expand Down
11 changes: 5 additions & 6 deletions util/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import util.version
import util.file
import util.misc

__author__ = "[email protected]"
__version__ = util.version.get_version()
Expand Down Expand Up @@ -76,15 +77,13 @@ def common_args(parser, arglist=(('tmp_dir', None), ('loglevel', None))):
the end, even if there's a failure.""",
default=False)
elif k == 'threads':
if v is None:
text_default = "all available cores"
else:
text_default = v
# if v is None, sanitize_thread_count() sets count to all available cores
thread_count = util.misc.sanitize_thread_count(v)
parser.add_argument('--threads',
dest="threads",
type=int,
help="Number of threads (default: {})".format(text_default),
default=v)
help="Number of threads; by default all cores are used",
default=thread_count)
elif k == 'version':
if not v:
v = __version__
Expand Down

0 comments on commit 853bea1

Please sign in to comment.