Skip to content

Commit

Permalink
Merge pull request #26 from broadinstitute/ct-downsample-jvm-parsefix
Browse files Browse the repository at this point in the history
JVMmemory parse fix for downsample_bams
  • Loading branch information
tomkinsc committed Jun 9, 2020
2 parents 00e55c9 + a77edd5 commit 4eb88a8
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
9 changes: 5 additions & 4 deletions read_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,9 +442,10 @@ def downsample_bams(data_pairs, downsample_target, threads=None, JVMmemory=None)
downsamplesam = tools.picard.DownsampleSamTool()
workers = util.misc.sanitize_thread_count(threads)
JVMmemory = JVMmemory if JVMmemory else tools.picard.DownsampleSamTool.jvmMemDefault
jvm_worker_memory = str(max(1,int(JVMmemory.rstrip("g"))/workers))+'g'

jvm_worker_memory_mb = str(int(util.misc.convert_size_str(JVMmemory,"m")[:-1])//workers)+"m"
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
future_to_file = {executor.submit(downsamplesam.downsample_to_approx_count, *(list(fp)+[downsample_target]), JVMmemory=jvm_worker_memory): fp[0] for fp in data_pairs}
future_to_file = {executor.submit(downsamplesam.downsample_to_approx_count, *(list(fp)+[downsample_target]), JVMmemory=jvm_worker_memory_mb): fp[0] for fp in data_pairs}
for future in concurrent.futures.as_completed(future_to_file):
f = future_to_file[future]
try:
Expand All @@ -455,9 +456,9 @@ def downsample_bams(data_pairs, downsample_target, threads=None, JVMmemory=None)
def dedup_bams(data_pairs, threads=None, JVMmemory=None):
workers = util.misc.sanitize_thread_count(threads)
JVMmemory = JVMmemory if JVMmemory else tools.picard.DownsampleSamTool.jvmMemDefault
jvm_worker_memory = str(max(1,int(JVMmemory.rstrip("g"))/workers))+'g'
jvm_worker_memory_mb = str(int(util.misc.convert_size_str(JVMmemory,"m")[:-1])//workers)+"m"
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
future_to_file = {executor.submit(rmdup_mvicuna_bam, *fp, JVMmemory=jvm_worker_memory): fp[0] for fp in data_pairs}
future_to_file = {executor.submit(rmdup_mvicuna_bam, *fp, JVMmemory=jvm_worker_memory_mb): fp[0] for fp in data_pairs}
for future in concurrent.futures.as_completed(future_to_file):
f = future_to_file[future]
try:
Expand Down
25 changes: 25 additions & 0 deletions util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,28 @@ def wraps(f):
def unwrap(f):
"""Find the original function under layers of wrappers"""
return f if not hasattr(f, '__wrapped__') else unwrap(f.__wrapped__)

def convert_size_str(input_size_str, output_unit="m", round_number=True):
""" intended to convert a jvm-style size spec to an int value of the desired unit """
unit2factor = {'k': 1024, 'm': 1024**2, 'g': 1024**3, 't': 1024**4}
output_unit = output_unit.lower()

size_spec_pattern = re.compile(r"(.*)([kmgt])")

m = re.search(size_spec_pattern, input_size_str)
if m:
if m.group(1) and m.group(2):
input_size=float(m.group(1))
input_unit=m.group(2).lower()

if input_unit not in unit2factor.keys():
raise TypeError("Error parsing size from string: %s" % input_size_str)

# convert to bytes
size_in_bytes = input_size * unit2factor[input_unit]

# convert to desired size
if round_number:
return str(round(max(1,float(size_in_bytes)/unit2factor[output_unit])))+output_unit
else:
return str(float(size_in_bytes)/unit2factor[output_unit])+output_unit

0 comments on commit 4eb88a8

Please sign in to comment.