Skip to content

Commit

Permalink
fix: align mg assembly rules to expected outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
Ryan Routsong committed Feb 1, 2024
1 parent 0efd53f commit 725f091
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 16 deletions.
20 changes: 20 additions & 0 deletions config/skyline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"sif": "/data/openomics/SIFs/",
"mounts": {
"kaiju": {
"to": "/opt/kaiju",
"from": "/data/openomics/references/weave/kaiju/kaiju_db_nr_euk_2023-05-10",
"mode": "ro"
},
"kraken2" : {
"to": "/opt/kraken2",
"from": "/data/openomics/references/weave/kraken2/k2_pluspfp_20230605",
"mode": "ro"
},
"fastq_screen" : {
"to": "/fdb/fastq_screen/FastQ_Screen_Genomes",
"from": "/data/openomics/references/weave/FastQ_Screen_Genomes",
"mode": "ro"
}
}
}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ terminaltables
pyyaml
tabulate
progressbar
python-dateutil
13 changes: 9 additions & 4 deletions scripts/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ def get_current_server():
re_biowulf_head = (r"biowulf\.nih\.gov", "biowulf")
re_biowulf_compute = (r"cn\d{4}", "biowulf")

# locus hostnames
re_locus_head = (r"ai\-submit\d{1}", "locus")
re_locus_compute = (r"ai\-hpcn\d{3}", "locus")
# skyline hostnames
re_skyline_head = (r"ai-hpc(submit|n)(\d+)?", "skyline")
re_skyline_compute = (r"ai-hpc(submit|n)(\d+)?", "skyline")

host_profiles = [re_bigsky, re_biowulf_compute, re_biowulf_head, re_locus_compute, re_locus_head]
host_profiles = [re_bigsky, re_biowulf_compute, re_biowulf_head, re_skyline_head, re_skyline_compute]

host = None
for pat, this_host in host_profiles:
Expand Down Expand Up @@ -157,6 +157,11 @@ def get_bigsky_seq_dirs():
"seqroot": "/data/RTB_GRS/SequencerRuns/",
"seq": get_biowulf_seq_dirs(),
"profile": Path(Path(__file__).parent.parent, "utils", "profiles", "biowulf").resolve(),
},
"skyline": {
"seqroot": "/data/rtb_grs/SequencerRuns/",
"seq": get_bigsky_seq_dirs(),
"profile": Path(Path(__file__).parent.parent, "utils", "profiles", "skyline").resolve(),
}
}

Expand Down
16 changes: 10 additions & 6 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,10 @@ def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None):
else:
popen_kwargs['cwd'] = str(Path.cwd())

parent_jobid = None
if local or dry_run:
proc = Popen(popen_cmd, stdout=PIPE, stderr=STDOUT, **popen_kwargs)
parent_jobid = None
popen_kwargs['env'].update(os.environ)
proc = Popen(map(str, popen_cmd), stdout=PIPE, stderr=STDOUT, **popen_kwargs)
for line in proc.stdout:
lutf8 = line.decode('utf-8')
jid_search = re.search(r"external jobid \'(\d+)\'", lutf8, re.MULTILINE)
Expand All @@ -139,15 +140,16 @@ def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None):
def mk_sbatch_script(wd, cmd):
if not Path(wd, 'logs', 'masterjob').exists():
Path(wd, 'logs', 'masterjob').mkdir(mode=0o755, parents=True)
shebang = "#!/bin/bash --login" if host == 'skyline' else '#!/bin/bash'
master_job_script = \
f"""
#!/bin/bash
{shebang}
#SBATCH --job-name=weave_masterjob
#SBATCH --output={wd}/logs/masterjob/%x_%j.out
#SBATCH --error={wd}/logs/masterjob/%x_%j.err
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --time=02-00:00:00
#SBATCH --time=05-00:00:00
#SBATCH --export=ALL
#SBATCH --mem=16g
""".lstrip()
Expand All @@ -169,7 +171,9 @@ def get_mods(init=False):
mod_cmd.append('source /gs1/apps/user/rmlspack/share/spack/setup-env.sh')
mod_cmd.append('spack load [email protected]')
mod_cmd.append('source activate snakemake7-19-1')
else:
elif host == 'skyline':
mod_cmd.append('source /data/openomics/bin/dependencies.sh')
elif host == 'biowulf':
if init:
mod_cmd.append('source /etc/profile.d/modules.sh')
else:
Expand Down Expand Up @@ -259,7 +263,7 @@ def exec_pipeline(configs, dry_run=False, local=False):
top_env['SINGULARITY_CACHEDIR'] = str(Path(this_config['out_to'], '.singularity').absolute())
this_cmd = [
"snakemake",
"-pr",
"-pr", "--cores", "all",
"--use-singularity",
"--rerun-incomplete",
"--keep-incomplete",
Expand Down
15 changes: 13 additions & 2 deletions weave
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def run(args):
pairs = ['1', '2'] if sample_sheet.is_paired_end else ['1']

# ~~~ demultiplexing configuration ~~~
bcls = [x for x in Path(rundir).rglob('*.bcl.*') if not 'tmp' in str(x)]
bcls = [x.absolute() for x in Path(rundir).rglob('*.bcl.*') if not 'tmp' in str(x)]
if not bcls:
bcls = [x for x in Path(rundir).rglob('*.cbcl') if not 'tmp' in str(x)]
bcls = [x.absolute() for x in Path(rundir).rglob('*.cbcl') if not 'tmp' in str(x)]
exec_config['sample_sheet'].append(str(sample_sheet.path))
exec_config['bcl_files'].append(bcls)
exec_config['demux_data'].append(files.check_if_demuxed(rundir))
Expand Down Expand Up @@ -124,4 +124,15 @@ if __name__ == '__main__':
parser_cache.set_defaults(func = get_cache)
parser_unlock.set_defaults(func = unlock_dir)
args = main_parser.parse_args()

if not hasattr(args, 'func'):
print(main_parser.print_help())
print('---')
print(parser_run.print_help())
print('---')
print(parser_cache.print_help())
print('---')
print(parser_unlock.print_help())
exit(0)

args.func(args)
8 changes: 4 additions & 4 deletions workflow/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ rule fastqc_untrimmed:
fqreport = config['out_to'] + "/" + config["project"] + "/{sids}/fastqc_untrimmed/{sids}_R{rnums}_" + trim_input_suffix + "_fastqc.zip",
params:
output_dir = lambda w: config['out_to'] + "/" + config["project"] + "/" + w.sids + "/fastqc_untrimmed/"
log: config['out_to'] + "/logs/" + "/" + config["project"] + "/fastqc_untrimmed/{sids}_R{rnums}.log"
log: config['out_to'] + "/logs/" + config["project"] + "/fastqc_untrimmed/{sids}_R{rnums}.log"
threads: 4
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif"
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif"
resources: mem_mb = 8096
shell:
"""
Expand All @@ -42,7 +42,7 @@ rule fastqc_trimmed:
fqreport = config['out_to'] + "/" + config["project"] + "/{sids}/fastqc_trimmed/{sids}_trimmed_R{rnums}_fastqc.zip",
params:
output_dir = lambda w: config['out_to'] + "/" + config["project"] + "/" + w.sids + "/fastqc_trimmed/"
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif"
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif"
threads: 4
resources: mem_mb = 8096
log: config['out_to'] + "/logs/" + config["project"] + "/fastqc_trimmed/{sids}_R{rnums}.log"
Expand Down Expand Up @@ -124,7 +124,7 @@ rule multiqc_report:
input_dir = config['out_to'],
output_dir = config['out_to'] + "/" + config["project"] + "/multiqc/",
report_title = "Run: " + config["run_ids"] + ", Project: " + config["project"],
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif"
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif"
threads: 4
resources: mem_mb = 8096
log:
Expand Down

0 comments on commit 725f091

Please sign in to comment.