diff --git a/config/skyline.json b/config/skyline.json new file mode 100644 index 0000000..f1f4b00 --- /dev/null +++ b/config/skyline.json @@ -0,0 +1,20 @@ +{ + "sif": "/data/openomics/SIFs/", + "mounts": { + "kaiju": { + "to": "/opt/kaiju", + "from": "/data/openomics/references/weave/kaiju/kaiju_db_nr_euk_2023-05-10", + "mode": "ro" + }, + "kraken2" : { + "to": "/opt/kraken2", + "from": "/data/openomics/references/weave/kraken2/k2_pluspfp_20230605", + "mode": "ro" + }, + "fastq_screen" : { + "to": "/fdb/fastq_screen/FastQ_Screen_Genomes", + "from": "/data/openomics/references/weave/FastQ_Screen_Genomes", + "mode": "ro" + } + } +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 326097d..29ef7c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ terminaltables pyyaml tabulate progressbar +python-dateutil diff --git a/scripts/config.py b/scripts/config.py index e9f50e2..2d58b4f 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -20,11 +20,11 @@ def get_current_server(): re_biowulf_head = (r"biowulf\.nih\.gov", "biowulf") re_biowulf_compute = (r"cn\d{4}", "biowulf") - # locus hostnames - re_locus_head = (r"ai\-submit\d{1}", "locus") - re_locus_compute = (r"ai\-hpcn\d{3}", "locus") + # skyline hostnames + re_skyline_head = (r"ai-hpc(submit|n)(\d+)?", "skyline") + re_skyline_compute = (r"ai-hpc(submit|n)(\d+)?", "skyline") - host_profiles = [re_bigsky, re_biowulf_compute, re_biowulf_head, re_locus_compute, re_locus_head] + host_profiles = [re_bigsky, re_biowulf_compute, re_biowulf_head, re_skyline_head, re_skyline_compute] host = None for pat, this_host in host_profiles: @@ -157,6 +157,11 @@ def get_bigsky_seq_dirs(): "seqroot": "/data/RTB_GRS/SequencerRuns/", "seq": get_biowulf_seq_dirs(), "profile": Path(Path(__file__).parent.parent, "utils", "profiles", "biowulf").resolve(), + }, + "skyline": { + "seqroot": "/data/rtb_grs/SequencerRuns/", + "seq": get_bigsky_seq_dirs(), + "profile": Path(Path(__file__).parent.parent, "utils", "profiles", "skyline").resolve(), } } diff --git a/scripts/utils.py b/scripts/utils.py index 29fda6a..6093a79 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -113,9 +113,10 @@ def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None): else: popen_kwargs['cwd'] = str(Path.cwd()) + parent_jobid = None if local or dry_run: - proc = Popen(popen_cmd, stdout=PIPE, stderr=STDOUT, **popen_kwargs) - parent_jobid = None + popen_kwargs['env'].update(os.environ) + proc = Popen(map(str, popen_cmd), stdout=PIPE, stderr=STDOUT, **popen_kwargs) for line in proc.stdout: lutf8 = line.decode('utf-8') jid_search = re.search(r"external jobid \'(\d+)\'", lutf8, re.MULTILINE) @@ -139,15 +140,16 @@ def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None): def mk_sbatch_script(wd, cmd): if not Path(wd, 'logs', 'masterjob').exists(): Path(wd, 'logs', 'masterjob').mkdir(mode=0o755, parents=True) + shebang = "#!/bin/bash --login" if host == 'skyline' else '#!/bin/bash' master_job_script = \ f""" - #!/bin/bash + {shebang} #SBATCH --job-name=weave_masterjob #SBATCH --output={wd}/logs/masterjob/%x_%j.out #SBATCH --error={wd}/logs/masterjob/%x_%j.err #SBATCH --ntasks=1 #SBATCH --cpus-per-task=2 - #SBATCH --time=02-00:00:00 + #SBATCH --time=05-00:00:00 #SBATCH --export=ALL #SBATCH --mem=16g """.lstrip() @@ -169,7 +171,9 @@ def get_mods(init=False): mod_cmd.append('source /gs1/apps/user/rmlspack/share/spack/setup-env.sh') mod_cmd.append('spack load miniconda3@4.11.0') mod_cmd.append('source activate snakemake7-19-1') - else: + elif host == 'skyline': + mod_cmd.append('source /data/openomics/bin/dependencies.sh') + elif host == 'biowulf': if init: mod_cmd.append('source /etc/profile.d/modules.sh') else: @@ -259,7 +263,7 @@ def exec_pipeline(configs, dry_run=False, local=False): top_env['SINGULARITY_CACHEDIR'] = str(Path(this_config['out_to'], '.singularity').absolute()) this_cmd = [ "snakemake", - "-pr", + "-pr", "--cores", "all", "--use-singularity", "--rerun-incomplete", "--keep-incomplete", diff --git a/weave b/weave index 95af79b..e4d69f5 100755 --- a/weave +++ b/weave @@ -26,9 +26,9 @@ def run(args): pairs = ['1', '2'] if sample_sheet.is_paired_end else ['1'] # ~~~ demultiplexing configuration ~~~ - bcls = [x for x in Path(rundir).rglob('*.bcl.*') if not 'tmp' in str(x)] + bcls = [x.absolute() for x in Path(rundir).rglob('*.bcl.*') if not 'tmp' in str(x)] if not bcls: - bcls = [x for x in Path(rundir).rglob('*.cbcl') if not 'tmp' in str(x)] + bcls = [x.absolute() for x in Path(rundir).rglob('*.cbcl') if not 'tmp' in str(x)] exec_config['sample_sheet'].append(str(sample_sheet.path)) exec_config['bcl_files'].append(bcls) exec_config['demux_data'].append(files.check_if_demuxed(rundir)) @@ -124,4 +124,15 @@ if __name__ == '__main__': parser_cache.set_defaults(func = get_cache) parser_unlock.set_defaults(func = unlock_dir) args = main_parser.parse_args() + + if not hasattr(args, 'func'): + print(main_parser.print_help()) + print('---') + print(parser_run.print_help()) + print('---') + print(parser_cache.print_help()) + print('---') + print(parser_unlock.print_help()) + exit(0) + args.func(args) \ No newline at end of file diff --git a/workflow/qc.smk b/workflow/qc.smk index 68a8fb9..11632bb 100644 --- a/workflow/qc.smk +++ b/workflow/qc.smk @@ -23,9 +23,9 @@ rule fastqc_untrimmed: fqreport = config['out_to'] + "/" + config["project"] + "/{sids}/fastqc_untrimmed/{sids}_R{rnums}_" + trim_input_suffix + "_fastqc.zip", params: output_dir = lambda w: config['out_to'] + "/" + config["project"] + "/" + w.sids + "/fastqc_untrimmed/" - log: config['out_to'] + "/logs/" + "/" + config["project"] + "/fastqc_untrimmed/{sids}_R{rnums}.log" + log: config['out_to'] + "/logs/" + config["project"] + "/fastqc_untrimmed/{sids}_R{rnums}.log" threads: 4 - containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif" + containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif" resources: mem_mb = 8096 shell: """ @@ -42,7 +42,7 @@ rule fastqc_trimmed: fqreport = config['out_to'] + "/" + config["project"] + "/{sids}/fastqc_trimmed/{sids}_trimmed_R{rnums}_fastqc.zip", params: output_dir = lambda w: config['out_to'] + "/" + config["project"] + "/" + w.sids + "/fastqc_trimmed/" - containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif" + containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif" threads: 4 resources: mem_mb = 8096 log: config['out_to'] + "/logs/" + config["project"] + "/fastqc_trimmed/{sids}_R{rnums}.log" @@ -124,7 +124,7 @@ rule multiqc_report: input_dir = config['out_to'], output_dir = config['out_to'] + "/" + config["project"] + "/multiqc/", report_title = "Run: " + config["run_ids"] + ", Project: " + config["project"], - containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif" + containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif" threads: 4 resources: mem_mb = 8096 log: