Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dp24 gc content #28

Merged
merged 20 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified bin/BedTools.py
100644 → 100755
Empty file.
Empty file modified bin/extract_contaminants_by_type.py
100644 → 100755
Empty file.
Empty file modified bin/filter_barcode_blast_results.py
100644 → 100755
Empty file.
33 changes: 33 additions & 0 deletions bin/gc_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
"""
Script for finding the GC content of each sequence in a multiFASTA file

Written by Eerik Aunin @eeaunin

Adapted by Damon-Lee Pointon @DLBPointon
"""

import argparse
import general_purpose_functions as gpf


def main(fasta_path):
fasta_data = gpf.read_fasta_in_chunks(fasta_path)
for header, seq in fasta_data:
header = header.split()[0]
seq = seq.upper()
gc_content = None
gc_count = seq.count("G") + seq.count("C")
seq_len = len(seq)
if seq_len > 0:
gc_content = gc_count / seq_len
gc_content_string = "{:.6f}".format(gc_content)
print("{}\t{}".format(header, gc_content_string))


if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("fasta_path", type=str, help="Path to input FASTA file")
parser.add_argument("-v", action="version", version="1.0")
args = parser.parse_args()
main(args.fasta_path)
Empty file modified bin/organelle_contamination_recommendation.py
100644 → 100755
Empty file.
Empty file modified bin/pacbio_barcode_check.py
100644 → 100755
Empty file.
20 changes: 3 additions & 17 deletions bin/reformat_blast_outfmt6.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,6 @@
in_data = gpf.ll(in_path)

for line in in_data:
split_line = line.split()
assert len(split_line) == 14
output_line = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
split_line[0],
split_line[4],
split_line[5],
split_line[6],
split_line[7],
split_line[8],
split_line[9],
split_line[10],
split_line[11],
split_line[12],
split_line[13],
split_line[2],
)
print(output_line)
s = line.split()
assert len(s) == 14
print("\t".join(s[0:1] + s[4:] + s[2:3]))
Empty file modified bin/reformat_diamond_outfmt6.py
100644 → 100755
DLBPointon marked this conversation as resolved.
Show resolved Hide resolved
Empty file.
5 changes: 5 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,9 @@ process {
ext.prefix = { "${meta.id}_euk" }
}

withName: '.*:.*:GENERATE_GENOME:GNU_SORT' {
ext.prefix = { "${meta.id}_sorted"}
ext.args = { '-k2,2 -nr' }
}

}
7 changes: 7 additions & 0 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@

----------------------------------------------------------------------------------------
*/
process {
maxForks = 1
}

executor {
queueSize=1
}

params {
config_profile_name = 'Full test profile'
Expand Down
16 changes: 16 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
"git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543",
"installed_by": ["modules"]
},
"custom/getchromsizes": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"],
"patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff"
},
"diamond/blastx": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
Expand All @@ -35,6 +41,11 @@
"git_sha": "5a35af8b60d45425c4b9193e567d16b614d93dbe",
"installed_by": ["modules"]
},
"gnu/sort": {
"branch": "master",
"git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840",
"installed_by": ["modules"]
},
"fcs/fcsgx": {
"branch": "master",
"git_sha": "8c4542e5d421c4690cf1fa6ec729e9304763fdaf",
Expand Down Expand Up @@ -66,6 +77,11 @@
"git_sha": "a1ffbc1fd87bd5a829e956cc26ec9cc53af3e817",
"installed_by": ["modules"]
},
"samtools/faidx": {
"branch": "master",
"git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe",
"installed_by": ["modules"]
},
"samtools/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
Expand Down
7 changes: 4 additions & 3 deletions modules/local/blast_chunk_to_full.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process BLAST_CHUNK_TO_FULL {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(chunked)
Expand All @@ -22,6 +22,7 @@ process BLAST_CHUNK_TO_FULL {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
blast_hit_chunk_coords_to_full_coords: \$(blast_hit_chunk_coords_to_full_coords.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/blast_get_top_hits.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process BLAST_GET_TOP_HITS {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(outfmt6)
Expand All @@ -21,6 +21,7 @@ process BLAST_GET_TOP_HITS {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
blast_get_top_hits: \$(blast_get_top_hits.py -v)
END_VERSIONS
"""
Expand All @@ -31,6 +32,7 @@ process BLAST_GET_TOP_HITS {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
reformat_blast_outfmt6: \$(blast_get_top_hits.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/check_barcode.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process CHECK_BARCODE {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta) , path(barcodes)
Expand All @@ -27,6 +27,7 @@ process CHECK_BARCODE {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
pacbio_barcode_check: \$(pacbio_barcode_check.py -v)
END_VERSIONS
"""
Expand All @@ -37,6 +38,7 @@ process CHECK_BARCODE {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
pacbio_barcode_check: \$(pacbio_barcode_check.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/extract_contaminants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process EXTRACT_CONTAMINANTS {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(fasta)
Expand All @@ -24,6 +24,7 @@ process EXTRACT_CONTAMINANTS {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
extract_contaminants_by_type: \$(extract_contaminants_by_type.py -v)
END_VERSIONS
"""
Expand All @@ -35,6 +36,7 @@ process EXTRACT_CONTAMINANTS {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
extract_contaminants_by_type: \$(extract_contaminants_by_type.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/filter_barcode.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process FILTER_BARCODE {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(fasta)
Expand All @@ -28,6 +28,7 @@ process FILTER_BARCODE {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
filter_barcode_blast_results: \$(filter_barcode_blast_results.py -v)
END_VERSIONS
"""
Expand All @@ -41,6 +42,7 @@ process FILTER_BARCODE {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
filter_barcode_blast_results: \$(filter_barcode_blast_results.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/format_diamond_outfmt6.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process REFORMAT_FULL_OUTFMT6 {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(diamond_blast)
Expand All @@ -21,6 +21,7 @@ process REFORMAT_FULL_OUTFMT6 {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
reformat_diamond_outfmt6: \$(reformat_diamond_outfmt6.py -v)
END_VERSIONS
"""
Expand All @@ -32,6 +33,7 @@ process REFORMAT_FULL_OUTFMT6 {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
reformat_diamond_outfmt6: \$(reformat_diamond_outfmt6.py -v)
END_VERSIONS
"""
Expand Down
39 changes: 39 additions & 0 deletions modules/local/gc_content.nf
DLBPointon marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
process GC_CONTENT {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path( "*-gc.txt" ) , emit: txt
path "versions.yml" , emit: versions

script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
gc_content.py ${fasta} > ${prefix}-gc.txt

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
gc_content: \$(gc_content.py -v)
END_VERSIONS
"""

stub:
"""
touch full_coords.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
gc_content: \$(gc_content.py -v)
END_VERSIONS
"""
}
40 changes: 40 additions & 0 deletions modules/local/get_largest_scaff.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process GET_LARGEST_SCAFF {

tag "$meta.id"
label 'process_low'

conda "conda-forge::coreutils=9.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'docker.io/ubuntu:20.04' }"

input:
tuple val( meta ), path( file )

output:
env largest_scaff , emit: scaff_size
path "versions.yml" , emit: versions

shell:
def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
$/
largest_scaff=`head -n 1 "${file}" | cut -d$'\t' -f2`

cat <<-END_VERSIONS > versions.yml
"${task.process}":
coreutils: $VERSION
END_VERSIONS
/$

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
largest_scaff=1000000

cat <<-END_VERSIONS > versions.yml
"${task.process}":
coreutils: $VERSION
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions modules/local/get_lineage_for_kraken.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ process GET_LINEAGE_FOR_KRAKEN {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
pandas: \$(pip list | grep "pandas" | sed 's/[[:blank:]]//g' | sed 's/pandas//g')
general_purpose_functions.py: \$(general_purpose_functions.py --version | cut -d' ' -f2)
get_lineage_for_kraken_results.py: \$(get_lineage_for_kraken_results.py --version | cut -d' ' -f2)
END_VERSIONS
Expand Down
Loading