From f1daf2f39b2cab296bdb30b362b3b361826ed090 Mon Sep 17 00:00:00 2001 From: "Luis M. Rodriguez-R" Date: Wed, 17 Jan 2024 20:49:31 +0100 Subject: [PATCH] Versions are now captured in the result JSONs Towards #143 --- lib/miga/cli/action/add_result.rb | 23 ++++++- lib/miga/result.rb | 8 ++- lib/miga/result/versions.rb | 17 +++++ scripts/assembly.bash | 16 ++++- scripts/cds.bash | 10 ++- scripts/distances.bash | 98 ++++++++++++++++++++++++++++- scripts/essential_genes.bash | 15 ++++- scripts/mytaxa.bash | 19 +++++- scripts/mytaxa_scan.bash | 17 ++++- scripts/read_quality.bash | 8 ++- scripts/ssu.bash | 20 +++++- scripts/stats.bash | 12 +++- scripts/taxonomy.bash | 100 +++++++++++++++++++++++++++++- scripts/trimmed_fasta.bash | 12 +++- scripts/trimmed_reads.bash | 32 ++++++++-- 15 files changed, 379 insertions(+), 28 deletions(-) create mode 100644 lib/miga/result/versions.rb diff --git a/lib/miga/cli/action/add_result.rb b/lib/miga/cli/action/add_result.rb index 20504f50..63194aa0 100644 --- a/lib/miga/cli/action/add_result.rb +++ b/lib/miga/cli/action/add_result.rb @@ -5,13 +5,17 @@ class MiGA::Cli::Action::AddResult < MiGA::Cli::Action def parse_cli - cli.defaults = { force: false } + cli.defaults = { force: false, stdin_versions: false } cli.parse do |opt| cli.opt_object(opt, [:project, :dataset_opt, :result]) opt.on( '-f', '--force', 'Force re-indexing of the result even if it\'s already registered' ) { |v| cli[:force] = v } + opt.on( + '--stdin-versions', + 'Read Software versions from STDIN' + ) { |v| cli[:stdin_versions] = v } end end @@ -21,5 +25,22 @@ def perform cli.say "Registering result: #{cli[:result]}" r = obj.add_result(cli[:result], true, force: cli[:force]) raise 'Cannot add result, incomplete expected files' if r.nil? + + # Add Software version data + if cli[:stdin_versions] + versions = {} + sw = nil + $stdin.each do |ln| + ln = ln.chomp.strip + if ln =~ /^=> (.*)/ + sw = $1 + versions[sw] = '' + else + versions[sw] += ln + end + end + r.add_versions(versions) + r.save + end end end diff --git a/lib/miga/result.rb b/lib/miga/result.rb index f661095e..2d9cd454 100644 --- a/lib/miga/result.rb +++ b/lib/miga/result.rb @@ -3,6 +3,7 @@ require 'miga/result/dates' require 'miga/result/source' require 'miga/result/stats' +require 'miga/result/versions' ## # The result from a task run. It can be project-wide or dataset-specific. @@ -10,6 +11,7 @@ class MiGA::Result < MiGA::MiGA include MiGA::Result::Dates include MiGA::Result::Source include MiGA::Result::Stats + include MiGA::Result::Versions # Class-level class << self @@ -151,7 +153,11 @@ def add_files(files) ## # Initialize and #save empty result def create - @data = { created: Time.now.to_s, stats: {}, files: {} } + @data = { + created: Time.now.to_s, + stats: {}, files: {}, + versions: { 'MiGA' => MiGA::VERSION.join('.') } + } save end diff --git a/lib/miga/result/versions.rb b/lib/miga/result/versions.rb new file mode 100644 index 00000000..9bf5911e --- /dev/null +++ b/lib/miga/result/versions.rb @@ -0,0 +1,17 @@ +require 'miga/result/base' + +## +# Helper module including functions for results to handle software versions +module MiGA::Result::Versions + ## + # Add version information for the Software used by this result + def add_versions(versions) + versions.each { |k, v| self[:versions][k] = v } + end + + ## + # Get list of software and their versions as raw text (Markdown) + def versions_md + versions.map { |k, v| "- #{k}: #{v}" }.join("\n") + end +end diff --git a/scripts/assembly.bash b/scripts/assembly.bash index ffe6b095..ad66ae66 100755 --- a/scripts/assembly.bash +++ b/scripts/assembly.bash @@ -10,6 +10,7 @@ cd "$PROJECT/data/05.assembly" miga date > "$DATASET.start" # Interpose (if needed) +interpose=no TF="../04.trimmed_fasta" b=$DATASET if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then @@ -22,6 +23,7 @@ if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then gzip -cd "$TF/${b}.${s}.fasta.gz" > "${b}.${s}.tmp" fi done + interpose=yes FastA.interpose.pl "$cr" "$b".[12].tmp rm "$b".[12].tmp miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f @@ -62,5 +64,17 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \ # Finalize miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +cat < MiGA +$(miga --version) +$( + if [[ "$interpose" == "yes" ]] ; then + echo "=> Enveomics Collection: FastA.interpose.pl" + echo "version unknown" + fi +) +=> IDBA-UD +version unknown +VERSIONS diff --git a/scripts/cds.bash b/scripts/cds.bash index e0b67f7e..35ca5b6d 100755 --- a/scripts/cds.bash +++ b/scripts/cds.bash @@ -68,6 +68,12 @@ for ext in gff3 faa fna ; do done # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +=> Prodigal +$(prodigal -v 2>&1 | grep . | perl -pe 's/^Prodigal //') +VERSIONS diff --git a/scripts/distances.bash b/scripts/distances.bash index 943c3493..7b3feb77 100755 --- a/scripts/distances.bash +++ b/scripts/distances.bash @@ -22,5 +22,99 @@ fi ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +fastaai=no +aai=no +ani=no +blast=no +blat=no +diamond=no +fastani=no +case $(miga option -P "$PROJECT" -k haai_p) in + fastaai) + fastaai=yes + ;; + diamond) + diamond=yes + aai=yes + ;; + blast) + blast=yes + aai=yes + ;; +esac + +case $(miga option -P "$PROJECT" -k aai_p) in + diamond) + diamond=yes + aai=yes + ;; + blast) + blast=yes + aai=yes + ;; +esac + +case $(miga option -P "$PROJECT" -k ani_p) in + blast) + blast=yes + ani=yes + ;; + blat) + blat=yes + ani=yes + ;; + fastani) + fastani=yes + ;; +esac + + +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +$( + if [[ "$fastaai" == "yes" ]] ; then + echo "=> FastAAI" + fastaai version 2>&1 | perl -pe 's/.*=//' + fi +) +$( + if [[ "$fastani" == "yes" ]] ; then + echo "=> FastANI" + fastANI --version 2>&1 | grep . | perl -pe 's/^version //' + fi +) +$( + if [[ "$aai" == "yes" ]] ; then + echo "=> Enveomics Collection: aai.rb" + aai.rb --version 2>&1 | perl -pe 's/.*: //' + fi +) +$( + if [[ "$ani" == "yes" ]] ; then + echo "=> Enveomics Collection: ani.rb" + ani.rb --version 2>&1 | perl -pe 's/.*: //' + fi +) +$( + if [[ "$blast" == "yes" ]] ; then + echo "=> NCBI BLAST+" + blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //' + fi +) +$( + if [[ "$blat" == "yes" ]] ; then + echo "=> BLAT" + blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//' + fi +) +$( + if [[ "$diamond" == "yes" ]] ; then + echo "=> Diamond" + diamond --version 2>&1 | perl -pe 's/^diamond version //' + fi +) +VERSIONS + diff --git a/scripts/essential_genes.bash b/scripts/essential_genes.bash index 6e36f9e2..a5bb97ea 100755 --- a/scripts/essential_genes.bash +++ b/scripts/essential_genes.bash @@ -70,4 +70,17 @@ fi # Finalize miga date > "${DATASET}.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +cat < MiGA +$(miga --version) +=> Enveomics Collection: HMM.essential.rb +$(HMM.essential.rb --version 2>&1 | perl -pe 's/.*: //') +$( + if [[ "$NOMULTI" -eq "1" ]] ; then + echo "=> FastAAI" + fastaai version 2>&1 | perl -pe 's/.*=//' + fi +) +VERSIONS + diff --git a/scripts/mytaxa.bash b/scripts/mytaxa.bash index 32be681d..10c10485 100755 --- a/scripts/mytaxa.bash +++ b/scripts/mytaxa.bash @@ -98,5 +98,20 @@ else fi # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +$( + if [[ "$MIGA_MYTAXA" != "no" && "$MULTI" -eq "1" ]] ; then + echo "=> MyTaxa" + MyTaxa | grep Version: | perl -pe 's/.*: //' + echo "=> Diamond" + diamond --version 2>&1 | perl -pe 's/^diamond version //' + echo "=> Krona" + ktImportText | head -n 2 | tail -n 1 | awk '{ print $3 }' + fi +) +VERSIONS + diff --git a/scripts/mytaxa_scan.bash b/scripts/mytaxa_scan.bash index 59e0795b..41dcee0c 100755 --- a/scripts/mytaxa_scan.bash +++ b/scripts/mytaxa_scan.bash @@ -97,5 +97,18 @@ else fi # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +$( + if [[ "$MIGA_MYTAXA" != "no" && "$NOMULTI" -eq "1" ]] ; then + echo "=> MyTaxa" + MyTaxa | grep Version: | perl -pe 's/.*: //' + echo "=> Diamond" + diamond --version 2>&1 | perl -pe 's/^diamond version //' + fi +) +VERSIONS + diff --git a/scripts/read_quality.bash b/scripts/read_quality.bash index 2adfc225..7021d5f8 100755 --- a/scripts/read_quality.bash +++ b/scripts/read_quality.bash @@ -19,6 +19,10 @@ for s in 1 2 ; do done # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +VERSIONS diff --git a/scripts/ssu.bash b/scripts/ssu.bash index 6ae2abbd..a5ad7771 100755 --- a/scripts/ssu.bash +++ b/scripts/ssu.bash @@ -65,4 +65,22 @@ fi # Finalize miga date > "${DATASET}.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +cat < MiGA +$(miga --version) +$( + if [[ -s $fa ]] ; then + echo "=> barrnap" + barrnap --version 2>&1 | perl -pe 's/^barrnap //' + echo "=> bedtools" + bedtools --version 2>&1 | perl -pe 's/^bedtools //' + echo "=> Enveomics Collection" + echo "version unknown" + echo "=> RDP Naive Bayes Classifier" + tail -n 1 "${DATASET}.rdp.tsv" | perl -pe 's/.*: //' + echo "=> tRNAscan-SE" + tRNAscan-SE -h 2>&1 | head -n 2 | tail -n 1 | perl -pe 's/^tRNAscan-SE //' + fi +) +VERSIONS diff --git a/scripts/stats.bash b/scripts/stats.bash index 8d5a2c70..b3c37ace 100755 --- a/scripts/stats.bash +++ b/scripts/stats.bash @@ -12,11 +12,17 @@ cd "$DIR" miga date > "${DATASET}.start" # Calculate statistics -for i in raw_reads trimmed_fasta assembly cds essential_genes distances taxonomy ssu ; do +for i in raw_reads trimmed_fasta assembly \ + cds essential_genes distances taxonomy ssu ; do echo "# $i" miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i done # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +VERSIONS + diff --git a/scripts/taxonomy.bash b/scripts/taxonomy.bash index 25bb8cf1..637a882e 100755 --- a/scripts/taxonomy.bash +++ b/scripts/taxonomy.bash @@ -16,5 +16,101 @@ ruby -I "$MIGA/lib" \ "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1 # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +fastaai=no +aai=no +ani=no +blast=no +blat=no +diamond=no +fastani=no +REF_PROJECT=$(miga option -P "$PROJECT" -k ref_project) +if [[ -S "$REF_PROJECT" ]] ; then + case $(miga option -P "$REF_PROJECT" -k haai_p) in + fastaai) + fastaai=yes + ;; + diamond) + diamond=yes + aai=yes + ;; + blast) + blast=yes + aai=yes + ;; + esac + + case $(miga option -P "$REF_PROJECT" -k aai_p) in + diamond) + diamond=yes + aai=yes + ;; + blast) + blast=yes + aai=yes + ;; + esac + + case $(miga option -P "$REF_PROJECT" -k ani_p) in + blast) + blast=yes + ani=yes + ;; + blat) + blat=yes + ani=yes + ;; + fastani) + fastani=yes + ;; + esac +fi + +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +$( + if [[ "$fastaai" == "yes" ]] ; then + echo "=> FastAAI" + fastaai version 2>&1 | perl -pe 's/.*=//' + fi +) +$( + if [[ "$fastani" == "yes" ]] ; then + echo "=> FastANI" + fastANI --version 2>&1 | grep . | perl -pe 's/^version //' + fi +) +$( + if [[ "$aai" == "yes" ]] ; then + echo "=> Enveomics Collection: aai.rb" + aai.rb --version 2>&1 | perl -pe 's/.*: //' + fi +) +$( + if [[ "$ani" == "yes" ]] ; then + echo "=> Enveomics Collection: ani.rb" + ani.rb --version 2>&1 | perl -pe 's/.*: //' + fi +) +$( + if [[ "$blast" == "yes" ]] ; then + echo "=> NCBI BLAST+" + blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //' + fi +) +$( + if [[ "$blat" == "yes" ]] ; then + echo "=> BLAT" + blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//' + fi +) +$( + if [[ "$diamond" == "yes" ]] ; then + echo "=> Diamond" + diamond --version 2>&1 | perl -pe 's/^diamond version //' + fi +) +VERSIONS + diff --git a/scripts/trimmed_fasta.bash b/scripts/trimmed_fasta.bash index 978b1da5..4824b6b8 100755 --- a/scripts/trimmed_fasta.bash +++ b/scripts/trimmed_fasta.bash @@ -32,6 +32,14 @@ for x in 1.fasta 2.fasta SingleReads.fa CoupledReads.fa ; do done # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +=> Enveomics Collection: FastQ.maskQual.rb +$(FastQ.maskQual.rb --version | perl -pe 's/.* //') +=> Enveomics Collection: FastA.interpose.pl +version unknown +VERSIONS diff --git a/scripts/trimmed_reads.bash b/scripts/trimmed_reads.bash index 30fb508f..ef901524 100755 --- a/scripts/trimmed_reads.bash +++ b/scripts/trimmed_reads.bash @@ -36,15 +36,19 @@ if [[ -s "$b.2.fastq.gz" ]] ; then $CMD -1 "$b.1.fastq.gz" -2 "$b.2.fastq.gz" for s in 1 2 ; do mv "$b/${s}.post_trim_${b}.${s}.fq.gz" "${b}.${s}.clipped.fastq.gz" - mv "$b/${s}.pre_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.pre.${s}.html" - mv "$b/${s}.post_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.post.${s}.html" + mv "$b/${s}.pre_trim_QC_${b}.${s}.html" \ + "../03.read_quality/${b}.pre.${s}.html" + mv "$b/${s}.post_trim_QC_${b}.${s}.html" \ + "../03.read_quality/${b}.post.${s}.html" done else # Unpaired $CMD -u "$b.1.fastq.gz" mv "$b/unpaired.post_trim_${b}.1.fq.gz" "${b}.1.clipped.fastq.gz" - mv "$b/unpaired.pre_trim_QC_${b}.1.html" "../03.read_quality/${b}.pre.1.html" - mv "$b/unpaired.post_trim_QC_${b}.1.html" "../03.read_quality/${b}.post.1.html" + mv "$b/unpaired.pre_trim_QC_${b}.1.html" \ + "../03.read_quality/${b}.pre.1.html" + mv "$b/unpaired.post_trim_QC_${b}.1.html" \ + "../03.read_quality/${b}.post.1.html" fi mv "$b/Subsample_Adapter_Detection.stats.txt" \ "../03.read_quality/$b.adapters.txt" @@ -54,6 +58,22 @@ rm -r "$b" rm -f "$b".[12].fastq.gz # Finalize -miga date > "$DATASET.done" -miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f +miga date > "${DATASET}.done" +cat < MiGA +$(miga --version) +=> Enveomics Collection: FastQ.tag.rb +$(FastQ.tag.rb --version | perl -pe 's/.* //') +=> Multitrim +version unknown +=> FaQCs +$(FaQCs --version 2>&1 | perl -pe 's/.*: //') +=> Seqtk +$(seqtk 2>&1 | grep Version | perl -pe 's/.*: //') +=> Fastp +$(fastp --version 2>&1 | perl -pe 's/^fastp //') +=> Falco +$(falco -V 2>&1 | tee) +VERSIONS