Skip to content

Commit

Permalink
Versions are now captured in the result JSONs
Browse files Browse the repository at this point in the history
Towards #143
  • Loading branch information
lmrodriguezr committed Jan 17, 2024
1 parent c75c334 commit f1daf2f
Show file tree
Hide file tree
Showing 15 changed files with 379 additions and 28 deletions.
23 changes: 22 additions & 1 deletion lib/miga/cli/action/add_result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@

class MiGA::Cli::Action::AddResult < MiGA::Cli::Action
def parse_cli
cli.defaults = { force: false }
cli.defaults = { force: false, stdin_versions: false }
cli.parse do |opt|
cli.opt_object(opt, [:project, :dataset_opt, :result])
opt.on(
'-f', '--force',
'Force re-indexing of the result even if it\'s already registered'
) { |v| cli[:force] = v }
opt.on(
'--stdin-versions',
'Read Software versions from STDIN'
) { |v| cli[:stdin_versions] = v }
end
end

Expand All @@ -21,5 +25,22 @@ def perform
cli.say "Registering result: #{cli[:result]}"
r = obj.add_result(cli[:result], true, force: cli[:force])
raise 'Cannot add result, incomplete expected files' if r.nil?

# Add Software version data
if cli[:stdin_versions]
versions = {}
sw = nil
$stdin.each do |ln|
ln = ln.chomp.strip
if ln =~ /^=> (.*)/
sw = $1
versions[sw] = ''
else
versions[sw] += ln
end
end
r.add_versions(versions)
r.save
end
end
end
8 changes: 7 additions & 1 deletion lib/miga/result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
require 'miga/result/dates'
require 'miga/result/source'
require 'miga/result/stats'
require 'miga/result/versions'

##
# The result from a task run. It can be project-wide or dataset-specific.
class MiGA::Result < MiGA::MiGA
include MiGA::Result::Dates
include MiGA::Result::Source
include MiGA::Result::Stats
include MiGA::Result::Versions

# Class-level
class << self
Expand Down Expand Up @@ -151,7 +153,11 @@ def add_files(files)
##
# Initialize and #save empty result
def create
@data = { created: Time.now.to_s, stats: {}, files: {} }
@data = {
created: Time.now.to_s,
stats: {}, files: {},
versions: { 'MiGA' => MiGA::VERSION.join('.') }
}
save
end

Expand Down
17 changes: 17 additions & 0 deletions lib/miga/result/versions.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
require 'miga/result/base'

##
# Helper module including functions for results to handle software versions
module MiGA::Result::Versions
##
# Add version information for the Software used by this result
def add_versions(versions)
versions.each { |k, v| self[:versions][k] = v }
end

##
# Get list of software and their versions as raw text (Markdown)
def versions_md
versions.map { |k, v| "- #{k}: #{v}" }.join("\n")
end
end
16 changes: 15 additions & 1 deletion scripts/assembly.bash
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ cd "$PROJECT/data/05.assembly"
miga date > "$DATASET.start"

# Interpose (if needed)
interpose=no
TF="../04.trimmed_fasta"
b=$DATASET
if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
Expand All @@ -22,6 +23,7 @@ if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
gzip -cd "$TF/${b}.${s}.fasta.gz" > "${b}.${s}.tmp"
fi
done
interpose=yes
FastA.interpose.pl "$cr" "$b".[12].tmp
rm "$b".[12].tmp
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
Expand Down Expand Up @@ -62,5 +64,17 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \

# Finalize
miga date > "$DATASET.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
$(
if [[ "$interpose" == "yes" ]] ; then
echo "=> Enveomics Collection: FastA.interpose.pl"
echo "version unknown"
fi
)
=> IDBA-UD
version unknown
VERSIONS

10 changes: 8 additions & 2 deletions scripts/cds.bash
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ for ext in gff3 faa fna ; do
done

# Finalize
miga date > "$DATASET.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
miga date > "${DATASET}.done"
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
=> Prodigal
$(prodigal -v 2>&1 | grep . | perl -pe 's/^Prodigal //')
VERSIONS

98 changes: 96 additions & 2 deletions scripts/distances.bash
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,99 @@ fi
ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"

# Finalize
miga date > "$DATASET.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
fastaai=no
aai=no
ani=no
blast=no
blat=no
diamond=no
fastani=no
case $(miga option -P "$PROJECT" -k haai_p) in
fastaai)
fastaai=yes
;;
diamond)
diamond=yes
aai=yes
;;
blast)
blast=yes
aai=yes
;;
esac

case $(miga option -P "$PROJECT" -k aai_p) in
diamond)
diamond=yes
aai=yes
;;
blast)
blast=yes
aai=yes
;;
esac

case $(miga option -P "$PROJECT" -k ani_p) in
blast)
blast=yes
ani=yes
;;
blat)
blat=yes
ani=yes
;;
fastani)
fastani=yes
;;
esac


miga date > "${DATASET}.done"
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
$(
if [[ "$fastaai" == "yes" ]] ; then
echo "=> FastAAI"
fastaai version 2>&1 | perl -pe 's/.*=//'
fi
)
$(
if [[ "$fastani" == "yes" ]] ; then
echo "=> FastANI"
fastANI --version 2>&1 | grep . | perl -pe 's/^version //'
fi
)
$(
if [[ "$aai" == "yes" ]] ; then
echo "=> Enveomics Collection: aai.rb"
aai.rb --version 2>&1 | perl -pe 's/.*: //'
fi
)
$(
if [[ "$ani" == "yes" ]] ; then
echo "=> Enveomics Collection: ani.rb"
ani.rb --version 2>&1 | perl -pe 's/.*: //'
fi
)
$(
if [[ "$blast" == "yes" ]] ; then
echo "=> NCBI BLAST+"
blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //'
fi
)
$(
if [[ "$blat" == "yes" ]] ; then
echo "=> BLAT"
blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//'
fi
)
$(
if [[ "$diamond" == "yes" ]] ; then
echo "=> Diamond"
diamond --version 2>&1 | perl -pe 's/^diamond version //'
fi
)
VERSIONS

15 changes: 14 additions & 1 deletion scripts/essential_genes.bash
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,17 @@ fi

# Finalize
miga date > "${DATASET}.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
=> Enveomics Collection: HMM.essential.rb
$(HMM.essential.rb --version 2>&1 | perl -pe 's/.*: //')
$(
if [[ "$NOMULTI" -eq "1" ]] ; then
echo "=> FastAAI"
fastaai version 2>&1 | perl -pe 's/.*=//'
fi
)
VERSIONS

19 changes: 17 additions & 2 deletions scripts/mytaxa.bash
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,20 @@ else
fi

# Finalize
miga date > "$DATASET.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
miga date > "${DATASET}.done"
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
$(
if [[ "$MIGA_MYTAXA" != "no" && "$MULTI" -eq "1" ]] ; then
echo "=> MyTaxa"
MyTaxa | grep Version: | perl -pe 's/.*: //'
echo "=> Diamond"
diamond --version 2>&1 | perl -pe 's/^diamond version //'
echo "=> Krona"
ktImportText | head -n 2 | tail -n 1 | awk '{ print $3 }'
fi
)
VERSIONS

17 changes: 15 additions & 2 deletions scripts/mytaxa_scan.bash
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,18 @@ else
fi

# Finalize
miga date > "$DATASET.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
miga date > "${DATASET}.done"
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
$(
if [[ "$MIGA_MYTAXA" != "no" && "$NOMULTI" -eq "1" ]] ; then
echo "=> MyTaxa"
MyTaxa | grep Version: | perl -pe 's/.*: //'
echo "=> Diamond"
diamond --version 2>&1 | perl -pe 's/^diamond version //'
fi
)
VERSIONS

8 changes: 6 additions & 2 deletions scripts/read_quality.bash
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ for s in 1 2 ; do
done

# Finalize
miga date > "$DATASET.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
miga date > "${DATASET}.done"
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
VERSIONS

20 changes: 19 additions & 1 deletion scripts/ssu.bash
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,22 @@ fi

# Finalize
miga date > "${DATASET}.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
$(
if [[ -s $fa ]] ; then
echo "=> barrnap"
barrnap --version 2>&1 | perl -pe 's/^barrnap //'
echo "=> bedtools"
bedtools --version 2>&1 | perl -pe 's/^bedtools //'
echo "=> Enveomics Collection"
echo "version unknown"
echo "=> RDP Naive Bayes Classifier"
tail -n 1 "${DATASET}.rdp.tsv" | perl -pe 's/.*: //'
echo "=> tRNAscan-SE"
tRNAscan-SE -h 2>&1 | head -n 2 | tail -n 1 | perl -pe 's/^tRNAscan-SE //'
fi
)
VERSIONS
12 changes: 9 additions & 3 deletions scripts/stats.bash
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ cd "$DIR"
miga date > "${DATASET}.start"

# Calculate statistics
for i in raw_reads trimmed_fasta assembly cds essential_genes distances taxonomy ssu ; do
for i in raw_reads trimmed_fasta assembly \
cds essential_genes distances taxonomy ssu ; do
echo "# $i"
miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
done

# Finalize
miga date > "$DATASET.done"
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
miga date > "${DATASET}.done"
cat <<VERSIONS \
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
=> MiGA
$(miga --version)
VERSIONS

Loading

0 comments on commit f1daf2f

Please sign in to comment.