Skip to content

Commit

Permalink
Merge pull request nf-core#113 from nf-core/time
Browse files Browse the repository at this point in the history
Time
  • Loading branch information
luisas authored Mar 20, 2024
2 parents e2013ed + 45c1cd5 commit 83de935
Show file tree
Hide file tree
Showing 10 changed files with 258 additions and 33 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ Initial release of nf-core/multiplesequencealign, created with the [nf-core](htt
[#77](https://github.com/nf-core/multiplesequencealign/issues/77) - Add module zip
[#93](https://github.com/nf-core/multiplesequencealign/pull/93) - Add multiqc basic support. Add custom params validation. Add basic shiny app.
[#100](https://github.com/nf-core/multiplesequencealign/pull/100) - Add support for optional stats and evals. Clean tests.
[#111](https://github.com/nf-core/multiplesequencealign/pull/111) - Add Readme documentation. Add nf-test for the pipeline.
[#110](https://github.com/nf-core/multiplesequencealign/issues/110) - Add Readme documentation. Add nf-test for the pipeline.
[#76](https://github.com/nf-core/multiplesequencealign/issues/76) - Add reading of trace files for shiny app.

### `Fixed`

Expand Down
18 changes: 16 additions & 2 deletions bin/app.py → bin/shiny_app/shiny_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,25 @@
import pandas as pd
import matplotlib.pyplot as plt
import sys

import os
import shiny_app_merge_score_and_trace as ms

# Style
sns.set(context="talk", style="white", font_scale=0.8)


# Load file
# ----------------------------------------------------------------------------
summary_report = "./shiny_data.csv"
summary_report = "./shiny_data_with_trace.csv"
trace = "./trace.txt"

if not os.path.exists(summary_report):
summary_report_no_trace = "./shiny_data.csv"
# run merge script here
if os.path.exists(trace):
ms.merge_data_and_trace(summary_report_no_trace, trace, summary_report)
else:
summary_report = summary_report_no_trace

try:
inputfile = pd.read_csv(summary_report)
Expand All @@ -33,6 +43,10 @@
"tc": "total column score (TC)",
"perc_sim": "sequences avg similarity",
"seq_length_mean": "sequence length (mean)",
"time_tree": "tree time (min)",
"time_align": "alignment time (min)",
"memory_tree": "tree memory (GB)",
"memory_align": "alignment memory (GB)"
}

app_ui = ui.page_fluid(
Expand Down
99 changes: 99 additions & 0 deletions bin/shiny_app/shiny_app_merge_score_and_trace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import pandas as pd

def convert_time(time):
if time is not None:
if "ms" in time:
time = time.replace('ms', '')
time = float(time)/60000
elif "s" in time:
time = time.replace('s', '')
time = float(time)/60
elif "m" in time:
time = time.replace('m', '')
elif "h" in time:
time = time.replace('h', '')
time = float(time)*60
return time

def convert_memory(memory):
# from anything to GB
if memory is not None:
if "GB" in memory:
memory = memory.replace('GB', '')
elif "MB" in memory:
memory = memory.replace('MB', '')
memory = float(memory)/1000
elif "KB" in memory:
memory = memory.replace('KB', '')
memory = float(memory)/1000000
return memory

def cleanTrace(trace):
# Update trace file
def extract_element(row, nelement):
elements = row.split(':')
return elements[nelement]

trace["tag"] = trace.name.str.split('(', expand = True)[1].str.split(')', expand = True)[0]
trace["id"] = trace.tag.str.split(expand = True)[0]
trace["args"] = trace.tag.str.split("args:", expand=True)[1]
trace["full_name"] = trace.name.str.split('(', expand = True)[0].str.strip()
trace["process"] = trace.full_name.apply(extract_element, nelement=-1)
trace["subworkflow"] = trace.full_name.apply(extract_element, nelement=-2)
trace.replace('null', pd.NA, inplace=True)
return trace

def prep_tree_trace(trace):
trace_trees = trace[trace["subworkflow"] == "COMPUTE_TREES"]
# rename args to args_tree
trace_trees.rename(columns={"args": "args_tree"}, inplace=True)
# rename process to tree and remove _GUIDETREE
trace_trees["tree"] = trace_trees["process"].str.replace("_GUIDETREE", "")
# subselect only the columns we need
trace_trees = trace_trees[["id", "args_tree", "tree", "realtime", "rss", "cpus"]]
trace_trees.rename(columns={"realtime": "time_tree"}, inplace=True)
trace_trees.rename(columns={"rss": "memory_tree"}, inplace=True)
trace_trees.rename(columns={"cpus": "cpus_tree"}, inplace=True)
trace_trees.replace('null', pd.NA, inplace=True)
print(trace_trees)
# remove ms from time_tree and convert it to min
trace_trees["time_tree"] = trace_trees["time_tree"].apply(convert_time)
# convert memory to GB
trace_trees["memory_tree"] = trace_trees["memory_tree"].apply(convert_memory)
return trace_trees

def prep_align_trace(trace):
trace_align = trace[trace["subworkflow"] == "ALIGN"]
# rename args to args_align
trace_align.rename(columns={"args": "args_aligner"}, inplace=True)
# rename process to align and remove _ALIGN
trace_align["aligner"] = trace_align["process"].str.replace("_ALIGN", "")
# subselect only the columns we need
trace_align = trace_align[["id", "args_aligner", "aligner", "realtime", "rss", "cpus"]]
trace_align.rename(columns={"realtime": "time_align"}, inplace=True)
trace_align.rename(columns={"rss": "memory_align"}, inplace=True)
trace_align.rename(columns={"cpus": "cpus_align"}, inplace=True)
trace_align.replace('null', pd.NA, inplace=True)
# remove ms from time_align and convert it to min
trace_align["time_align"] = trace_align["time_align"].apply(convert_time)
# convert memory to GB
trace_align["memory_align"] = trace_align["memory_align"].apply(convert_memory)
return trace_align


def merge_data_and_trace(data_file,trace_file,out_file_name):
# read in shiny_data.csv
data = pd.read_csv(data_file)
# read in trace
trace = pd.read_csv(trace_file, sep='\t')
clean_trace = cleanTrace(trace)
trace_trees = prep_tree_trace(clean_trace)
trace_align = prep_align_trace(clean_trace)

#merge data and trace_trees
data_tree = pd.merge(data, trace_trees, on=["id", "tree", "args_tree"], how="left")
data_tree_align = pd.merge(data_tree, trace_align, on=["id", "aligner", "args_aligner"], how="left")

# write to file
data_tree_align.to_csv(out_file_name, index=False)

9 changes: 4 additions & 5 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ process {
// ------------------------------------

withName: "FAMSA_GUIDETREE"{
tag = { "${meta.id} args:${meta.args_tree}" }
ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" }
ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" }
publishDir = [
Expand All @@ -102,6 +103,7 @@ process {
}

withName: "CLUSTALO_GUIDETREE"{
tag = { "${meta.id} args:${meta.args_tree}" }
ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" }
ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" }
publishDir = [
Expand All @@ -125,6 +127,7 @@ process {
}

withName: "MAFFT|CLUSTALO_ALIGN|TCOFFEE_ALIGN|MUSCLE5_SUPER5|TCOFFEE3D_ALIGN|FAMSA_ALIGN|LEARNMSA_ALIGN|REGRESSIVE"{
tag = { "${meta.id} tree:${meta.tree} argstree:${args_tree} args:${meta.args_aligner}" }
ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" }
ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" }
if ( params.skip_compress ) {
Expand Down Expand Up @@ -227,16 +230,12 @@ process {
// ------------------------------------
withName: 'PREPARE_SHINY' {
publishDir = [
path: { "${params.outdir}/shiny" },
path: { "${params.outdir}/shiny_app" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}





withName: 'ZIP' {
ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" }
publishDir = [
Expand Down
7 changes: 4 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ workflow NFCORE_MULTIPLESEQUENCEALIGN {
emit:
multiqc_report = MULTIPLESEQUENCEALIGN.out.multiqc



}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -105,7 +103,10 @@ workflow {
params.outdir,
params.monochrome_logs,
params.hook_url,
NFCORE_MULTIPLESEQUENCEALIGN.out.multiqc_report
NFCORE_MULTIPLESEQUENCEALIGN.out.multiqc_report,
"${params.outdir}/shiny_app",
"${params.outdir}/pipeline_info",
params.shiny_trace_mode
)
}

Expand Down
9 changes: 5 additions & 4 deletions modules/local/prepare_shiny.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ process PREPARE_SHINY {
path (app)

output:
tuple val (meta), path("shiny_data.csv"), emit: data
path ("shiny_app.py"), emit: app
path ("shiny_data.csv"), emit: data
path ("shiny_app*"), emit: app
path ("run.sh"), emit: run
path "versions.yml", emit: versions

Expand All @@ -23,8 +23,9 @@ process PREPARE_SHINY {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
mv $table shiny_data.csv
mv $app shiny_app.py
cp $table shiny_data.csv
cp $app/* .
rm $app
echo "shiny run --reload shiny_app.py" > run.sh
chmod +x run.sh
Expand Down
10 changes: 6 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,6 @@ params {
multiqc_methods_description = null
skip_multiqc = false

// Shiny options
shiny_app = "${projectDir}/bin/app.py"
skip_shiny = false

// Boilerplate options
outdir = null
publish_dir_mode = 'copy'
Expand All @@ -53,6 +49,11 @@ params {
help = false
version = false

// Shiny options
shiny_app = "${projectDir}/bin/shiny_app"
skip_shiny = false
shiny_trace_mode = "all" // all, latest

// Config options
config_profile_name = null
config_profile_description = null
Expand Down Expand Up @@ -233,6 +234,7 @@ report {
trace {
enabled = true
file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt"
fields = 'task_id, hash,native_id,name,status,exit,realtime,%cpu,rss,peak_rss,vmem,peak_vmem,rchar,wchar,cpus,start'
}
dag {
enabled = true
Expand Down
8 changes: 6 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,13 @@
},
"shiny_app": {
"type": "string",
"format": "file-path",
"fa_icon": "fas fa-fast-forward",
"format": "directory-path",
"fa_icon": "fas fa-folder-open",
"description": "File containing the main shiny app."
},
"shiny_trace_mode": {
"type": "string",
"description": "variable containing the shiny_trace mode to be used."
}
}
},
Expand Down
Loading

0 comments on commit 83de935

Please sign in to comment.