From a537ee49980e2c709495dba313558056f6893eeb Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Wed, 6 May 2015 13:14:49 +0200 Subject: [PATCH 01/13] implement split heatmaps and chao1 display - added some debugging code - fix no stop on err() - cbind_max and rbind_max to merge gtables (originals seem broken) - g_get as generic "get me this part of a ggplot" - loading meta-data - read.phyloFlash now creates a list (instead of a data.frame) - error msg if no taxa were merged (min-ntu-count) - separated clustering from plot function - distinct color schemes for split heatmaps - heatmap splitting configurable by regex - rewrote plot.phyloFlash, using [cr]bind_max instead of gtable_add_rows - layout mostly clean now - showing multiple legends for split heatmap view - split regex, shortening of names, scaling and clustering now configurable from cmdline - using log-scale for heatmap color --- phyloFlash_heatmap.R | 386 +++++++++++++++++++++++++++++-------------- 1 file changed, 265 insertions(+), 121 deletions(-) diff --git a/phyloFlash_heatmap.R b/phyloFlash_heatmap.R index d475276..422917d 100755 --- a/phyloFlash_heatmap.R +++ b/phyloFlash_heatmap.R @@ -23,6 +23,14 @@ # along with this program. # If not, see . +debugCode = quote({ + dump.frames(); + cat(paste(" ", 1L:length(last.dump), ": ", + names(last.dump), sep = ""),"", + sep = "\n", file=stderr()) +}); +options(warn=2, keep.source=TRUE, error = debugCode); + library(optparse); load_libraries <- function() { @@ -54,7 +62,7 @@ msg <- function(...,lvl=2) { # 3 info # 4 debug - if (lvl=="error") { + if (lvl=="0") { stop(...); } else if (pf_logLevel >= lvl) { cat(...,'\n'); @@ -65,35 +73,54 @@ warn <- function(...) msg(lvl=1,...); info <- function(...) msg(lvl=3,...); debug <- function(...) msg(lvl=4,...); -### ggplot helpers - -# extract just the legend from a ggplot object -g_get_legend <- function(a.gplot) { - # from http://stackoverflow.com/questions/11883844/ - # "inserting-a-table-under-the-legend-in-a-ggplot2-histogram" - tmp <- ggplot_gtable(ggplot_build(a.gplot)); - leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box"); - legend <- tmp$grobs[[leg]]; - return(legend); +## workaround for not working rbind(gtable...) +## adapted from http://stackoverflow.com/questions/24234791 +rbind_max <- function(...,size=grid::unit.pmax){ + bind2 <- function (x, y) { + stopifnot(ncol(x) == ncol(y)) + if (nrow(x) == 0) return(y) + if (nrow(y) == 0) return(x) + y$layout$t <- y$layout$t + nrow(x) + y$layout$b <- y$layout$b + nrow(x) + x$layout <- rbind(x$layout, y$layout) + x$heights <- gtable:::insert.unit(x$heights, y$heights) + x$rownames <- c(x$rownames, y$rownames) + if (is.function(size)) { + x$widths <- do.call(size, list(x$widths, y$widths)) + } + x$grobs <- append(x$grobs, y$grobs) + x + } + Reduce(bind2, list(...)) } -g_get_map <- function(a.gplot) { - gb <- ggplotGrob(a.gplot); - return(gtable_filter(gb,pattern="axis-l|panel")); +cbind_max <- function(...,size=grid::unit.pmax){ + ##http://stackoverflow.com/questions/24234791 + bind2 <- function (x, y) { + stopifnot(nrow(x) == nrow(y)) + if (ncol(x) == 0) return(y) + if (ncol(y) == 0) return(x) + y$layout$l <- y$layout$l + ncol(x) + y$layout$r <- y$layout$r + ncol(x) + x$layout <- rbind(x$layout, y$layout) + x$widths <- gtable:::insert.unit(x$widths, y$widths) + x$colnames <- c(x$colnames, y$colnames) + if (is.function(size)) { + x$heights <- do.call(size, list(x$heights, y$heights)) + } + x$grobs <- append(x$grobs, y$grobs) + x + } + Reduce(bind2, list(...)) } -# hide the legend in a ggplot object -g_hide_legend <- function(a.gplot) { - return (a.gplot + theme(legend.position = "none")); +# extract a grob from a ggplot/gtable +g_get <- function(pat, obj) { + if (is.ggplot(obj)) obj <- ggplotGrob(obj); + if (!is.grob(obj)) err("not a grob?!"); + return (gtable_filter(obj,pattern=pat)); } -g_hide_x_axis <- function(a.gplot) { - return (a.gplot + theme(axis.text.x=element_blank()) -# + theme(plot.margin =unit(c(0,0,0,0),"null")) - ); - return -} - # prepare a pure dendrogram plot from a dendro_data object # @param bool vertical If true, plot has leaves as rows. # @param bool labels If true, plot includes labels @@ -150,11 +177,11 @@ g_make_dendro_plot <- function(dendro, vertical=TRUE, labels=TRUE) { p <- p + scale_x_continuous(expand=c(expandFactor,0)); } - return (p); + return (p); } -# load phyloFlash data +# loads phyloFlash output files into R read.phyloFlash <- function(files) { ### Extract library names from command line # remove .phyloFlash...csv @@ -172,7 +199,6 @@ read.phyloFlash <- function(files) { msg("Loading CSV files..."); for (lib in libs) { fileName <- paste(lib, ".phyloFlash.NTUabundance.csv", sep=""); - info("Reading: ",fileName); fileData <- read.csv(fileName); @@ -185,21 +211,39 @@ read.phyloFlash <- function(files) { } else { NTUcounts <- merge(x=NTUcounts, y=fileData, by="NTU", all=TRUE); } + + # read meta-data + fileName <- paste(lib, ".phyloFlash.report.csv", sep=""); + info("Reading: ", fileName); + fileData <- read.csv(fileName, col.names=c("key",lib)); + if (!exists("MetaData")) { + MetaData <- fileData; + } else { + MetaData <- merge(x=MetaData, y=fileData, by="key"); + } } - + + pfData <- list(); # result is a list + + # turn data.frame into matrix, get dimnames right ntu_names <- NTUcounts$NTU; sample_names <- colnames(NTUcounts[,-1]); - NTUcounts <- as.matrix(NTUcounts[,-1]); rownames(NTUcounts) <- ntu_names; - - # turn NA into 0 - NTUcounts[is.na(NTUcounts)] <- 0; + NTUcounts[is.na(NTUcounts)] <- 0; # turn NA into 0 + pfData$data <- list(NTUcounts); - return (NTUcounts); + # turn key column into row names, transpose + pfData$meta <- MetaData[,-1]; + rownames(pfData$meta) <- MetaData[,1]; + pfData$meta <- data.frame(t(pfData$meta)); + + return (pfData); } +# shortens taxnames to last to group names shorten_taxnames <- function(data) { + if (is.list(data)) return (lapply(data, shorten_taxnames)); # convert NTU name column into rownames shortnames <- lapply(rownames(data), function(x) gsub(".*;(.*;.*)","\\1", x)); @@ -208,13 +252,35 @@ shorten_taxnames <- function(data) { return(data); } +# splits matrix using regex (returns list) +split_by_name <- function(data, re_list) { + names <- rownames(data[[1]]); + groups <- rep(0, length(names)); + i <- 1; + for (pat in re_list) { + groups[grepl(pat, names) & groups==0] = i; + i <- i+1; + } + sd <- split(data.frame(data), groups); + return (lapply(sd, as.matrix)); +} + ### remove taxa observed rarely merge_low_counts <- function(data, thres=50, other="Other") { + if (is.list(data)) { + return(lapply(data, merge_low_counts, thres, other)); + } + msg("A total of",nrow(data),"taxa were observed.", "Merging taxa with <", thres, "observations into \"", other, "\"."); ndata <- data[rowSums(data) >= thres,]; + + if (length(ndata) == length(data)) { + msg("No taxa to merge"); + return(data); + } odata <- colSums(data[rowSums(data) < thres,]); odata <- matrix(odata, ncol=length(odata), @@ -225,18 +291,44 @@ merge_low_counts <- function(data, thres=50, other="Other") { msg("Removed taxa with <",thres,"observations.", nrow(data),"taxa left."); - return(data); + return (data); } +# scales matrix columns to percent scale_to_percent <- function(mat) { - return(scale(mat, center=FALSE, scale=colSums(mat)) * 100); + if (is.list(mat)) + return (lapply(mat, scale_to_percent)); + return (scale(mat, center=FALSE, scale=colSums(mat)) * 100); } -cluster <- function(mat,method="ward") { - return(as.dendrogram(hclust(dist(mat), method))); +# cluster, create dendrograms and reorder data +cluster <- function(pf, method="ward") { + mkdendro <- function(mat) { + return(as.dendrogram(hclust(dist(mat), method))); + } + + ## re-join data if list + joined = do.call(rbind, pf$data); + ## create horizontal clusters + pf$col_dendro <- mkdendro(t(joined)); + ## re-order meta-data + pf$meta <- pf$meta[order.dendrogram(pf$col_dendro),]; + + ## create vertical clusters + pf$row_dendro <- lapply(pf$data, mkdendro); + ## re-order data matrices + rorder <- function(mat, dendr) { + return (mat[order.dendrogram(dendr), + order.dendrogram(pf$col_dendro)]); + } + + pf$data <- mapply(rorder, pf$data, pf$row_dendro, SIMPLIFY=FALSE); + + return(pf); } -g_make_heatmap <- function(mat, colorScheme=defaultColorScheme) { +g_make_heatmap <- function(mat, n, colorScheme=defaultColorScheme, angle=90, hjust=0,vjust=0.6) { + highcol = c("steelblue","indianred")[n] ## factorize dims matNames <- attr(mat, "dimnames"); df <- as.data.frame(mat); @@ -244,82 +336,102 @@ g_make_heatmap <- function(mat, colorScheme=defaultColorScheme) { df$y.variable <- matNames[[1]]; df$y.variable <- with(df, factor(y.variable,levels=y.variable,ordered=TRUE)); - x=mean(mat); - mat <- melt(df, id.vars="y.variable"); + + breaks = c(0,25,50,75,100); heatMapPlot <- ggplot(mat, aes(variable,y.variable)) + geom_tile(aes(fill=value)) + #scale_fill_gradientn(colours = colorScheme) + #scale_fill_gradientn(colours = rainbow(3)) + #scale_fill_gradient2(low="blue", mid="green", high="red", midpoint = 30)+ - scale_fill_gradient(low="white", high="steelblue")+ + scale_fill_gradient(low="white", high=highcol, + trans="log", #breaks=breaks, labels=breaks, + na.value="white") + labs(x = NULL, y = NULL) + scale_x_discrete(expand=c(0,0)) + scale_y_discrete(expand=c(0,0)) + - theme(axis.text.x = element_text(angle=90)); -# theme(plot.margin = unit(c(0,0,1,1), "cm")); - + theme(axis.text.x = element_text(angle=angle, hjust=hjust,vjust=vjust), + axis.ticks.length = unit(0,"null")); return(heatMapPlot); } +gtable_text_row <- function(strvec) { + grobs <- lapply(strvec, function(str) { + textGrob(str,gp=gpar(fontsize=8)) + }) + + g <- gtable_row("textrow", grobs); + gt <- gtable(heights=unit(1,"lines"), widths=unit(0,"null")); + gt <- gtable_add_grob(gt, g, t=1, l=1); +} -plot.phyloFlash <- function(pdata) { - row_dendro <- cluster(pdata); - col_dendro <- cluster(t(pdata)); +plot.phyloFlash <- function(pf) { + nmaps <- length(pf$data); + rows <- sapply(pf$data,nrow); - ## reorder to match clustering - pdata <- pdata [order.dendrogram(row_dendro), - order.dendrogram(col_dendro)]; + # empty table + zero <- gtable(widths=unit(0,"null"),heights=unit(0,"null")); + zero1 <- gtable(widths=unit(1,"null"),heights=unit(0,"null")); + + ## get heatmaps and labels + gg_heatmaps <- mapply(g_make_heatmap, pf$data, c(1:length(pf$data)), SIMPLIFY=FALSE); + gr_heatmaps <- mapply(g_get, rep("panel|axis-l", nmaps), gg_heatmaps); + ## merge below each other + g <- do.call(rbind_max, gr_heatmaps); + ## scale heights by number of rows + g$heights = g$heights * (rows/sum(rows)); + + ## get trees over samples + gr_trees <- lapply(pf$row_dendro, g_make_dendro_plot); + gr_trees <- lapply(gr_trees, function(x) g_get("panel",x)) + ## merge into one column + gr_trees <- do.call(rbind_max, gr_trees); + ## add to right of heatmaps + g<-cbind_max(g, gr_trees,size=1); + + g <- gtable_add_row_space(g, unit(.2,"lines")); + + + ## add row at top + gr_legends <- lapply(gg_heatmaps, function(x) { + g_get("guides", g_get("guide-box", x)$grobs[[1]]) }) + gr_legends <- do.call(cbind_max, gr_legends) + + gr_legend <- gtable_add_grob(zero, gr_legends , t=1, l=1) + gr_legend$heights = max(gr_legends$heights) + gr_legend$widths = sum(gr_legends$widths) + ##gr_legend <- g_get("guide-box", gg_heatmaps[[1]]); + ##gr_legend <- g_get("guides", gr_legend$grobs[[1]]); + gr_sampleTree <- g_get("panel", g_make_dendro_plot(pf$col_dendro, FALSE, TRUE)); + gr_sampleTree$heights=unit(0.1,"null") - heatmap <- g_make_heatmap(pdata); - ntuTree <- g_make_dendro_plot(row_dendro, TRUE, FALSE); - sampleTree <- g_make_dendro_plot(col_dendro, FALSE, FALSE); + top_row <- cbind_max(zero, gr_sampleTree, gr_legend);#gr_legend); - g <- g_make_grid(heatmap, ntuTree, sampleTree); - g <- g_add_to_grid(g, heatmap, ntuTree); - g <- g_add_to_grid(g, heatmap, ntuTree); - return(g); -} + g<-rbind_max(top_row, g); -g_make_grid <- function(heatMap, ntuTree, sampleTree) { - ## start with the raw heatmap (no legend) - g <- ggplotGrob(g_hide_legend(heatMap)); + chao <- pf$meta$NTU.Chao1.richness.estimate; + chao <- round(as.numeric(as.character(chao))) - ## add a column to the right - g <- gtable_add_cols(g, unit(5, "cm")); - ## put in the dendrogram for the NTUs - g <- gtable_add_grob(g, ggplotGrob(ntuTree), - t=3, b=3, l=6, r=6); - - ## add a row above - g <- gtable_add_rows(g, unit(5, "cm"),0); - ## put the dendrogram with the sample clustering in - g <- gtable_add_grob(g, ggplotGrob(sampleTree), - t=1, b=1, l=4, r=4); - ## put the legend in the upper right corner - g <- gtable_add_grob(g, g_get_legend(heatMap), - t=1, b=1, l=6, r=6); + gr_chao_grob <- textGrob("Chao1",x=unit(.99,"npc"),just="right",gp=gpar(fontsize=8)) + gr_chao_lab <- gtable_add_grob(zero1,gr_chao_grob,t=1,l=1,r=1,b=1); + chao_row <- cbind_max(gr_chao_lab, gtable_text_row(chao), zero); + g<-rbind_max(g,chao_row); + + # add row at bottom + gr_sample_labels <- g_get("axis-b", gg_heatmaps[[1]]); + bottom_row <- cbind_max(zero,gr_sample_labels,zero); + g<-rbind_max(g, bottom_row); + + g <- gtable_add_row_space(g, unit(.1,"lines")); + g <- gtable_add_col_space(g, unit(.1,"lines")); + g <- gtable_add_padding(g, unit(.3,"lines")); + return (g); } -g_add_to_grid <- function(g, heatMap, ntuTree) { - ## add some space - g <- gtable_add_rows(g, unit(.2,"lines"),nrow(g)-3); - - ## add another heatmap - g <- gtable_add_rows(g, unit(1, "null"),nrow(g)-3); - ro <- nrow(g)-3; - g <- gtable_add_grob(g, g_get_map(heatMap), - t=ro, b=ro, l=3, r=4); - g <- gtable_add_grob(g, ggplotGrob(ntuTree), - t=ro, b=ro, l=6, r=6); - - return (g); -} - pF_main <- function() { options <- list( make_option( @@ -341,14 +453,40 @@ pF_main <- function() { help="Sum NTUs with less counts in pseudo NTU \"Other\". Default %default." ), make_option( - "--no-split-euks", - action="store_false", - help="Do not show Eukaryotes in separate plot section" + "--no-split", + action="store_true", + default=FALSE, + help="Do not split heatmap" + ), + make_option( + "--split-regex", + default="Eukaryota", + type="character", + help="Split heatmap using this regex on taxa", + ), + make_option( + "--no-shorten-names", + action="store_true", + default=FALSE, + help="Do not shorten taxa names to last two groups", + ), + make_option( + "--no-scaling", + action="store_true", + default=FALSE, + help="Do not scale columns to percentages" + ), + make_option( + "--hclust-method", + default="ward", + help="Use this method for hclust clustering. Can be: + ward, single, complete, average, mcquitty, median or centroid. + Default is %default." ), make_option( "--out", default="out.png", - help="Name of output file. Must end in .png or .pdf. Default %default." + help="Name of output file. Must end in .png or .pdf. Default is %default." ), make_option( "--out-size", @@ -361,7 +499,8 @@ pF_main <- function() { option_list=options, usage="usage: %prog [options] [files]", description=" -Generates a heatmap plot from multiple phyloFlash result sets. +Generates a heatmap plot from multiple phyloFlash result sets. For more control, +source this file from R. Files: A list of files and/or directories that will be searched @@ -370,14 +509,10 @@ Files: conf <- parse_args(parser, positional_arguments = TRUE); - # fixme: unparsed "-*" type options - if (length(conf$args)==0) { print_help(parser); quit(status=2); } - - #str(conf$options); # set loglevel if (conf$options$quiet) { @@ -386,39 +521,48 @@ Files: pf_setLogLevel(3); } + info("Loading libraries"); load_libraries(); - pdata <- read.phyloFlash(conf$args); - pdata <- shorten_taxnames(pdata); - pdata <- merge_low_counts(pdata, thres=conf$options$"min-ntu-count"); - pdata <- scale_to_percent(pdata); + pf <- read.phyloFlash(conf$args); - g <- plot.phyloFlash(pdata); - - png(file="out.png", width=1280,height=1024); - #svg(file="out.svg"); - #png(file="out.png",width=1280,height=1280); - #pdf(file="out.pdf"); - grid.newpage(); - grid.draw(g); - dev.off(); + ## split by domain + if (!conf$options$"no-split") { + pat <- strsplit(conf$options$"split-regex",",")[[1]]; + pf$data <- split_by_name(pf$data, pat); + } + if (!conf$options$"no-shorten-names") { + pf$data <- shorten_taxnames(pf$data); + } - png(file="out2.png", width=1280,height=1024); - grid.newpage(); - gtable_show_layout(g); - dev.off(); + pf$data <- merge_low_counts(pf$data, + thres=conf$options$"min-ntu-count"); + if (!conf$options$"no-scaling") { + pf$data <- scale_to_percent(pf$data); + } - + pf <- cluster(pf, method=conf$options$"hclust-method"); + g <- plot.phyloFlash(pf); + outdim = as.integer(strsplit(conf$options$"out-size","x")[[1]]); + switch(strsplit(conf$options$out, "[.]")[[1]][-1], + png = png(file = conf$options$out, + width=outdim[1], height = outdim[2]), + svg = svg(file = conf$options$out, + width=outdim[1], height = outdim[2]), + pdf = pdf(file = conf$options$out, + width=outdim[1], height = outdim[2]) + ); + grid.newpage(); + grid.draw(g); + dev.off(); + msg("Brief summary of counts:"); if (pf_logLevel >= 2) { ### "cat(summary(...))" does - print(summary(pdata)); + print(summary(pf$data)); } - - print(colSums(pdata)); - } # if we are run as a script from the cmdline From f0de78c7ec3e9f8b1fe19244391c7a6bfdd30769 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Thu, 7 May 2015 04:32:01 +0200 Subject: [PATCH 02/13] style cleanup (indenting, no tabs, etc...) --- phyloFlash.pl | 377 +++++++++++++++++++++++--------------------------- 1 file changed, 170 insertions(+), 207 deletions(-) diff --git a/phyloFlash.pl b/phyloFlash.pl index 690c451..bf63743 100755 --- a/phyloFlash.pl +++ b/phyloFlash.pl @@ -152,7 +152,8 @@ =head1 COPYRIGHT AND LICENSE # change these to match your installation -my $DBHOME = "$FindBin::RealBin/119";#HGV: edited to point to locally created db dir release version 119 +my $DBHOME = "$FindBin::RealBin/119"; +#HGV: edited to point to locally created db dir release version 119 # constants my $version = 'phyloFlash v2.0'; @@ -175,7 +176,7 @@ =head1 COPYRIGHT AND LICENSE my $crlf = 0; # csv line terminator my $skip_emirge = 0; # Flag - skip Emirge step? (default = 0, no) my $skip_spades = 0; # Flag - skip SPAdes assembly? (default = 0, no) -my $sc = 0; # Flag - single cell data? (default = 0, no) +my $sc = 0; # Flag - single cell data? (default = 0, no) my $check_env = 0; # Check environment (runs check_environment subroutine only) my @tools_list; # Array to store list of tools required # (0 will be turned into "\n" in parsecmdline) @@ -210,45 +211,26 @@ sub welcome { # Specify list of required tools. Run this subroutine AFTER parse_cmdline() sub process_required_tools { # binaries needed by phyloFlash - @tools_list = ( - bbmap => "bbmap.sh", - barrnap => "$FindBin::RealBin/barrnap-HGV/bin/barrnap_HGV", - vsearch => "vsearch", - mafft => "mafft", - fastaFromBed => "fastaFromBed", - sed => "sed", - grep => "grep", - awk => "awk", - cat => "cat", - plotscript => "$FindBin::RealBin/phyloFlash_plotscript.R" + require_tools( + bbmap => "bbmap.sh", + barrnap => "$FindBin::RealBin/barrnap-HGV/bin/barrnap_HGV", + vsearch => "vsearch", + mafft => "mafft", + fastaFromBed => "fastaFromBed", + sed => "sed", + grep => "grep", + awk => "awk", + cat => "cat", + plotscript => "$FindBin::RealBin/phyloFlash_plotscript.R" ); - if ($skip_spades == 0) { - @tools_list = (@tools_list, - spades => "spades.py"); - } - if ($skip_emirge == 0) { - @tools_list = (@tools_list, - emirge => "emirge.py", - emirge_amp => "emirge_amplicon.py", - emirge_rename_fasta => "emirge_rename_fasta.py"); - } - require_tools(@tools_list); - #require_tools(( - # bbmap => "bbmap.sh", - # spades => "spades.py", - # barrnap => "$FindBin::RealBin/barrnap-HGV/bin/barrnap_HGV", - # emirge => "emirge.py", - # emirge_amp => "emirge_amplicon.py", - # emirge_rename_fasta => "emirge_rename_fasta.py", - # vsearch => "vsearch", - # mafft => "mafft", - # fastaFromBed => "fastaFromBed", - # sed => "sed", - # grep => "grep", - # awk => "awk", - # cat => "cat", - # plotscript => "$FindBin::RealBin/phyloFlash_plotscript.R" - # )); + if ($skip_spades == 0) { + require_tools(spades => "spades.py"); + } + if ($skip_emirge == 0) { + require_tools(emirge => "emirge.py", + emirge_amp => "emirge_amplicon.py", + emirge_rename_fasta => "emirge_rename_fasta.py"); + } } # parse arguments passed on commandline and do some @@ -268,7 +250,7 @@ sub parse_cmdline { 'crlf' => \$crlf, 'skip_emirge' => \$skip_emirge, 'skip_spades' => \$skip_spades, - 'sc' => \$sc, + 'sc' => \$sc, 'check_env' => \$check_env, 'help' => sub { pod2usage(1) }, 'man' => sub { pod2usage(-exitval=>0, -verbose=>2) }, @@ -413,26 +395,26 @@ sub print_report { } if ($skip_spades == 0) { - ## Print the table of SSU assembly-based taxa to report file - print {$fh} "---\n"; - print {$fh} "SSU assembly based taxa:\n"; - print {$fh} "OTU\tcoverage\tdbHit\ttaxonomy\t%id\talnlen\tevalue\n"; - - foreach my $arr (@ssuassem_results_sorted) { - print {$fh} join("\t", @$arr)."\n"; - } + ## Print the table of SSU assembly-based taxa to report file + print {$fh} "---\n"; + print {$fh} "SSU assembly based taxa:\n"; + print {$fh} "OTU\tcoverage\tdbHit\ttaxonomy\t%id\talnlen\tevalue\n"; + + foreach my $arr (@ssuassem_results_sorted) { + print {$fh} join("\t", @$arr)."\n"; + } } - + if ($skip_emirge == 0) { - ## Print the table of SSU reconstruction-based taxa to report file - print {$fh} "---\n"; - print {$fh} "SSU reconstruction based taxa:\n"; - print {$fh} "OTU\tratio\tdbHit\ttaxonomy\t%id\talnlen\tevalue\n"; - foreach my $arr (@ssurecon_results_sorted) { - print {$fh} join("\t", @$arr)."\n"; - } + ## Print the table of SSU reconstruction-based taxa to report file + print {$fh} "---\n"; + print {$fh} "SSU reconstruction based taxa:\n"; + print {$fh} "OTU\tratio\tdbHit\ttaxonomy\t%id\talnlen\tevalue\n"; + foreach my $arr (@ssurecon_results_sorted) { + print {$fh} join("\t", @$arr)."\n"; + } } - + close($fh); } @@ -461,6 +443,7 @@ sub write_csv { "NTUs observed three or more times",$xtons[2], "NTU Chao1 richness estimate",$chao1 )); + open_or_die(\$fh, ">", "$libraryNAME.phyloFlash.report.csv"); while ($#report > 0) { print {$fh} shift(@report).",".shift(@report).$crlf; @@ -607,16 +590,13 @@ () $SSU_ratio_pc = $SSU_ratio * 100; msg("mapping rate: $SSU_ratio_pc%"); - + if ($SSU_total_pairs * 2 * $readlength < 1800) { msg("WARNING: mapping coverage lower than 1x,\n reconstruction with SPADES and Emirge disabled."); $skip_emirge = 1; $skip_spades = 1; } - - - } sub bbmap_hitstats_parse { @@ -671,14 +651,14 @@ sub bbmap_hitstats_parse { map { [$_, $taxa_from_hitmaps{$_}] } keys %taxa_from_hitmaps; - $xtons[2] = $#taxa_from_hitmaps_sorted +1; - + $xtons[2] = $#taxa_from_hitmaps_sorted +1; + if ($xtons[1] > 0) { - $chao1 = - $xtons[2] + - ($xtons[0] * $xtons[0]) / 2 / $xtons[1]; + $chao1 = + $xtons[2] + + ($xtons[0] * $xtons[0]) / 2 / $xtons[1]; } else { - $chao1 = 'n.d.'; + $chao1 = 'n.d.'; } msg("done..."); } @@ -696,7 +676,6 @@ sub spades_run { } msg ("kmers for SPAdes are ".$kmer); - my $args; if ($sc == 1) { $args = '--sc '; @@ -720,104 +699,95 @@ sub spades_parse { # getting spades output and reformatting it... msg("getting 16S phylotypes and their coverages..."); - # only run if spades assembly is not empty! - if (-s "$libraryNAME.spades/scaffolds.fasta") { - - # run barrnap once for each domain - - # if single cell data - accept partial rRNAs down to 0.1 - # and use lower e-value setting - my $b_args = " --evalue 1e-100 --reject 0.6 " ; - if ($sc == 1) { - $b_args = " --evalue 1e-20 --reject 0.1 "; - } - - foreach ('bac', 'arch', 'euk') { - run_prog("barrnap", + #spades scaffolds file is empty, settting skip_spades variable to avoid + #further processing + if (! -s "$libraryNAME.spades/scaffolds.fasta") { + msg("no phylotypes assembled with SPAdes"); + $skip_spades = 1; + system ("rm ./$libraryNAME.spades -r"); + } + + # run barrnap once for each domain + # if single cell data - accept partial rRNAs down to 0.1 + # and use lower e-value setting + my $b_args = " --evalue 1e-100 --reject 0.6 " ; + if ($sc == 1) { + $b_args = " --evalue 1e-20 --reject 0.1 "; + } + + foreach ('bac', 'arch', 'euk') { + run_prog("barrnap", $b_args. "--kingdom $_ --gene ssu --threads $cpus " . "$libraryNAME.spades/scaffolds.fasta", "$libraryNAME.scaffolds.$_.gff", "$libraryNAME.barrnap.out"); - } - - - # now merge multi-hits on the same scaffold-and-strand by picking - # the lowest start and highest stop position. - - my %ssus; - # pre-filter with grep for speed - my $fh; - open_or_die(\$fh, "-|", - "grep -hE '16S_rRNA\|18S_rRNA' ". - "$libraryNAME.scaffolds.bac.gff ". - "$libraryNAME.scaffolds.arch.gff ". - "$libraryNAME.scaffolds.euk.gff"); - while (my $row = <$fh>) { - my @cols = split("\t", $row); - # gff format: - # 0 seqname, 1 source, 2 feature, 3 start, 4 end, - # 5 score, 6 strand, 7 frame, 8 attribute - - my $seqname = $cols[0]; - my $start = $cols[3]; - my $stop = $cols[4]; - my $strand = $cols[6]; - - # put our desired output fasta name into "feature" col 3 - # the may be "bug" using / mixing bed and gff formats - # but it saves us messing with the fasta afterwards - $seqname =~ m/NODE_([0-9]*)_.*cov_([0-9\\.]*)_/; - $cols[2] = "$libraryNAME.PFspades_$1_$2"; - - # do the actual merging, left most start and right most stop wins - if (exists $ssus{$seqname.$strand}) { - my $old_start = $ssus{$seqname.$strand}[3]; - my $old_stop = $ssus{$seqname.$strand}[4]; - $cols[3] = ($start < $old_start) ? $start : $old_start; - $cols[4] = ($stop > $old_stop) ? $stop : $old_stop; - } - $ssus{$seqname.$strand} = [@cols]; - } - close($fh); - - open_or_die(\$fh, ">", "tmp.$libraryNAME.scaffolds.final.gff"); - for my $key (sort keys %ssus) { - print $fh join("\t",@{$ssus{$key}}); - } - close($fh); - - # fastaFromBed will build a .fai index from the source .fasta - # However, it does not notice if the .fasta changed. So we - # delete the .fai if it is older than the .fasta. - if ( -e "$libraryNAME.spades/scaffolds.fasta.fai" && - file_is_newer("$libraryNAME.spades/scaffolds.fasta", - "$libraryNAME.spades/scaffolds.fasta.fai")) { - unlink("$libraryNAME.spades/scaffolds.fasta.fai"); - } - - # extract rrna fragments from spades scaffolds accoding to gff - run_prog("fastaFromBed", - " -fi $libraryNAME.spades/scaffolds.fasta " - . "-bed tmp.$libraryNAME.scaffolds.final.gff " - . "-fo $libraryNAME.spades_rRNAs.final.fasta " - . "-s -name", - "tmp.$libraryNAME.fastaFromBed.out", - "&1"); - - msg("done..."); - } - - #spades scaffolds file is empty, settting skip_spades variable to avoid - #further processing - - else - { - msg("no phylotypes assembled with SPAdes"); - $skip_spades = 1; - system ("rm ./$libraryNAME.spades -r"); + + # now merge multi-hits on the same scaffold-and-strand by picking + # the lowest start and highest stop position. + + my %ssus; + # pre-filter with grep for speed + my $fh; + open_or_die(\$fh, "-|", + "grep -hE '16S_rRNA\|18S_rRNA' ". + "$libraryNAME.scaffolds.bac.gff ". + "$libraryNAME.scaffolds.arch.gff ". + "$libraryNAME.scaffolds.euk.gff"); + while (my $row = <$fh>) { + my @cols = split("\t", $row); + # gff format: + # 0 seqname, 1 source, 2 feature, 3 start, 4 end, + # 5 score, 6 strand, 7 frame, 8 attribute + + my $seqname = $cols[0]; + my $start = $cols[3]; + my $stop = $cols[4]; + my $strand = $cols[6]; + + # put our desired output fasta name into "feature" col 3 + # the may be "bug" using / mixing bed and gff formats + # but it saves us messing with the fasta afterwards + $seqname =~ m/NODE_([0-9]*)_.*cov_([0-9\\.]*)_/; + $cols[2] = "$libraryNAME.PFspades_$1_$2"; + + # do the actual merging, left most start and right most stop wins + if (exists $ssus{$seqname.$strand}) { + my $old_start = $ssus{$seqname.$strand}[3]; + my $old_stop = $ssus{$seqname.$strand}[4]; + $cols[3] = ($start < $old_start) ? $start : $old_start; + $cols[4] = ($stop > $old_stop) ? $stop : $old_stop; + } + $ssus{$seqname.$strand} = [@cols]; + } + close($fh); + + open_or_die(\$fh, ">", "tmp.$libraryNAME.scaffolds.final.gff"); + for my $key (sort keys %ssus) { + print $fh join("\t",@{$ssus{$key}}); + } + close($fh); + + # fastaFromBed will build a .fai index from the source .fasta + # However, it does not notice if the .fasta changed. So we + # delete the .fai if it is older than the .fasta. + if ( -e "$libraryNAME.spades/scaffolds.fasta.fai" && + file_is_newer("$libraryNAME.spades/scaffolds.fasta", + "$libraryNAME.spades/scaffolds.fasta.fai")) { + unlink("$libraryNAME.spades/scaffolds.fasta.fai"); } + + # extract rrna fragments from spades scaffolds accoding to gff + run_prog("fastaFromBed", + " -fi $libraryNAME.spades/scaffolds.fasta " + . "-bed tmp.$libraryNAME.scaffolds.final.gff " + . "-fo $libraryNAME.spades_rRNAs.final.fasta " + . "-s -name", + "tmp.$libraryNAME.fastaFromBed.out", + "&1"); + + msg("done..."); } @@ -933,7 +903,7 @@ sub vsearch_best_match { . " $libraryNAME.spades_rRNAs.final.fasta", "$libraryNAME.all.final.fasta"); } - + if (-s "$libraryNAME.all.final.fasta") { run_prog("vsearch", "-usearch_global $libraryNAME.all.final.fasta " @@ -1009,30 +979,23 @@ sub mafft_run { msg("creating alignment and tree..."); if ($skip_spades == 0 && $skip_emirge == 1) { - run_prog("cat", - "$libraryNAME.all.dbhits.NR97.fa " - . "$libraryNAME.spades_rRNAs.final.fasta ", -# . "$libraryNAME.emirge.final.fasta ", - "$libraryNAME.SSU.collection.fasta"); + run_prog("cat", + "$libraryNAME.all.dbhits.NR97.fa " + . "$libraryNAME.spades_rRNAs.final.fasta ", + "$libraryNAME.SSU.collection.fasta"); - } - - elsif ($skip_spades == 1 && $skip_emirge == 0) { - run_prog("cat", - "$libraryNAME.all.dbhits.NR97.fa " - #. "$libraryNAME.spades_rRNAs.final.fasta " - . "$libraryNAME.emirge.final.fasta ", - "$libraryNAME.SSU.collection.fasta"); + } elsif ($skip_spades == 1 && $skip_emirge == 0) { + run_prog("cat", + "$libraryNAME.all.dbhits.NR97.fa " + . "$libraryNAME.emirge.final.fasta ", + "$libraryNAME.SSU.collection.fasta"); - } - - elsif ($skip_spades == 0 && $skip_emirge == 0) { - run_prog("cat", - "$libraryNAME.all.dbhits.NR97.fa " - . "$libraryNAME.spades_rRNAs.final.fasta " - . "$libraryNAME.emirge.final.fasta ", - "$libraryNAME.SSU.collection.fasta"); - + } elsif ($skip_spades == 0 && $skip_emirge == 0) { + run_prog("cat", + "$libraryNAME.all.dbhits.NR97.fa " + . "$libraryNAME.spades_rRNAs.final.fasta " + . "$libraryNAME.emirge.final.fasta ", + "$libraryNAME.SSU.collection.fasta"); } run_prog("mafft", @@ -1052,12 +1015,12 @@ sub mafft_run { sub clean_up { # cleanup of intermediate files and folders msg("cleaning temp files..."); - if ($skip_spades == 0) - { system ("rm ./$libraryNAME.spades -r"); - } - if ($skip_emirge == 0) - { system ("rm ./$libraryNAME -r"); - } + if ($skip_spades == 0) { + system ("rm ./$libraryNAME.spades -r"); + } + if ($skip_emirge == 0) { + system ("rm ./$libraryNAME -r"); + } system ("rm tmp.* -r"); msg("done..."); } @@ -1067,18 +1030,18 @@ sub run_plotscript { #FIXME: crashes if .inserthistorgram contains no lines if ($skip_spades + $skip_emirge == 2) { - run_prog("plotscript", - "--args NULL " - . "$libraryNAME.inserthistogram ", - "tmp.$libraryNAME.plotscript.out", - "&1"); + run_prog("plotscript", + "--args NULL " + . "$libraryNAME.inserthistogram ", + "tmp.$libraryNAME.plotscript.out", + "&1"); } else { - run_prog("plotscript", - "--args $libraryNAME.SSU.collection.fasta.tree " - . "$libraryNAME.inserthistogram ", - "tmp.$libraryNAME.plotscript.out", - "&1"); + run_prog("plotscript", + "--args $libraryNAME.SSU.collection.fasta.tree " + . "$libraryNAME.inserthistogram ", + "tmp.$libraryNAME.plotscript.out", + "&1"); } } @@ -1445,18 +1408,18 @@ sub write_report_html { bbmap_fast_filter_parse(); bbmap_hitstats_parse(); if ($skip_spades == 0) { # Run SPAdes if not explicitly skipped - spades_run(); - spades_parse(); + spades_run(); + spades_parse(); } if ($skip_emirge == 0) { # Run Emirge if not explicitly skipped - emirge_run(); - emirge_parse(); + emirge_run(); + emirge_parse(); } if ($skip_spades + $skip_emirge < 2) { # If at least one of either SPAdes or Emirge is activated, parse results - vsearch_best_match(); - vsearch_parse(); - vsearch_cluster(); - mafft_run(); + vsearch_best_match(); + vsearch_parse(); + vsearch_cluster(); + mafft_run(); } $runtime = $timer->minutes; From 132ec421d00bf23aea0a05cc9dbdd2aa6de3ee40 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Thu, 7 May 2015 04:45:48 +0200 Subject: [PATCH 03/13] improve message for missing tools showing the full file name we're looking for now --- PhyloFlash.pm | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/PhyloFlash.pm b/PhyloFlash.pm index 7acfa5a..e981fd5 100644 --- a/PhyloFlash.pm +++ b/PhyloFlash.pm @@ -150,20 +150,21 @@ will abort, asking the user to fix the prerequisites. =cut sub check_environment { - my $error = 0; + my @missing; + msg("Checking for required tools."); foreach my $prog (keys %progs) { my $progname = $progs{$prog}; if ($progs{$prog} = can_run($progname)) { msg("Using $prog found at \"".$progs{$prog}."\"."); } else { - $error = 1; + push @missing, " $prog ($progname)"; } } - if ($error == 1) { + if (@missing) { msg("Unable to find all required tools. These are missing:"); - foreach my $prog (keys %progs) { - msg(" ".$prog) if (!defined($progs{$prog})); + foreach my $prog (@missing) { + msg($prog); } die "Please make sure these are installed and in your PATH.\n\n"; } else { From 362b0a647f9a8bea5f18b6a3924dcb4f01c286e8 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Thu, 7 May 2015 05:03:54 +0200 Subject: [PATCH 04/13] add some fatal error messages - removed some commented out code - check against neither-remote-nor-present condition for unived and silva databases - be more accepting about SILVA version string. now anything but "_" goes. (including the current "119.1") - test and exit if the version couldn't be extracted --- phyloFlash_makedb.pl | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/phyloFlash_makedb.pl b/phyloFlash_makedb.pl index 4675f12..bc5ccef 100755 --- a/phyloFlash_makedb.pl +++ b/phyloFlash_makedb.pl @@ -122,26 +122,40 @@ =head1 COPYRIGHT AND LICENSE ### MAIN ### if ($use_remote==1 && $univec_file eq "") { - #run_stage(msg => "downloading lastest UniVec DB from NCBI", msg("downloading latest univec from ncbi"); $univec_file = file_download($univec_url); - #my $univec_file = "UniVec"; } elsif ($use_remote==0 && $univec_file ne "") { msg("using local copy of univec: $univec_file"); } +else { + msg("No univec file found and downloading disabled.\n" + ."Aborting."); + exit(); +} if ($use_remote==1 && $silva_file eq "") { msg("downloading latest SSU RefNR from www.arb-silva.de"); $silva_file = file_download($silva_url); - #my $silva_file = "SILVA_119_SSURef_Nr99_tax_silva_trunc.fasta.gz"; } elsif ($use_remote==0 && $silva_file ne "") { msg("using local copy of Silva SSU RefNR: $silva_file"); } +else { + msg("No SILVA database found and downloading disabled.\n" + ."Aborting."); + exit(); +} # extract SILVA version -my ($silva_release) = ($silva_file =~ m/SILVA_(\d+)_/); +my ($silva_release) = ($silva_file =~ m/SILVA_([^_]+)_/); + +if (!$&) { + msg("ERROR: Unable to extract version from SILVA database filename:"); + msg(" Expected 'SILVA__...' in '$silva_file'."); + msg(" Aborting."); + exit(); +} # create database directory my $dbdir = "./".$silva_release."/"; From ba2969eb9fcad75a1c5a03faef9526f7d2b3b619 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Thu, 7 May 2015 05:39:17 +0200 Subject: [PATCH 05/13] add err() for fatal messages and replace "die" - standard format including timestamp - no line number though --- PhyloFlash.pm | 62 ++++++++++++++++++++++++++++---------------- phyloFlash.pl | 13 +++------- phyloFlash_makedb.pl | 18 +++++-------- 3 files changed, 50 insertions(+), 43 deletions(-) diff --git a/PhyloFlash.pm b/PhyloFlash.pm index e981fd5..b423378 100644 --- a/PhyloFlash.pm +++ b/PhyloFlash.pm @@ -4,6 +4,9 @@ use strict; package PhyloFlash; use Exporter qw(import); use Time::Piece; +use Text::Wrap; + +$Text::Wrap::huge = "overflow"; =head1 NAME @@ -28,6 +31,7 @@ our @ISA = qw(Exporter); our @EXPORT = qw( get_cpus msg + err file_is_newer open_or_die csv_escape @@ -60,9 +64,22 @@ Logs a message to STDERR with time stamp prefix. =cut sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print STDERR $line; + my $t = localtime; + my $line = wrap ("[".$t->hms."] ", " ", + join "\n", @_); + print STDERR $line."\n"; +} + +=item err($msg) + +Logs an error message to STDERR and dies + +=cut +sub err { + my @msg = (@_,"Aborting."); + $msg[0] = "FATAL: ".$msg[0]; + msg(@msg); + exit(3); } =item file_is_newer ($file1, $file2) @@ -99,7 +116,7 @@ sub open_or_die { } open($$fh, $mode, $fname) - or die "Failed to $msg '$fname': $!"; + or err("Failed to $msg '$fname': $!"); } =item csv_escape ($var) @@ -166,7 +183,7 @@ sub check_environment { foreach my $prog (@missing) { msg($prog); } - die "Please make sure these are installed and in your PATH.\n\n"; + err("Please make sure these are installed and in your PATH.\n\n"); } else { msg("All required tools found."); } @@ -203,15 +220,16 @@ sub run_prog { if (not exists $progs{$prog}) { msg("trying to launch unknown tool \"$prog\". pls add to %progs"); - $progs{$prog} = can_run($prog) or die "Failed to find $prog"; + $progs{$prog} = can_run($prog) or err("Failed to find $prog"); } my $cmd = $progs{$prog}." ".$args; $cmd .= " >".$redir_stdout if ($redir_stdout); $cmd .= " 2>".$redir_stderr if ($redir_stderr); - msg("executing [$cmd]"); + msg("running subcommand:","$cmd"); system($cmd) == 0 - or die "Couldn't launch [$cmd]: $!/$?"; + or err("Tool execution failed!.", + "Error was '$!' and return code '$?'"); # FIXME: print tail of stderr if redirected } @@ -225,7 +243,7 @@ sub file_md5 { my $file = shift; my $fh; open($fh, "<", $file) - or die "Unable to open $file."; + or err("Unable to open $file."); my $ctx = Digest::MD5->new; $ctx->addfile($fh); close($fh); @@ -236,7 +254,7 @@ sub file_md5 { Fetches the contents of a file from FTP. $ftp must be a connected Net::FTP object and $pattern a file pattern relative to the -current path of $ftp. If the file exists, the contents are returned. +current path of $ftp. If the file exists, the contents are returned. Otherwise returns an empty string =cut @@ -252,7 +270,8 @@ sub ftp_read_var { open($fh, '>', \$out); my $file = shift(@$files); $ftp->get($file, $fh) - or die "Could not download $file", $ftp->message; + or err("Could not download file '$file'.", + "FTP error: ".$ftp->message); close($fh); return $out; @@ -298,25 +317,25 @@ sub file_download { Debug => 0, Timeout => 600 )) - or die "Could not connect to $server"; + or err("Could not connect to $server"); $ftp->login("anonymous", "-phyloFlash") - or die "Could not login to $server:", $ftp->message; + or err("Could not login to $server:", $ftp->message); $ftp->binary(); $ftp->pasv(); msg(" Finding $path$pat"); $ftp->cwd($path) - or die "Could not enter path '\$path\': ", $ftp->message; + or err("Could not enter path '\$path\': ", $ftp->message); my $files = $ftp->ls($pat) - or die "Could not list files matching \'$pat'\ in \'$path\': ", - $ftp->message; - die "No files found?!" + or err("Could not list files matching \'$pat'\ in \'$path\': ", + $ftp->message); + err("No files found?!") if (@$files == 0); msg(" Multiple files found?! Using first of ".join(@$files)) if (@$files > 1); my $file = shift(@$files); my $file_size = $ftp->size($file) - or die "Could not get file size:", $ftp->message; + or err("Could not get file size:", $ftp->message); msg(" Found $file ($file_size bytes)"); # try downloading md5 @@ -377,10 +396,10 @@ sub file_download { print STDERR "|" . "-" x 75 . "|\n"; $ftp->hash(\*STDERR, $ftp->size($file)/76); $ftp->get($file) - or die "Failed to download $file:", $ftp->message; + or err("Failed to download $file:", $ftp->message); print STDERR "\n"; - die "File size mismatch?!" + err("File size mismatch?!") if (-s $file != $file_size); if ($file_md5 eq "") { # had no md5 @@ -394,8 +413,7 @@ sub file_download { return $file; } - msg(" MD5 sum mismatch: '$file_md5' != '$local_md5'"); - die; + err(" MD5 sum mismatch: '$file_md5' != '$local_md5'"); } =item fasta_copy_except ($source, $dest, @accs) diff --git a/phyloFlash.pl b/phyloFlash.pl index bf63743..3683229 100755 --- a/phyloFlash.pl +++ b/phyloFlash.pl @@ -260,9 +260,9 @@ sub parse_cmdline { # check correct $libraryNAME if ($check_env == 1) { process_required_tools(); - check_environment(); - exit; + check_environment(); # will die on failure } + pod2usage("Please specify output file basename with -lib") if !defined($libraryNAME); pod2usage("\nArgument to -lib may not be empty") @@ -326,13 +326,8 @@ sub parse_cmdline { } # check surplus arguments - if ($ARGV[0]) { - print "Commandline contains extra words:"; - foreach (@ARGV) { - print "$_\n"; - } - die("aborting"); - } + err("Command line contains extra words:", @ARGV) + if ($ARGV[0]); } sub print_report { diff --git a/phyloFlash_makedb.pl b/phyloFlash_makedb.pl index bc5ccef..5295709 100755 --- a/phyloFlash_makedb.pl +++ b/phyloFlash_makedb.pl @@ -129,9 +129,7 @@ =head1 COPYRIGHT AND LICENSE msg("using local copy of univec: $univec_file"); } else { - msg("No univec file found and downloading disabled.\n" - ."Aborting."); - exit(); + err("No univec file found and downloading disabled."); } if ($use_remote==1 && $silva_file eq "") { @@ -142,31 +140,27 @@ =head1 COPYRIGHT AND LICENSE msg("using local copy of Silva SSU RefNR: $silva_file"); } else { - msg("No SILVA database found and downloading disabled.\n" - ."Aborting."); - exit(); + err("No SILVA database found and downloading disabled."); } # extract SILVA version my ($silva_release) = ($silva_file =~ m/SILVA_([^_]+)_/); if (!$&) { - msg("ERROR: Unable to extract version from SILVA database filename:"); - msg(" Expected 'SILVA__...' in '$silva_file'."); - msg(" Aborting."); - exit(); + err("Unable to extract version from SILVA database filename:", + "Expected 'SILVA__...' in '$silva_file'."); } # create database directory my $dbdir = "./".$silva_release."/"; if (! -e $dbdir) { mkdir $dbdir - or die "Failed to create dir $silva_release", $!; + or err("Failed to create dir $silva_release", $!); } msg("unpacking SILVA database"); anyuncompress $silva_file => "$dbdir/SILVA_SSU.fasta" - or die "unpacking failed: $AnyUncompressError\n"; + or err("unpacking failed: $AnyUncompressError"); my @lsu_in_ssh = find_LSU("$dbdir/SILVA_SSU.fasta"); From 89d1563e4fe205477bdbd405988cee5b6e077124 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Sun, 10 May 2015 04:30:58 +0200 Subject: [PATCH 06/13] add some test data for heatmap --- test_files/double.phyloFlash.NTUabundance.csv | 8 ++++++++ test_files/double.phyloFlash.report.csv | 0 test_files/even.phyloFlash.NTUabundance.csv | 8 ++++++++ test_files/even.phyloFlash.report.csv | 0 test_files/linear.phyloFlash.NTUabundance.csv | 8 ++++++++ test_files/linear.phyloFlash.report.csv | 0 6 files changed, 24 insertions(+) create mode 100644 test_files/double.phyloFlash.NTUabundance.csv create mode 100644 test_files/double.phyloFlash.report.csv create mode 100644 test_files/even.phyloFlash.NTUabundance.csv create mode 100644 test_files/even.phyloFlash.report.csv create mode 100644 test_files/linear.phyloFlash.NTUabundance.csv create mode 100644 test_files/linear.phyloFlash.report.csv diff --git a/test_files/double.phyloFlash.NTUabundance.csv b/test_files/double.phyloFlash.NTUabundance.csv new file mode 100644 index 0000000..6a2b447 --- /dev/null +++ b/test_files/double.phyloFlash.NTUabundance.csv @@ -0,0 +1,8 @@ +NTU, reads +A;P1;X1,10 +A;P1;X2,20 +A;P2;X3,40 +B;P3;X4,80 +B;P3;X5,160 +E;P4;X6,320 +E;P5;X7,640 diff --git a/test_files/double.phyloFlash.report.csv b/test_files/double.phyloFlash.report.csv new file mode 100644 index 0000000..e69de29 diff --git a/test_files/even.phyloFlash.NTUabundance.csv b/test_files/even.phyloFlash.NTUabundance.csv new file mode 100644 index 0000000..d1ca334 --- /dev/null +++ b/test_files/even.phyloFlash.NTUabundance.csv @@ -0,0 +1,8 @@ +NTU, reads +A;P1;X1,100 +A;P1;X2,100 +A;P2;X3,100 +B;P3;X4,100 +B;P3;X5,100 +E;P4;X6,100 +E;P5;X7,100 diff --git a/test_files/even.phyloFlash.report.csv b/test_files/even.phyloFlash.report.csv new file mode 100644 index 0000000..e69de29 diff --git a/test_files/linear.phyloFlash.NTUabundance.csv b/test_files/linear.phyloFlash.NTUabundance.csv new file mode 100644 index 0000000..a424618 --- /dev/null +++ b/test_files/linear.phyloFlash.NTUabundance.csv @@ -0,0 +1,8 @@ +NTU, reads +A;P1;X1,50 +A;P1;X2,100 +A;P2;X3,150 +B;P3;X4,200 +B;P3;X5,250 +E;P4;X6,300 +E;P5;X7,450 diff --git a/test_files/linear.phyloFlash.report.csv b/test_files/linear.phyloFlash.report.csv new file mode 100644 index 0000000..e69de29 From 67f885a993f36dcde12afac03069eca7e5fb97f3 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Sun, 10 May 2015 04:47:40 +0200 Subject: [PATCH 07/13] cleanup - add some more messages to show progress - move code a bit - remove some comments --- phyloFlash_heatmap.R | 120 +++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 62 deletions(-) diff --git a/phyloFlash_heatmap.R b/phyloFlash_heatmap.R index 422917d..fe53ce4 100755 --- a/phyloFlash_heatmap.R +++ b/phyloFlash_heatmap.R @@ -23,6 +23,7 @@ # along with this program. # If not, see . +## more useful errors on cmdline (traceback) debugCode = quote({ dump.frames(); cat(paste(" ", 1L:length(last.dump), ": ", @@ -42,16 +43,7 @@ load_libraries <- function() { library(gtable); } - - -defaultColorScheme <- c("#313695", "#4575B4", "#74ADD1", "#ABD9E9", - "#E0F3F8", "#FFFFBF", "#FEE090", "#FDAE61", - "#F46D43", "#D73027", "#A50026"); - -#### helper functions - ### logging - pf_logLevel <- 2; pf_setLogLevel <- function(x) { assign("pf_logLevel", x, .GlobalEnv); } msg <- function(...,lvl=2) { @@ -95,7 +87,6 @@ rbind_max <- function(...,size=grid::unit.pmax){ } cbind_max <- function(...,size=grid::unit.pmax){ - ##http://stackoverflow.com/questions/24234791 bind2 <- function (x, y) { stopifnot(nrow(x) == nrow(y)) if (ncol(x) == 0) return(y) @@ -180,6 +171,45 @@ g_make_dendro_plot <- function(dendro, vertical=TRUE, labels=TRUE) { return (p); } +# makes a ggplot heatmap from a matrix +g_make_heatmap <- function(mat, n, angle=90, hjust=0,vjust=0.6) { + highcol = c("steelblue","indianred")[n] + ## factorize dims + matNames <- attr(mat, "dimnames"); + df <- as.data.frame(mat); + colnames(df) <- matNames[[2]]; + df$y.variable <- matNames[[1]]; + df$y.variable <- with(df, factor(y.variable,levels=y.variable,ordered=TRUE)); + + mat <- melt(df, id.vars="y.variable"); + + heatMapPlot <- ggplot(mat, aes(variable,y.variable)) + + geom_tile(aes(fill=value)) + + scale_fill_gradient(low="white", high=highcol, trans="log", + na.value="white") + + labs(x = NULL, y = NULL) + + scale_x_discrete(expand=c(0,0)) + + scale_y_discrete(expand=c(0,0)) + + theme(axis.text.x = element_text(angle=angle, hjust=hjust,vjust=vjust), + axis.ticks.length = unit(0,"null")); + + return(heatMapPlot); +} + +# makes a grob containing a row of strings from a string vector +gtable_text_row <- function(strvec) { + grobs <- lapply(strvec, function(str) { + textGrob(str,gp=gpar(fontsize=8)) + }) + gt <- gtable(heights=unit(1,"lines"), widths=unit(0,"null")); + + if (length(strvec) == 0) return(gt); + + g <- gtable_row("textrow", grobs); + gt <- gtable_add_grob(gt, g, t=1, l=1); + + gt +} # loads phyloFlash output files into R read.phyloFlash <- function(files) { @@ -197,7 +227,7 @@ read.phyloFlash <- function(files) { msg(unlist(libs), fill=77); msg("Loading CSV files..."); - for (lib in libs) { + for (lib in libs) tryCatch({ fileName <- paste(lib, ".phyloFlash.NTUabundance.csv", sep=""); info("Reading: ",fileName); fileData <- read.csv(fileName); @@ -221,7 +251,9 @@ read.phyloFlash <- function(files) { } else { MetaData <- merge(x=MetaData, y=fileData, by="key"); } - } + }, warning = function(e) { + err("Error in read.phyloFlash:\n",e$message); + }); pfData <- list(); # result is a list @@ -327,46 +359,7 @@ cluster <- function(pf, method="ward") { return(pf); } -g_make_heatmap <- function(mat, n, colorScheme=defaultColorScheme, angle=90, hjust=0,vjust=0.6) { - highcol = c("steelblue","indianred")[n] - ## factorize dims - matNames <- attr(mat, "dimnames"); - df <- as.data.frame(mat); - colnames(df) <- matNames[[2]]; - df$y.variable <- matNames[[1]]; - df$y.variable <- with(df, factor(y.variable,levels=y.variable,ordered=TRUE)); - - mat <- melt(df, id.vars="y.variable"); - - breaks = c(0,25,50,75,100); - - heatMapPlot <- ggplot(mat, aes(variable,y.variable)) + - geom_tile(aes(fill=value)) + - #scale_fill_gradientn(colours = colorScheme) + - #scale_fill_gradientn(colours = rainbow(3)) + - #scale_fill_gradient2(low="blue", mid="green", high="red", midpoint = 30)+ - scale_fill_gradient(low="white", high=highcol, - trans="log", #breaks=breaks, labels=breaks, - na.value="white") + - labs(x = NULL, y = NULL) + - scale_x_discrete(expand=c(0,0)) + - scale_y_discrete(expand=c(0,0)) + - theme(axis.text.x = element_text(angle=angle, hjust=hjust,vjust=vjust), - axis.ticks.length = unit(0,"null")); - - return(heatMapPlot); -} - -gtable_text_row <- function(strvec) { - grobs <- lapply(strvec, function(str) { - textGrob(str,gp=gpar(fontsize=8)) - }) - - g <- gtable_row("textrow", grobs); - gt <- gtable(heights=unit(1,"lines"), widths=unit(0,"null")); - gt <- gtable_add_grob(gt, g, t=1, l=1); -} - +# creates a plot from a phyloFlash "object" plot.phyloFlash <- function(pf) { nmaps <- length(pf$data); rows <- sapply(pf$data,nrow); @@ -393,7 +386,6 @@ plot.phyloFlash <- function(pf) { g <- gtable_add_row_space(g, unit(.2,"lines")); - ## add row at top gr_legends <- lapply(gg_heatmaps, function(x) { g_get("guides", g_get("guide-box", x)$grobs[[1]]) }) @@ -402,8 +394,6 @@ plot.phyloFlash <- function(pf) { gr_legend <- gtable_add_grob(zero, gr_legends , t=1, l=1) gr_legend$heights = max(gr_legends$heights) gr_legend$widths = sum(gr_legends$widths) - ##gr_legend <- g_get("guide-box", gg_heatmaps[[1]]); - ##gr_legend <- g_get("guides", gr_legend$grobs[[1]]); gr_sampleTree <- g_get("panel", g_make_dendro_plot(pf$col_dendro, FALSE, TRUE)); gr_sampleTree$heights=unit(0.1,"null") @@ -462,7 +452,7 @@ pF_main <- function() { "--split-regex", default="Eukaryota", type="character", - help="Split heatmap using this regex on taxa", + help="Split heatmap using this regex on taxa. Default '%default'", ), make_option( "--no-shorten-names", @@ -528,6 +518,7 @@ Files: ## split by domain if (!conf$options$"no-split") { + msg("Splitting data according to regex ", conf$options$"split-regex"); pat <- strsplit(conf$options$"split-regex",",")[[1]]; pf$data <- split_by_name(pf$data, pat); } @@ -537,14 +528,24 @@ Files: pf$data <- merge_low_counts(pf$data, thres=conf$options$"min-ntu-count"); + if (!conf$options$"no-scaling") { + msg("Rescaling counts to percentages"); pf$data <- scale_to_percent(pf$data); } + msg("Clustering..."); pf <- cluster(pf, method=conf$options$"hclust-method"); + msg("Brief summary of counts:"); + if (pf_logLevel >= 2) { + print(summary(pf$data)); + } + + msg("Creating plot..."); g <- plot.phyloFlash(pf); + msg("Printing plot..."); outdim = as.integer(strsplit(conf$options$"out-size","x")[[1]]); switch(strsplit(conf$options$out, "[.]")[[1]][-1], png = png(file = conf$options$out, @@ -554,15 +555,10 @@ Files: pdf = pdf(file = conf$options$out, width=outdim[1], height = outdim[2]) ); - + grid.newpage(); grid.draw(g); dev.off(); - - msg("Brief summary of counts:"); - if (pf_logLevel >= 2) { ### "cat(summary(...))" does - print(summary(pf$data)); - } } # if we are run as a script from the cmdline From f0e53024102260d162a673a6373154f6da936103 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Sun, 10 May 2015 04:48:38 +0200 Subject: [PATCH 08/13] remove legend title ("value"... not helpful) --- phyloFlash_heatmap.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/phyloFlash_heatmap.R b/phyloFlash_heatmap.R index fe53ce4..edbeb3a 100755 --- a/phyloFlash_heatmap.R +++ b/phyloFlash_heatmap.R @@ -191,7 +191,8 @@ g_make_heatmap <- function(mat, n, angle=90, hjust=0,vjust=0.6) { scale_x_discrete(expand=c(0,0)) + scale_y_discrete(expand=c(0,0)) + theme(axis.text.x = element_text(angle=angle, hjust=hjust,vjust=vjust), - axis.ticks.length = unit(0,"null")); + axis.ticks.length = unit(0,"null"), + legend.title=element_blank())); return(heatMapPlot); } From f55bea41bf31409aa86ad87d00ffb5ad797c16b4 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Sun, 10 May 2015 04:49:20 +0200 Subject: [PATCH 09/13] make legend breaks rounded integers (instead of long floats "20.001293") --- phyloFlash_heatmap.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/phyloFlash_heatmap.R b/phyloFlash_heatmap.R index edbeb3a..1ebb75b 100755 --- a/phyloFlash_heatmap.R +++ b/phyloFlash_heatmap.R @@ -183,9 +183,12 @@ g_make_heatmap <- function(mat, n, angle=90, hjust=0,vjust=0.6) { mat <- melt(df, id.vars="y.variable"); + breaks <- function(x) axisTicks(log10(range(x, na.rm = TRUE)), log = TRUE); + heatMapPlot <- ggplot(mat, aes(variable,y.variable)) + geom_tile(aes(fill=value)) + - scale_fill_gradient(low="white", high=highcol, trans="log", + scale_fill_gradient(low="white", high=highcol, trans="log", + breaks=breaks, na.value="white") + labs(x = NULL, y = NULL) + scale_x_discrete(expand=c(0,0)) + From 3cb710ff169c992c30717bf50c45e82486a0f054 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Sun, 10 May 2015 04:50:43 +0200 Subject: [PATCH 10/13] let white in heatmap be "1", not min(matrix) --- phyloFlash_heatmap.R | 1 + 1 file changed, 1 insertion(+) diff --git a/phyloFlash_heatmap.R b/phyloFlash_heatmap.R index 1ebb75b..7cf4e38 100755 --- a/phyloFlash_heatmap.R +++ b/phyloFlash_heatmap.R @@ -189,6 +189,7 @@ g_make_heatmap <- function(mat, n, angle=90, hjust=0,vjust=0.6) { geom_tile(aes(fill=value)) + scale_fill_gradient(low="white", high=highcol, trans="log", breaks=breaks, + limits=c(1,max(mat$value)), na.value="white") + labs(x = NULL, y = NULL) + scale_x_discrete(expand=c(0,0)) + From 429e8901796eedd3fa00a9bce9a8a093b936c488 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Sun, 10 May 2015 04:57:02 +0200 Subject: [PATCH 11/13] fix typo --- phyloFlash_heatmap.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phyloFlash_heatmap.R b/phyloFlash_heatmap.R index 7cf4e38..25d96dd 100755 --- a/phyloFlash_heatmap.R +++ b/phyloFlash_heatmap.R @@ -196,7 +196,7 @@ g_make_heatmap <- function(mat, n, angle=90, hjust=0,vjust=0.6) { scale_y_discrete(expand=c(0,0)) + theme(axis.text.x = element_text(angle=angle, hjust=hjust,vjust=vjust), axis.ticks.length = unit(0,"null"), - legend.title=element_blank())); + legend.title=element_blank()); return(heatMapPlot); } From 91c55603c0ed5b591f50fd0c6cda849053b88d76 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Mon, 11 May 2015 11:18:45 +0200 Subject: [PATCH 12/13] allow configuring order of rows --- phyloFlash_heatmap.R | 87 ++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/phyloFlash_heatmap.R b/phyloFlash_heatmap.R index 25d96dd..9e23d19 100755 --- a/phyloFlash_heatmap.R +++ b/phyloFlash_heatmap.R @@ -113,9 +113,9 @@ g_get <- function(pat, obj) { } # prepare a pure dendrogram plot from a dendro_data object -# @param bool vertical If true, plot has leaves as rows. +# @param bool axis 1:4=below,left,above,right # @param bool labels If true, plot includes labels -g_make_dendro_plot <- function(dendro, vertical=TRUE, labels=TRUE) { +g_make_dendro_plot <- function(dendro, axis, labels=TRUE) { ddata <- dendro_data(dendro, type="rectangle"); # Unexpanded, the dendrogram will span maximum width. That is, @@ -135,20 +135,19 @@ g_make_dendro_plot <- function(dendro, vertical=TRUE, labels=TRUE) { aes(x=x, y=y, xend=xend, yend=yend)) + theme_dendro() + labs(x = NULL, y = NULL) + - scale_y_continuous(expand=c(0,0)) + + scale_y_continuous(expand=c(0,0), + trans=c("reverse","identity")[(axis+1)%/%2]) + theme(axis.ticks.length = unit(0,"null"), axis.ticks.margin = unit(0,"null") ); # flip if vertical and add 1 mm space on the outer # edge to make sure the outmost connection is visible - if (vertical) { - p <- p + coord_flip() + - theme(plot.margin = unit(c(0,1,0,0),"mm")) - } else { - p <- p + - theme(plot.margin = unit(c(1,0,0,0),"mm")) + + if (axis %% 2 == 0) { + p <- p + coord_flip() } + # add the appropriate scale configuration # if labels are to be shown, pull them from the @@ -159,7 +158,7 @@ g_make_dendro_plot <- function(dendro, vertical=TRUE, labels=TRUE) { expand = c(expandFactor,0), breaks = 1:length(ddata$labels$label), labels = ddata$labels$label); - if (vertical) { + if (axis %% 2 == 0) { p <- p + theme(axis.text.y = element_text(angle = 0, hjust = 1)); } else { p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1)); @@ -365,31 +364,36 @@ cluster <- function(pf, method="ward") { } # creates a plot from a phyloFlash "object" -plot.phyloFlash <- function(pf) { +plot.phyloFlash <- function(pf, + row.order=c("tree","map","chao","labels"), + col.order=c("labels","map","tree")) { + if (is.character(row.order)) { + row.order = strsplit(row.order,",")[[1]]; + } + if (is.character(col.order)) { + col.order = strsplit(col.order,",")[[1]]; + } + nmaps <- length(pf$data); - rows <- sapply(pf$data,nrow); # empty table zero <- gtable(widths=unit(0,"null"),heights=unit(0,"null")); zero1 <- gtable(widths=unit(1,"null"),heights=unit(0,"null")); ## get heatmaps and labels - gg_heatmaps <- mapply(g_make_heatmap, pf$data, c(1:length(pf$data)), SIMPLIFY=FALSE); - gr_heatmaps <- mapply(g_get, rep("panel|axis-l", nmaps), gg_heatmaps); + gg_heatmaps <- mapply(g_make_heatmap, pf$data, c(1:length(pf$data)), SIMPLIFY=FALSE); + gr_heatmaps <- mapply(g_get, rep("panel|axis-l", nmaps), gg_heatmaps); ## merge below each other - g <- do.call(rbind_max, gr_heatmaps); + gr_heatmaps <- do.call(rbind_max, gr_heatmaps); ## scale heights by number of rows - g$heights = g$heights * (rows/sum(rows)); + nrows <- sapply(pf$data,nrow); + gr_heatmaps$heights = gr_heatmaps$heights * (nrows/sum(nrows)); ## get trees over samples - gr_trees <- lapply(pf$row_dendro, g_make_dendro_plot); + gr_trees <- lapply(pf$row_dendro, function(x) g_make_dendro_plot(x,axis=4)); gr_trees <- lapply(gr_trees, function(x) g_get("panel",x)) ## merge into one column gr_trees <- do.call(rbind_max, gr_trees); - ## add to right of heatmaps - g<-cbind_max(g, gr_trees,size=1); - - g <- gtable_add_row_space(g, unit(.2,"lines")); ## add row at top gr_legends <- lapply(gg_heatmaps, function(x) { @@ -399,26 +403,29 @@ plot.phyloFlash <- function(pf) { gr_legend <- gtable_add_grob(zero, gr_legends , t=1, l=1) gr_legend$heights = max(gr_legends$heights) gr_legend$widths = sum(gr_legends$widths) - gr_sampleTree <- g_get("panel", g_make_dendro_plot(pf$col_dendro, FALSE, TRUE)); + axis = ifelse(match("tree",row.order) < match("map",row.order),3,1); + gr_sampleTree <- g_get("panel", g_make_dendro_plot(pf$col_dendro, axis=axis)); gr_sampleTree$heights=unit(0.1,"null") - top_row <- cbind_max(zero, gr_sampleTree, gr_legend);#gr_legend); - - g<-rbind_max(top_row, g); - chao <- pf$meta$NTU.Chao1.richness.estimate; chao <- round(as.numeric(as.character(chao))) gr_chao_grob <- textGrob("Chao1",x=unit(.99,"npc"),just="right",gp=gpar(fontsize=8)) gr_chao_lab <- gtable_add_grob(zero1,gr_chao_grob,t=1,l=1,r=1,b=1); - - chao_row <- cbind_max(gr_chao_lab, gtable_text_row(chao), zero); - g<-rbind_max(g,chao_row); - + # add row at bottom gr_sample_labels <- g_get("axis-b", gg_heatmaps[[1]]); + + tree_row <- cbind_max(zero, gr_sampleTree, gr_legend); + chao_row <- cbind_max(gr_chao_lab, gtable_text_row(chao), zero); bottom_row <- cbind_max(zero,gr_sample_labels,zero); - g<-rbind_max(g, bottom_row); + + g <- cbind_max(gr_heatmaps, gr_trees,size=1); + g <- gtable_add_row_space(g, unit(.2,"lines")); + + gr_rows <- list(tree_row, g, chao_row, bottom_row); + gr_rows <- gr_rows[match(row.order, c("tree","map","chao","labels"))]; + g <- do.call(rbind_max,gr_rows); g <- gtable_add_row_space(g, unit(.1,"lines")); g <- gtable_add_col_space(g, unit(.1,"lines")); @@ -478,6 +485,20 @@ pF_main <- function() { ward, single, complete, average, mcquitty, median or centroid. Default is %default." ), + make_option( + "--row-order", + default="tree,map,chao,labels", + help="Order of rows, separated by commas. Valid terms are: + tree, map, chao and labels. + Default is %default." + ), + make_option( + "--col-order", + default="labels,map,tree", + help="Order of columns, separated by commas. Valid terms are: + labels, map and tree. + Default is %default." + ), make_option( "--out", default="out.png", @@ -548,7 +569,9 @@ Files: } msg("Creating plot..."); - g <- plot.phyloFlash(pf); + g <- plot.phyloFlash(pf, + row.order=conf$options$"row-order", + col.order=conf$options$"col-order"); msg("Printing plot..."); outdim = as.integer(strsplit(conf$options$"out-size","x")[[1]]); From f0ae17ad945880d38713e8046f609cfdfc2383c9 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Mon, 11 May 2015 11:19:23 +0200 Subject: [PATCH 13/13] fix Rplot.pdf being generated always (last value in R function is always returned, and printed if possible, so we are now returning an invisible 1 at the end of the main) --- phyloFlash_heatmap.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/phyloFlash_heatmap.R b/phyloFlash_heatmap.R index 9e23d19..f97cd63 100755 --- a/phyloFlash_heatmap.R +++ b/phyloFlash_heatmap.R @@ -587,6 +587,8 @@ Files: grid.newpage(); grid.draw(g); dev.off(); + + invisible(1); } # if we are run as a script from the cmdline