From c5ae82e1e216a7c1c10b1104faa00c96137c6952 Mon Sep 17 00:00:00 2001 From: Paul Jonas Jost <70631928+PaulJonasJost@users.noreply.github.com> Date: Tue, 13 Aug 2024 09:25:09 +0200 Subject: [PATCH] Donload, report and code for violin plot (#298) * Download, report and code for violin plot * Code snippets batch corr (#307) * change order of batch correction * allow for none + batch correction * add prepreocessing assignment if none as preprocessing * fix bug - PCA snippet functions --- program/shinyApp/R/C.R | 1 + program/shinyApp/R/fun_getCodeSnippets.R | 435 +++++++++++++---------- program/shinyApp/R/pca/server.R | 1 - program/shinyApp/R/pre_processing/ui.R | 66 +++- program/shinyApp/server.R | 241 ++++++++++--- 5 files changed, 501 insertions(+), 243 deletions(-) diff --git a/program/shinyApp/R/C.R b/program/shinyApp/R/C.R index 361c1285..8e448533 100644 --- a/program/shinyApp/R/C.R +++ b/program/shinyApp/R/C.R @@ -19,6 +19,7 @@ PADJUST_METHOD <<- list( ) CODE_DOWNLOAD_PREFACE <<- " + # Load the data ---- # The following will try to detect the directory of the file and load the data # this is succesfull if diff --git a/program/shinyApp/R/fun_getCodeSnippets.R b/program/shinyApp/R/fun_getCodeSnippets.R index bfb9fd0c..30954870 100644 --- a/program/shinyApp/R/fun_getCodeSnippets.R +++ b/program/shinyApp/R/fun_getCodeSnippets.R @@ -1,4 +1,3 @@ - getPlotCode <- function(numberOfScenario) { PreProcessing_Procedure <- par_tmp[[session$token]]$PreProcessing_Procedure row_selection <- par_tmp[[session$token]]$row_selection @@ -151,142 +150,145 @@ selected <- unique( assay(res_tmp$data) <- as.data.frame(pareto.matrix) ' } - if(par_tmp[[session$token]]['BatchColumn'] != "NULL" & PreProcessing_Procedure != "vst_DESeq"){ - stringSource <- c(stringSource, "sva") - string_batchCorrection <- 'res_tmp$data_batch_corrected <- res_tmp$data - assay(res_tmp$data_batch_corrected) <- sva::ComBat( - dat = assay(res_tmp$data_batch_corrected), - batch = as.factor(colData(res_tmp$data_batch_corrected)[,par_tmp["BatchColumn"]]) - ) - ' - # copy string to a new one and replace all orccurences of res_tmp$data with res_tmp$data_batch_corrected - stringPreProcessing_batch <- stringPreProcessing - stringPreProcessing_batch <- gsub("res_tmp$data","res_tmp$data_batch_corrected",stringPreProcessing_batch) - string_batchCorrection <- paste0(prequel_stringPreProcessing,"\n", string_batchCorrection) - } else if (par_tmp[[session$token]]['BatchColumn'] != "NULL" & PreProcessing_Procedure == "vst_DESeq") { - stringPreProcessing_batch <- stringPreProcessing - stringPreProcessing_batch <- gsub("res_tmp$data","res_tmp$data_batch_corrected",stringPreProcessing_batch) - stringPreProcessing_batch <- gsub("par_tmp$DESeq_formula","par_tmp$DESeq_formula_batch",stringPreProcessing_batch) - string_batchCorrection <- paste0(prequel_stringPreProcessing,"\n", string_batchCorrection) - } else { - string_batchCorrection <- '' - } - stringPreProcessing <- paste0(prequel_stringPreProcessing,"\n", string_batchCorrection, "\n", stringPreProcessing) - if (par_tmp[[session$token]]['BatchColumn'] != "NULL") { - stringPreProcessing <- paste0( - stringPreProcessing, "\n", - "# uncomment this line to use batch corrected data\n# res_tmp$data <- res_tmp$data_batch_corrected\n" - ) - } }else{ - stringPreProcessing <- '' + prequel_stringPreProcessing <- c("") + stringPreProcessing <- 'res_tmp$data <- tmp_data_selected' } + if(par_tmp[[session$token]]['BatchColumn'] != "NULL" & PreProcessing_Procedure != "vst_DESeq"){ + string_batchCorrection <- ' +res_tmp$data_batch_corrected <- res_tmp$data +assay(res_tmp$data_batch_corrected) <- sva::ComBat( + dat = assay(res_tmp$data_batch_corrected), + batch = as.factor(colData(res_tmp$data_batch_corrected)[,par_tmp$BatchColumn]) +) +' + } else if (par_tmp[[session$token]]['BatchColumn'] != "NULL" & PreProcessing_Procedure == "vst_DESeq") { + # TODO: formula needs to be updated + string_batchCorrection <- ' +dds_batch <- DESeq2::DESeqDataSetFromMatrix( + countData = assay(res_tmp$data), + colData = colData(res_tmp$data), + design = as.formula(par_tmp$DESeq_formula_batch) + ) +de_seq_result_batch <- DESeq2::DESeq(dds_batch) +res_tmp$DESeq_obj_batch <- de_seq_result_batch +dds_vst_batch <- vst( +object = de_seq_result, +blind = TRUE +) +# Note that the following command overwrites the current data with the batch corrected data +# if you want to recover the non batch corrected data you can check dds_vst +assay(res_tmp$data) <- as.data.frame(assay(de_seq_result_batch)) + ' + } else { + string_batchCorrection <- '' + } + stringPreProcessing <- paste0(prequel_stringPreProcessing,"\n", stringPreProcessing, "\n", string_batchCorrection) + if (par_tmp[[session$token]]['BatchColumn'] != "NULL") { + stringPreProcessing <- paste0( + stringPreProcessing, "\n", + "# uncomment this line to use non - batch corrected data\nres_tmp$data <- res_tmp$data_batch_corrected\n" + ) + } + ## Plot Code ---- + ## PreProcessing ---- + + if(numberOfScenario == 0.5){ + stringtosave <- ' +violin_plot <- function(data, color_by){ + # create a violin plot based on the provided summarized experiment. Colors by + # the provided color_by column and returns the plot + data_frame <- as.data.frame(assay(data)) + data_frame <- reshape2::melt(data_frame, variable.name="Sample", value.name="Counts") + data_frame <- merge(data_frame, colData(data), by.x = "Sample", by.y = "row.names") + plot2return <- ggplot(data_frame, aes(x = Sample, y = Counts, fill = data_frame[[color_by]])) + + geom_violin(trim = T, color = "black") + + CUSTOM_THEME + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + labs(title = "Count distribution per sample", + x = "Sample", + y = "Counts", + fill = color_by + ) + return(plot2return) +} +raw_violin <- violin_plot( + res_tmp$data_original[par_tmp[["entities_selected"]],par_tmp[["samples_selected"]]], + par_tmp$violin_color +) + ggtitle("Count distribution per sample - raw") + theme(legend.position = "none") +preprocessed_violin <- violin_plot( + res_tmp$data, + par_tmp$violin_color +) + ggtitle("Count distribution per sample - preprocessed") + theme(legend.position = "none") +lapply(ls(pattern="violin"), get) +' + } + ## PCA ---- if(numberOfScenario >= 1 & numberOfScenario < 9){ # Calculate all necessary intermediate data sets prequel_stringtosave <- ' - pca <- prcomp( - x = as.data.frame(t(assay(res_tmp$data))), - center = T, - scale. = FALSE - ) +pca <- prcomp( + x = as.data.frame(t(assay(res_tmp$data))), + center = T, + scale. = FALSE +) - pcaData <- data.frame(pca$x,colData(res_tmp$data)) -# Annotation (important for plotly) - if(!any(colnames(pcaData) == "global_ID")){ - pcaData$global_ID <- rownames(pcaData) - } - if(!is.null(par_tmp$PCA$PCA_anno_tooltip)){ - adj2colname <- gsub(" ",".",par_tmp$PCA$PCA_anno_tooltip) - pcaData$chosenAnno <- pcaData[,adj2colname] - }else{ - pcaData$chosenAnno <- pcaData$global_ID - } -# Scree Plot calculations +pcaData <- data.frame(pca$x,colData(res_tmp$data)) +# Annotation (important for plotly - but does not cause harm) +if(!any(colnames(pcaData) == "global_ID")){ + pcaData$global_ID <- rownames(pcaData) +} +if(!is.null(par_tmp$PCA$PCA_anno_tooltip)){ + adj2colname <- gsub(" ",".",par_tmp$PCA$PCA_anno_tooltip) + pcaData$chosenAnno <- pcaData[,adj2colname] +}else{ + pcaData$chosenAnno <- pcaData$global_ID +} + +# Scree Plot/Variance explained calculations var_explained_df <- data.frame( - PC = paste0("PC",1:ncol(pca$x)), - var_explained = (pca$sdev)^2/sum((pca$sdev)^2) + PC = paste0("PC",1:ncol(pca$x)), + var_explained = (pca$sdev)^2/sum((pca$sdev)^2) ) var_explained_df$Var <- paste0(round(var_explained_df$var_explained,4)*100,"%") var_explained_df$PC <- factor(var_explained_df$PC,levels = paste0("PC",1:ncol(pca$x))) percentVar <- round(100 * var_explained_df$var_explained, digits = 1) -names(percentVar)<- var_explained_df$PC - -# Loadings calculations -LoadingsDF <- data.frame( - entitie = rownames(pca$rotation), - Loading = pca$rotation[,par_tmp$PCA$x_axis_selection] -) -LoadingsDF <- LoadingsDF[order(LoadingsDF$Loading,decreasing = T),] -LoadingsDF <- rbind( -LoadingsDF[nrow(LoadingsDF):(nrow(LoadingsDF) - par_tmp$PCA$bottomSlider),], - LoadingsDF[par_tmp$PCA$topSlider:1,] -) -LoadingsDF$entitie <- factor(LoadingsDF$entitie,levels = rownames(LoadingsDF)) -if(!is.null(par_tmp$PCA$EntitieAnno_Loadings)){ - LoadingsDF$entitie=factor( - make.unique(as.character(rowData(res_tmp$data)[rownames(LoadingsDF),par_tmp$PCA$EntitieAnno_Loadings])), - levels = make.unique(as.character(rowData(res_tmp$data)[rownames(LoadingsDF),par_tmp$PCA$EntitieAnno_Loadings])) - ) -} - -if(is.null(par_tmp$PCA$nPCAs_to_look_at)){ - df_loadings <- data.frame( - entity = row.names(pca$rotation), - pca$rotation[, 1:2] - ) -}else{ - df_loadings <- data.frame( - entity = row.names(pca$rotation), - pca$rotation[, 1:par_tmp$PCA$nPCAs_to_look_at] - ) -} - -df_loadings_filtered <- as.matrix(df_loadings[,-1]) >= abs(par_tmp$PCA$filterValue) -entitiesToInclude <- apply(df_loadings_filtered, 1, any) - -df_loadings <- df_loadings[entitiesToInclude,] %>% - tidyr::gather(key = "PC", value = "loading", -entity) - -if(!is.null(par_tmp$PCA$EntitieAnno_Loadings_matrix)){ - df_loadings$chosenAnno <- factor( - make.unique(as.character(rowData(res_tmp$data)[unique(df_loadings$entity),par_tmp$PCA$EntitieAnno_Loadings_matrix])), - levels = make.unique(as.character(rowData(res_tmp$data)[unique(df_loadings$entity),par_tmp$PCA$EntitieAnno_Loadings_matrix])) - ) -}else{ - df_loadings$chosenAnno <- df_loadings$entity -} -' +names(percentVar)<- var_explained_df$PC' if (numberOfScenario == 1) { - stringtosave = 'pca_plot <- ggplot(pcaData, aes(x = pcaData[,par_tmp$PCA$x_axis_selection], - y = pcaData[,par_tmp$PCA$y_axis_selection], - color=pcaData[,par_tmp$PCA$coloring_options], - label=global_ID, - global_ID=global_ID, - chosenAnno=chosenAnno)) + + stringtosave = ' +pca_plot <- ggplot(pcaData, + aes(x = pcaData[,par_tmp$PCA$x_axis_selection], + y = pcaData[,par_tmp$PCA$y_axis_selection], + color=pcaData[,par_tmp$PCA$coloring_options], + label=global_ID, + global_ID=global_ID, + chosenAnno=chosenAnno)) + geom_point(size =3)+ scale_color_manual(name = par_tmp$PCA$coloring_options,values=par_tmp$PCA$colorTheme)+ - xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + - ylab(paste0(names(percentVar[par_tmp$PCA$y_axis_selection]),": ", percentVar[par_tmp$PCA$y_axis_selection], "% variance")) + - coord_fixed()+ - CUSTOM_THEME+ - theme(aspect.ratio = 1)+ - ggtitle(par_tmp$PCA$customTitle)' + xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + + ylab(paste0(names(percentVar[par_tmp$PCA$y_axis_selection]),": ", percentVar[par_tmp$PCA$y_axis_selection], "% variance")) + + coord_fixed()+ + CUSTOM_THEME+ + theme(aspect.ratio = 1)+ + ggtitle(par_tmp$PCA$customTitle)' } if (numberOfScenario == 2) { - stringtosave = 'pca_plot <- ggplot(pcaData, aes(x = pcaData[,par_tmp$PCA$x_axis_selection], - y = pcaData[,par_tmp$PCA$y_axis_selection], - color=pcaData[,par_tmp$PCA$coloring_options], - label=global_ID, - global_ID=global_ID, - chosenAnno=chosenAnno)) + - geom_point(size =3)+ - scale_color_discrete(name = par_tmp$PCA$coloring_options)+ + stringtosave = ' +pca_plot <- ggplot(pcaData, + aes(x = pcaData[,par_tmp$PCA$x_axis_selection], + y = pcaData[,par_tmp$PCA$y_axis_selection], + color=pcaData[,par_tmp$PCA$coloring_options], + label=global_ID, + global_ID=global_ID, + chosenAnno=chosenAnno)) + + geom_point(size =3) + + scale_color_discrete(name = par_tmp$PCA$coloring_options) + xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + ylab(paste0(names(percentVar[par_tmp$PCA$y_axis_selection]),": ", percentVar[par_tmp$PCA$y_axis_selection], "% variance")) + coord_fixed()+ @@ -295,14 +297,16 @@ if(!is.null(par_tmp$PCA$EntitieAnno_Loadings_matrix)){ ggtitle(par_tmp$PCA$customTitle)' } if (numberOfScenario == 3) { - stringtosave = 'pca_plot <- ggplot(pcaData, aes(x = pcaData[,par_tmp$PCA$x_axis_selection], - y = pcaData[,par_tmp$PCA$y_axis_selection], - color=pcaData[,par_tmp$PCA$coloring_options], - label=global_ID, - global_ID=global_ID, - chosenAnno=chosenAnno)) + - geom_point(size =3)+ - scale_color_manual(values=par_tmp$PCA$colorTheme, + stringtosave = ' +pca_plot <- ggplot(pcaData, + aes(x = pcaData[,par_tmp$PCA$x_axis_selection], + y = pcaData[,par_tmp$PCA$y_axis_selection], + color=pcaData[,par_tmp$PCA$coloring_options], + label=global_ID, + global_ID=global_ID, + chosenAnno=chosenAnno)) + + geom_point(size =3) + + scale_color_manual(values=par_tmp$PCA$colorTheme, name = par_tmp$PCA$coloring_options)+ xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + ylab(paste0(names(percentVar[par_tmp$PCA$y_axis_selection]),": ", percentVar[par_tmp$PCA$y_axis_selection], "% variance")) + @@ -312,71 +316,78 @@ if(!is.null(par_tmp$PCA$EntitieAnno_Loadings_matrix)){ ggtitle(par_tmp$PCA$customTitle)' } if (numberOfScenario == 4) { - stringtosave = 'pca_plot <- ggplot(pcaData, aes(x = pcaData[,par_tmp$PCA$x_axis_selection], - y = pcaData[,par_tmp$PCA$y_axis_selection], - color=pcaData[,par_tmp$PCA$coloring_options], - label=global_ID, - global_ID=global_ID, - chosenAnno=chosenAnno)) + - geom_point(size =3)+ - scale_color_manual(name = par_tmp$PCA$coloring_options,values=par_tmp$PCA$colorTheme)+ + stringtosave = ' +pca_plot <- ggplot(pcaData, + aes(x = pcaData[,par_tmp$PCA$x_axis_selection], + y = pcaData[,par_tmp$PCA$y_axis_selection], + color=pcaData[,par_tmp$PCA$coloring_options], + label=global_ID, + global_ID=global_ID, + chosenAnno=chosenAnno)) + + geom_point(size =3)+ + scale_color_manual(name = par_tmp$PCA$coloring_options,values=par_tmp$PCA$colorTheme)+ xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + ylab(paste0(names(percentVar[par_tmp$PCA$y_axis_selection]),": ", percentVar[par_tmp$PCA$y_axis_selection], "% variance")) + coord_fixed()+ CUSTOM_THEME+ theme(aspect.ratio = 1)+ - ggtitle(par_tmp$PCA$customTitle)+geom_segment(data=df_out_r[which(df_out_r$feature!=""),], - aes(x=0, y=0, xend=v1, yend=v2), - arrow=arrow(type="closed",unit(0.01, "inches"),ends = "both"), - #linetype="solid", - #alpha=0.5, - color="#ab0521")' + ggtitle(par_tmp$PCA$customTitle)+ + geom_segment(data=df_out_r[which(df_out_r$feature!=""),], + aes(x=0, y=0, xend=v1, yend=v2), + arrow=arrow(type="closed",unit(0.01, "inches"),ends = "both"), + #linetype="solid", + #alpha=0.5, + color="#ab0521")' } if (numberOfScenario == 5) { - stringtosave = 'pca_plot <- ggplot(pcaData, aes(x = pcaData[,par_tmp$PCA$PCA$x_axis_selection], - y = pcaData[,par_tmp$PCA$PCA$y_axis_selection], - color=pcaData[,par_tmp$PCA$PCA$coloring_options], - label=global_ID, - global_ID=global_ID, - chosenAnno=chosenAnno)) + - geom_point(size =3)+ - scale_color_discrete(name = par_tmp$PCA$coloring_options)+ - xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + + stringtosave = ' +pca_plot <- ggplot(pcaData, + aes(x = pcaData[,par_tmp$PCA$PCA$x_axis_selection], + y = pcaData[,par_tmp$PCA$PCA$y_axis_selection], + color=pcaData[,par_tmp$PCA$PCA$coloring_options], + label=global_ID, + global_ID=global_ID, + chosenAnno=chosenAnno)) + + geom_point(size =3)+ + scale_color_discrete(name = par_tmp$PCA$coloring_options)+ + xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + ylab(paste0(names(percentVar[par_tmp$PCA$y_axis_selection]),": ", percentVar[par_tmp$PCA$y_axis_selection], "% variance")) + coord_fixed()+ CUSTOM_THEME+ theme(aspect.ratio = 1)+ ggtitle(par_tmp$PCA$customTitle)+ geom_segment(data=df_out_r[which(df_out_r$feature!=""),], - aes(x=0, y=0, xend=v1, yend=v2), - arrow=arrow(type="closed",unit(0.01, "inches"),ends = "both"), - #linetype="solid", - #alpha=0.5, - color="#ab0521")' + aes(x=0, y=0, xend=v1, yend=v2), + arrow=arrow(type="closed",unit(0.01, "inches"),ends = "both"), + #linetype="solid", + #alpha=0.5, + color="#ab0521")' } if (numberOfScenario == 6) { - stringtosave = 'pca_plot <- ggplot(pcaData, aes(x = pcaData[,par_tmp$PCA$x_axis_selection], - y = pcaData[,par_tmp$PCA$y_axis_selection], - color=pcaData[,par_tmp$PCA$coloring_options], - label=global_ID, - global_ID=global_ID, - chosenAnno=chosenAnno)) + - geom_point(size =3)+ - scale_color_manual(values=par_tmp$PCA$colorTheme, - name = par_tmp$PCA$coloring_options)+ - xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + - ylab(paste0(names(percentVar[par_tmp$PCA$y_axis_selection]),": ", percentVar[par_tmp$PCA$y_axis_selection], "% variance")) + - coord_fixed()+ - CUSTOM_THEME+ - theme(aspect.ratio = 1)+ - ggtitle(par_tmp$PCA$customTitle)+geom_segment(data=df_out_r[which(df_out_r$feature!=""),], - aes(x=0, y=0, xend=v1, yend=v2), - arrow=arrow(type="closed",unit(0.01, "inches"),ends = "both"), - #linetype="solid", - #alpha=0.5, - color="#ab0521")' + stringtosave = ' +pca_plot <- ggplot(pcaData, + aes(x = pcaData[,par_tmp$PCA$x_axis_selection], + y = pcaData[,par_tmp$PCA$y_axis_selection], + color=pcaData[,par_tmp$PCA$coloring_options], + label=global_ID, + global_ID=global_ID, + chosenAnno=chosenAnno)) + + geom_point(size =3)+ +scale_color_manual(values=par_tmp$PCA$colorTheme, + name = par_tmp$PCA$coloring_options)+ +xlab(paste0(names(percentVar[par_tmp$PCA$x_axis_selection]),": ",percentVar[par_tmp$PCA$x_axis_selection], "% variance")) + +ylab(paste0(names(percentVar[par_tmp$PCA$y_axis_selection]),": ", percentVar[par_tmp$PCA$y_axis_selection], "% variance")) + +coord_fixed()+ +CUSTOM_THEME+ +theme(aspect.ratio = 1)+ +ggtitle(par_tmp$PCA$customTitle)+geom_segment(data=df_out_r[which(df_out_r$feature!=""),], + aes(x=0, y=0, xend=v1, yend=v2), + arrow=arrow(type="closed",unit(0.01, "inches"),ends = "both"), + #linetype="solid", + #alpha=0.5, + color="#ab0521")' } ### Scree if (numberOfScenario == 7) { @@ -389,27 +400,73 @@ if(!is.null(par_tmp$PCA$EntitieAnno_Loadings_matrix)){ } ### Loadings single if (numberOfScenario == 8) { - stringtosave = 'plotOut=ggplot(LoadingsDF,aes(x = Loading,y = entitie)) + - geom_col(aes(fill = Loading)) + - scale_y_discrete( - breaks = LoadingsDF$entitie, - labels = stringr::str_wrap(gsub("\\\\.[0-9].*$","",LoadingsDF$entitie),20)) + - scale_fill_gradient2(low = "#277d6a",mid = "white",high = "orange") + - ylab(ifelse(is.null(par_tmp$PCA$EntitieAnno_Loadings),"",par_tmp$PCA$EntitieAnno_Loadings)) + - xlab(paste0("Loadings: ",par_tmp$PCA$x_axis_selection)) + - CUSTOM_THEME' + stringtosave = ' +# Loadings calculations +LoadingsDF <- data.frame( + entitie = rownames(pca$rotation), + Loading = pca$rotation[,par_tmp$PCA$x_axis_selection] +) +LoadingsDF <- LoadingsDF[order(LoadingsDF$Loading,decreasing = T),] +LoadingsDF <- rbind( +LoadingsDF[nrow(LoadingsDF):(nrow(LoadingsDF) - par_tmp$PCA$bottomSlider),], + LoadingsDF[par_tmp$PCA$topSlider:1,] +) +LoadingsDF$entitie <- factor(LoadingsDF$entitie,levels = rownames(LoadingsDF)) +if(!is.null(par_tmp$PCA$EntitieAnno_Loadings)){ + LoadingsDF$entitie=factor( + make.unique(as.character(rowData(res_tmp$data)[rownames(LoadingsDF),par_tmp$PCA$EntitieAnno_Loadings])), + levels = make.unique(as.character(rowData(res_tmp$data)[rownames(LoadingsDF),par_tmp$PCA$EntitieAnno_Loadings])) + ) +} +singleLoadings_plot=ggplot(LoadingsDF,aes(x = Loading,y = entitie)) + +geom_col(aes(fill = Loading)) + +scale_y_discrete( + breaks = LoadingsDF$entitie, + labels = stringr::str_wrap(gsub("\\\\.[0-9].*$","",LoadingsDF$entitie),20)) + +scale_fill_gradient2(low = "#277d6a",mid = "white",high = "orange") + +ylab(ifelse(is.null(par_tmp$PCA$EntitieAnno_Loadings),"",par_tmp$PCA$EntitieAnno_Loadings)) + +xlab(paste0("Loadings: ",par_tmp$PCA$x_axis_selection)) + +CUSTOM_THEME' } ### Loadings matrix if (numberOfScenario == 8.1) { - stringtosave = 'LoadingsMatrix_plot <- ggplot(df_loadings, - aes(x = PC,y = chosenAnno,fill = loading)) + - geom_raster() + - scale_fill_gradientn( - colors = c("#277d6a", "white", "orange"), - limits = c(-max(df_loadings$loading),max(df_loadings$loading)) - ) + - labs(x = "PCs", y = "entity", fill = "Loading") + - CUSTOM_THEME' + stringtosave = ' +if(is.null(par_tmp$PCA$nPCAs_to_look_at)){ + df_loadings <- data.frame( + entity = row.names(pca$rotation), + pca$rotation[, 1:2] + ) +}else{ + df_loadings <- data.frame( + entity = row.names(pca$rotation), + pca$rotation[, 1:par_tmp$PCA$nPCAs_to_look_at] + ) +} + +df_loadings_filtered <- as.matrix(df_loadings[,-1]) >= abs(par_tmp$PCA$filterValue) +entitiesToInclude <- apply(df_loadings_filtered, 1, any) + +df_loadings <- df_loadings[entitiesToInclude,] %>% + tidyr::gather(key = "PC", value = "loading", -entity) + +if(!is.null(par_tmp$PCA$EntitieAnno_Loadings_matrix)){ + df_loadings$chosenAnno <- factor( + make.unique(as.character(rowData(res_tmp$data)[unique(df_loadings$entity),par_tmp$PCA$EntitieAnno_Loadings_matrix])), + levels = make.unique(as.character(rowData(res_tmp$data)[unique(df_loadings$entity),par_tmp$PCA$EntitieAnno_Loadings_matrix])) + ) +}else{ + df_loadings$chosenAnno <- df_loadings$entity +} + +LoadingsMatrix_plot <- ggplot(df_loadings, +aes(x = PC,y = chosenAnno,fill = loading)) + +geom_raster() + +scale_fill_gradientn( +colors = c("#277d6a", "white", "orange"), +limits = c(-max(df_loadings$loading),max(df_loadings$loading)) +) + +labs(x = "PCs", y = "entity", fill = "Loading") + +CUSTOM_THEME' } stringtosave <- paste0(prequel_stringtosave,"\n",stringtosave,"\n","lapply(ls(pattern='plot'), get)") diff --git a/program/shinyApp/R/pca/server.R b/program/shinyApp/R/pca/server.R index 3b5e741b..07d7b071 100644 --- a/program/shinyApp/R/pca/server.R +++ b/program/shinyApp/R/pca/server.R @@ -480,7 +480,6 @@ pca_Server <- function(id, data, params, row_select){ }, content = function(file){ envList <- list( - res_tmp = res_tmp[[session$token]], par_tmp = par_tmp[[session$token]] ) diff --git a/program/shinyApp/R/pre_processing/ui.R b/program/shinyApp/R/pre_processing/ui.R index b33711e7..91c4e88f 100644 --- a/program/shinyApp/R/pre_processing/ui.R +++ b/program/shinyApp/R/pre_processing/ui.R @@ -41,16 +41,66 @@ pre_processing_main_panel <- mainPanel( htmlOutput(outputId = "Statisitcs_Data"), HTML(text = "
"), fluidRow( - column(6, - h4("Raw Data"), - plotOutput("raw_violin_plot"), - plotOutput("raw_kde_plot") + column( + 6, + h4("Raw Data"), + plotOutput("raw_violin_plot"), + plotOutput("raw_kde_plot") ), - column(6, - h4("Pre-processed Data"), - plotOutput("preprocessed_violin_plot"), - plotOutput("preprocessed_kde_plot") + column( + 6, + h4("Pre-processed Data"), + plotOutput("preprocessed_violin_plot"), ) + ), + splitLayout( + style = "border: 1px solid silver:", cellWidths = c("70%", "30%"), + NULL, + actionButton( + inputId = "only2Report_Preprocess", + label = "Send only to Report", + class = "btn-info" + ) + ) %>% helper(type = "markdown", content = "SampleCorr_Downloads"), + splitLayout( + style = "border: 1px solid silver:", cellWidths = c("70%", "30%"), + NULL, + downloadButton( + outputId = "getR_Code_Preprocess", + label = "Get underlying R code and data", + icon = icon(name = "code") + ) + ), + splitLayout( + style = "border: 1px solid silver:", cellWidths = c("70%", "30%"), + NULL, + downloadButton( + outputId = "SavePlot_Preprocess", + label = "Save plot", + class = "btn-info" + ) + ), + splitLayout( + style = "border: 1px solid silver:", cellWidths = c("70%", "30%"), + NULL, + radioGroupButtons( + inputId = "file_ext_Preprocess", + label = "File Type:", + choices = c(".png", ".tiff", ".pdf"), + selected = ".png" + ) + ), + splitLayout( + style = "border: 1px solid silver:", cellWidths = c("50%", "50%"), + cellArgs = list(style = "padding: 5px"), + div(textAreaInput( + inputId = "NotesPreprocessedData", + label = "Notes:", + placeholder = NOTES_PlACEHOLDER, + width = "1000px" + ) %>% helper(type = "markdown", content = "TakingNotesMD_help"), + helpText(NOTES_HELP)), + NULL ) ) ) diff --git a/program/shinyApp/server.R b/program/shinyApp/server.R index 1776e3d0..1736d0b2 100644 --- a/program/shinyApp/server.R +++ b/program/shinyApp/server.R @@ -928,13 +928,34 @@ server <- function(input,output,session){ res_tmp[[session$token]]$data <<- res_tmp[[session$token]]$data[rownames(res_tmp[[session$token]]$data),] par_tmp[[session$token]]['BatchColumn'] <<- input$BatchEffect_Column - # Batch correction before preprocessing + # preprocessing + print(paste0("Do chosen Preprocessing:",input$PreProcessing_Procedure)) + tryCatch({ + if(input$PreProcessing_Procedure == "vst_DESeq"){ + res_tmp[[session$token]]$data <<- deseq_processing( + data = res_tmp[[session$token]]$data, + omic_type = par_tmp[[session$token]]$omic_type, + formula_main = input$DESeq_formula_main, + formula_sub = input$DESeq_formula_sub, + session_token = session$token, + batch_correct = F + ) + } else { + res_tmp[[session$token]]$data <<- preprocessing( + data = res_tmp[[session$token]]$data, + omic_type = par_tmp[[session$token]]$omic_type, + procedure = input$PreProcessing_Procedure + ) + } + }, error = function(e){ + error_modal(e) + req(FALSE) + }) + + # Batch correction after preprocessing if (input$BatchEffect_Column != "NULL" & input$PreProcessing_Procedure != "vst_DESeq") { tryCatch({ - res_tmp[[session$token]]$data_batch_corrected <<- prefiltering( - res_tmp[[session$token]]$data, - par_tmp[[session$token]]$omic_type - ) + res_tmp[[session$token]]$data_batch_corrected <<- res_tmp[[session$token]]$data assay(res_tmp[[session$token]]$data_batch_corrected) <<- sva::ComBat( dat = assay(res_tmp[[session$token]]$data_batch_corrected), batch = as.factor(colData(res_tmp[[session$token]]$data_batch_corrected)[,input$BatchEffect_Column]) @@ -948,13 +969,13 @@ server <- function(input,output,session){ } else if (input$BatchEffect_Column != "NULL" & input$PreProcessing_Procedure == "vst_DESeq"){ tryCatch({ res_tmp[[session$token]]$data_batch_corrected <<- deseq_processing( - data = tmp_data_selected, - omic_type = par_tmp[[session$token]]$omic_type, - formula_main = input$DESeq_formula_main, - formula_sub = c(input$DESeq_formula_sub, input$BatchEffect_Column), - session_token = session$token, - batch_correct = T - ) + data = tmp_data_selected, + omic_type = par_tmp[[session$token]]$omic_type, + formula_main = input$DESeq_formula_main, + formula_sub = c(input$DESeq_formula_sub, input$BatchEffect_Column), + session_token = session$token, + batch_correct = T + ) }, error = function(e){ error_modal( e, additional_text = paste0( @@ -962,7 +983,7 @@ server <- function(input,output,session){ "in the design matrix (one or more factors informing about another one).", "Make sure the batch effect column is correct and ", "that the design matrix is not singular!" - ) + ) ) req(FALSE) }) @@ -970,37 +991,6 @@ server <- function(input,output,session){ res_tmp[[session$token]]$data_batch_corrected <<- NULL } - # preprocessing - print(paste0("Do chosen Preprocessing:",input$PreProcessing_Procedure)) - tryCatch({ - if(input$PreProcessing_Procedure == "vst_DESeq"){ - res_tmp[[session$token]]$data <<- deseq_processing( - data = res_tmp[[session$token]]$data, - omic_type = par_tmp[[session$token]]$omic_type, - formula_main = input$DESeq_formula_main, - formula_sub = input$DESeq_formula_sub, - session_token = session$token, - batch_correct = F - ) - } else { - res_tmp[[session$token]]$data <<- preprocessing( - data = res_tmp[[session$token]]$data, - omic_type = par_tmp[[session$token]]$omic_type, - procedure = input$PreProcessing_Procedure - ) - if (!is.null(res_tmp[[session$token]]$data_batch_corrected)) { - res_tmp[[session$token]]$data_batch_corrected <<- preprocessing( - data = res_tmp[[session$token]]$data_batch_corrected, - omic_type = par_tmp[[session$token]]$omic_type, - procedure = input$PreProcessing_Procedure - ) - } - } - }, error = function(e){ - error_modal(e) - req(FALSE) - }) - if(input$PreProcessing_Procedure == "filterOnly"){ addWarning <- "Only Filtering of low abundant is done only if Transcriptomics or Metabolomics was chosen
" } else if(input$PreProcessing_Procedure == "none"){ @@ -1058,6 +1048,7 @@ server <- function(input,output,session){ violin_plot(res_tmp[[session$token]]$data, color_by = input$violin_color) }) + par_tmp[[session$token]]['violin_color'] <<- input$violin_color waiter$hide() return("Pre-Processing successfully") }) @@ -1083,7 +1074,9 @@ server <- function(input,output,session){ fun_LogIt( message = paste0( "**PreProcessing** - Preprocessing procedure -specific (user-chosen): ", - ifelse(input$PreProcessing_Procedure == "vst_DESeq",paste0(input$PreProcessing_Procedure, "~",input$DESeq_formula_main),input$PreProcessing_Procedure) + ifelse(input$PreProcessing_Procedure == "vst_DESeq", + paste0(input$PreProcessing_Procedure, "~",input$DESeq_formula_main), + input$PreProcessing_Procedure) ) ) if(input$BatchEffect_Column != "NULL"){ @@ -1119,6 +1112,164 @@ server <- function(input,output,session){ }) output$debug <- renderText(dim(res_tmp[[session$token]]$data)) + ## Preprocessing save, Report and Code Snippet + output$SavePlot_Preprocess <- downloadHandler( + filename = function() { + paste0("Preprocessing_ViolinPlot_", Sys.Date(), input$file_ext_Preprocess) + }, + content = function(file) { + # Create individual plots + raw_plot <- violin_plot( + res_tmp[[session$token]]$data_original[par_tmp[[session$token]][['entities_selected']], par_tmp[[session$token]][['samples_selected']]], + color_by = input$violin_color + ) + ggtitle("Count distribution per sample - raw") + theme(legend.position = "none") + + preprocessed_plot <- violin_plot( + res_tmp[[session$token]]$data, + color_by = input$violin_color + ) + ggtitle("Count distribution per sample - preprocessed") + + # Arrange the plots side by side with more space for the right plot + combined_plot <- grid.arrange( + raw_plot, + preprocessed_plot, + ncol = 2, + widths = c(1, 1.3) + ) + + # Save the combined plot + ggsave( + filename = file, + plot = combined_plot, + width = 16, # Increase the width of the figure + height = 8, # Adjust height if necessary + units = "in", + device = gsub("\\.","",input$file_ext_Preprocess) + ) + + on.exit({ + file_path <- paste0("/www/",session$token,"/") + tmp_filename <- paste0( + getwd(), + file_path, + paste0( + "Preprocessing_ViolinPlot_", + format(Sys.time(), "%Y_%m_%d_%H_%M_%S"), + input$file_ext_Preprocess + ) + ) + raw_plot <- violin_plot( + res_tmp[[session$token]]$data_original[par_tmp[[session$token]][['entities_selected']], par_tmp[[session$token]][['samples_selected']]], + color_by = input$violin_color + ) + ggtitle("Count distribution per sample - raw") + theme(legend.position = "none") + + preprocessed_plot <- violin_plot( + res_tmp[[session$token]]$data, + color_by = input$violin_color + ) + ggtitle("Count distribution per sample - preprocessed") + + # Arrange the plots side by side with more space for the right plot + combined_plot <- grid.arrange( + raw_plot, + preprocessed_plot, + ncol = 2, + widths = c(1, 1.3) + ) + ggsave( + filename = tmp_filename, + plot = combined_plot, + width = 16, # Increase the width of the figure + height = 8, # Adjust height if necessary + units = "in", + device = gsub("\\.","",input$file_ext_Preprocess) + ) + + fun_LogIt(message = "## PreProcessing Violin Plot{.tabset .tabset-fade}") + fun_LogIt(message = "### Info") + fun_LogIt(message = paste0("**PreProcess** - The Samples were plotted after: ",input$violin_color)) + fun_LogIt( + message = paste0("**PreProcess** - ![Violin Plot](",tmp_filename,")") + ) + # no publication snippet as thats already in the log + }) + } + ) + + observeEvent(input$only2Report_Preprocess,{ + notificationID <- showNotification("Saving...",duration = 0) + tmp_filename <- paste0( + getwd(), + file_path, + paste0( + "Preprocessing_ViolinPlot_", + format(Sys.time(), "%Y_%m_%d_%H_%M_%S"), + input$file_ext_Preprocess + ) + ) + raw_plot <- violin_plot( + res_tmp[[session$token]]$data_original[par_tmp[[session$token]][['entities_selected']], par_tmp[[session$token]][['samples_selected']]], + color_by = input$violin_color + ) + ggtitle("Count distribution per sample - raw") + theme(legend.position = "none") + + preprocessed_plot <- violin_plot( + res_tmp[[session$token]]$data, + color_by = input$violin_color + ) + ggtitle("Count distribution per sample - preprocessed") + + # Arrange the plots side by side with more space for the right plot + combined_plot <- grid.arrange( + raw_plot, + preprocessed_plot, + ncol = 2, + widths = c(1, 1.3) + ) + ggsave( + filename = tmp_filename, + plot = combined_plot, + width = 16, # Increase the width of the figure + height = 8, # Adjust height if necessary + units = "in", + device = gsub("\\.","",input$file_ext_Preprocess) + ) + fun_LogIt(message = "## PreProcessing Violin Plot{.tabset .tabset-fade}") + fun_LogIt(message = "### Info") + fun_LogIt(message = paste0("**PreProcess** - The Samples were plotted after: ",input$violin_color)) + fun_LogIt( + message = paste0("**PreProcess** - ![Violin Plot](",tmp_filename,")") + ) + # no publication snippet as thats already in the log + removeNotification(notificationID) + showNotification("Saved!",type = "message", duration = 1) + }) + + output$getR_Code_Preprocess <- downloadHandler( + filename = function() { + paste0("ShinyOmics_Rcode2Reproduce_", Sys.Date(), ".zip") + }, + content = function(file) { + envList <- list( + res_tmp = res_tmp[[session$token]], + par_tmp = par_tmp[[session$token]] + ) + temp_directory <- file.path(tempdir(), as.integer(Sys.time())) + dir.create(temp_directory) + + write( + getPlotCode(0.5), + file.path(temp_directory, "Code.R") + ) + + saveRDS(envList, file.path(temp_directory, "Data.RDS")) + + zip::zip( + zipfile = file, + files = dir(temp_directory), + root = temp_directory + ) + }, + contentType = "application/zip" + ) + # Sample Correlation ---- # calling server without reactive it will be init upon start, with no update # of respective data inputs hence need of at least one reactive!