Merge branch 'master' of github.com:seoanezonjic/ExpHunterSuite

seoanezonjic · Sep 6, 2024 · c4df4cf · c4df4cf
2 parents d80743e + c609d79
commit c4df4cf
Show file tree

Hide file tree

Showing 14 changed files with 220 additions and 54 deletions.
diff --git a/R/factor_mining.R b/R/factor_mining.R
@@ -117,12 +117,42 @@ get_cluster_string_assoc <- function(res.hcpc, string_factors){
 	return(all_factor_clusters)
 }
 
-parse_eigenvectors <- function(eigenvectors) {
+parse_eigenvectors <- function(eigenvectors, eig_abs_cutoff = NULL) {
+
 	parsed_eigvec <- list()
 	for (Dim in names(eigenvectors)) {
 		pDim <- gsub("Dim","PC",Dim)
-		parsed_eigvec[[paste("positive", pDim, sep = ".")]] <- eigenvectors[[Dim]]
-		parsed_eigvec[[paste("negative", pDim, sep = ".")]] <- eigenvectors[[Dim]] * -1
+		eigen_vec <- eigenvectors[[Dim]]
+
+		if (!is.null(eig_abs_cutoff))
+			eigen_vec <- eigen_vec[abs(eigen_vec) > eig_abs_cutoff]
+			# eigen_vec[abs(eigen_vec) < eig_abs_cutoff] <- 0
+
+		parsed_eigvec[[paste("positive", pDim, sep = ".")]] <- eigen_vec
+		parsed_eigvec[[paste("negative", pDim, sep = ".")]] <- eigen_vec * -1
 	}
 	return(parsed_eigvec)
+}
+
+get_and_parse_pca_eigenvectors <- 	function(pca_res, cor_pval_cutoff = 0.05, eig_abs_cutoff = NULL){
+	dimnames(pca_res$pca_data$svd$V) <- dimnames(pca_res$pca_data$var$cor)
+	eigenvectors <- name_all_columns(as.data.frame(pca_res$pca_data$svd$V))
+	correlations <- pca_res$pca_data$var$cor
+	n <- nrow(pca_res$pca_data$ind$cos2)
+	cor_sig <- cor_pval(correlations, n)
+	eigenvectors <- filter_eigenvectors(eigenvectors, cor_sig, pval_cutoff = cor_pval_cutoff)
+	eigenvectors <- parse_eigenvectors(eigenvectors, eig_abs_cutoff = eig_abs_cutoff)
+	return(eigenvectors)
+
+}
+
+
+filter_eigenvectors <- function(eigenvectors, cor_sig, pval_cutoff) {
+ 	eigenvec_fil <- lapply(names(eigenvectors), function(dimension) {
+		eigenvec <- eigenvectors[[dimension]]
+		significance <- cor_sig[,dimension]
+	  eigenvec[significance <= pval_cutoff] 
+	})
+	names(eigenvec_fil) <- names(eigenvectors)
+	return(eigenvec_fil)
 }
diff --git a/R/functional_analysis_library.R b/R/functional_analysis_library.R
@@ -283,6 +283,7 @@ multienricher_topGO <- function(all_funsys, genes_list, universe=NULL,
   organism_info, gene_id="entrez", p_value_cutoff = 0.05, algorithm = "classic", statistic = "fisher", 
   nodeSize = 5, task_size=1, workers=1, scoreOrder = "increasing") {
 
+
   #checking input format
   if (!is.list(genes_list)) 
     genes_list <- list(genes_list)
@@ -308,7 +309,6 @@ multienricher_topGO <- function(all_funsys, genes_list, universe=NULL,
 multi_topGOTest <- function(funsys, genes_list, nodeSize = 5, org_db, 
                             universe = NULL, gene_id = "entrez",p_value_cutoff = 0.05, algorithm = "classic", statistic = "fisher",
                             scoreOrder = "increasing", workers = 1, task_size = 1, geneSel = NULL) {
-
   if(is.null(universe) && statistic == "fisher") {
     go_to_genes <- topGO::annFUN.org(funsys, mapping = org_db, ID = gene_id)
     universe <- unique(unlist(go_to_genes))
@@ -331,26 +331,32 @@ multi_topGOTest <- function(funsys, genes_list, nodeSize = 5, org_db,
       return(l_genes)
     }   
   })
+  all_names <- unique(unlist(lapply(genes_list, names)))
+  if (statistic == "fisher") {
+    rand <- factor(c(1,2))
+  } else {
+    rand <- seq(0,1,0.1)
+  }
+
+  all_genes <- sample(rand, length(all_names), replace = TRUE)
+  names(all_genes) <-  all_names 
   # Create environment variables used for initialization of the topGOdata obj
   topGO::groupGOTerms()
+  enriched_cats <- parallel_list(genes_list, function(l_genes) {
+  if(length(l_genes) == 0)
+    return(data.frame())
   GOdata <- new("topGOdata",
                ontology = funsys,
-               allGenes = genes_list[[1]],
+               allGenes = l_genes,
                nodeSize = nodeSize,
                mapping = org_db,
                geneSel = geneSel,
                annotationFun = topGO::annFUN.org,
                ID = gene_id)
-  enriched_cats <- parallel_list(genes_list, function(l_genes) {
-    if (statistic == "fisher") {
-      l_GOdata <- topGO::updateGenes(object = GOdata, geneList = l_genes)
-    } else {
-      l_GOdata <- topGO::updateGenes(object = GOdata, geneList = l_genes, geneSel = geneSel)
-
-    }
-    result <- topGO::runTest(l_GOdata, algorithm = algorithm, 
+
+    result <- topGO::runTest(GOdata, algorithm = algorithm, 
     statistic = statistic, scoreOrder = scoreOrder)
-    res_table <- topGO::GenTable(l_GOdata, 
+    res_table <- topGO::GenTable(GOdata, 
                                  p.value = result,
                                  topNodes = length(result@score), 
                                  format.FUN = function(x, dig, eps){x})

diff --git a/R/io_handling.R b/R/io_handling.R
@@ -89,10 +89,7 @@ load_hunter_folder <- function(path = NULL){
     })
     names(dgh_exp_results[["all_data_normalized"]]) <- sapply(strsplit(packages_results,"_"), utils::tail, 1)
 
-    dgh_exp_results$pca_all_genes <- read.table(file.path(path, "PCA_results/all_genes_eigenvectors.txt"), 
-                                                row.names = 1, header = TRUE)
-    dgh_exp_results$pca_degs<- read.table(file.path(path, "PCA_results/prevalent_eigenvectors.txt"), 
-                                                row.names = 1, header = TRUE)
+    dgh_exp_results$pca_data <- readRDS(file.path(path, "PCA_results/pca_data.rds"))
 
     return(dgh_exp_results)
 }

diff --git a/R/main_functional_hunter.R b/R/main_functional_hunter.R
@@ -141,14 +141,15 @@ main_functional_hunter <- function(
         }
     }
 
-
+    if (!is.null(hunter_results$pca_data)) {
     # prepare PCA data for KS
-    pca_eigenvectors <- list(pca_all_genes  = hunter_results$pca_all_genes,
-                             pca_degs = hunter_results$pca_degs)
+        pca_eigenvectors <-  lapply(hunter_results$pca_data, 
+                                    get_and_parse_pca_eigenvectors, 
+                                    cor_pval = 1, 
+                                    eig_abs_cutoff = NULL)
 
-    pca_eigenvectors <- lapply(pca_eigenvectors, name_all_columns)
+    }
 
-    pca_eigenvectors <- lapply(pca_eigenvectors, parse_eigenvectors)
     ############################################################
     ##                                                        ##
     ##                     PERFORM ENRICHMENTS                ## 
@@ -201,16 +202,20 @@ main_functional_hunter <- function(
                                                    p_value_cutoff = pthreshold)
     }
 
-    func_results$PCA_enrichments <- lapply(pca_eigenvectors, 
-                                           multienricher_topGO, 
-                                           all_funsys = enrich_dbs,
-                                           organism_info = current_organism_info,
-                                           p_value_cutoff = pthreshold,
-                                           algorithm = "classic", 
-                                           statistic = "ks",
-                                           gene_id = "ensembl", 
-                                           scoreOrder = "decreasing")
-
+    if (!is.null(hunter_results$pca_data)) {
+
+        func_results$PCA_enrichments <- lapply(pca_eigenvectors, 
+                                                multienricher_topGO, 
+                                                all_funsys = enrich_dbs,
+                                                organism_info = current_organism_info,
+                                                p_value_cutoff = pthreshold,
+                                                algorithm = "classic",
+                                                statistic = "t",
+                                                gene_id = "ensembl", 
+                                                scoreOrder = "decreasing",
+                                                workers = cores, 
+                                                task_size = task_size)
+    }
     ############################################################
     ##                                                        ##
     ##                     EXPORT DATA                        ## 
@@ -220,6 +225,6 @@ main_functional_hunter <- function(
     # Reorder rows by combined FDR
     DEGH_results <- DEGH_results[order(DEGH_results[,"combined_FDR"]),] 
     func_results$DEGH_results_annot <- DEGH_results
-
+    func_results$pca_data <- hunter_results$pca_data
     return(func_results)
 }
diff --git a/R/statistics_functions.R b/R/statistics_functions.R
@@ -339,3 +339,9 @@ ct_from_qual <- function(qual_table, col_ref = 1, col_sub = 2){
   } 
   return(all_ct)
 }
+
+
+cor_pval <- function(r, n) {
+  t_statistic <- r * sqrt((n - 2) / (1 - r^2))
+  2 * pt(-abs(t_statistic), df = n - 2)
+}
diff --git a/R/write_report.R b/R/write_report.R
@@ -92,6 +92,7 @@ write_pca_data <- function(PCA_res, output_files){
        write_general_pca(prevalent_pca, pca_output, "prevalent_")
     }
 
+    saveRDS(PCA_res, file = file.path(pca_output, "pca_data.rds"))
 }
 
 write_general_pca <- function(pca_data, output_files, tag = ""){

diff --git a/inst/scripts/functional_Hunter.R b/inst/scripts/functional_Hunter.R
@@ -13,7 +13,7 @@ if( Sys.getenv('DEGHUNTER_MODE') == 'DEVELOPMENT' ){
     custom_libraries <- c('general_functions.R', 
         'functional_analysis_library.R', 'plotting_functions.R', 
         'main_functional_hunter.R', "io_handling.R", "plotting_functions.R", 
-        "write_report.R")
+        "write_report.R", "factor_mining.R", "statistics_functions.R")
     for (lib in custom_libraries){
         source(file.path(root_path, 'R', lib))
     }
@@ -215,6 +215,7 @@ func_results <- main_functional_hunter(
        clusters_flag = clusters_flag
 )
 
+
 write_enrich_files(func_results, opt$output_files)
 
 write_functional_report(hunter_results = hunter_results, 

diff --git a/inst/templates/dea_results.Rmd b/inst/templates/dea_results.Rmd
@@ -181,6 +181,7 @@ pca_data <- PCA_res$DEGs$pca_data
 dim_to_keep <- PCA_res$DEGs$dim_to_keep
 dim_data <- PCA_res$DEGs$dim_data
 dim_data_merged <- PCA_res$DEGs$dim_data_merged
+res.hcpc <- PCA_res$DEGs$res.hcpc
 
 ```
 

diff --git a/inst/templates/expression_qc.Rmd b/inst/templates/expression_qc.Rmd
@@ -55,6 +55,7 @@ pca_data <- PCA_res$all_genes$pca_data
 dim_to_keep <- PCA_res$all_genes$dim_to_keep
 dim_data <- PCA_res$all_genes$dim_data
 dim_data_merged <- PCA_res$all_genes$dim_data_merged
+res.hcpc <- PCA_res$all_genes$res.hcpc
 
 ```
 

diff --git a/inst/templates/facto_miner.Rmd b/inst/templates/facto_miner.Rmd
@@ -10,6 +10,7 @@ This is a PCA plot of the count values normalized following the default method a
 Graphical representation of PCA dimensions. The bars represent the percentage of total variance that summarize each dimension. 
 The line measures the percentage of total variance accumulated in previous dimensions. The color distinguishes between significan or no significant dimensions. 
 Only significant dimensions will be considered in the following plots.
+
 ```{r, echo = FALSE, warning =FALSE, message=FALSE, results = 'asis', fig.width = 5, fig.height = 4 }
 inertia <- as.data.frame(pca_data$eig)[,c(2,3)]
 inertia$names <- factor(gsub("comp ", "PC", rownames(inertia)), levels= gsub("comp ", "PC", rownames(inertia)))
@@ -21,11 +22,28 @@ ggplot2::labs(x = "", y = "Percentage of total variance")+
 ggplot2::scale_y_continuous(sec.axis = ggplot2::sec_axis(~./100, name = "Cumulative percentage of variance", labels = scales::percent)) + 
 ggplot2::theme(legend.position = "bottom", axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))
 
+```
 
 
+#### **Distribution of Eigenvectors**
+The eigenvector contains the weights of each gene for the PC. Here are represented the distributions of the weights of each eigenvector.
+The vertical lines represent the quantiles. 
 
-```
+```{r , echo = FALSE, warning =FALSE, message=FALSE, results = 'asis', fig.width = 5, fig.height = 4 }
 
+eig_dist <- pca_data$svd$V
+colnames(eig_dist) <- colnames(pca_data$var$cor)
+eig_dist <- reshape2::melt(eig_dist)
+
+ggplot2::ggplot(eig_dist, ggplot2::aes(x = value, 
+                                            y = Var2)) + 
+ggridges::geom_density_ridges(jittered_points = TRUE, 
+                              position = "raincloud", 
+                              alpha = 0.7, 
+                              scale = 0.9, 
+                              quantile_lines = TRUE) + 
+ggplot2::theme_classic()
+```
 
 #### **Comparison of significant dimensions significant dimensions**
 
@@ -105,7 +123,6 @@ factoextra::fviz_pca_biplot(pca_data, repel = TRUE,select.var = list(name=numeri
 
 ```{r , echo = FALSE, warning =FALSE, message=FALSE, results = 'asis'}
 cat(paste("#### **Hierarchical clustering of individuals using first", dim_to_keep, "significant PCA dimensions**",sep=' '))
-res.hcpc <- FactoMineR::HCPC(pca_data, graph = FALSE)
 plot(res.hcpc , choice="tree", title="")
 
 ```

diff --git a/inst/templates/func_initial_details.Rmd b/inst/templates/func_initial_details.Rmd
@@ -1,14 +1,9 @@
-```{r, include = FALSE} 
-# Load necessary packages
-#require(ggplot2)
-#require(knitr)
-#require(clusterProfiler)
-```
+
 
 ## **Used data in this analysis**
 Specifically, in this experiment set, known experiment labels are:
 
-`r paste(knitr::knit(text = paste(sample_classes, collapse = "\n")), collapse = "\n")`
+`r paste(knitr::knit(text = paste(sample_classes[-1], collapse = "\n")), collapse = "\n")`
 
 ## **General description**
 This report contains all the functional information that was requested by the options when functional_Hunter.R was executed.

diff --git a/inst/templates/functional_report.Rmd b/inst/templates/functional_report.Rmd
@@ -45,6 +45,7 @@ require(DOSE)
 ```{r child="func_top_genes.Rmd"}
 ```
 
+
 ```{r ORA_analysis, echo=FALSE, results='asis', message=FALSE, warning=FALSE}
 
 
@@ -73,7 +74,21 @@ for(funsys in names(flags_gsea)) {
 }
 cat(unlist(res_gsea), sep = '\n')
 
+```
+
+```{r PCA_enr, echo=FALSE, results='asis', message=FALSE, warning=FALSE}
+res_PCA <- list()
+
+for(funsys in names(flags_ora)) {
+  if (funsys %in% c("BP", "MF", "CC")){
+    pca_enr_all <- func_results$PCA_enrichments$all_genes[[funsys]]
+    pca_enr_degs <- func_results$PCA_enrichments$DEGs[[funsys]]
+    pca_enr <- knitr::knit_expand("pca_enr.Rmd")
+    res_PCA[[funsys]] <- knitr::knit(text=pca_enr, quiet=TRUE)
+  }
+}
 
+cat(unlist(res_PCA), sep = '\n')
 ```
 
 ## **Values of options passed to the Functional Hunter main function**

diff --git a/inst/templates/pca_enr.Rmd b/inst/templates/pca_enr.Rmd
@@ -0,0 +1,59 @@
+```{r , echo=FALSE, results='asis', message=FALSE, warning=FALSE}
+	print_go_tables <- function(all_enr) {
+		cat("\n\n\n<br>")
+			cat('<div style="display:flex; flex-wrap: wrap;">')
+		for (dimension in names(all_enr)) {
+
+			cat('<div style ="width:50%">')
+			cat(paste0("\n**",dimension,"**<br>"))
+			top_GOs <- head(all_enr[[dimension]], 10)
+			if (nrow(top_GOs) != 0){
+				top_GOs <- top_GOs[,c("GO.ID","Term","p.value","fdr")]
+				top_GOs[,c("p.value","fdr")]<- format(top_GOs[,c("p.value","fdr")], scientific = TRUE, digits = 3)
+				print(htmltools::tagList(
+					DT::datatable(top_GOs, filter = 'top', rownames = FALSE, extensions = c('Buttons','ColReorder'),
+		                    options = list(
+		                      colReorder = TRUE,
+		                      dom = 'lftBip',
+		                        buttons = c('copy', 'csv', 'excel')))
+				))
+			} else {
+				cat("\n**No enrichments found**\n")
+			}
+			cat("</div>")
+
+		}	
+		cat("</div>")
+	}
+```
+```{r "{{funsys}}_enr", echo=FALSE, results='asis', message=FALSE, warning=FALSE}
+cat("\n## **{{funsys}} - Enrichments for Principal Components**\n")
+
+cat("### **Functional enrichments of PCs using all genes**\n")
+
+last_dim <- 1
+while (last_dim < func_results$pca_data$all_genes$dim_to_keep){
+	plot(FactoMineR::plot.PCA(func_results$pca_data$all_genes$pca_data, 
+						invisible = "quali", 
+						axis = c(last_dim, last_dim + 1), 
+						title = paste0(last_dim, " and ", last_dim + 1, " PCs")))
+	last_dim <- last_dim + 2
+} 
+
+print_go_tables(pca_enr_all)
+
+cat("### **Functional enrichments of PCs using only DEGs**\n")
+
+last_dim <- 1
+while (last_dim < func_results$pca_data$all_genes$dim_to_keep){
+	plot(FactoMineR::plot.PCA(func_results$pca_data$DEGs$pca_data, 
+		invisible = "quali", 
+		axis = c(last_dim, last_dim + 1), 
+		title = paste0(last_dim, " and ", last_dim + 1, " PCs")))
+	last_dim <- last_dim + 2
+} 
+
+print_go_tables(pca_enr_degs)
+
+
+```