Skip to content

Commit

Permalink
Merge branch 'master' of github.com:seoanezonjic/ExpHunterSuite
Browse files Browse the repository at this point in the history
  • Loading branch information
AEstebanMar committed Sep 6, 2024
2 parents d80743e + c609d79 commit c4df4cf
Show file tree
Hide file tree
Showing 14 changed files with 220 additions and 54 deletions.
36 changes: 33 additions & 3 deletions R/factor_mining.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,42 @@ get_cluster_string_assoc <- function(res.hcpc, string_factors){
return(all_factor_clusters)
}

parse_eigenvectors <- function(eigenvectors) {
parse_eigenvectors <- function(eigenvectors, eig_abs_cutoff = NULL) {

parsed_eigvec <- list()
for (Dim in names(eigenvectors)) {
pDim <- gsub("Dim","PC",Dim)
parsed_eigvec[[paste("positive", pDim, sep = ".")]] <- eigenvectors[[Dim]]
parsed_eigvec[[paste("negative", pDim, sep = ".")]] <- eigenvectors[[Dim]] * -1
eigen_vec <- eigenvectors[[Dim]]

if (!is.null(eig_abs_cutoff))
eigen_vec <- eigen_vec[abs(eigen_vec) > eig_abs_cutoff]
# eigen_vec[abs(eigen_vec) < eig_abs_cutoff] <- 0

parsed_eigvec[[paste("positive", pDim, sep = ".")]] <- eigen_vec
parsed_eigvec[[paste("negative", pDim, sep = ".")]] <- eigen_vec * -1
}
return(parsed_eigvec)
}

get_and_parse_pca_eigenvectors <- function(pca_res, cor_pval_cutoff = 0.05, eig_abs_cutoff = NULL){
dimnames(pca_res$pca_data$svd$V) <- dimnames(pca_res$pca_data$var$cor)
eigenvectors <- name_all_columns(as.data.frame(pca_res$pca_data$svd$V))
correlations <- pca_res$pca_data$var$cor
n <- nrow(pca_res$pca_data$ind$cos2)
cor_sig <- cor_pval(correlations, n)
eigenvectors <- filter_eigenvectors(eigenvectors, cor_sig, pval_cutoff = cor_pval_cutoff)
eigenvectors <- parse_eigenvectors(eigenvectors, eig_abs_cutoff = eig_abs_cutoff)
return(eigenvectors)

}


filter_eigenvectors <- function(eigenvectors, cor_sig, pval_cutoff) {
eigenvec_fil <- lapply(names(eigenvectors), function(dimension) {
eigenvec <- eigenvectors[[dimension]]
significance <- cor_sig[,dimension]
eigenvec[significance <= pval_cutoff]
})
names(eigenvec_fil) <- names(eigenvectors)
return(eigenvec_fil)
}
28 changes: 17 additions & 11 deletions R/functional_analysis_library.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ multienricher_topGO <- function(all_funsys, genes_list, universe=NULL,
organism_info, gene_id="entrez", p_value_cutoff = 0.05, algorithm = "classic", statistic = "fisher",
nodeSize = 5, task_size=1, workers=1, scoreOrder = "increasing") {


#checking input format
if (!is.list(genes_list))
genes_list <- list(genes_list)
Expand All @@ -308,7 +309,6 @@ multienricher_topGO <- function(all_funsys, genes_list, universe=NULL,
multi_topGOTest <- function(funsys, genes_list, nodeSize = 5, org_db,
universe = NULL, gene_id = "entrez",p_value_cutoff = 0.05, algorithm = "classic", statistic = "fisher",
scoreOrder = "increasing", workers = 1, task_size = 1, geneSel = NULL) {

if(is.null(universe) && statistic == "fisher") {
go_to_genes <- topGO::annFUN.org(funsys, mapping = org_db, ID = gene_id)
universe <- unique(unlist(go_to_genes))
Expand All @@ -331,26 +331,32 @@ multi_topGOTest <- function(funsys, genes_list, nodeSize = 5, org_db,
return(l_genes)
}
})
all_names <- unique(unlist(lapply(genes_list, names)))
if (statistic == "fisher") {
rand <- factor(c(1,2))
} else {
rand <- seq(0,1,0.1)
}

all_genes <- sample(rand, length(all_names), replace = TRUE)
names(all_genes) <- all_names
# Create environment variables used for initialization of the topGOdata obj
topGO::groupGOTerms()
enriched_cats <- parallel_list(genes_list, function(l_genes) {
if(length(l_genes) == 0)
return(data.frame())
GOdata <- new("topGOdata",
ontology = funsys,
allGenes = genes_list[[1]],
allGenes = l_genes,
nodeSize = nodeSize,
mapping = org_db,
geneSel = geneSel,
annotationFun = topGO::annFUN.org,
ID = gene_id)
enriched_cats <- parallel_list(genes_list, function(l_genes) {
if (statistic == "fisher") {
l_GOdata <- topGO::updateGenes(object = GOdata, geneList = l_genes)
} else {
l_GOdata <- topGO::updateGenes(object = GOdata, geneList = l_genes, geneSel = geneSel)

}
result <- topGO::runTest(l_GOdata, algorithm = algorithm,

result <- topGO::runTest(GOdata, algorithm = algorithm,
statistic = statistic, scoreOrder = scoreOrder)
res_table <- topGO::GenTable(l_GOdata,
res_table <- topGO::GenTable(GOdata,
p.value = result,
topNodes = length(result@score),
format.FUN = function(x, dig, eps){x})
Expand Down
5 changes: 1 addition & 4 deletions R/io_handling.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,7 @@ load_hunter_folder <- function(path = NULL){
})
names(dgh_exp_results[["all_data_normalized"]]) <- sapply(strsplit(packages_results,"_"), utils::tail, 1)

dgh_exp_results$pca_all_genes <- read.table(file.path(path, "PCA_results/all_genes_eigenvectors.txt"),
row.names = 1, header = TRUE)
dgh_exp_results$pca_degs<- read.table(file.path(path, "PCA_results/prevalent_eigenvectors.txt"),
row.names = 1, header = TRUE)
dgh_exp_results$pca_data <- readRDS(file.path(path, "PCA_results/pca_data.rds"))

return(dgh_exp_results)
}
Expand Down
37 changes: 21 additions & 16 deletions R/main_functional_hunter.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,15 @@ main_functional_hunter <- function(
}
}


if (!is.null(hunter_results$pca_data)) {
# prepare PCA data for KS
pca_eigenvectors <- list(pca_all_genes = hunter_results$pca_all_genes,
pca_degs = hunter_results$pca_degs)
pca_eigenvectors <- lapply(hunter_results$pca_data,
get_and_parse_pca_eigenvectors,
cor_pval = 1,
eig_abs_cutoff = NULL)

pca_eigenvectors <- lapply(pca_eigenvectors, name_all_columns)
}

pca_eigenvectors <- lapply(pca_eigenvectors, parse_eigenvectors)
############################################################
## ##
## PERFORM ENRICHMENTS ##
Expand Down Expand Up @@ -201,16 +202,20 @@ main_functional_hunter <- function(
p_value_cutoff = pthreshold)
}

func_results$PCA_enrichments <- lapply(pca_eigenvectors,
multienricher_topGO,
all_funsys = enrich_dbs,
organism_info = current_organism_info,
p_value_cutoff = pthreshold,
algorithm = "classic",
statistic = "ks",
gene_id = "ensembl",
scoreOrder = "decreasing")

if (!is.null(hunter_results$pca_data)) {

func_results$PCA_enrichments <- lapply(pca_eigenvectors,
multienricher_topGO,
all_funsys = enrich_dbs,
organism_info = current_organism_info,
p_value_cutoff = pthreshold,
algorithm = "classic",
statistic = "t",
gene_id = "ensembl",
scoreOrder = "decreasing",
workers = cores,
task_size = task_size)
}
############################################################
## ##
## EXPORT DATA ##
Expand All @@ -220,6 +225,6 @@ main_functional_hunter <- function(
# Reorder rows by combined FDR
DEGH_results <- DEGH_results[order(DEGH_results[,"combined_FDR"]),]
func_results$DEGH_results_annot <- DEGH_results

func_results$pca_data <- hunter_results$pca_data
return(func_results)
}
6 changes: 6 additions & 0 deletions R/statistics_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,9 @@ ct_from_qual <- function(qual_table, col_ref = 1, col_sub = 2){
}
return(all_ct)
}


cor_pval <- function(r, n) {
t_statistic <- r * sqrt((n - 2) / (1 - r^2))
2 * pt(-abs(t_statistic), df = n - 2)
}
1 change: 1 addition & 0 deletions R/write_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ write_pca_data <- function(PCA_res, output_files){
write_general_pca(prevalent_pca, pca_output, "prevalent_")
}

saveRDS(PCA_res, file = file.path(pca_output, "pca_data.rds"))
}

write_general_pca <- function(pca_data, output_files, tag = ""){
Expand Down
3 changes: 2 additions & 1 deletion inst/scripts/functional_Hunter.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if( Sys.getenv('DEGHUNTER_MODE') == 'DEVELOPMENT' ){
custom_libraries <- c('general_functions.R',
'functional_analysis_library.R', 'plotting_functions.R',
'main_functional_hunter.R', "io_handling.R", "plotting_functions.R",
"write_report.R")
"write_report.R", "factor_mining.R", "statistics_functions.R")
for (lib in custom_libraries){
source(file.path(root_path, 'R', lib))
}
Expand Down Expand Up @@ -215,6 +215,7 @@ func_results <- main_functional_hunter(
clusters_flag = clusters_flag
)


write_enrich_files(func_results, opt$output_files)

write_functional_report(hunter_results = hunter_results,
Expand Down
1 change: 1 addition & 0 deletions inst/templates/dea_results.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ pca_data <- PCA_res$DEGs$pca_data
dim_to_keep <- PCA_res$DEGs$dim_to_keep
dim_data <- PCA_res$DEGs$dim_data
dim_data_merged <- PCA_res$DEGs$dim_data_merged
res.hcpc <- PCA_res$DEGs$res.hcpc
```

Expand Down
1 change: 1 addition & 0 deletions inst/templates/expression_qc.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ pca_data <- PCA_res$all_genes$pca_data
dim_to_keep <- PCA_res$all_genes$dim_to_keep
dim_data <- PCA_res$all_genes$dim_data
dim_data_merged <- PCA_res$all_genes$dim_data_merged
res.hcpc <- PCA_res$all_genes$res.hcpc
```

Expand Down
21 changes: 19 additions & 2 deletions inst/templates/facto_miner.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This is a PCA plot of the count values normalized following the default method a
Graphical representation of PCA dimensions. The bars represent the percentage of total variance that summarize each dimension.
The line measures the percentage of total variance accumulated in previous dimensions. The color distinguishes between significan or no significant dimensions.
Only significant dimensions will be considered in the following plots.

```{r, echo = FALSE, warning =FALSE, message=FALSE, results = 'asis', fig.width = 5, fig.height = 4 }
inertia <- as.data.frame(pca_data$eig)[,c(2,3)]
inertia$names <- factor(gsub("comp ", "PC", rownames(inertia)), levels= gsub("comp ", "PC", rownames(inertia)))
Expand All @@ -21,11 +22,28 @@ ggplot2::labs(x = "", y = "Percentage of total variance")+
ggplot2::scale_y_continuous(sec.axis = ggplot2::sec_axis(~./100, name = "Cumulative percentage of variance", labels = scales::percent)) +
ggplot2::theme(legend.position = "bottom", axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))
```


#### **Distribution of Eigenvectors**
The eigenvector contains the weights of each gene for the PC. Here are represented the distributions of the weights of each eigenvector.
The vertical lines represent the quantiles.

```
```{r , echo = FALSE, warning =FALSE, message=FALSE, results = 'asis', fig.width = 5, fig.height = 4 }
eig_dist <- pca_data$svd$V
colnames(eig_dist) <- colnames(pca_data$var$cor)
eig_dist <- reshape2::melt(eig_dist)
ggplot2::ggplot(eig_dist, ggplot2::aes(x = value,
y = Var2)) +
ggridges::geom_density_ridges(jittered_points = TRUE,
position = "raincloud",
alpha = 0.7,
scale = 0.9,
quantile_lines = TRUE) +
ggplot2::theme_classic()
```

#### **Comparison of significant dimensions significant dimensions**

Expand Down Expand Up @@ -105,7 +123,6 @@ factoextra::fviz_pca_biplot(pca_data, repel = TRUE,select.var = list(name=numeri

```{r , echo = FALSE, warning =FALSE, message=FALSE, results = 'asis'}
cat(paste("#### **Hierarchical clustering of individuals using first", dim_to_keep, "significant PCA dimensions**",sep=' '))
res.hcpc <- FactoMineR::HCPC(pca_data, graph = FALSE)
plot(res.hcpc , choice="tree", title="")
```
Expand Down
9 changes: 2 additions & 7 deletions inst/templates/func_initial_details.Rmd
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
```{r, include = FALSE}
# Load necessary packages
#require(ggplot2)
#require(knitr)
#require(clusterProfiler)
```


## **Used data in this analysis**
Specifically, in this experiment set, known experiment labels are:

`r paste(knitr::knit(text = paste(sample_classes, collapse = "\n")), collapse = "\n")`
`r paste(knitr::knit(text = paste(sample_classes[-1], collapse = "\n")), collapse = "\n")`

## **General description**
This report contains all the functional information that was requested by the options when functional_Hunter.R was executed.
Expand Down
15 changes: 15 additions & 0 deletions inst/templates/functional_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ require(DOSE)
```{r child="func_top_genes.Rmd"}
```


```{r ORA_analysis, echo=FALSE, results='asis', message=FALSE, warning=FALSE}
Expand Down Expand Up @@ -73,7 +74,21 @@ for(funsys in names(flags_gsea)) {
}
cat(unlist(res_gsea), sep = '\n')
```

```{r PCA_enr, echo=FALSE, results='asis', message=FALSE, warning=FALSE}
res_PCA <- list()
for(funsys in names(flags_ora)) {
if (funsys %in% c("BP", "MF", "CC")){
pca_enr_all <- func_results$PCA_enrichments$all_genes[[funsys]]
pca_enr_degs <- func_results$PCA_enrichments$DEGs[[funsys]]
pca_enr <- knitr::knit_expand("pca_enr.Rmd")
res_PCA[[funsys]] <- knitr::knit(text=pca_enr, quiet=TRUE)
}
}
cat(unlist(res_PCA), sep = '\n')
```

## **Values of options passed to the Functional Hunter main function**
Expand Down
59 changes: 59 additions & 0 deletions inst/templates/pca_enr.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
```{r , echo=FALSE, results='asis', message=FALSE, warning=FALSE}
print_go_tables <- function(all_enr) {
cat("\n\n\n<br>")
cat('<div style="display:flex; flex-wrap: wrap;">')
for (dimension in names(all_enr)) {
cat('<div style ="width:50%">')
cat(paste0("\n**",dimension,"**<br>"))
top_GOs <- head(all_enr[[dimension]], 10)
if (nrow(top_GOs) != 0){
top_GOs <- top_GOs[,c("GO.ID","Term","p.value","fdr")]
top_GOs[,c("p.value","fdr")]<- format(top_GOs[,c("p.value","fdr")], scientific = TRUE, digits = 3)
print(htmltools::tagList(
DT::datatable(top_GOs, filter = 'top', rownames = FALSE, extensions = c('Buttons','ColReorder'),
options = list(
colReorder = TRUE,
dom = 'lftBip',
buttons = c('copy', 'csv', 'excel')))
))
} else {
cat("\n**No enrichments found**\n")
}
cat("</div>")
}
cat("</div>")
}
```
```{r "{{funsys}}_enr", echo=FALSE, results='asis', message=FALSE, warning=FALSE}
cat("\n## **{{funsys}} - Enrichments for Principal Components**\n")
cat("### **Functional enrichments of PCs using all genes**\n")
last_dim <- 1
while (last_dim < func_results$pca_data$all_genes$dim_to_keep){
plot(FactoMineR::plot.PCA(func_results$pca_data$all_genes$pca_data,
invisible = "quali",
axis = c(last_dim, last_dim + 1),
title = paste0(last_dim, " and ", last_dim + 1, " PCs")))
last_dim <- last_dim + 2
}
print_go_tables(pca_enr_all)
cat("### **Functional enrichments of PCs using only DEGs**\n")
last_dim <- 1
while (last_dim < func_results$pca_data$all_genes$dim_to_keep){
plot(FactoMineR::plot.PCA(func_results$pca_data$DEGs$pca_data,
invisible = "quali",
axis = c(last_dim, last_dim + 1),
title = paste0(last_dim, " and ", last_dim + 1, " PCs")))
last_dim <- last_dim + 2
}
print_go_tables(pca_enr_degs)
```
Loading

0 comments on commit c4df4cf

Please sign in to comment.