Skip to content

Commit

Permalink
Use ggkegg for visualisation of all KEGG pathway diagrams (#200)
Browse files Browse the repository at this point in the history
* implemented a new `color_kegg_pathway()` function using `ggkegg` to create colored KEGG pathway ggplot objects (instead of using `KEGGREST` to obtain the colored PNG files, which no longer works #169)
* renamed the `visualize_hsa_KEGG` to `visualize_KEGG_diagram()` to reflect this is now able to handle KEGG pathway enrichment results from any organism
* updated the `visualize_term_interactions()` and `visualize_KEGG_diagram()` functions so that they now return a list of ggplot objects (named by term ID)
* updated the `get_kegg_gsets()` function to also use `ggkegg` for fetching genes per pathway data
* removed unneeded dependencies: `magick`, `KEGGgraph` and `KEGGREST`
  • Loading branch information
egeulgen authored Apr 26, 2024
1 parent 8194f94 commit 5bb9800
Show file tree
Hide file tree
Showing 25 changed files with 352 additions and 724 deletions.
9 changes: 4 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: pathfindR
Type: Package
Title: Enrichment Analysis Utilizing Active Subnetworks
Version: 2.3.1.9000
Version: 2.3.1.9001
Authors@R: c(person("Ege", "Ulgen",
role = c("cre", "cph"),
email = "[email protected]",
Expand Down Expand Up @@ -45,19 +45,18 @@ Imports:
ggraph,
ggupset,
fpc,
ggkegg,
grDevices,
httr,
igraph,
R.utils,
magick,
msigdbr,
KEGGREST,
KEGGgraph,
knitr
Depends: R (>= 4.0),
pathfindR.data (>= 2.0)
Suggests:
testthat (>= 2.3.2),
covr,
mockery
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
VignetteBuilder: knitr
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ export(score_terms)
export(summarize_enrichment_results)
export(term_gene_graph)
export(term_gene_heatmap)
export(visualize_KEGG_diagram)
export(visualize_active_subnetworks)
export(visualize_hsa_KEGG)
export(visualize_term_interactions)
export(visualize_terms)
import(doParallel)
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# pathfindR (development version)

## Major Changes

- implemented a new `color_kegg_pathway()` function using `ggkegg` to create colored KEGG pathway ggplot objects (instead of using `KEGGREST` to obtain the colored PNG files, which no longer works #169)
- renamed the `visualize_hsa_KEGG` to `visualize_KEGG_diagram()` to reflect this is now able to handle KEGG pathway enrichment results from any organism
- updated the `visualize_term_interactions()` and `visualize_KEGG_diagram()` functions so that they now return a list of ggplot objects (named by term ID)
- updated the `get_kegg_gsets()` function to also use `ggkegg` for fetching genes per pathway data
- removed unneeded dependencies: `magick`, `KEGGgraph` and `KEGGREST`

# pathfindR 2.3.1

## Minor Changes and Bug Fixes
Expand Down
2 changes: 1 addition & 1 deletion R/comparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ combined_results_graph <- function(combined_df, selected_terms = "common", use_d
p <- p + ggplot2::scale_size(range = c(5, 10), breaks = round(seq(round(min(igraph::V(g)$size)),
round(max(igraph::V(g)$size)), length.out = 4)), name = size_label)
p <- p + ggplot2::theme_void()
p <- p + suppressWarnings(ggraph::geom_node_text(ggplot2::aes_(label = ~name),
p <- p + suppressWarnings(ggraph::geom_node_text(ggplot2::aes(label = .data$name),
nudge_y = 0.2, repel = TRUE, max.overlaps = 20))

vertex_cols <- c(`Common term` = "#FCCA46", `A-only term` = "#9FB8AD", `B-only term` = "#619B8A",
Expand Down
71 changes: 36 additions & 35 deletions R/data_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,45 +171,46 @@ gset_list_from_gmt <- function(path2gmt, descriptions_idx = 2) {
#' \item{descriptions - A named vector containing the descriptions for each KEGG pathway}
#' }
get_kegg_gsets <- function(org_code = "hsa") {
# created named list, eg: path:map00010: 'Glycolysis / Gluconeogenesis'
pathways_list <- KEGGREST::keggList("pathway", org_code)

# make them into KEGG-style pathway identifiers
pathway_codes <- sub("path:", "", names(pathways_list))

# parse pathway genes
genes_by_pathway <- lapply(pathway_codes, function(pwid) {
pw <- KEGGREST::keggGet(pwid)

## get gene symbols
all_entries <- pw[[1]]$GENE
if (is.null(all_entries)) {
return(NULL)
}
tmp <- c(TRUE, FALSE)
if (grepl(";", all_entries[2])) {
tmp <- c(FALSE, TRUE)
}
pw <- all_entries[tmp]

pw <- sub(";.+", "", pw) ## discard any description
pw <- pw[grep("^[A-Za-z0-9_-]+(\\@)?$", pw)] ## remove mistaken lines
pw <- unique(pw) ## keep unique genes
return(pw)
})

names(genes_by_pathway) <- pathway_codes
message("Grab a cup of coffee, this will take a while...")

# remove empty gene sets (metabolic pathways)
kegg_genes <- genes_by_pathway[vapply(genes_by_pathway, length, 1) != 0]
url <- paste0("https://rest.kegg.jp/list/pathway/", org_code)
result <- httr::GET(url)
result <- httr::content(result, "text")

kegg_descriptions <- pathways_list
names(kegg_descriptions) <- sub("path:", "", names(kegg_descriptions))
kegg_descriptions <- sub(" & .*$", "", sub("-([^-]*)$", "&\\1", kegg_descriptions))
kegg_descriptions <- kegg_descriptions[names(kegg_descriptions) %in% names(kegg_genes)]
parsed_result <- strsplit(result, "\n")[[1]]
pathway_ids <- vapply(parsed_result, function(x) unlist(strsplit(x, "\t"))[1], "id")
pathway_descriptons <- vapply(parsed_result, function(x) unlist(strsplit(x, "\t"))[2], "description")
names(pathway_descriptons) <- pathway_ids

result <- list(gene_sets = kegg_genes, descriptions = kegg_descriptions)
return(result)
genes_by_pathway <- lapply(pathway_ids, function(pw_id) {
pathways_graph <- ggkegg::pathway(pid = pw_id, directory = tempdir(), use_cache = FALSE, return_tbl_graph = FALSE)
all_pw_gene_ids <- igraph::V(pathways_graph)$name[igraph::V(pathways_graph)$type == "gene"]
all_pw_gene_ids <- unlist(strsplit(all_pw_gene_ids, " "))
all_pw_gene_ids <- unique(all_pw_gene_ids)
all_pw_gene_ids <- sub("^hsa:", "", all_pw_gene_ids)

all_pw_gene_symbols <- AnnotationDbi::mget(
all_pw_gene_ids, org.Hs.eg.db::org.Hs.egSYMBOL, ifnotfound = NA
)

all_pw_gene_symbols <- unique(unname(unlist(all_pw_gene_symbols)))
all_pw_gene_symbols <- all_pw_gene_symbols[!is.na(all_pw_gene_symbols)]

return(all_pw_gene_symbols)
})

names(genes_by_pathway) <- pathway_ids

# remove empty gene sets (e.g. pure metabolic pathways)
kegg_genes <- genes_by_pathway[vapply(genes_by_pathway, length, 1) != 0]

kegg_descriptions <- pathway_descriptons
kegg_descriptions <- sub(" & .*$", "", sub("-([^-]*)$", "&\\1", kegg_descriptions))
kegg_descriptions <- kegg_descriptions[names(kegg_descriptions) %in% names(kegg_genes)]

result <- list(gene_sets = kegg_genes, descriptions = kegg_descriptions)
return(result)
}

#' Retrieve Reactome Pathway Gene Sets
Expand Down
4 changes: 2 additions & 2 deletions R/scoring.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,8 @@ plot_scores <- function(score_matrix, cases = NULL, label_samples = TRUE, case_t
score_df$Type <- factor(score_df$Type, levels = c(case_title, control_title))
}

g <- ggplot2::ggplot(score_df, ggplot2::aes_(x = ~Sample, y = ~Term))
g <- g + ggplot2::geom_tile(ggplot2::aes_(fill = ~scores), color = "white")
g <- ggplot2::ggplot(score_df, ggplot2::aes(x = .data$Sample, y = .data$Term))
g <- g + ggplot2::geom_tile(ggplot2::aes(fill = .data$scores), color = "white")
g <- g + ggplot2::scale_fill_gradient2(low = low, mid = mid, high = high)
g <- g + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1), legend.title = ggplot2::element_text(size = 10),
Expand Down
Loading

0 comments on commit 5bb9800

Please sign in to comment.