Skip to content

Commit

Permalink
public code and data
Browse files Browse the repository at this point in the history
  • Loading branch information
fritzbayer committed Oct 24, 2023
1 parent 45c2f35 commit cc45d67
Show file tree
Hide file tree
Showing 2,381 changed files with 38,027 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

*.history
*.Rhistory
*.DS_Store
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Network-based clustering unveils interconnected landscapes of genomic and clinical features across myeloid malignancies

This repository contains supplementary information, data and code for the manuscript:

Bayer et al. 2023, "Network-based clustering unveils interconnected landscapes of genomic and clinical features across myeloid malignancies"
84 changes: 84 additions & 0 deletions analysis/aic_analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# This script creates a figure for AIC analysis, used to determine the optimal number of clusters.

# Load required libraries
library(ggplot2)
library(reshape2)
library(clustNet)
library(extrafont)

# Clear the workspace
rm(list=ls())

# Helper Function to Load AIC Matrix
load_aic_matrix <- function(cluster_range, chi_range, file_path) {
aic_matrix <- matrix(NA, nrow = length(cluster_range), ncol = length(chi_range))
for (k_idx in seq_along(cluster_range)) {
k <- cluster_range[k_idx]
for (i_idx in seq_along(chi_range)) {
i <- chi_range[i_idx]
file <- paste0(file_path, "aic_k", k, "_chi", i, ".rds")
if (file.exists(file)) {
aic_value <- readRDS(file)$testAIC
if (length(levels(as.factor(readRDS(file)$newclustermembership))) == k) {
aic_matrix[k_idx, i_idx] <- aic_value
}
}
}
}
return(aic_matrix)
}

# Helper Function to Plot AIC Heatmap
plot_aic_heatmap <- function(aics, minK, maxK, chiVec, AICrange) {
meltdivergy <- melt(aics)
ggheatmap <- ggplot(data = meltdivergy, aes(Var1, Var2, fill = value)) +
geom_tile() +
xlab(expression(chi)) +
ylab("k") +
scale_fill_gradient2(high = rgb(0.98, 0.98, 1), low = "#117777",
mid = "#88BBBB", space = "Lab", na.value = "grey75",
midpoint = AICrange / 2, limit = c(0, AICrange), name = "AIC\nchange\n") +
scale_y_continuous(breaks = c(minK:maxK)) +
theme_minimal() +
theme(axis.title.x = element_text(vjust = -1),
axis.title.y = element_text(angle = 0, hjust = -0.5, vjust = 0.505),
axis.text.x = element_text(angle = 0, vjust = 0.5, size = 20, hjust = 0.6),
axis.text.y = element_text(angle = 0, vjust = 0.5, size = 20, hjust = 1),
legend.text = element_text(size = 20),
axis.title = element_text(size = 30),
legend.title = element_text(size = 24)) +
theme(legend.key.size = unit(2, "line")) +
theme(plot.margin = unit(c(-0.3, -0.3, 0.4, 0.4), "cm"))
return(ggheatmap)
}

# Main Script

# Initialize Parameters
cluster_range <- 5:18
chi_range <- 0:40 * 0.1
file_path <- "../euler_AIC/euler_results/"

# Load AIC Matrix
aic_matrix <- load_aic_matrix(cluster_range, chi_range, file_path)

# Prepare and Plot AIC Heatmap
minK <- 5
maxK <- 18
AICrange <- 40
minaics <- apply(aic_matrix, 2, min, na.rm = TRUE)
aics <- t(aic_matrix) - minaics
topaics <- AICrange
aics[aics > topaics] <- topaics
rownames(aics) <- chi_range
colnames(aics) <- c(minK:maxK)

ggheatmap <- plot_aic_heatmap(aics, minK, maxK, chi_range, AICrange)

# Save Plot to PDF
loadfonts()
pdf("~/Desktop/aic_analysis2.pdf", height = 8, width = 16, family = "Arial", paper = "special", onefile = FALSE)
ggheatmap
dev.off()


96 changes: 96 additions & 0 deletions analysis/barplot_of_clusters.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# get bar plots of cluster assignment

# Load required libraries
library(reshape2)
library(ggplot2)
library(ggpubr)
library(RColorBrewer)

# Clear the workspace
rm(list=ls())

# read data
cluster_results <- readRDS("../results/euler_memberships.rds")
mutation_covariate_data <- readRDS("../data/aml_data.rds")

table(cluster_results$clustermembership)

k_clust <- length(levels(as.factor(cluster_results$clustermembership)))

cancer_table <- matrix(NA, k_clust, 4)

colnames(cancer_table) <- c("AML", "MDS", "MPN", "CMML")
rownames(cancer_table) <- LETTERS[1:k_clust]

for (ii in 1:k_clust){
count_aml <- length(which(mutation_covariate_data$Dx[which(cluster_results$clustermembership==ii)]=="AML"))
count_mds <- length(which(mutation_covariate_data$Dx[which(cluster_results$clustermembership==ii)]=="MDS"))
count_mpn <- length(which(mutation_covariate_data$Dx[which(cluster_results$clustermembership==ii)]=="MPN"))
count_cmml <- length(which(mutation_covariate_data$Dx[which(cluster_results$clustermembership==ii)]=="CMML"))

cancer_table[ii,] <- c(count_aml, count_mds, count_mpn, count_cmml)
}


mycolor <- c("#DD7788", "#771122", "#DDDD77", "#117777")

p2 <- ggbarplot(melt(cancer_table), "Var1", "value",
fill = "Var2", color = "Var2", palette = mycolor,
label = TRUE, lab.col = NA)+
# label = TRUE, lab.col = "white", lab.pos = "in")+
xlab("Clusters") +
# ylab("Number of Patients") +
ylab("Patients per cluster") +
guides(fill=guide_legend(title="Cancer Type"),col = FALSE)+
theme(legend.position="off",
axis.line=element_blank(),
axis.text.y=element_blank(), #remove y axis labels
axis.ticks.y=element_blank(),
axis.ticks.x=element_blank()); p2

# save plot
saveRDS(p2, "../figures/barplot.rds")

p22 <- ggbarplot(melt(cancer_table), "Var1", "value",
fill = "Var2", color = "Var2", palette = mycolor,
label = TRUE, lab.col = NA)+
# label = TRUE, lab.col = "white", lab.pos = "in")+
xlab("Clusters") +
# ylab("Number of Patients") +
ylab("") +
guides(fill=guide_legend(title="Cancer type"),col = FALSE)+
theme(legend.position="bottom",
axis.line=element_blank(),
axis.text.y=element_blank(), #remove y axis labels
axis.ticks.y=element_blank(),
axis.ticks.x=element_blank()); p22

cluster_legend <- get_legend(p22)

# save legend
saveRDS(cluster_legend, "../figures/bar_legend_ct.rds")


# df_cancer_table <- melt(cancer_table)
#
# df_cancer_table$Var1 <- factor(df_cancer_table$Var1, # Change ordering manually
# levels = c("A","H","F","I","C","E","D","B","G"))
#
# p2 <- ggbarplot(df_cancer_table, "Var1", "value",
# fill = "Var2", color = "Var2", palette = mycolor,
# label = TRUE, lab.col = NA)+
# # label = TRUE, lab.col = "white", lab.pos = "in")+
# xlab("Clusters") +
# # ylab("Number of Patients") +
# ylab("") +
# guides(fill=guide_legend(title="Cancer Type"),col = FALSE)+
# theme(legend.position="off",
# axis.line=element_blank(),
# axis.text.y=element_blank(), #remove y axis labels
# axis.ticks.y=element_blank(),
# axis.ticks.x=element_blank()); p2
#
# # save plot
# saveRDS(p2, "../figures/barplot.rds")


Loading

0 comments on commit cc45d67

Please sign in to comment.