forked from zhengtaoxiao/Single-Cell-Metabolic-Landscape
-
Notifications
You must be signed in to change notification settings - Fork 48
/
Copy pathutils.R
80 lines (75 loc) · 2.47 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
##gmtPathways is copied from fgsea package.
gmtPathways <- function(gmt.file) {
pathwayLines <- strsplit(readLines(gmt.file), "\t")
pathways <- lapply(pathwayLines, tail, -2)
names(pathways) <- sapply(pathwayLines, head, 1)
pathways
}
## get_os is copied from https://github.com/r-lib/rappdirs/blob/master/R/utils.r#L1
get_os <- function() {
if (.Platform$OS.type == "windows") {
"win"
} else if (Sys.info()["sysname"] == "Darwin") {
"mac"
} else if (.Platform$OS.type == "unix") {
"unix"
} else {
stop("Unknown OS")
}
}
##from : https://github.com/mikelove/DESeq2/blob/master/R/core.R
estimateSizeFactorsForMatrix <- function(counts, locfunc=stats::median,
geoMeans, controlGenes) {
if (missing(geoMeans)) {
incomingGeoMeans <- FALSE
loggeomeans <- rowMeans(log(counts))
} else {
incomingGeoMeans <- TRUE
if (length(geoMeans) != nrow(counts)) {
stop("geoMeans should be as long as the number of rows of counts")
}
loggeomeans <- log(geoMeans)
}
if (all(is.infinite(loggeomeans))) {
stop("every gene contains at least one zero, cannot compute log geometric means")
}
sf <- if (missing(controlGenes)) {
apply(counts, 2, function(cnts) {
exp(locfunc((log(cnts) - loggeomeans)[is.finite(loggeomeans) & cnts > 0]))
})
} else {
if ( !( is.numeric(controlGenes) | is.logical(controlGenes) ) ) {
stop("controlGenes should be either a numeric or logical vector")
}
loggeomeansSub <- loggeomeans[controlGenes]
apply(counts[controlGenes,,drop=FALSE], 2, function(cnts) {
exp(locfunc((log(cnts) - loggeomeansSub)[is.finite(loggeomeansSub) & cnts > 0]))
})
}
if (incomingGeoMeans) {
# stabilize size factors to have geometric mean of 1
sf <- sf/exp(mean(log(sf)))
}
sf
}
##calculate how many pathways of one gene involved.
num_of_pathways <- function (gmtfile,overlapgenes){
pathways <- gmtPathways(gmtfile)
pathway_names <- names(pathways)
filter_pathways <- list()
for (p in pathway_names){
genes <- pathways[[p]]
common_genes <- intersect(genes,overlapgenes)
if(length(common_genes>=5)){
filter_pathways[[p]] <- common_genes
}
}
all_genes <- unique(as.vector(unlist(filter_pathways)))
gene_times <- data.frame(num =rep(0,length(all_genes)),row.names = all_genes)
for(p in pathway_names){
for(g in filter_pathways[[p]]){
gene_times[g,"num"] = gene_times[g,"num"]+1
}
}
gene_times
}