-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcombine_annotated_psi.R
74 lines (57 loc) · 2.19 KB
/
combine_annotated_psi.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env Rscript
library("optparse")
library(data.table)
library(tidyverse)
library(tidytext)
option_list = list(
make_option(c("-f", "--folder"), type="character", default=NULL,
help="folder with the parsed csvs", metavar="character"),
make_option(c("-o", "--out"), type="character", default="out.txt",
help="output file name", metavar="character")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
# combine_annotated_psi = function(folder,output){
# suffix = "_normalized_annotated.csv"
# # everything has a suffix that we define, use this for removing later
# files = list.files(folder,full.names = TRUE,pattern = suffix)
#
#
# # using something I found on the Google's
# pb = txtProgressBar(min = 2, max = length(files), initial = 2)
# mydata = fread(files[1])
# for(f in 2:length(files)){
# t = fread(files[f])
# t = t %>% dplyr::select(-index,-clusters,-gene_id_junction)
# mydata = unique(full_join(mydata, t, by = c("seqnames","start","end","strand_junction","type")))
# rm(t)
# setTxtProgressBar(pb,f)
# }
#
# fwrite(mydata,output)
#
# }
# combine_annotated_psi(folder = opt$folder, output = opt$out)
combine_annotated_psi_long = function(folder,output){
suffix = "_normalized_annotated.csv"
# everything has a suffix that we define, use this for removing later
files = list.files(folder,full.names = TRUE,pattern = suffix)
# using something I found on the Google's
mydata = fread(files[1])
mydata$sample = colnames(mydata)[9]
setnames(mydata,colnames(mydata)[9],"PSI")
pb = txtProgressBar(min = 2, max = length(files), initial = 2)
for(f in 2:length(files)){
t = fread(files[f])
t$sample = colnames(t)[9]
setnames(t,colnames(t)[9],"PSI")
mydata = rbind(mydata,t)
rm(t)
setTxtProgressBar(pb,f)
}
mydata[,paste_into_igv_junction := paste0(seqnames, ":",start, "-",end)]
# my_sparse_data = cast_sparse(mydata, paste_into_igv_junction,sample, PSI)
# saveRDS(my_sparse_data,output)
fwrite(mydata,output)
}
combine_annotated_psi_long(folder = opt$folder, output = opt$out)