-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathae.R
executable file
·139 lines (100 loc) · 3.94 KB
/
ae.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
################################################################################
#
# File name: ae.R
#
# Authors: Jacek Marzec ( [email protected] ), code modified from Rossalind Cutts
#
# Barts Cancer Institute,
# Queen Mary, University of London
# Charterhouse Square, London EC1M 6BQ
#
################################################################################
################################################################################
#
# Description: Getting data from ArrayExpress using ArrayExpress package.
#
# Command line use: R --file=./ae.R --args "E-MEXP-993" "raw|processed|full" "/scratch/jack/data/PhD/raw/Affy_U133Plus2" - this will get the raw data files if they exist
#
# First arg: ArrayExpress id
# Second arg: whether data is raw or matrix format
# Third arg: the directory for the data
#
################################################################################
#===============================================================================
# Functions
#===============================================================================
##### Prepare object to write into a file
prepare2write <- function (x) {
x2write <- cbind(rownames(x), x)
colnames(x2write) <- c("",colnames(x))
return(x2write)
}
#===============================================================================
# Main
#===============================================================================
library("ArrayExpress")
Args <- commandArgs();
ArrayExpressID=Args[4]
FileType=Args[5]
DataDir=paste(Args[6], ArrayExpressID, sep="/")
print(ArrayExpressID)
print(FileType)
print(DataDir)
##### Set/create a directory for data download
if (file.exists(DataDir)){
setwd(DataDir)
} else {
dir.create(DataDir, recursive=TRUE)
setwd(DataDir)
}
##### Report used parameters to a file
write(Args, file = "parameters.txt", append = FALSE, sep="\t")
##### Get experiment info and data
rawset = ArrayExpress(ArrayExpressID, path = DataDir, save = FALSE)
##### Get raw|processed or both (full) files from ArrayExpress
files=getAE(ArrayExpressID,type=FileType,path=DataDir)
##### Get processed expression matrix
if ( FileType == "processed" || FileType == "full" ) {
##### Identify the columns to extract
cn = getcolproc(files)
cat( paste0("Expression measurments will be extracted from column named: ", cn))
##### Create the object. Assume that the second
proset = procset(files, cn[2])
write.table(prepare2write(exprs(proset)),file=paste0(ArrayExpressID, "_processed.txt"),sep="\t",row.names=FALSE,quote=FALSE)
##### Remove the processed data files for individual samples
system("rm *sample_table.txt")
}
phenodata = NULL
##### Get experiment phenodata
if (is.list(rawset)) {
samples = NULL
for (i in 1:length(rawset)) {
phenodata = rbind(phenodata,pData(phenoData(rawset[[i]])))
samples = c(samples, sampleNames(rawset[[i]]))
}
} else {
phenodata=pData(phenoData(rawset))
samples = sampleNames(rawset)
}
targetFile=NULL
##### Generate target file
for (i in 1:length(names(phenodata))) {
if ( length(grep("Source.Name",names(phenodata)[i])) != 0 ) {
targetFile=data.frame(phenodata[i])
} else if ( length(grep("Characteristics",names(phenodata)[i])) != 0 ) {
targetFile=data.frame(targetFile, phenodata[i])
} else if ( length(grep("Factor",names(phenodata)[i])) != 0 ) {
targetFile=data.frame(targetFile, phenodata[i])
}
}
if ( FileType == "raw" || FileType == "full" ) {
targetFile=data.frame(samples,files$rawFiles,targetFile)
} else {
targetFile=data.frame(samples,files$processedFiles,targetFile)
}
names(targetFile)=gsub("[.]{2}","_",sapply(names(targetFile), function(elt) elt[length(elt)]))
names(targetFile)=gsub("[.]","",sapply(names(targetFile), function(elt) elt[length(elt)]))
names(targetFile)[1:2]=c("Name","FileName")
write.table(targetFile,file=paste0(ArrayExpressID, "_target.txt"),sep="\t",row.names=FALSE,quote=FALSE)
##### Remove the compressed raw|processed data files
system("rm *.zip")