Skip to content

Commit

Permalink
renamed data dir to avoid issues and added correlation coeffiecient p…
Browse files Browse the repository at this point in the history
…lots
  • Loading branch information
elswob committed Jul 9, 2015
1 parent 8dc155c commit 91d00ec
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 47 deletions.
8 changes: 6 additions & 2 deletions R/pam50_correlation_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
#' @param p50 The PAM50 results text file
#' @param id The ID of the samples to extract
#' @param name The name to represent the samples
cor_plot = function(p50,id,name){
cor_plot = function(p,id,name){
print(paste0("Generating correlation plots - ",id," - ",name))
p$X=rownames(p)
print(head(p))
#get colors
#myPal <- brewer.pal(6, "Spectral")
myPal = c("red","green","blue","orange","purple","black")
Expand Down Expand Up @@ -78,11 +81,12 @@ cor_plot = function(p50,id,name){

p$X <- factor(p$X, levels = p[order(p$Call), "X"])
fm=melt(p,id.vars=c("X","Call"),measure.vars=subtypes)
print(head(fm))
#set the colours for the ticks
colorOrder = myPal[as.character(p[order(p$Call),]$Call)]
p2=plotter("Call")

pdf(paste0(name,".pdf"))
pdf(paste0(inputDir,"/",name,"_cor_coef_plot.pdf"))
multiplot(p1,p2,cols=2)
dev.off()
}
Expand Down
7 changes: 3 additions & 4 deletions R/subtypePrediction_distributed.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
#8. Assign the class of the most highly correlated centroid to each sample

pam50_wrapper=function(inputDir,inputFile,short){
calibrationFile<- "./Data/mediansPerDataset_v2.txt"
trainCentroids<- "./Data/pam50_centroids.txt"
trainFile<<- "./Data/220arrays_nonUBCcommon+12normal_50g.txt"
calibrationFile<- "./pam50_data/mediansPerDataset_v2.txt"
trainCentroids<- "./pam50_data/pam50_centroids.txt"
trainFile<<- "./pam50_data/220arrays_nonUBCcommon+12normal_50g.txt"
proliferationGenes<-c("CCNB1","UBE2C","BIRC5","KNTC2","CDC20","PTTG1","RRM2","MKI67","TYMS","CEP55","CDCA1")
stdArray<-T # just for visualization, and only set to F if many missing genes
predFiles<- paste(inputDir,inputFile,sep="/")


###
# some constants
###
Expand Down
80 changes: 40 additions & 40 deletions R/subtypePrediction_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ medianCtr<-function(x){
}

pcaEA<-function(x,classes,size=1,showLegend=T,legendloc="topright",mainStr="",startPC=1,stopPC=2,showNames=T,showClasses=F,axisExpansion=0,groupColors=NA){

features<- dim(x)[1]
samples<- dim(x)[2]
sampleNames<- dimnames(x)[[2]]
featureNames<-dimnames(x)[[1]]
x<-apply(x,2,as.numeric)

#principal components plots
data.pca<-prcomp(as.matrix(x))

Expand All @@ -55,15 +55,15 @@ pcaEA<-function(x,classes,size=1,showLegend=T,legendloc="topright",mainStr="",st
legendColors[k]<-k
}
}

if(length(groupColors)==1){
gr.labels<-as.numeric(gr.labels)
}

#plot 2pcs by each other
i<-startPC
j<-stopPC

#graphing parameters
par(lab=c(3,4,3))
par(mgp=c(.3,.5,.0))
Expand All @@ -77,7 +77,7 @@ pcaEA<-function(x,classes,size=1,showLegend=T,legendloc="topright",mainStr="",st
xmax<-max(data.pca$rotation[,i])+abs(axisExpansion*max(data.pca$rotation[,i]))
ymin<-min(data.pca$rotation[,j])-abs(axisExpansion*min(data.pca$rotation[,j]))
ymax<-max(data.pca$rotation[,j])+abs(axisExpansion*max(data.pca$rotation[,j]))
plot(data.pca$rotation[,i],data.pca$rotation[,j], xlab=strX, ylab=strY,
plot(data.pca$rotation[,i],data.pca$rotation[,j], xlab=strX, ylab=strY,
main=strM, col=gr.labels,cex=size,pch="",xlim=c(xmin,xmax),ylim=c(ymin,ymax))
if(showNames){
text(data.pca$rotation[,i],data.pca$rotation[,j],labels=names(data.pca$rotation[,i]),cex=size*.6)
Expand All @@ -96,26 +96,26 @@ pcaEA<-function(x,classes,size=1,showLegend=T,legendloc="topright",mainStr="",st
cols <- function(lowi = "green", highi = "red", ncolors = 20) {
low <- col2rgb(lowi)/255
high <- col2rgb("black")/255
col1 <- rgb(seq(low[1], high[1], len = ncolors), seq(low[2],
col1 <- rgb(seq(low[1], high[1], len = ncolors), seq(low[2],
high[2], len = ncolors), seq(low[3], high[3], len = ncolors))
low <- col2rgb("black")/255
high <- col2rgb(highi)/255
col2 <- rgb(seq(low[1], high[1], len = ncolors), seq(low[2],
col2 <- rgb(seq(low[1], high[1], len = ncolors), seq(low[2],
high[2], len = ncolors), seq(low[3], high[3], len = ncolors))
col<-c(col1[1:(ncolors-1)],col2)
return(col)
}

myHeatmap<-function(x,t.colors=NA,fileName="cluster.cdt",linkage="complete",distance="pearson",contrast=2,returnSampleClust=F,rowNames=NA,rightMar=7,bottomMar=1,colNames=NA){

temp<-hclust2treeview(x,method=distance,file=fileName,link=linkage,keep.hclust=T)
gTree<-temp[[1]]
sTree<-temp[[2]]

imageVals<-x
imageVals[x > contrast] <- contrast
imageVals[x < -1 * contrast] <- -1 * contrast

if(sum(is.na(t.colors))>0){
heatmap(imageVals,Rowv=as.dendrogram(gTree),Colv=as.dendrogram(sTree),
col=cols(),labCol=colNames, scale="none",
Expand Down Expand Up @@ -165,26 +165,26 @@ collapseIDs<-function(x,allids=row.names(x),method="mean"){
ids<- levels(as.factor(allids))
x.col<- NULL

if(length(ids)==dim(x)[1]){
if(length(ids)==dim(x)[1]){
dimnames(x)[[1]]<-allids
return(x)
return(x)
}

for(i in 1:length(ids)){
if(sum(allids==ids[i])>1){
indices <- allids==ids[i]
indices <- allids==ids[i]
if(method=="mean"){
vals<-apply(x[indices,],2,mean,na.rm=T)
}
if(method=="median"){
vals<-apply(x[indices,],2,median,na.rm=T)
}
if(method=="stdev"){
if(method=="stdev"){
temp<- x[indices,]
stdevs<- apply(temp,1,sd,na.rm=T)
vals<- temp[match(max(stdevs),stdevs),]
}
if(method=="iqr"){
if(method=="iqr"){
temp<- x[indices,]
iqrs<- apply(temp,1,function(x){quantile(x,.75,na.rm=T)-quantile(x,.25,na.rm=T)})
vals<- temp[match(max(iqrs),iqrs),]
Expand All @@ -197,7 +197,7 @@ collapseIDs<-function(x,allids=row.names(x),method="mean"){

dimnames(x.col)<- list(ids,dimnames(x)[[2]])
return(x.col)

}


Expand All @@ -214,26 +214,26 @@ readarray<-function(dataFile,designFile=NA,hr=1,impute=T,method="mean"){
ids<-x[,1]
xd<-x[,-1]
xd<-apply(xd,2,as.numeric)
xd<-collapseIDs(xd,ids,method)
xd<-collapseIDs(xd,ids,method)
}else{
sampleNames<-as.vector(t(x[1,-1]))
x<-x[-1,]

classes<-x[1:(headerRows-1),]
dimnames(classes)[[1]]<-classes[,1]
classes<-classes[,-1]
classes[classes==""]<-NA
classes<-t(classes)
rownames(classes)<-sampleNames
classes<-as.data.frame(classes)

xd<-x[(-1:-(headerRows-1)),]
ids<-as.vector(t(xd[,1]))
xd<-xd[,-1]
xd<-apply(xd,2,as.numeric)
xd<-collapseIDs(xd,ids,method)
}

features<- dim(xd)[1]
samples<- dim(xd)[2]
geneNames<-rownames(xd)
Expand All @@ -249,15 +249,15 @@ readarray<-function(dataFile,designFile=NA,hr=1,impute=T,method="mean"){
x<-x[sort.list(rownames(x)),]
classes<-as.data.frame(x)
}

if(sum(apply(xd,2,is.na))>0 & impute){
library(impute)
allAnn<-dimnames(xd)
data.imputed<-impute.knn(as.matrix(xd))$data
xd<-data.imputed[1:features,]
dimnames(xd)<-allAnn
}

return(list(xd=xd, classes=classes, nfeatures=features, nsamples=samples, fnames=geneNames, snames=sampleNames))
}

Expand All @@ -269,26 +269,26 @@ sspPredict<-function(x,classes="",y,nGenes="",priors="equal",std=F,distm="euclid
samples<- dim(x)[2]
sampleNames<- dimnames(x)[[2]]
featureNames<- dimnames(x)[[1]]

#parse the test file - same as train file but no rows of classes
tdataMatrix<-y
tfeatures<- dim(y)[1]
tsamples<- dim(y)[2]
tsampleNames<- dimnames(y)[[2]]
tfeatureNames<- dimnames(y)[[1]]

#dimnames(tdataMatrix)[[2]]<-paste("x",seq(1,471))
temp <- overlapSets(dataMatrix,tdataMatrix)
dataMatrix <- temp$x
tdataMatrix <- temp$y
sfeatureNames<-row.names(dataMatrix)

# standardize both sets
if(std){
dataMatrix<-standardize(dataMatrix)
tdataMatrix<-standardize(tdataMatrix)
}

if(!centroids){
thisClass <- as.vector(classes[,1])
nClasses<-nlevels(as.factor(thisClass))
Expand All @@ -299,21 +299,21 @@ sspPredict<-function(x,classes="",y,nGenes="",priors="equal",std=F,distm="euclid
thisClass<-as.numeric(thisClass)
dataMatrix <- dataMatrix[,!(is.na(thisClass))]
thisClass <- thisClass[!(is.na(thisClass))]

scores<-apply(dataMatrix,1,bwss,thisClass)
trainscores<-vector()
for(j in 1:dim(dataMatrix)[1]){
trainscores<-vector()
for(j in 1:dim(dataMatrix)[1]){
trainscores[j]<-scores[[row.names(dataMatrix)[j]]]$bss / scores[[row.names(dataMatrix)[j]]]$wss
}

dataMatrix<-dataMatrix[sort.list(trainscores,decreasing=T),]
tdataMatrix<-tdataMatrix[sort.list(trainscores,decreasing=T),]
tdataMatrix<-tdataMatrix[sort.list(trainscores,decreasing=T),]

if(nGenes==""){
nGenes<-dim(dataMatrix)[1]
}
print(paste("Number of genes used:",nGenes))

dataMatrix<-dataMatrix[1:nGenes,]
tdataMatrix<-tdataMatrix[1:nGenes,]

Expand All @@ -322,15 +322,15 @@ sspPredict<-function(x,classes="",y,nGenes="",priors="equal",std=F,distm="euclid
centroids[,j]<-apply(dataMatrix[,thisClass==j],1,mean)
}
dimnames(centroids)<-list(row.names(dataMatrix),NULL)

}else{
nGenes<-dim(dataMatrix)[1]
print(paste("Number of genes used:",nGenes))
centroids<-dataMatrix
nClasses<-dim(centroids)[2]
classLevels<-dimnames(centroids)[[2]]
}

distances<-matrix(ncol=nClasses,nrow=dim(tdataMatrix)[2])
for(j in 1:nClasses){
if(distm=="euclidean"){
Expand All @@ -343,14 +343,14 @@ sspPredict<-function(x,classes="",y,nGenes="",priors="equal",std=F,distm="euclid
distances[,j]<-t(-1*cor(cbind(centroids[,j],tdataMatrix),method="spearman",use="pairwise.complete.obs"))[2:(dim(tdataMatrix)[2]+1)]
}
}

scores<-apply(distances,1,min)
prediction<-vector(length=tsamples)
for(i in 1:tsamples){
prediction[i]<-classLevels[match(scores[i],distances[i,])]
}
names(prediction)<-tsampleNames

return(list(predictions=prediction,testData=tdataMatrix,distances=distances,centroids=centroids))
}

4 changes: 3 additions & 1 deletion R/subtype_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#installifnot("gplots")
#installifnot("heatmap.plus")
#installifnot("GMD")
#installifnot("reshape")
#biocIfnot("limma")
#biocIfnot("org.Hs.eg.db")
#biocIfnot("genefu")
Expand All @@ -23,6 +24,7 @@ library(gplots)
library(org.Hs.eg.db)
library(genefu)
library(heatmap.plus)
library(reshape)
#library(GMD)

####################################################################
Expand Down Expand Up @@ -367,7 +369,7 @@ run_p50=function(){
dev.off()

#plot the correlation coefficients
cor_plot(pam.res,".","Samples")
cor_plot(pam.res,".",short)
}

################# SCMGENE #######################
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 91d00ec

Please sign in to comment.