-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcalcPredScore.R
129 lines (113 loc) · 4.21 KB
/
calcPredScore.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
calcPredScore <- function(U,
V,
simDrug,
simTarget,
knownDrugIndex,
knownTargetIndex,
testIndexRow,
testIndexCol,
K = 5,
testLabel,
thisAlpha,
thisBeta,
thisGamma) {
# INPUT
# U: row latent matrix
# V: col latent matrix
# simDrug: similarity matrix for drug, but diagonal elements are zeros
# simTarget: similarity matrix for target, but diagonal elements are zeros
# testIndexRow: row index for test set
# testIndexCol: col index for test set
# K: number of nearest neighbor for prediction
# testLabel: labels for the test set
# OUTPUT
# a list of AUC and AUPR
if (K < 0) {
stop("K MUST be '>=' 0! \n")
}
if (K > 0) {
## cat("with K smoothing! \n")
## for drug
indexTestD <- unique(testIndexRow)
testD <- U[indexTestD, ]
testD <- cbind(indexTestD, testD)
numTest <- length(indexTestD)
numColTestD <- ncol(testD)
simDrugKnown <- simDrug[, knownDrugIndex]
numDrugKnown <- length(knownDrugIndex)
for (i in 1:numTest) {
indexCurr <- indexTestD[i]
isNewDrug <- !(indexCurr %in% knownDrugIndex)
if (isNewDrug) {
simDrugNew <- simDrugKnown[indexCurr, ] # vector
indexRank <- rank(simDrugNew) # vector
indexNeig <- which(indexRank > (numDrugKnown - K))
simCurr <- simDrugNew[indexNeig] # vector
# index for U
index4U <- knownDrugIndex[indexNeig]
U_Known <- U[index4U, , drop = FALSE] # force to matrix
# vec %*% matrix => matrix
testD[i, 2:numColTestD] <- (simCurr %*% U_Known) / sum(simCurr)
}
}
Unew <- U
Unew[indexTestD, ] <- testD[, -1]
## for target
# unique index for test target
indexTestT <- unique(testIndexCol)
testT <- V[indexTestT, ]
# add first column as labels
testT <- cbind(indexTestT, testT) # 1st column is unique test label
# number of unique test set
numTest <- length(indexTestT)
# number of column for testT
numColTestT <- ncol(testT)
# known similarity matrix for targets
simTargetKnown <- simTarget[, knownTargetIndex]
# number of known targets
numTargetKnown <- length(knownTargetIndex)
for (i in 1:numTest) {
indexCurr <- indexTestT[i]
isNewTarget <- !(indexCurr %in% knownTargetIndex)
if (isNewTarget) {
simTargetNew <- simTargetKnown[indexCurr, ] # vector
indexRank <- rank(simTargetNew) # vector
# selected neighbor index with top K neighbor
indexNeig <- which(indexRank > (numTargetKnown - K))
# get similarity value of K
simCurr <- simTargetNew[indexNeig] # vector
# index for V
index4V <- knownTargetIndex[indexNeig]
V_Known <- V[index4V, , drop = FALSE] # force to matrix
# vec %*% matrix => matrix
testT[i, 2:numColTestT] <- (simCurr %*% V_Known) / sum(simCurr)
}
}
Vnew <- V
Vnew[indexTestT, ] <- testT[, -1]
Vnewt <- t(Vnew)
UnewVnewt <- Unew %*% Vnewt
val <- thisAlpha * UnewVnewt + thisBeta * (simDrug %*% UnewVnewt) + thisGamma * (UnewVnewt %*% simTarget)
# score from val
##score <- exp(val) / (1 + exp(val))
# 2017-07-18, numerical stability
score <- sigmoid(val)
testSetIndex <- cbind(testIndexRow, testIndexCol)
score <- score[testSetIndex]
result <- calAUPR(testLabel, score)
} else { # K = 0 condition
# cat("without K smoothing! \n")
# flush.console()
Vt <- t(V)
UVt <- U %*% Vt
val <- thisAlpha * UVt + thisBeta * (simDrug %*% UVt) + thisGamma * (UVt %*% simTarget)
# score
##score <- exp(val) / (1 + exp(val))
# 2017-07-18, numerical stability
score <- sigmoid(val)
testSetIndex <- cbind(testIndexRow, testIndexCol)
score <- score[testSetIndex]
result <- calAUPR(testLabel, score)
}
return(result)
}