-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.R
85 lines (67 loc) · 2.26 KB
/
predict.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
args=commandArgs(trailingOnly=TRUE)
# print(length(args))
if (length(args) != 3 )
stop("Invalid number of arguments to Rscript.")
script_path <- args[1]
input_file <- args[2]
output_path <- args[3]
# print(script_path)
# print(input_file)
# print(output_path)
convertRI <- function(diff){
if(diff < 0.1)
return(1)
return(ceiling(round(diff*10, digits = 0)/2))
}
library(reshape2, quietly=TRUE)
library(caret, quietly=TRUE)
A_col <- unlist(read.table(paste0(script_path, "A_col.txt")))
svm_L <- readRDS(paste0(script_path, "svm_L_model.rds"))
In <- read.table(input_file, comment.char = "", sep = "\t", header = TRUE)
if(length(unique(In$SAMPLE))>1)
stop("More than 1 samples are found in the VCF file.")
Ind <- In[,c("POS", "REF", "ALT", "QUAL", "SAMPLE", "AO", "DP")]
Ind$AO[Ind$AO=="."] <- 0
Ind$DP[Ind$DP=="."] <- 0
Ind <- transform(Ind, DP = as.numeric(DP))
Ind <- transform(Ind, AO = as.numeric(AO))
Ind$MUT <- paste(Ind$REF, Ind$POS, Ind$ALT, sep = "")
Ind$VAL <- Ind$AO/Ind$DP
Ind$VAL[Ind$VAL == "NaN"] <- 0.0000
Ind$VAL <- round(Ind$VAL, digits = 4)
Ind_l <- dcast(Ind[,c("SAMPLE", "MUT", "VAL")], SAMPLE~MUT, value.var = "VAL")
data_2 <- data.frame(matrix(ncol = length(A_col), nrow = nrow(Ind_l)))
colnames(data_2) <- A_col
for(i in colnames(data_2)){
if(i %in% colnames(Ind_l)){
data_2[1,i] <- Ind_l[1,i]
}else{
data_2[1,i] <- 0.00
}
}
pred_prob <- predict(svm_L, data_2, type="prob")
prob <- unname(unlist(pred_prob))
diff <- max(prob)-max(prob[prob!=max(prob)]) #highest - second highest
RI <- convertRI(diff)
out <- pred_prob
colnames(out)[1:3] <- c("MDR", "Susceptible", "XDR")
out[,c("MDR", "Susceptible", "XDR")] <- round(out[,c("MDR", "Susceptible", "XDR")], digits = 4)
if(predict(svm_L, data_2)== "X") {
out$Class <- "XDR"
} else if(predict(svm_L, data_2)== "M") {
out$Class <- "MDR"
} else {
out$Class <- "Susceptible"
}
out$Sample <- Ind_l$SAMPLE
out$RI <- RI
out <- out[c(5,1:4,6)]
cat("\n")
print(out)
cat("\n")
write.table(out, file = paste0(output_path, "prediction.tsv"), row.names = FALSE)
# cat("\n")
# cat("\tSAMPLE =", Ind_l$SAMPLE, "\n")
# cat("\tPredicted class =", suppressWarnings(names(sort(pred_prob_svm_R_ind, decreasing=TRUE)))[1], "\n")
# cat("\tProbability =", max(prob), "\n")
# cat("\tRelability Index (RI) =", RI, "\n\n")