forked from ilarsf/gwasTools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_rsID.r
71 lines (58 loc) · 2.28 KB
/
add_rsID.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/Rscript
#Copyright (c) 2018 Brooke Wolford
# Lab of Dr. Cristen Willer and Dr. Mike Boehnke
# University of Michigan
options(stringsAsFactors=F)
library(data.table)
library(optparse)
library(tidyverse)
option_list <- list(
make_option("--input", type="character", default="",
help="Input file, tab delimited, can be gzipped"),
make_option("--output", type="character", default="",
help="Name for output file"),
make_option("--col",type="character",default="SNP",
help="name of column with SNP ID formatted X:XXXXX_X/X [default='SNP']"),
make_option("--dbsnp",type="character",default="",
help="Bed file from dbsnp with columns chr, posS, posE, rsID")
)
parser <- OptionParser(usage="%prog [options]", option_list=option_list, description="This script adds an rsID column to a .txt file with results from BOLT-LMM.")
args <- parse_args(parser, positional_arguments = 0)
opt <- args$options
print(opt)
#check for required arguments
if (opt$input=="" || opt$output=="" || opt$dbsnp=="") {
stop("Please provide --input and --output and --dbsnp arguments\n")
}
snp_col<-opt$col
#open file, even if zipped
if (grepl('.gz',opt$input)) {
file <- fread(paste(sep=" ","zcat",opt$input),header=T)
} else {
file <- fread(opt$input, header=T)
}
print("Opened --input file\n")
file<-as_tibble(file)
#open dbsnp file, even if zipped
if (grepl('.gz',opt$dbsnp)) {
dbsnp <- fread(paste(sep=" ","zcat",opt$dbsnp),header=F)
} else {
dbsnp <- fread(opt$dbsnp, header=F)
}
print("Opened --dbsnp file\n")
names(dbsnp)<-c("chr","posS","posE","rsID")
dbsnp<-as_tibble(dbsnp)
#split up SNP name in file
df_cols<-names(file)
#n_cols<-length(df_cols)
#snp_col<-which(df_cols==opt$col)
file<-separate(file, (!!snp_col), c("snp","alleles"),"_") %>% separate(snp,c("chr","posE"),":")
file<-mutate(file,chr=type.convert(chr)) %>% mutate(posE=type.convert(posE))
#inner join of file and dbsnp
join<-inner_join(file,dbsnp,by=c("chr"="chr","posE"="posE"))
#reformat join so we just added rsID column to the original data frame
join<-mutate(join,(!!snp_col):=paste(sep=":",chr,posE)) %>% mutate((!!snp_col):=paste(sep="_",SNP,alleles))
final<-select(join,one_of(c(df_cols,"rsID")))
#write file
filename<-opt$output
write.table(x=final,file=filename,col.names=T,row.names=F,quote=F,sep="\t")