-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagilentProbeMap.sh
36 lines (24 loc) · 1.9 KB
/
agilentProbeMap.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
########################################################################
# Agilent microarray probe sequence mapping to reference transcriptomes#
# >> personal communication & use scripts from #
# https://github.com/MWSchmid/microarray #
# prepareMicroarrayProbes.py #
# Diana Coman Schmid #
# Eawag 2015 #
########################################################################
# map probe sequences from the zebrafish Agilent microarray platform to the latest zebrafish transcriptome (Zv9)
# build the bowtie index
python ./MicroarrayCrossPlatform/prepareMicroarrayProbes.py BUILD Danio_rerio.Zv9.cdna.all.fa Danio_rerioZv9cdna_index
# get probe ID and nucleotide sequence information
# remove the first 9 lines (not needed) and select only the "ProbeUID" and "Sequence" columns
head Cy3_AA_High_Cy5_AA_Lind_Ctrl_253100010016_2_2.txt
sed '1,9d' Cy3_AA_High_Cy5_AA_Lind_Ctrl_253100010016_2_2.txt > zebrafish_seq_probe_tmp.txt
cut -f 10,13 zebrafish_seq_probe_tmp.txt > zebrafish_seq_probe.txt
awk '{ print $2 " " $1}' zebrafish_seq_probe.txt > zebrafish_probe_seq.txt
# check and format (TAB delim.) the file fields
python ./MicroarrayCrossPlatform/prepareMicroarrayProbes.py TABTOFASTA zebrafish_seq_probe.txt 1 2 1 zebrafish_seq_probe.fasta
# align the probes to the reference transcriptome ("cDNA")
python ./MicroarrayCrossPlatform/prepareMicroarrayProbes.py ALIGN ./zebrafish/genome/Danio_rerioZv9cdna_index zebrafish_seq_probe.fasta unaligned.txt aligned.txt
# extract the ID mappings
python ./MicroarrayCrossPlatform/prepareMicroarrayProbes.py EXTRACT aligned.txt probeNameToID.txt IDtoProbeName.txt