-
Notifications
You must be signed in to change notification settings - Fork 0
/
miscellaneous.py
55 lines (45 loc) · 1.83 KB
/
miscellaneous.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# file for miscellaneous functions and work
import csv
import os
def findFileName(start, target_dir):
"""return first occurence of filename that starts with start and in target_dir
:param start: string that filename starts with
:param target_dir: target directory
:return: full filename
"""
for filename in os.listdir(target_dir):
if filename.startswith(start):
return filename
def parseFiles():
"""get absolute path of fastq files"""
f = open("/home/aliu/Projects/CausalAssociation/data/rawData/SRR_USE_LIST.txt", "r")
f = f.read().splitlines()
lines = []
for line in f:
lines.append('/proj/omics4tb2/aliu/projects/causalAssociation/data/rawData/' + line + '.fastq')
print(",".join(lines))
def combineCSVFiles():
"""combine SRR and GCM data into single csv file"""
OMICS4TB2 = "/home/aliu/omics4tb2" #"/proj/omics4tb2"
HOME = "/home"
SCC_USE_LIST_FILE = HOME + "/aliu/Projects/causalAssociation/data/rawData/SRR_USE_LIST.txt"
GCM_USE_LIST_FILE = HOME + "/aliu/Projects/causalAssociation/data/rawData/GCM_USE_LIST"
SCC_IDENTIFIERS = []
GCM_IDENTIFIERS = []
with open(SCC_USE_LIST_FILE, "r") as file:
lines = file.read().splitlines()
SCC_IDENTIFIERS = lines
with open(GCM_USE_LIST_FILE) as gsmfile:
gsmfile = csv.reader(gsmfile, delimiter='\t')
for line in gsmfile:
GCM_IDENTIFIERS.append(line)
combined = []
for x in range(len(GCM_IDENTIFIERS)):
row = GCM_IDENTIFIERS[x]
row.append(SCC_IDENTIFIERS[x])
combined.append(row)
with open(HOME + '/aliu/Projects/causalAssociation/data/rawData/COMBINED.csv', 'w') as writeOutFile:
writer = csv.writer(writeOutFile, dialect='excel-tab')
writer.writerows(combined)
if __name__ == "__main__":
combineCSVFiles()