-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfastq_upsampling.py
executable file
·63 lines (51 loc) · 1.91 KB
/
fastq_upsampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#! /usr/bin/env python3
#Anne-Sophie Denommé-Pichon
#AGPLv3
import sys
def upsample(upsampling_read_count, read_id):
"""
upsampling_read_count : number of additional reads
read_id : id of the read you want to repeat
"""
try:
max_lane = 0
max_tile = 0
max_x = 0
max_y = 0
while True: # boucle infinie pour lire les lignes 4 par 4
title_line = next(sys.stdin).rstrip()
sequence_line = next(sys.stdin).rstrip()
plus_line = next(sys.stdin).rstrip()
quality_line = next(sys.stdin).rstrip()
print(title_line)
print(sequence_line)
print(plus_line)
print(quality_line)
lane = int(title_line.split(':')[2])
if lane > max_lane:
max_lane = lane
tile = int(title_line.split(':')[3])
if tile > max_tile:
max_tile = tile
x = int(title_line.split(':')[4])
if x > max_x:
max_x = x
y = str(title_line.split(':')[5])
if title_line == read_id:
title_line_str = title_line
sequence_line_str = sequence_line
plus_line_str = plus_line
quality_line_str = quality_line
except StopIteration: # quand erreur StopIteration, arrêter
pass
for i in range(1, upsampling_read_count + 1):
print(':'.join(title_line_str.split(':')[0:2] + [str(max_lane + i), str(max_tile + i), str(max_x + i), str(y)]))
print(sequence_line_str)
print('+')
print(quality_line_str)
if __name__ == '__main__':
if len(sys.argv) == 3:
upsample(int(sys.argv[1]), sys.argv[2])
else:
print("Usage: fastq_upsampling.py <upsampling_read_count> <read_id> < input.fastq > output.fastq", file=sys.stderr)
sys.exit(1)