-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathS03_preprocess.py
66 lines (52 loc) · 1.66 KB
/
S03_preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import tgt
import numpy as np
sampling_rate = 22050
hop_length = 256
def get_alignment(tier):
sil_phones = ["sil", "sp", "spn"]
phones = []
durations = []
start_time = 0
end_time = 0
end_idx = 0
for t in tier._objects:
s, e, p = t.start_time, t.end_time, t.text
# Trim leading silences
if phones == []:
if p in sil_phones:
continue
else:
start_time = s
if p not in sil_phones:
# For ordinary phones
phones.append(p)
end_time = e
end_idx = len(phones)
else:
# For silent phones
phones.append(p)
durations.append(
int(
np.round(e * sampling_rate / hop_length)
- np.round(s * sampling_rate / hop_length)
)
)
# Trim tailing silences
phones = phones[:end_idx]
durations = durations[:end_idx]
return phones, durations, start_time, end_time
if __name__ == "__main__":
wav_path = "DATA/test_data/speaker_01/test_0.wav"
text_path = os.path.join("DATA/test_data/speaker_01/test_0.lab")
tg_path = os.path.join("DATA/test_aligned/speaker_01/test_0.TextGrid")
dur_filename = "DATA/duration/speaker_01/test_0.npy"
textgrid = tgt.io.read_textgrid(tg_path)
phone, duration, start, end = get_alignment(
textgrid.get_tier_by_name("phones")
)
text = "{" + " ".join(phone) + "}"
duration = np.array(duration)
print(text)
print(duration.shape)
np.save(dur_filename, duration)