-
Notifications
You must be signed in to change notification settings - Fork 412
/
noisyspeech_synthesizer.cfg
95 lines (83 loc) · 3.87 KB
/
noisyspeech_synthesizer.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Configuration for generating Noisy Speech Dataset
# - sampling_rate: Specify the sampling rate. Default is 16 kHz
# - audioformat: default is .wav
# - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances.
# - silence_length: Duration of silence introduced between clean speech utterances.
# - total_hours: Total number of hours of data required. Units are in hours.
# - snr_lower: Lower bound for SNR required (default: 0 dB)
# - snr_upper: Upper bound for SNR required (default: 40 dB)
# - target_level_lower: Lower bound for the target audio level before audiowrite (default: -35 dB)
# - target_level_upper: Upper bound for the target audio level before audiowrite (default: -15 dB)
# - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper)
# - clean_activity_threshold: Activity threshold for clean speech
# - noise_activity_threshold: Activity threshold for noise
# - fileindex_start: Starting file ID that will be used in filenames
# - fileindex_end: Last file ID that will be used in filenames
# - is_test_set: Set it to True if it is the test set, else False for the training set
# - noise_dir: Specify the directory path to all noise files
# - Speech_dir: Specify the directory path to all clean speech files
# - noisy_destination: Specify path to the destination directory to store noisy speech
# - clean_destination: Specify path to the destination directory to store clean speech
# - noise_destination: Specify path to the destination directory to store noise speech
# - log_dir: Specify path to the directory to store all the log files
# Configuration for unit tests
# - snr_test: Set to True if SNR test is required, else False
# - norm_test: Set to True if Normalization test is required, else False
# - sampling_rate_test: Set to True if Sampling Rate test is required, else False
# - clipping_test: Set to True if Clipping test is required, else False
# - unit_tests_log_dir: Specify path to the directory where you want to store logs
[noisy_speech]
sampling_rate: 16000
audioformat: *.wav
audio_length: 30
silence_length: 0.2
total_hours: 500
snr_lower: -5
snr_upper: 20
randomize_snr: True
target_level_lower: -35
target_level_upper: -15
total_snrlevels: 21
clean_activity_threshold: 0.6
noise_activity_threshold: 0.0
fileindex_start: None
fileindex_end: None
is_test_set: False
noise_dir: datasets\noise
speech_dir: datasets\clean\read_speech
noise_types_excluded: None
noisy_destination: datasets\training_set_sept12\noisy
clean_destination: datasets\training_set_sept12\clean
noise_destination: datasets\training_set_sept12\noise
log_dir: logs
# Config: add singing voice to clean speech
use_singing_data=1
# 0 for no, 1 for yes
clean_singing: datasets\clean\singing_voice
#datasets\clean_singing\VocalSet11\FULL
singing_choice: 3
# 1 for only male, 2 for only female, 3 (default) for both male and female
# Config: add emotional data to clean speech
use_emotion_data=1
# 0 for no, 1 for yes
clean_emotion: datasets\clean\emotional_speech
# Config: add Chinese (mandarin) data to clean speech
use_mandarin_data=1
# 0 for no, 1 for yes
clean_mandarin: datasets\clean\mandarin_speech
# Config: add reverb to clean speech
rir_choice: 3
# 1 for only real rir, 2 for only synthetic rir, 3 (default) use both real and synthetic
lower_t60: 0.3
# lower bound of t60 range in seconds
upper_t60: 1.3
# upper bound of t60 range in seconds
rir_table_csv: datasets\acoustic_params\RIR_table_simple.csv
clean_speech_t60_csv: datasets\acoustic_params\cleanspeech_table_t60_c50.csv
# percent_for_adding_reverb=0.5 # percentage of clean speech convolved with RIR
# Unit tests config
snr_test: True
norm_test: True
sampling_rate_test = True
clipping_test = True
unit_tests_log_dir: unittests_logs