noisyspeech_synthesizer.cfg

# Configuration for generating Noisy Speech Dataset

# - sampling_rate: Specify the sampling rate. Default is 16 kHz
# - audioformat: default is .wav
# - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances. 
# - silence_length: Duration of silence introduced between clean speech utterances.
# - total_hours: Total number of hours of data required. Units are in hours. 
# - snr_lower: Lower bound for SNR required (default: 0 dB)
# - snr_upper: Upper bound for SNR required (default: 40 dB)
# - target_level_lower: Lower bound for the target audio level before audiowrite (default: -35 dB)
# - target_level_upper: Upper bound for the target audio level before audiowrite (default: -15 dB)
# - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper)
# - clean_activity_threshold: Activity threshold for clean speech
# - noise_activity_threshold: Activity threshold for noise
# - fileindex_start: Starting file ID that will be used in filenames
# - fileindex_end: Last file ID that will be used in filenames
# - is_test_set: Set it to True if it is the test set, else False for the training set
# - noise_dir: Specify the directory path to all noise files
# - Speech_dir: Specify the directory path to all clean speech files
# - noisy_destination: Specify path to the destination directory to store noisy speech
# - clean_destination: Specify path to the destination directory to store clean speech
# - noise_destination: Specify path to the destination directory to store noise speech
# - log_dir: Specify path to the directory to store all the log files

# Configuration for unit tests
# - snr_test: Set to True if SNR test is required, else False
# - norm_test: Set to True if Normalization test is required, else False
# - sampling_rate_test: Set to True if Sampling Rate test is required, else False
# - clipping_test: Set to True if Clipping test is required, else False
# - unit_tests_log_dir: Specify path to the directory where you want to store logs

[noisy_speech]

sampling_rate: 16000
audioformat: *.wav
audio_length: 30
silence_length: 0.2
total_hours: 500
snr_lower: -5 
snr_upper: 20
randomize_snr: True
target_level_lower: -35
target_level_upper: -15
total_snrlevels: 21
clean_activity_threshold: 0.6
noise_activity_threshold: 0.0
fileindex_start: None
fileindex_end: None
is_test_set: False

noise_dir: datasets\noise 
speech_dir: datasets\clean\read_speech
noise_types_excluded: None

noisy_destination: datasets\training_set_sept12\noisy
clean_destination: datasets\training_set_sept12\clean
noise_destination: datasets\training_set_sept12\noise
log_dir: logs 

# Config: add singing voice to clean speech
use_singing_data=1
# 0 for no, 1 for yes
clean_singing: datasets\clean\singing_voice
#datasets\clean_singing\VocalSet11\FULL
singing_choice: 3
# 1 for only male, 2 for only female, 3 (default) for both male and female 

# Config: add emotional data to clean speech
use_emotion_data=1
# 0 for no, 1 for yes
clean_emotion: datasets\clean\emotional_speech

# Config: add Chinese (mandarin) data to clean speech
use_mandarin_data=1
# 0 for no, 1 for yes
clean_mandarin: datasets\clean\mandarin_speech

# Config: add reverb to clean speech
rir_choice: 3
# 1 for only real rir, 2 for only synthetic rir, 3 (default) use both real and synthetic
lower_t60: 0.3 
# lower bound of t60 range in seconds
upper_t60: 1.3 
# upper bound of t60 range in seconds
rir_table_csv: datasets\acoustic_params\RIR_table_simple.csv
clean_speech_t60_csv: datasets\acoustic_params\cleanspeech_table_t60_c50.csv
# percent_for_adding_reverb=0.5 # percentage of clean speech convolved with RIR

# Unit tests config
snr_test: True
norm_test: True
sampling_rate_test = True
clipping_test = True

unit_tests_log_dir: unittests_logs