-
Notifications
You must be signed in to change notification settings - Fork 4
/
config_default.yaml
85 lines (75 loc) · 4.66 KB
/
config_default.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#=================================================================================================
#
# config file to build a clean dataset of species songs from xeno-canto audio recordings
#
# authors : Félix Michaud and Sylvain Haupert
#
#=================================================================================================
RANDOM_SEED: 1979 # Fixed random seed
################################
# XENO-CANTO PARAMETERS
################################
PARAMS_XC:
NUM_FILES: 20 # Number of file to extract for each species
PARAM_XC_LIST: # Xeno-canto parameters
- len:20-60
- q:"A"
- type:song
CSV_XC_FILE: 'xc_metadata.csv' # The file will contain all the metadata collected from Xeno-canto
################################
# ROIS EXTRACTION PARAMETERS
################################
PARAMS_EXTRACT:
# function
FUNC: !FUNC bambird.extract_rois_full_sig # select the function used to extract the rois {bambird.extract_rois_core, bambird.extract_rois_full_sig}
# Extract Audio resampling
SAMPLE_RATE: 48000 # Sampling frequency in Hz
# Audio preprocess
LOW_FREQ: 250 # Low frequency in Hz of the bandpass filter applied to the audio
HIGH_FREQ: 12000 # High frequency in Hz of the bandpass filter applied to the audio
BUTTER_ORDER: 1 # butterworth filter order to select the bandwidth corresponding to the ROI
AUDIO_DURATION: 60 # Max duration of the audio file (in seconds)
CHUNK_DURATION: 10 # Split the audio signal in chunks of duration = CHUNK_DURATION (in seconds)
OVLP: 0.5 # Define the overlap ratio between each chunk (0=> no overlap, 0.75=> 75% of overlap between 2 consecutive chunks)
# Spectrogram
MODE_RMBCKG: median # Mode to compute the remove_background {'mean', 'median'}
NFFT: 1024 # Number of points used for the SFFT to compute the spectrogram
N_RUNNING_MEAN: 10 # Number of points used to compute the running mean of the noise profil
# hysteretic thresholding parameters (in dB)
MASK_PARAM1: 26
MASK_PARAM2: 10
# Select and merge bbox parameters
MAX_RATIO_YX: 7 # ratio Y/X between the high (Y in px) and the width (X in px) of the ROI
MIN_DURATION: 0.1 # minimum event duration in s
MARGIN_T_LEFT: 0.2 # overlapping time margin in s on the left side of the ROI to merge
MARGIN_T_RIGHT: 0.2 # overlapping time margin in s on the right side of the ROI to merge
MARGIN_F_BOTTOM: 250 # overlapping frequency margin in Hz on the bottom of the ROI to merge
MARGIN_F_TOP: 250 # overlapping frequency margin in Hz on the top of the ROI to merge
# Save ROIs parameters
MARGIN_T: 0.1 # time margin in s around the ROI
MARGIN_F: 250 # frequency margin in Hz around the ROI
FILTER_ORDER: 5 # butterworth filter order to select the bandwidth corresponding to the ROI
################################
# FEATURES EXTRACTION PARAMETERS
################################
PARAMS_FEATURES:
# Extract Audio resampling
SAMPLE_RATE: 48000 # Sampling frequency in Hz
# Audio preprocess
LOW_FREQ: 250 # Low frequency in Hz of the bandpass filter applied to the audio
HIGH_FREQ: 12000 # High frequency in Hz of the bandpass filter applied to the audio
BUTTER_ORDER: 5 # butterworth filter order to select the bandwidth corresponding to the ROI
# Spectrogram
NFFT: 1024 # Number of points of the spectrogram
SHAPE_RES: 'high' # Resolution of the shapes {low, med, high}
################################
# CLUSTERING PARAMETERS
################################
PARAMS_CLUSTER:
FEATURES: ['shp', 'centroid_f', 'peak_f', 'duration_t', 'bandwidth_f', 'bandwidth_min_f', 'bandwidth_max_f'] # choose the features used to cluster {'shp', 'centroid_f', 'peak_f', 'duration_t', 'bandwidth_f', 'bandwidth_min_f', 'bandwidth_max_f', 'min_f', 'max_f' }
SCALER: MINMAXSCALER # scaler method to prepare the features before the clustering {STANDARDSCALER, ROBUSTSCALER, MINMAXSCALER}
PERCENTAGE_PTS: 5 # minimum number of ROIs to form a cluster (in % of the total number of ROIs) {number between 0 and 1 or blank}
MIN_PTS: # minimum number of ROIs to form a cluster {integer or blank}
EPS: auto # set the maximum distance between elements in a single clusters {a number or 'auto'}
METHOD: DBSCAN # cluster method that allow some points beeing outside of a cluster {HDBSCAN, DBSCAN}
KEEP: BIGGEST # define if we want to keep all the clusters or only the biggest cluster, while the rest is discarded as noise {BIGGEST, ALL}