diff --git a/src/Audio911.jl b/src/Audio911.jl index 69ae733..9bc926b 100644 --- a/src/Audio911.jl +++ b/src/Audio911.jl @@ -1,11 +1,10 @@ module Audio911 -using DSP -using FFTW +using FFTW, DSP using LinearAlgebra using Parameters using SpecialFunctions -using Statistics +using Statistics, Roots using NaNStatistics using PyCall @@ -40,20 +39,22 @@ include("fft/spectral.jl") include("utils/speech_detector.jl") include("utils/in_out.jl") include("utils/trimaudio.jl") +# wavelets +include("wavelet/cwt.jl") # structures -export signal_setup, signal_data -# main functions -export takeFFT, lin_spectrogram, mel_spectrogram, _mfcc, spectral_features, f0 +export AudioSetup, AudioData, AudioObj +# audio features +export audio_features_obj, get_feature + # utility functions export speech_detector export load_audio, save_audio, trim_audio, normalize_audio -# audio features audioFeaturesExtractor -export audio_features_extractor - +# wavelets +export cwt, cwt_windowing +get_fft! # TODO patch -extractfeatures = takeFFT +extractfeatures = export extractfeatures - end # module Audio911 diff --git a/src/audioFeaturesExtractor.jl b/src/audioFeaturesExtractor.jl index 2840d88..d63f98b 100644 --- a/src/audioFeaturesExtractor.jl +++ b/src/audioFeaturesExtractor.jl @@ -1,394 +1,387 @@ -function audio_features_extractor( - x::AbstractVector{T}; - sr::Int64, - profile::Symbol = :all, - - # fft - fft_length::Int64 = 256, - window_type::Vector{Symbol} = [:hann, :periodic], - window_length::Int64 = fft_length, - overlap_length::Int64 = Int(round(fft_length * 0.500)), - # window_length::Int64 = Int(round(0.03 * sr)), - # overlap_length::Int64 = Int(round(0.02 * sr)), - window_norm::Bool = false, - - # spectrum - frequency_range::Vector{Int64} = Int[0, sr/2], - spectrum_type::Symbol = :power, # :power, :magnitude - - # mel - mel_style::Symbol = :htk, # :htk, :slaney - mel_bands::Int64 = 26, - filterbank_design_domain::Symbol = :linear, - filterbank_normalization::Symbol = :bandwidth, # :bandwidth, :area, :none - frequency_scale::Symbol = :mel, - - # mfcc - num_coeffs::Int64 = 13, - normalization_type::Symbol = :dithered, # :standard, :dithered - rectification::Symbol = :log, - log_energy_source::Symbol = :standard, # :standard (after windowing), :mfcc - log_energy_pos::Symbol = :none, #:append, :replace, :none - delta_window_length::Int64 = 9, - delta_matrix::Symbol = :transposed, # :standard, :transposed - - # spectral - spectral_spectrum::Symbol = :linear # :linear, :mel +""" +Audio911 + +può essere utilizzata in 2 modi differenti: +######################################################################################################### +1-creare un oggetto audio_obj + +audio = audio_features_obj(x, sr) + +audio.get_fft() +audio.get_lin_spec() +audio.get_mel_spec() +audio.get_mfcc() +audio.get_spectrals() +audio.get_f0() +audio.get_features() + +######################################################################################################### +2-utilizzare l'invocazione get_feature per ottenere le features separatamente + +costruzione: +get_feature( + x AbstractFloat file audio mono + sr Int64 frequenza di campionamento + feat Symbol audio feature da estrarre + :fft fast fourier transform (da implementare meglio) + :lin spettrogramma lineare + :mel spettrogramma mel + :mfcc coefficienti mfcc e relative delta e deltadelta + :spectrals le feature spettrali: + centroid, crest, decrease, entropy, flatness, flux, kurtosis, rolloff, skewness, slope, spread + :f0 frequenza fondamentale + kwargs... + +############################################################################################## +paramentri addizionali +sia per l'oggetto audio, che per la chiamata a feature singola + +# fft +fft_length::Int64 = 256, +dimensione finestra fft, valori consigliati: 256, 512, 1024 + +window_type::Tuple{Symbol, Symbol} = (:hann, :periodic), +window_length::Int64 = fft_length, +overlap_length::Int64 = round(Int, fft_length * 0.500), +parametri relativi alla finestrazione della fft +di default audio911 usa una finestra di tipo hann, anzichè hamming +con una finestra della stessa dimensione della finestra fft +e un overlap pari alla metà del suo valore + +i valori standard sarebbero questi +# window_length::Int64 = Int(round(0.03 * sr)), +# overlap_length::Int64 = Int(round(0.02 * sr)), + +window_norm::Bool = false, normalizzazione delle finestre + +# spectrum +frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)), +limiti banda, importantissimi per isolare la porzione di spettro dove si prevede di recuperare l'informazione + +spectrum_type::Symbol = :power, # :power, :magnitude +tipo di spettro, di default :power, molto raramente si usa magnitude + +# mel +mel_style::Symbol = :htk, # :htk, :slaney +tipo di banco filtro dello spettrogramma mel +la tipologia htk surclassa la tipologia slaney in tutti i nostri esperimenti + +mel_bands::Int64 = 26, +numero di bande che compongono lo spettrogramma mel. 26 è il valore di default +ma non c'è un vero e proprio standard di questo valore. +da ricordare che nel caso si voglia utilizzare anche la mfcc, +i coefficienti della mfcc vengono calcolati sulle prime bande dello spettrogramma +quindi una variazione di questo valore comporta anche un diverso funzionamento della mfcc +il cui valore "num_coeff" andrà opportunamente tarato. + +filterbank_design_domain::Symbol = :linear, +filterbank_normalization::Symbol = :bandwidth, # :bandwidth, :area, :none +frequency_scale::Symbol = :mel, +l'implementazione corretta di questi paramentri è da completare + +# mfcc +num_coeffs::Int64 = 13, +numero delle bande mfcc, vedi sopra + +normalization_type::Symbol = :dithered, # :standard, :dithered +paramentro preso da audioflux: +i valori dei coefficienti mfcc, se vanno sotto la soglia di 1e-8 +vengono normalizzati a questo valore. +mentre matlab non ha una soglia limite. +dithered > audiofluz, standard > matlab + +rectification::Symbol = :log, +log_energy_source::Symbol = :standard, # :standard (after windowing), :mfcc +log_energy_pos::Symbol = :none, #:append, :replace, :none +spesso viene salvato, nella mfcc, il valore del volume in log. +questi parametri definiscono dove viene calcolata e dove salvarla: +:append viene creato un n-esimo coefficiente, :replace la log energy va a sostituire il primo coefficiente mfcc +se ne sconsiglia comunque l'uso. + +delta_window_length::Int64 = 9, +finestra di calcolo della derivata + +delta_matrix::Symbol = :transposed, # :standard, :transposed +preso da audioflux che calcola le delta sull'asse delle frequenze anzichè sull'asse temporale +potrebbe sembrare un errore, ma potrebbe anche non esserlo + +# spectral +spectral_spectrum::Symbol = :lin, # :lin, :mel +si può scegliere su che spettrogramma calcolare le spectral features: se partendo dal lineare o dal mel + +# f0 +f0_method::Symbol = :nfc, +f0_range::Tuple{Int64, Int64} = (50, 400), +median_filter_length::Int64 = 1 +Questi paramentri sono in fase di studio +""" +################################################################################ +# audio object # +################################################################################ + +function audio_features_obj( + x::AbstractVector{Float64}, + sr::Int64; + + # profile::Symbol = :all, + + # fft + fft_length::Int64 = 256, + window_type::Tuple{Symbol, Symbol} = (:hann, :periodic), + window_length::Int64 = fft_length, + overlap_length::Int64 = round(Int, fft_length * 0.500), + # window_length::Int64 = Int(round(0.03 * sr)), + # overlap_length::Int64 = Int(round(0.02 * sr)), + window_norm::Bool = false, + + # spectrum + frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)), + spectrum_type::Symbol = :power, # :power, :magnitude + + # mel + mel_style::Symbol = :htk, # :htk, :slaney + mel_bands::Int64 = 26, + filterbank_design_domain::Symbol = :linear, + filterbank_normalization::Symbol = :bandwidth, # :bandwidth, :area, :none + frequency_scale::Symbol = :mel, + + # mfcc + num_coeffs::Int64 = 13, + normalization_type::Symbol = :dithered, # :standard, :dithered + rectification::Symbol = :log, + log_energy_source::Symbol = :standard, # :standard (after windowing), :mfcc + log_energy_pos::Symbol = :none, #:append, :replace, :none + delta_window_length::Int64 = 9, + delta_matrix::Symbol = :transposed, # :standard, :transposed + + # spectral + spectral_spectrum::Symbol = :lin, # :lin, :mel + + # f0 + f0_method::Symbol = :nfc, + f0_range::Tuple{Int64, Int64} = (50, 400), + median_filter_length::Int64 = 1 +) + setup = AudioSetup( + sr = sr, + + # fft + fft_length = fft_length, + window_type = window_type, + window_length = window_length, + overlap_length = overlap_length, + window_norm = window_norm, + + # spectrum + frequency_range = frequency_range, + spectrum_type = spectrum_type, + + # mel + mel_style = mel_style, + mel_bands = mel_bands, + filterbank_design_domain = filterbank_design_domain, + filterbank_normalization = filterbank_normalization, + frequency_scale = frequency_scale, + + # mfcc + num_coeffs = num_coeffs, + normalization_type = normalization_type, + rectification = rectification, + log_energy_source = log_energy_source, + log_energy_pos = log_energy_pos, + delta_window_length = delta_window_length, + delta_matrix = delta_matrix, + + # spectral + spectral_spectrum = spectral_spectrum, + + # f0 + f0_method = f0_method, + f0_range = f0_range, + median_filter_length = median_filter_length + ) + + # preemphasis + # zi = 2 * x[1] - x[2] + # filt!(x, [1.0, -0.97], 1.0, x, [zi]) + # normalize + # x = x ./ maximum(abs.(x)) + + data = AudioData( + x = x, + ) + + return AudioObj(setup, data) +end + +function audio_features_obj( + x::AbstractVector{T}, + sr::Int64; + kwargs... ) where {T <: AbstractFloat} + audio_features_obj(Float64.(x), sr; kwargs...) +end - setup = signal_setup( - sr = sr, - - # fft - window_type = window_type, - window_length = window_length, - overlap_length = overlap_length, - window_norm = window_norm, - - # spectrum - frequency_range = frequency_range, - spectrum_type = spectrum_type, - - # mel - mel_style = mel_style, - mel_bands = mel_bands, - filterbank_design_domain = filterbank_design_domain, - filterbank_normalization = filterbank_normalization, - frequency_scale = frequency_scale, - - # mfcc - num_coeffs = num_coeffs, - normalization_type = normalization_type, - rectification = rectification, - log_energy_source = log_energy_source, - log_energy_pos = log_energy_pos, - delta_window_length = delta_window_length, - delta_matrix = delta_matrix, - - # spectral - spectral_spectrum = spectral_spectrum, - ) - - # convert to Float64 - x = Float64.(x) - - # preemphasis - # not siutable for our kind of experiments. - # zi = 2 * x[1] - x[2] - # filt!(x, [1.0, -0.97], 1.0, x, [zi]) - # normalize - # x = x ./ maximum(abs.(x)) - - data = signal_data( - x = x, - ) - - takeFFT(data, setup) - mel_spectrogram(data, setup) - _mfcc(data, setup) - f0(data, setup) # pay attention to fft length! - - # TODO verificare che il sample sia di lunghezza superiore a fft_length - - if profile == :full - setup.frequency_range=Int[0, 1000] # verifica che 1000 < sr/2 - lin_spectrogram(data, setup) - spectral_features(data, setup) - - vcat( - ( - data.mel_spectrogram', - data.mfcc_coeffs', - data.mfcc_delta', - data.mfcc_deltadelta', - data.spectral_centroid', - data.spectral_crest', - data.spectral_decrease', - data.spectral_entropy', - data.spectral_flatness', - data.spectral_flux', - data.spectral_kurtosis', - data.spectral_rolloff', - data.spectral_skewness', - data.spectral_slope', - data.spectral_spread', - data.f0', - )..., - ) - - elseif profile == :gender - setup.frequency_range=Int[0, 1000] # verifica che 1000 < sr/2 - lin_spectrogram(data, setup) - spectral_features(data, setup) - - vcat(( - data.mel_spectrogram[:, 1:13]', - data.mfcc_coeffs', - # data.mfcc_delta', - # data.mfcc_deltadelta', - data.spectral_centroid', - data.spectral_crest', - data.spectral_decrease', - data.spectral_entropy', - data.spectral_flatness', - data.spectral_flux', - data.spectral_kurtosis', - data.spectral_rolloff', - data.spectral_skewness', - data.spectral_slope', - data.spectral_spread', - data.f0', - )...) - - elseif profile == :age - lin_spectrogram(data, setup) - spectral_features(data, setup) - - vcat(( - # data.mel_spectrogram', - data.mfcc_coeffs', - # data.mfcc_delta', - # data.mfcc_deltadelta', - data.spectral_centroid', - data.spectral_crest', - data.spectral_decrease', - # data.spectral_entropy', - data.spectral_flatness', - data.spectral_flux', - # data.spectral_kurtosis', - # data.spectral_rolloff', - # data.spectral_skewness', - # data.spectral_slope', - # data.spectral_spread', - data.f0', - )...) - - elseif profile == :speaker - setup.frequency_range=Int[0, 1000] # verifica che 1000 < sr/2 - lin_spectrogram(data, setup) - spectral_features(data, setup) - - vcat(( - # data.mel_spectrogram', - data.mfcc_coeffs', - # data.mfcc_delta', - # data.mfcc_deltadelta', - data.spectral_centroid', - # data.spectral_crest', - data.spectral_decrease', - # data.spectral_entropy', - data.spectral_flatness', - # data.spectral_flux', - # data.spectral_kurtosis', - # data.spectral_rolloff', - # data.spectral_skewness', - # data.spectral_slope', - # data.spectral_spread', - data.f0', - )...) - - elseif profile == :experimental - setup.frequency_range=Int[0, 1000] # verifica che 1000 < sr/2 - lin_spectrogram(data, setup) - spectral_features(data, setup) - - vcat(( - # data.mel_spectrogram', - data.mfcc_coeffs', - # data.mfcc_delta', - # data.mfcc_deltadelta', - data.spectral_centroid', - # data.spectral_crest', - data.spectral_decrease', - # data.spectral_entropy', - data.spectral_flatness', - # data.spectral_flux', - # data.spectral_kurtosis', - # data.spectral_rolloff', - # data.spectral_skewness', - # data.spectral_slope', - # data.spectral_spread', - data.f0', - )...) - - else - error("Unknown feature extraction profile: $profile.") - end +################################################################################ +# stand alone functions # +################################################################################ +function get_fft(setup::AudioSetup, data::AudioData) + get_fft!(setup, data) + + return data.fft end -# bitmask approach -# 1 - mel spectrogram # every audio parameter is defaulted to optimized value -# 3 - linear spectrogram -# 4 - mfcc -# 5 - delta -# 6 - delta delta -# 7 - centroid -# 8 - crest -# 9 - ecrease -# 10 - entropy -# 11 - flatness -# 12 - flux -# 13 - kurtosis -# 14 - rolloff -# 15 - skewness -# 16 - slope -# 17 - spread -# 18 - f0 - -# :full = features_bitmask = UInt8[ -# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -# 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1 -# ] - -# function audio_features_extractor( -# x::AbstractVector{T}; -# sr::Int64, -# features_bitmask::Vector{UInt8}=UInt8[ -# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -# 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -# 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1, -# 1 -# ], - -# # fft -# fft_length::Int64 = 256, -# window_type::Vector{Symbol}=[:hann, :periodic], -# window_length::Int64=fft_length, -# overlap_length::Int64=Int(round(fft_length * 0.500)), -# # window_length::Int64 = Int(round(0.03 * sr)), -# # overlap_length::Int64 = Int(round(0.02 * sr)), -# window_norm::Bool=:false, - -# # spectrum -# frequency_range::Vector{Int64}=Int[0, sr/2], -# spectrum_type::Symbol=:power, - -# # mel -# mel_style::Symbol=:htk, -# mel_bands::Int64=26, -# filterbank_design_domain::Symbol=:linear, -# filterbank_normalization::Symbol=:bandwidth, -# frequency_scale::Symbol=:mel, - -# # mfcc -# num_coeffs::Int64=13, -# normalization_type::Symbol=:dithered, -# rectification::Symbol=:log, -# log_energy_source::Symbol=:standard, -# log_energy_pos::Symbol=:replace, -# delta_window_length::Int64=9, -# delta_matrix::Symbol=:transposed, - -# # spectral -# spectral_spectrum::Symbol=:linear -# ) where {T<:AbstractFloat} - -# # function get_fft_length(sr::Int64) -# # sr <= 4000 && return 128 -# # sr <= 8000 && return 256 -# # sr <= 16000 && return 512 -# # return 1024 -# # end - -# # fft_length = get_fft_length(sr) - -# setup = signal_setup( -# sr=sr, - -# # fft -# window_type=window_type, -# window_length=window_length, -# overlap_length=overlap_length, -# window_norm=window_norm, - -# # spectrum -# frequency_range=frequency_range, -# spectrum_type=spectrum_type, - -# # mel -# mel_style=mel_style, -# mel_bands=mel_bands, -# filterbank_design_domain=filterbank_design_domain, -# filterbank_normalization=filterbank_normalization, -# frequency_scale=frequency_scale, - -# # mfcc -# num_coeffs=num_coeffs, -# normalization_type=normalization_type, -# rectification=rectification, -# log_energy_source=log_energy_source, -# log_energy_pos=log_energy_pos, -# delta_window_length=delta_window_length, -# delta_matrix=delta_matrix, - -# # spectral -# spectral_spectrum=spectral_spectrum -# ) - -# # convert to Float64 -# x = Float64.(x) - -# # preemphasis -# # not siutable for our kind of experiments, maybe for speaker recognition: needs to look over it. -# # zi = 2 * x[1] - x[2] -# # filt!(x, [1.0, -0.97], 1.0, x, [zi]) -# # normalize -# # x = x ./ maximum(abs.(x)) - -# data = signal_data( -# x=x -# ) - -# takeFFT(data, setup) -# lin_spectrogram(data, setup) -# mel_spectrogram(data, setup) -# _mfcc(data, setup) -# spectral_features(data, setup) -# f0(data, setup) # pay attention to fft length! - -# full_feats = vcat(( -# data.mel_spectrogram', -# data.mfcc_coeffs', -# data.mfcc_delta', -# data.mfcc_deltadelta', -# data.spectral_centroid', -# data.spectral_crest', -# data.spectral_decrease', -# data.spectral_entropy', -# data.spectral_flatness', -# data.spectral_flux', -# data.spectral_kurtosis', -# data.spectral_rolloff', -# data.spectral_skewness', -# data.spectral_slope', -# data.spectral_spread', data.f0' -# )...) -# end \ No newline at end of file +function get_lin_spec(setup::AudioSetup, data::AudioData) + get_fft!(setup, data) + lin_spectrogram!(setup, data) + + return data.lin_spectrogram, setup.lin_frequencies +end + +function get_mel_spec(setup::AudioSetup, data::AudioData) + get_fft!(setup, data) + get_mel_spec!(setup, data) + + return data.mel_spectrogram, setup.mel_frequencies +end + +function get_mfcc(setup::AudioSetup, data::AudioData) + get_fft!(setup, data) + get_mel_spec!(setup, data) + get_mfcc!(setup, data) + get_mfcc_deltas!(setup, data) + + return data.mfcc_coeffs, data.mfcc_delta, data.mfcc_deltadelta +end + +function get_spectrals(setup::AudioSetup, data::AudioData) + get_fft!(setup, data) + if setup.spectral_spectrum == :lin + lin_spectrogram!(setup, data) + elseif setup.spectral_spectrum == :mel + get_mel_spec!(setup, data) + else + error("setup.spectral_spectrum must be :lin or :mel.") + end + get_spectrals!(setup, data) + + return [ + data.spectral_centroid, + data.spectral_crest, + data.spectral_decrease, + data.spectral_entropy, + data.spectral_flatness, + data.spectral_flux, + data.spectral_kurtosis, + data.spectral_rolloff, + data.spectral_skewness, + data.spectral_slope, + data.spectral_spread + ] +end + +function get_f0(setup::AudioSetup, data::AudioData) + get_f0!(setup, data) + + return data.f0 +end + +################################################################################ +# stand alone functions caller # +################################################################################ +function get_feature( + x::AbstractVector{Float64}, + sr::Int64, + feat::Symbol; + + # profile::Symbol = :all, + + # fft + fft_length::Int64 = 256, + window_type::Tuple{Symbol, Symbol} = (:hann, :periodic), + window_length::Int64 = fft_length, + overlap_length::Int64 = round(Int, fft_length * 0.500), + # window_length::Int64 = Int(round(0.03 * sr)), + # overlap_length::Int64 = Int(round(0.02 * sr)), + window_norm::Bool = false, + + # spectrum + frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)), + spectrum_type::Symbol = :power, # :power, :magnitude + + # mel + mel_style::Symbol = :htk, # :htk, :slaney + mel_bands::Int64 = 26, + filterbank_design_domain::Symbol = :linear, + filterbank_normalization::Symbol = :bandwidth, # :bandwidth, :area, :none + frequency_scale::Symbol = :mel, + + # mfcc + num_coeffs::Int64 = 13, + normalization_type::Symbol = :dithered, # :standard, :dithered + rectification::Symbol = :log, + log_energy_source::Symbol = :standard, # :standard (after windowing), :mfcc + log_energy_pos::Symbol = :none, #:append, :replace, :none + delta_window_length::Int64 = 9, + delta_matrix::Symbol = :transposed, # :standard, :transposed + + # spectral + spectral_spectrum::Symbol = :lin # :lin, :mel +) + setup = AudioSetup( + sr = sr, + + # fft + fft_length = fft_length, + window_type = window_type, + window_length = window_length, + overlap_length = overlap_length, + window_norm = window_norm, + + # spectrum + frequency_range = frequency_range, + spectrum_type = spectrum_type, + + # mel + mel_style = mel_style, + mel_bands = mel_bands, + filterbank_design_domain = filterbank_design_domain, + filterbank_normalization = filterbank_normalization, + frequency_scale = frequency_scale, + + # mfcc + num_coeffs = num_coeffs, + normalization_type = normalization_type, + rectification = rectification, + log_energy_source = log_energy_source, + log_energy_pos = log_energy_pos, + delta_window_length = delta_window_length, + delta_matrix = delta_matrix, + + # spectral + spectral_spectrum = spectral_spectrum + ) + + data = AudioData( + x = x, + ) + + calc = Dict([ + :fft => get_fft, + :lin => get_lin_spec, + :mel => get_mel_spec, + :mfcc => get_mfcc, + :spectrals => get_spectrals, + :f0 => get_f0]) + + return calc[feat](setup, data) +end + +function get_feature( + x::AbstractVector{T}, + sr::Int64, + feat::Symbol; + kwargs... +) where {T <: AbstractFloat} + get_feature(Float64.(x), sr, feat; kwargs...) +end \ No newline at end of file diff --git a/src/fft/f0.jl b/src/fft/f0.jl index ed6b1af..3b4533a 100644 --- a/src/fft/f0.jl +++ b/src/fft/f0.jl @@ -1,5 +1,3 @@ -# using FFTW - function get_candidates( domain::AbstractArray{T}, edge::AbstractVector{Int64}, @@ -16,7 +14,7 @@ end function i_clip( x::AbstractVector{T}, - range::Vector{Int64} = [50, 400], + range::Tuple{Int64, Int64} = (50, 400), ) where {T <: AbstractFloat} x[x. 1, :min_peak_distance => 1) - if method == :nfc - edge = Int.(round.(setup.sr ./ reverse(f0_range))) + if setup.f0_method == :nfc + edge = Int.(round.(setup.sr ./ reverse(reduce(vcat, getindex.(setup.f0_range))))) r = size(y, 1) mxl = min(edge[end], r - 1) m2 = nextpow(2, 2 * r - 1) @@ -82,13 +80,13 @@ function f0( # conf = peak./sum(abs(peak),2) ## TODO - # elseif method == :srh - # elseif method == :pef - # elseif method == :cep - # elseif method == :lhs + # elseif setup.f0_method == :srh + # elseif setup.f0_method == :pef + # elseif setup.f0_method == :cep + # elseif setup.f0_method == :lhs end # force pitch estimate inside band edges - frq_0 = i_clip(vec(frq_0), f0_range) + frq_0 = i_clip(vec(frq_0), setup.f0_range) data.f0 = vec(frq_0[1:num_hops_final, :]) end \ No newline at end of file diff --git a/src/fft/fft.jl b/src/fft/fft.jl index 53c374e..a109d94 100644 --- a/src/fft/fft.jl +++ b/src/fft/fft.jl @@ -20,10 +20,10 @@ function get_onesided_fft_range(fft_length::Int64) end end # get_onesided_fft_range -function takeFFT( - data::signal_data, - setup::signal_setup -) +################################################################################ +# main # +################################################################################ +function get_fft!(setup::AudioSetup, data::AudioData) # TODO validate FFT length, validate overlap length (audioFeatureExtractor.m line 1324) setup.fft_length = setup.window_length # definisce la fft pari alla finestra @@ -48,6 +48,9 @@ function takeFFT( # else # data.fft = abs.(Z) # end + + ######### NON è nel punto corretto: la fft deve essere a numeri complessi, poi convertita in reali nel calcolo degli spettrogrammi!!! + ########### DA CAMBIARE ANCHE la struttura dati. setup.spectrum_type == :power ? data.fft = real(Z .* conj(Z)) : data.fft = abs.(Z) # log energy @@ -64,37 +67,4 @@ function takeFFT( data.log_energy = log.(log_energy) end - -end # takeFFT(data, setup) - -function takeFFT( - x::AbstractArray{T}, - sr::Int64; - fft_length::Int64=256, - window_type::Vector{Symbol}=[:hann, :periodic], - window_length::Int64=Int(round(0.03 * sr)), - overlap_length::Int64=Int(round(0.02 * sr)), - window_norm::Bool=true, - frequency_range::Vector{Int64}=[0, Int(floor(sr /2))], - spectrum_type::Bool=:power -) where {T<:AbstractFloat} - # setup and data structures definition - setup = signal_Setup( - sr=sr, - fft_length=fft_length, - window_type=window_type, - window_length=window_length, - overlap_length=overlap_length, - window_norm=window_norm, - frequency_range=frequency_range, - spectrum_type=spectrum_type - ) - - data = signal_data( - x=Float64.(x) - ) - - takeFFT(data, setup) - - return data.fft, setup.fft_frequencies -end # takeFFT(kwarg...) \ No newline at end of file +end # get_fft! \ No newline at end of file diff --git a/src/fft/lin.jl b/src/fft/lin.jl index 1a0a0dc..45743d2 100644 --- a/src/fft/lin.jl +++ b/src/fft/lin.jl @@ -10,9 +10,12 @@ function get_lin_norm_factor(spectrum_type::Symbol, fft_window::Vector{Float64}) end end -function lin_spectrogram( - data::signal_data, - setup::signal_setup +################################################################################ +# main # +################################################################################ +function lin_spectrogram!( + setup::AudioSetup, + data::AudioData, ) # trim to desired range bin_low = Int(ceil(setup.frequency_range[1] * setup.fft_length / setup.sr + 1)) @@ -68,6 +71,6 @@ function lin_spectrogram( # if (setup.bins[end] == floor(setup.fft_length / 2 + 1) && rem(setup.fft_length, 2) != 0) # linear_fc[end] = setup.sr * (setup.fft_length - 1) / (2 * setup.fft_length) # end - data.lin_spectrogram = data.lin_spectrogram' - + + data.lin_spectrogram = transpose(data.lin_spectrogram) end # lin_spectrogram \ No newline at end of file diff --git a/src/fft/mel.jl b/src/fft/mel.jl index 473055e..36c3967 100644 --- a/src/fft/mel.jl +++ b/src/fft/mel.jl @@ -1,9 +1,9 @@ function hz2mel( - hz::Vector{Int64}, + hz::Tuple{Int64, Int64}, mel_style::Symbol = :htk # :htk, :slaney ) if mel_style == :htk - mel = 2595 * log10.(1 .+ hz / 700) + mel = 2595 * log10.(1 .+ reduce(vcat, getindex.(hz)) / 700) else # slaney linStep = 200 / 3 logStep = log(6.4) / 27 @@ -51,14 +51,14 @@ end ### da generalizzare per ### frequency_scale :mel, :bark, :erb ### filterbanl_design_domain :linear, :warped (da verificare se serve) -function designMelFilterBank(data::signal_data, setup::signal_setup) +function designMelFilterBank(data::AudioData, setup::AudioSetup) # set the design domain ### da implementare in futuro setup.filterbank_design_domain == :linear ? design_domain = :linear : design_domain = setup.frequency_scale # compute band edges # TODO da inserire il caso :erb e :bark - + melRange = hz2mel(setup.frequency_range, setup.mel_style) # mimic audioflux linear mel_style @@ -67,7 +67,7 @@ function designMelFilterBank(data::signal_data, setup::signal_setup) setup.band_edges = lin_fq[1:(setup.mel_bands + 2)] else setup.band_edges = mel2hz( - LinRange(melRange[1], melRange[end], setup.mel_bands + 2), setup.mel_style) + LinRange(melRange[1], melRange[end], setup.mel_bands + 2), setup.mel_style) end ### parte esclusiva per mel filterbank si passa a file designmelfilterbank.m @@ -196,62 +196,12 @@ function audioDelta( end end -# function mel_spectrogram( -# x::AbstractArray{T}, -# sr::Int64; -# # default setup -# # fft -# window_type::Symbol=:hann, -# window_length::Int=Int(round(0.03 * sr)), -# overlap_length::Int=Int(round(0.02 * sr)), - -# # mel -# mel_bands::Int=32, -# mel_style::Symbol=:slaney, # :htk, :slaney -# frequency_range::Vector{Int64}=[0, Int(round(sr / 2))], -# filterbank_normalization::Symbol=:bandwidth, -# spectrum_type::Symbol=:power, - -# # filterbank_design_domain::Symbol=:linear, # settato, ma si usa? -# # windowNormalization::Bool=true, # settato, ma si usa? -# # oneSided::Bool=true # default, non viene parametrizzato -# ) where {T<:AbstractFloat} - -# # setup and data structures definition -# setup = signal_setup( -# sr=sr, - -# # fft -# window_type=window_type, -# window_length=window_length, -# overlap_length=overlap_length, - -# # linear spectrum -# lin_frequency_range=[0.0, sr / 2], - -# # mel -# mel_bands=mel_bands, -# mel_style=mel_style, -# frequency_range=Float64.(frequency_range), -# filterbank_normalization=filterbank_normalization, -# spectrum_type=spectrum_type, - -# # filterbank_design_domain=filterbank_design_domain, # settato, ma si usa? -# # windowNormalization=windowNormalization, # settato, ma si usa? -# # oneSided=oneSided # default, non viene parametrizzato -# ) - -# data = signal_data( -# x=Float64.(x) -# ) - -# takeFFT(data, setup) -# melSpectrogram(data, setup) -# end - -function mel_spectrogram( - data::signal_data, - setup::signal_setup +################################################################################ +# main # +################################################################################ +function get_mel_spec!( + setup::AudioSetup, + data::AudioData ) designMelFilterBank(data, setup) @@ -267,12 +217,12 @@ function mel_spectrogram( # error("magnitude not yet implemented.") # end - data.mel_spectrogram = data.mel_spectrogram' + data.mel_spectrogram = transpose(data.mel_spectrogram) end # melSpectrogram -function _mfcc( - data::signal_data, - setup::signal_setup +function get_mfcc!( + setup::AudioSetup, + data::AudioData ) # Design DCT matrix DCTmatrix = create_DCT_matrix(setup.mel_bands) @@ -318,77 +268,14 @@ function _mfcc( elseif (setup.log_energy_pos == :replace) data.mfcc_coeffs = hcat(data.log_energy, data.mfcc_coeffs[:, 2:end]) end +end - # METTERE IL CASO CHE le delta vengono calcolate solo se necessario +function get_mfcc_deltas!( + setup::AudioSetup, + data::AudioData +) data.mfcc_delta = audioDelta( data.mfcc_coeffs, setup.delta_window_length, setup.delta_matrix) data.mfcc_deltadelta = audioDelta( data.mfcc_delta, setup.delta_window_length, setup.delta_matrix) -end - -# function mfcc( -# x::AbstractArray{T}, -# sr::Int64; - -# # default setup -# # fft -# window_type::Symbol=:hann, -# window_length::Int=Int(round(0.03 * sr)), -# overlap_length::Int=Int(round(0.02 * sr)), -# window_norm::Bool=true, - -# # mel -# mel_bands::Int=32, -# mel_style::Symbol=:slaney, # :htk, :slaney -# frequency_range::Vector{Int64}=[0, Int(round(sr / 2))], -# filterbank_normalization::Symbol=:bandwidth, -# spectrum_type::Symbol=:power, - -# # mfcc -# num_coeffs::Int=13, -# rectification::Symbol=:log, -# log_energy_pos::Symbol=:append, -# delta_window_length::Int=9, - -# # filterbank_design_domain::Symbol=:linear, # settato, ma si usa? -# # oneSided::Bool=true # default, non viene parametrizzato -# ) where {T<:AbstractFloat} - -# # setup and data structures definition -# setup = signal_setup( -# sr=sr, - -# # fft -# window_type=window_type, -# window_length=window_length, -# overlap_length=overlap_length, -# window_norm=window_norm, - -# # linear spectrum -# lin_frequency_range=[0.0, sr / 2], - -# # mel -# mel_bands=mel_bands, -# mel_style=mel_style, -# frequency_range=Float64.(frequency_range), -# filterbank_normalization=filterbank_normalization, -# spectrum_type=spectrum_type, - -# # mfcc -# num_coeffs=num_coeffs, -# rectification=rectification, -# log_energy_pos=log_energy_pos, -# delta_window_length=delta_window_length, - -# # filterbank_design_domain=filterbank_design_domain, # settato, ma si usa? -# # oneSided=oneSided # default, non viene parametrizzato -# ) - -# data = signal_data( -# x=Float64.(x) -# ) - -# takeFFT(data, setup) -# melSpectrogram(data, setup) -# _mfcc(data, setup) -# end # mfcc \ No newline at end of file +end \ No newline at end of file diff --git a/src/fft/spectral.jl b/src/fft/spectral.jl index eddde0e..866e0ec 100644 --- a/src/fft/spectral.jl +++ b/src/fft/spectral.jl @@ -1,17 +1,12 @@ -# using LinearAlgebra - -# include("../signalDataStructure.jl") -# include("fft.jl") - -function get_spectrum(data::signal_data, setup::signal_setup) +function get_spectrum(setup::AudioSetup, data::AudioData) setup.spectral_spectrum == :mel && return data.mel_spectrogram', setup.mel_frequencies - setup.spectral_spectrum == :linear && return data.lin_spectrogram', setup.lin_frequencies + setup.spectral_spectrum == :lin && return data.lin_spectrogram', setup.lin_frequencies error("Unknown spectral spectrum") end function spectral_crest( s::AbstractArray{Float64}, - data::signal_data, + data::AudioData, sum_x1::Vector{Float64}, arithmetic_mean::Vector{Float64} ) @@ -23,14 +18,14 @@ function spectral_crest( data.spectral_crest = vec(peak ./ arithmetic_mean') end -function spectral_decrease(s::AbstractArray{Float64}, data::signal_data) +function spectral_decrease(s::AbstractArray{Float64}, data::AudioData) # calculate decrease data.spectral_decrease = vec(real(sum((s[2:end, :] .- s[1, :]') ./ (1:size(s, 1)-1), dims=1) ./ sum(s[2:end, :], dims=1))) end function spectral_entropy( s::AbstractArray{Float64}, - data::signal_data, + data::AudioData, sum_x1::Vector{Float64} ) # calculate entropy @@ -41,7 +36,7 @@ end function spectral_flatness( s::AbstractArray{Float64}, - data::signal_data, + data::AudioData, sum_x1::Vector{Float64}, arithmetic_mean::Vector{Float64} ) @@ -50,7 +45,7 @@ function spectral_flatness( data.spectral_flatness = vec(geometric_mean ./ arithmetic_mean') end -function spectral_flux(s::AbstractArray{Float64}, data::signal_data) +function spectral_flux(s::AbstractArray{Float64}, data::AudioData) initial_condition = s[:, 1] # calculate flux temp = diff(hcat(initial_condition, s), dims=2) @@ -63,7 +58,7 @@ end function spectral_kurtosis( s::AbstractArray{Float64}, - data::signal_data, + data::AudioData, # sum_x1::Vector{Float64}, # centroid::Vector{Float64}, higher_moment_tmp::AbstractArray{Float64}, @@ -80,7 +75,7 @@ function spectral_kurtosis( data.spectral_kurtosis = vec(sum(higher_momement_num .* higher_moment_tmp, dims=1) ./ (higher_moment_denom .* data.spectral_spread)') end -function spectral_rolloff(s::AbstractArray{Float64}, data::signal_data, fft_frequencies::Vector{Float64}) +function spectral_rolloff(s::AbstractArray{Float64}, data::AudioData, fft_frequencies::Vector{Float64}) # calculate rolloff point threshold = 0.95 c = cumsum(s, dims=1) @@ -94,7 +89,7 @@ end function spectral_skewness( s::AbstractArray{Float64}, - data::signal_data, + data::AudioData, sum_x1::Vector{Float64}, centroid::Vector{Float64}, higher_moment_denom::Vector{Float64}, @@ -112,7 +107,7 @@ end function spectral_slope( s::AbstractArray{Float64}, - data::signal_data, + data::AudioData, sum_x1::Vector{Float64}, arithmetic_mean::Vector{Float64}, fft_frequencies::Vector{Float64} @@ -123,39 +118,14 @@ function spectral_slope( data.spectral_slope = vec(real(sum(X_minus_mu_X .* f_minus_mu_f, dims=1) ./ sum(f_minus_mu_f .^ 2))) end -# function spectral_features( -# x::AbstractArray{T}, -# sr::Int64; -# window_length::Int64=Int(round(0.03 * sr)), -# overlap_length::Int64=Int(round(0.02 * sr)), -# frequency_range::Vector{Int64}=[0, Int(round(sr / 2))], -# window_type::Symbol=:hann, -# # windowNormalization::Bool=true, -# # oneSided::Bool=true -# ) where {T<:AbstractFloat} - -# # options and data structures definition -# options = signal_setup( -# sr=sr, -# window_length=window_length, -# overlap_length=overlap_length, -# frequency_range=Float64.(frequency_range), -# window_type=window_type, -# spectrum_type=:power, -# # windowNormalization=windowNormalization, -# # oneSided=oneSided -# ) - -# data = signal_data( -# x=Float64.(x) -# ) - -# takeFFT(data, options) -# spectral_features(data, options) -# end - -function spectral_features(data::signal_data, setup::signal_setup) - s, freq = get_spectrum(data, setup) +################################################################################ +# main # +################################################################################ +function get_spectrals!( + setup::AudioSetup, + data::AudioData + ) + s, freq = get_spectrum(setup, data) # common data size_x1 = size(s, 1) @@ -177,47 +147,4 @@ function spectral_features(data::signal_data, setup::signal_setup) spectral_skewness(s, data, sum_x1, data.spectral_centroid, higher_moment_denom, higher_momement_num) spectral_decrease(s, data) spectral_slope(s, data, sum_x1, arithmetic_mean, freq) -end - -function spectral_spread( - x::AbstractArray{T}, - sr::Int64; - fft_length::Int64=256, - window_type::Vector{Symbol}=[:hann, :periodic], - window_length::Int64=Int(round(0.03 * sr)), - overlap_length::Int64=Int(round(0.02 * sr)), - window_norm::Bool=true, - frequency_range::Vector{Int64}=[0, Int(floor(sr / 2))], - spectrum_type::Symbol=:magnitude -) where {T<:AbstractFloat} - # setup and data structures definition - setup_spread = signal_setup( - sr=sr, - fft_length=fft_length, - window_type=window_type, - window_length=window_length, - overlap_length=overlap_length, - window_norm=window_norm, - frequency_range=frequency_range, - spectrum_type=spectrum_type - ) - - data_spread = signal_data( - x=Float64.(x) - ) - - takeFFT(data_spread, setup_spread) - lin_spectrogram(data_spread, setup_spread) - - s, freq = data_spread.lin_spectrogram', setup_spread.lin_frequencies - - sum_x1 = vec(sum(s, dims=1)) - data_spread.spectral_centroid = vec(sum(s .* freq, dims=1) ./ sum_x1') - data_spread.spectral_centroid = replace!(data_spread.spectral_centroid, NaN => 0) - higher_moment_tmp = freq .- data_spread.spectral_centroid' - - data_spread.spectral_spread = vec(sqrt.(sum((higher_moment_tmp .^ 2) .* s, dims=1) ./ sum_x1')) - - return data_spread.spectral_spread -end - +end \ No newline at end of file diff --git a/src/signalDataStructure.jl b/src/signalDataStructure.jl index c187d7b..c9304ed 100644 --- a/src/signalDataStructure.jl +++ b/src/signalDataStructure.jl @@ -1,23 +1,23 @@ """ Audio911 signal data structures - uses package Parameter for @with_kw mutable struct + uses package Parameters for @with_kw mutable struct - signal_setup stores all datas that has to be shared in Audio911 module - signal_data stores all results from signal analysis + AudioSetup stores all datas that has to be shared in Audio911 module + AudioData stores all results from signal analysis """ -@with_kw mutable struct signal_setup +@with_kw mutable struct AudioSetup sr::Int64 # fft fft_length::Int64 = 0 - window_type::Vector{Symbol} = [:hann, :periodic] + window_type::Tuple{Symbol, Symbol} = (:hann, :periodic) window_length::Int64 = 0 overlap_length::Int64 = 0 window_norm::Bool = true # spectrum - frequency_range::Vector{Int64} = [] + frequency_range::Tuple{Int64, Int64} = (0, 0) lin_frequencies::Vector{Float64} = [] band_edges::AbstractVector{AbstractFloat} = [] spectrum_type::Symbol = :power # :power, :magnitude @@ -40,10 +40,15 @@ delta_matrix::Symbol = :standard # :standard, :transposed # spectral - spectral_spectrum::Symbol = :linear # :linear, :mel + spectral_spectrum::Symbol = :lin # :lin, :mel + + # f0 + f0_method::Symbol = :nfc + f0_range::Tuple{Int64, Int64} = (50, 400) + median_filter_length::Int64 = 1 end -@with_kw mutable struct signal_data +@with_kw mutable struct AudioData x::AbstractVector{Float64} = [] # fft @@ -64,9 +69,6 @@ end mfcc_deltadelta::AbstractArray{Float64} = [] log_energy::Vector{Float64} = [] - # f0 - f0::Vector{Float64} = [] - # spectral spectral_centroid::Vector{Float64} = [] spectral_crest::Vector{Float64} = [] @@ -79,117 +81,308 @@ end spectral_skewness::Vector{Float64} = [] spectral_slope::Vector{Float64} = [] spectral_spread::Vector{Float64} = [] + + # f0 + f0::Vector{Float64} = [] end -mutable struct SignalSetup - sr::Int +# reference: +# https://www.functionalnoise.com/pages/2023-01-31-julia-class/ - # fft - fft_length::Int - window_type::Tuple{Symbol, Symbol} # [:hann, :periodic] - window_length::Int - overlap_length::Int - window_norm::Bool +# TODO metti a posto gli output dei metodi - # spectrum - frequency_range::Vector{Int} - lin_frequencies::Vector{AbstractFloat} - band_edges::AbstractVector{AbstractFloat} - spectrum_type::Symbol - - # # mel - # mel_style::Symbol = :htk # :htk, :slaney - # mel_bands::Int64 = 26 - # mel_frequencies::Vector{Float64} = [] - # filterbank_design_domain::Symbol = :linear - # filterbank_normalization::Symbol = :bandwidth # :bandwidth, :area, :none - # frequency_scale::Symbol = :mel # TODO :mel, :bark, :erb - - # # mfcc - # num_coeffs::Int64 = 13 - # normalization_type::Symbol = :standard # :standard, :dithered - # rectification::Symbol = :log # :log, :cubic_root - # log_energy_source::Symbol = :standard # :standard (after windowing), :mfcc - # log_energy_pos::Symbol = :append #:append, :replace, :none - # delta_window_length::Int64 = 9 - # delta_matrix::Symbol = :standard # :standard, :transposed - - # # spectral - # spectral_spectrum::Symbol = :linear # :linear, :mel - - function SignalSetup(; - sr::Int, - fft_length::Int, - window_type::Tuple{Symbol, Symbol} = (:hann, :periodic), - window_length::Int = 0, - overlap_length::Int = 0, - window_norm::Bool = false, - # spectrum - frequency_range::Vector{Int} = [], - lin_frequencies::Vector{AbstractFloat} = [], - band_edges::Vector{AbstractFloat} = [], - spectrum_type::Symbol=:power, - ) - if window_type[1] ∉ (:hann, :hamming, :blackman, :flattopwin, :rect) - error("Unknown window_type $window_type[1].") +mutable struct AudioObj + setup::AudioSetup + data::AudioData + + const get_fft::Function + const get_lin_spec::Function + const get_mel_spec::Function + const get_mfcc::Function + const get_spectrals::Function + const get_f0::Function + const get_features::Function + + function get_fft(self::AudioObj) + if isempty(self.data.fft) + get_fft!(self.setup, self.data) + end + + return self.data.fft end - if window_type[2] ∉ (:periodic, :symmetric) - error("window_type second parameter must be :periodic or :symmetric.") + + function get_lin_spec(self::AudioObj) + if isempty(self.data.lin_spectrogram) + if isempty(self.data.fft) + get_fft!(self.setup, self.data) + end + lin_spectrogram!(self.setup, self.data) + end + + return self.data.lin_spectrogram, self.setup.lin_frequencies + end + + function get_mel_spec(self::AudioObj) + if isempty(self.data.mel_spectrogram) + if isempty(self.data.fft) + get_fft!(self.setup, self.data) + end + get_mel_spec!(self.setup, self.data) + end + + return self.data.mel_spectrogram, self.setup.mel_frequencies + end + + function get_mfcc(self::AudioObj) + if isempty(self.data.mfcc_coeffs) + if isempty(self.data.mel_spectrogram) + if isempty(self.data.fft) + get_fft!(self.setup, self.data) + end + get_mel_spec!(self.setup, self.data) + end + get_mfcc!(self.setup, self.data) + get_mfcc_deltas!(self.setup, self.data) + end + + return self.data.mfcc_coeffs, self.data.mfcc_delta, self.data.mfcc_deltadelta + end + + function get_spectrals(self::AudioObj) + if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram) + if isempty(self.data.fft) + get_fft!(self.setup, self.data) + end + lin_spectrogram!(self.setup, self.data) + elseif self.setup.spectral_spectrum == :mel && isempty(self.data.mel_spectrogram) + if isempty(self.data.fft) + get_fft!(self.setup, self.data) + end + mel_spectrogram!(self.setup, self.data) + end + if isempty(self.data.spectral_centroid) + get_spectrals!(self.setup, self.data) + end + + return [ + self.data.spectral_centroid, + self.data.spectral_crest, + self.data.spectral_decrease, + self.data.spectral_entropy, + self.data.spectral_flatness, + self.data.spectral_flux, + self.data.spectral_kurtosis, + self.data.spectral_rolloff, + self.data.spectral_skewness, + self.data.spectral_slope, + self.data.spectral_spread + ] + end + + function get_f0(self::AudioObj) + if isempty(self.data.f0) + get_f0!(self.setup, self.data) + end + + return self.data.fft end - if window_length == 0 - window_length = fft_length - elseif window_length < fft_length - error("window_length can't be smaller than fft_length.") + function get_features(self::AudioObj) + if isempty(self.data.fft) + get_fft!(self.setup, self.data) + end + if isempty(self.data.mel_spectrogram) + get_mel_spec!(self.setup, self.data) + end + if isempty(self.data.mfcc_coeffs) + get_mfcc!(self.setup, self.data) + get_mfcc_deltas!(self.setup, self.data) + end + if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram) + lin_spectrogram!(self.setup, self.data) + end + if isempty(self.data.spectral_centroid) + get_spectrals!(self.setup, self.data) + end + if isempty(self.data.f0) + get_f0!(self.setup, self.data) + end + + return vcat(( + self.data.mel_spectrogram', + self.data.mfcc_coeffs', + self.data.mfcc_delta', + self.data.mfcc_deltadelta', + self.data.spectral_centroid', + self.data.spectral_crest', + self.data.spectral_decrease', + self.data.spectral_entropy', + self.data.spectral_flatness', + self.data.spectral_flux', + self.data.spectral_kurtosis', + self.data.spectral_rolloff', + self.data.spectral_skewness', + self.data.spectral_slope', + self.data.spectral_spread', + self.data.f0' + )...) end - if overlap_length == 0 - overlap_length=Int(round(FFTLength * 0.500)) - elseif overlap_length > window_length - error("overlap_length can't be greater than window_length.") + function AudioObj(setup::AudioSetup, data::AudioData) + obj = new( + setup, data, + () -> get_fft(obj), + () -> get_lin_spec(obj), + () -> get_mel_spec(obj), + () -> get_mfcc(obj), + () -> get_spectrals(obj), + () -> get_f0(obj), + () -> get_features(obj) + ) + # return obj end +end - # if isempty(frequency_range) +mutable struct MyClass + myInt::Int + + # we have these `const` fields since Julia 1.8 + const print_int::Function + const set_int!::Function + + function print_int(self::MyClass) + println("hello, I have myInt: $(self.myInt)") + end - if spectrum_type ∉ (:power, :magnitude) - error("spectrum_type parameter must be symbol, :power or :magnitude.") + function set_int!(self::MyClass, new_int::Int) + self.myInt = new_int + return self end - new( - sr, - - # fft - fft_length, - window_type, - window_length, - overlap_length, - window_norm, - - # spectrum - frequency_range, - lin_frequencies, - band_edges, - spectrum_type, - - # # mel - # mel_style::Symbol = :htk # :htk, :slaney - # mel_bands::Int64 = 26 - # mel_frequencies::Vector{Float64} = [] - # filterbank_design_domain::Symbol = :linear - # filterbank_normalization::Symbol = :bandwidth # :bandwidth, :area, :none - # frequency_scale::Symbol = :mel # TODO :mel, :bark, :erb - - # # mfcc - # num_coeffs::Int64 = 13 - # normalization_type::Symbol = :standard # :standard, :dithered - # rectification::Symbol = :log # :log, :cubic_root - # log_energy_source::Symbol = :standard # :standard (after windowing), :mfcc - # log_energy_pos::Symbol = :append #:append, :replace, :none - # delta_window_length::Int64 = 9 - # delta_matrix::Symbol = :standard # :standard, :transposed - - # # spectral - # spectral_spectrum::Symbol = :linear # :linear, :mel + function MyClass(int::Int) + obj = new( + int, + () -> print_int(obj), + (new_int,) -> set_int!(obj, new_int) ) + return obj end -end \ No newline at end of file +end + +################################################################################ +# new data structures # +# TODO # +################################################################################ + +# mutable struct SignalSetup +# sr::Int + +# # fft +# fft_length::Int +# window_type::Tuple{Symbol, Symbol} # (:hann, :periodic) +# window_length::Int +# overlap_length::Int +# window_norm::Bool + +# # spectrum +# frequency_range::Tuple{Int64, Int64} +# lin_frequencies::Vector{AbstractFloat} +# band_edges::AbstractVector{AbstractFloat} +# spectrum_type::Symbol + +# # # mel +# # mel_style::Symbol = :htk # :htk, :slaney +# # mel_bands::Int64 = 26 +# # mel_frequencies::Vector{Float64} = [] +# # filterbank_design_domain::Symbol = :linear +# # filterbank_normalization::Symbol = :bandwidth # :bandwidth, :area, :none +# # frequency_scale::Symbol = :mel # TODO :mel, :bark, :erb + +# # # mfcc +# # num_coeffs::Int64 = 13 +# # normalization_type::Symbol = :standard # :standard, :dithered +# # rectification::Symbol = :log # :log, :cubic_root +# # log_energy_source::Symbol = :standard # :standard (after windowing), :mfcc +# # log_energy_pos::Symbol = :append #:append, :replace, :none +# # delta_window_length::Int64 = 9 +# # delta_matrix::Symbol = :standard # :standard, :transposed + +# # # spectral +# # spectral_spectrum::Symbol = :linear # :linear, :mel + +# function SignalSetup(; +# sr::Int, +# fft_length::Int, +# window_type::Tuple{Symbol, Symbol} = (:hann, :periodic), +# window_length::Int = 0, +# overlap_length::Int = 0, +# window_norm::Bool = false, +# # spectrum +# frequency_range::Tuple{Int64, Int64} = (0, 0), +# lin_frequencies::Vector{AbstractFloat} = [], +# band_edges::Vector{AbstractFloat} = [], +# spectrum_type::Symbol=:power, +# ) +# if window_type[1] ∉ (:hann, :hamming, :blackman, :flattopwin, :rect) +# error("Unknown window_type $window_type[1].") +# end +# if window_type[2] ∉ (:periodic, :symmetric) +# error("window_type second parameter must be :periodic or :symmetric.") +# end + +# if window_length == 0 +# window_length = fft_length +# elseif window_length < fft_length +# error("window_length can't be smaller than fft_length.") +# end + +# if overlap_length == 0 +# overlap_length=Int(round(FFTLength * 0.500)) +# elseif overlap_length > window_length +# error("overlap_length can't be greater than window_length.") +# end + +# # if isempty(frequency_range) + +# if spectrum_type ∉ (:power, :magnitude) +# error("spectrum_type parameter must be symbol, :power or :magnitude.") +# end + +# new( +# sr, + +# # fft +# fft_length, +# window_type, +# window_length, +# overlap_length, +# window_norm, + +# # spectrum +# frequency_range, +# lin_frequencies, +# band_edges, +# spectrum_type, + +# # # mel +# # mel_style::Symbol = :htk # :htk, :slaney +# # mel_bands::Int64 = 26 +# # mel_frequencies::Vector{Float64} = [] +# # filterbank_design_domain::Symbol = :linear +# # filterbank_normalization::Symbol = :bandwidth # :bandwidth, :area, :none +# # frequency_scale::Symbol = :mel # TODO :mel, :bark, :erb + +# # # mfcc +# # num_coeffs::Int64 = 13 +# # normalization_type::Symbol = :standard # :standard, :dithered +# # rectification::Symbol = :log # :log, :cubic_root +# # log_energy_source::Symbol = :standard # :standard (after windowing), :mfcc +# # log_energy_pos::Symbol = :append #:append, :replace, :none +# # delta_window_length::Int64 = 9 +# # delta_matrix::Symbol = :standard # :standard, :transposed + +# # # spectral +# # spectral_spectrum::Symbol = :linear # :linear, :mel +# ) +# end +# end \ No newline at end of file diff --git a/src/wavelet/cwt.jl b/src/wavelet/cwt.jl index 2fa8132..3fcc02e 100644 --- a/src/wavelet/cwt.jl +++ b/src/wavelet/cwt.jl @@ -1,489 +1,328 @@ # using SpecialFunctions -# using Statistics +# using Statistics, Roots # using FFTW +# using Parameters +# using Plots """ Continuous 1-D wavelet transform -CWTFILTERBANK methods: - -wt - Continuous wavelet transform -freqz - Wavelet frequency responses -timeSpectrum - Time-averaged wavelet spectrum -scaleSpectrum - Scale-averaged wavelet spectrum -wavelets - Time-domain wavelets -scales - Wavelet scales -waveletsupport - Wavelet time support -qfactor - Wavelet Q-factor -powerbw - 3-dB bandwidths of wavelet bandpass filters -centerFrequencies - Wavelet bandpass center frequencies -centerPeriods - Wavelet bandpass center periods - -CWTFILTERBANK properties: - -SamplingFrequency - Sampling frequency -SignalLength - Signal length -Wavelet - Analysis wavelet -FrequencyLimits - Frequency limits (tuple) -VoicesPerOctave - Voices per octave -Boundary - Reflect or treat data as periodic -da fare: -SamplingPeriod - Sampling period -PeriodLimits - Period limits -TimeBandwidth - Time-bandwidth product -WaveletParameters - Morse wavelet parameters +TODO documentation """ -if (!@isdefined(fbCell)) - struct fbCell - sr::Int64 - length::Int64 - wavelet::Symbol - frqLimits::Tuple{Int64,Int64} - vpo::Int64 # voices per octave - boundary::Symbol - end +################################################################################### +# data structures # +################################################################################### + +@with_kw mutable struct FbSetup + sr::Int64 + length::Int64 + wavelet::Symbol = :morse # :morse, :amor, :bump + gamma::Int64 = 3 + beta::Int64 = 20 + time_bandwidth::Int64 = 3 * 20 + cutoff::Int64 = 50 + vpo::Int64 = 10 # voices per octave + boundary::Symbol = :reflection # :reflection, :periodic + signal_pad::Int64 = 0 + frequency_range::Tuple{Int64, Int64} = (0, 0) + center_freq::Float64 = 0.0 + omega::Vector{Float64} = [] + frequencies::Vector{Float64} = [] + scales::Vector{Float64} = [] + period_range::Tuple{Int64, Int64} = (0, 0) # TODO cwtfilterbank.m line: 1093 + sampling_range::AbstractArray{Float64} = [] # TODO if necessary + psidft::AbstractArray{Float64} = [] + wavelet_center_freqs::AbstractArray{Float64} = [] end -if (!@isdefined(fbParameters)) - struct fbParameters - wavelet::Symbol - waveletParameters::AbstractArray{Any} - length::Int64 - sr::Int64 - samplingPeriod::AbstractArray{Any} - vpo::Int64 - timeBandwidth::Int64 - freqLimits::AbstractArray{Any} - periodLimits::AbstractArray{Any} - boundary::Symbol - end +function morsepeakfreq(ga::Int64, be::Int64) + # peak frequency for 0-th order Morse wavelet is $(\frac{\beta}{\gamma})^{1/\gamma}$ + peakAF = exp(1 / ga * (log(be) - log(ga))) + # obtain the peak frequency in cyclical frequency + peakCF = peakAF / (2 * pi) + + return peakAF, peakCF end -if (!@isdefined(fbData)) - struct fbData{T<:Real,S<:Real} - ga::Int64 - be::Int64 - cutOff::Int64 - normfreqflag::Bool - signalPad::Int64 - waveletCF::Real - omega::AbstractVector{T} - frequencies::AbstractVector{T} - scales::AbstractVector{T} - psiDFT::AbstractMatrix{S} - waveletCenterFrequencies::AbstractVector{T} - nyquistBin::Int64 - sigvar::Real - # npad - # PhiDFT - # PsiHalfPowerBandwidth - # PsiHalfPowerFrequencies - # PlotString - # CurrentClass - end +function morseproperties(ga::Int64, be::Int64) + morse_loga = (ga, be) -> be / ga * (1 + log(ga) - log(be)) + + width = sqrt(ga * be) + skew = (ga - 3) / width + kurt = 3 - skew .^ 2 - (2 / width^2) + + logsigo1 = 2 / ga * log(ga / (2 * be)) + loggamma((2 * be + 1 + 2) / ga) - + loggamma((2 * be + 1) / ga) + logsigo2 = 2 / ga * log(ga / (2 * be)) + 2 * loggamma((2 * be + 2) / ga) - + 2 * loggamma((2 * be + 1) / ga) + + sigmaF = sqrt(exp(logsigo1) - exp(logsigo2)) + ra = 2 * morse_loga(ga, be) - 2 * morse_loga(ga, be - 1) + + morse_loga(ga, 2 * (be - 1)) - morse_loga(ga, 2 * be) + rb = 2 * morse_loga(ga, be) - 2 * morse_loga(ga, be - 1 + ga) + + morse_loga(ga, 2 * (be - 1 + ga)) - morse_loga(ga, 2 * be) + rc = 2 * morse_loga(ga, be) - 2 * morse_loga(ga, be - 1 + ga ./ 2) + + morse_loga(ga, 2 * (be - 1 + ga ./ 2)) - morse_loga(ga, 2 * be) + + logsig2a = ra + 2 / ga * log(be / ga) + 2 * log(be) + + loggamma((2 * (be - 1) + 1) / ga) - loggamma((2 * be + 1) / ga) + logsig2b = rb + 2 / ga * log(be / ga) + 2 * log(ga) + + loggamma((2 * (be - 1 + ga) + 1) / ga) - loggamma((2 * be + 1) / ga) + logsig2c = rc + 2 / ga * log(be / ga) + log(2) + log(be) + log(ga) + + loggamma((2 * (be - 1 + ga ./ 2) + 1) / ga) - loggamma((2 * be + 1) / ga) + + sig2a = exp(logsig2a) + sig2b = exp(logsig2b) + sig2c = exp(logsig2c) + sigmaT = sqrt(sig2a + sig2b - sig2c) + + return width, skew, kurt, sigmaT, sigmaF end -function fzero( - funFcn::T, - x::Tuple{S,S} -) where {T<:Function,S<:Real} # Single-variable nonlinear zero finding - - # initialization - tol = eps(Float64) - - a, b = x - c = NaN - d = NaN - e = NaN - fa = funFcn(a) - fb = funFcn(b) - fc = fb - - # main loop, exit from middle of the loop - while ((fb != 0) && (a != b)) - # insure that b is the best result so far, a is the previous value of b, and c is on the opposite side of the zero from b. - if ((fb > 0) == (fc > 0)) - c = a - fc = fa - d = b - a - e = d - end - if (abs(fc) < abs(fb)) - a = b - b = c - c = a - fa = fb - fb = fc - fc = fa - end +function get_freq_cutoff_morse(cutoff::Int64, cf::Float64, ga::Int64, be::Int64) + anorm = 2 * exp(be / ga * (1 + (log(ga) - log(be)))) + alpha = 2 * (cutoff / 100) - # convergence test and possible exit - m = 0.5 * (c - b) - toler = 2.0 * tol * max(abs(b), 1.0) - if ((abs(m) <= toler) || (fb == 0.0)) - return b - end + psihat = x -> alpha - anorm * x .^ be * exp(-x .^ ga) - # choose bisection or interpolation - if ((abs(e) < toler) || (abs(fa) <= abs(fb))) - # bisection - d = m - e = m - else - # interpolation - s = fb / fa - if (a == c) - # linear interpolation - p = 2.0 * m * s - q = 1.0 - s - else - # inverse quadratic interpolation - q = fa / fc - r = fb / fc - p = s * (2.0 * m * q * (q - r) - (b - a) * (r - 1.0)) - q = (q - 1.0) * (r - 1.0) * (s - 1.0) - end - if (p > 0) - q = -q - else - p = -p - end - # is interpolated point acceptable - if ((2.0 * p < 3.0 * m * q - abs(toler * q)) && (p < abs(0.5 * e * q))) - e = d - d = p / q - else - d = m - e = m - end - end # interpolation - - # next point - a = b - fa = fb - if (abs(d) > toler) - b = b + d - elseif b > c - b = b - toler + omax = ((750) .^ (1 / ga)) + if psihat(cf) >= 0 + if psihat(omax) == psihat(cf) + omegaC = omax else - b = b + toler + omegaC = cf end - fb = funFcn(b) - end # main loop - - return b + else + omegaC = find_zero(psihat, (cf, omax)) + end end -function getFreqFromCutoffMorse( - cutoff::T, - cf::T, - ga::Int64, - be::Int64 -) where {T<:Real} - - anorm = 2 * exp(be / ga * (1 + (log(ga) - log(be)))) +function get_freq_cutoff_amor(cutoff::Int64, cf::Float64) alpha = 2 * cutoff - omax = ((750)^(1 / ga)) - psihat(om) = alpha - anorm * om^be * exp(-om^ga) + psihat = x -> alpha - 2 * exp(-(x - cf) .^ 2 / 2) - if (psihat(cf) >= 0) - if (psihat(omax) == psihat(cf)) - omegac = omax - else - omegac = cf - end - else - omegac = fzero(psihat, (cf, omax)) - end -end # function getFreqFromCutoffMorse - -function cwtfreqlimits( - wavelet::Symbol, - signalLength::Int64, - ga::Int64, - be::Int64, - vpo::Int64, - p::Int64, - cutoff::Int64, - fourierFactor::T, - sigmaT::T, - cf::T -) where {T<:Real} - - t = 1 #seconds - fs = 1 - cutoff = cutoff / 100 - maxscale = signalLength / (sigmaT * p) - - if (wavelet == :morse) - omegac = getFreqFromCutoffMorse(cutoff, cf, ga, be) - elseif (wavelet == :bump) - omegac = getFreqFromCutoffBump(cutoff, cf) - elseif (wavelet == :amor) - omegac = getFreqFromCutoffAmor(cutoff, cf) + omax = ((2 * 750) .^ 0.5 + cf) + + if psihat(cf) > 0 + omegaC = omax else - error("Unknown wavelet ", wavelet, ".") + omegaC = find_zero(psihat, (cf, omax)) end +end - minscale = omegac / pi - - if (maxscale < minscale * 2^(1 / vpo)) - maxscale = minscale * 2^(1 / vpo) - end +function get_freq_cutoff_bump(cutoff::Int64, cf::Float64) + sigma = 0.6 - minperiod = minscale * fourierFactor * t - maxfreq = 1 / (minscale * fourierFactor) * fs + if cutoff < 10 * eps(0.0) + omegaC = cf + sigma - 10 * eps(cf + sigma) + else + alpha = 2 * cutoff - maxperiod = maxscale * fourierFactor * t - minfreq = 1 / (maxscale * fourierFactor) * fs + psihat = x -> 1 / (1 - x^2) + log(alpha) - log(2) - 1 - if ((maxfreq > fs / 2) || (minperiod < 2 * t)) - maxfreq = fs / 2 - minperiod = 2 * t + epsilon = find_zero(psihat, (0 + eps(0.0), 1 - eps(1.0))) + omegaC = sigma * epsilon + cf end +end - return minfreq, maxperiod, maxscale, minscale, maxfreq, minperiod -end # function cwtfreqlimits +function freq2scales( + sr::Int64, frequency_range::Tuple{Int64, Int64}, vpo::Int64, center_freq::Float64) + # convert frequencies in Hz to radians/sample + wrange = frequency_range .* (1 / sr * 2 .* pi) + a0 = 2^(1 / vpo) + s0 = center_freq / wrange[2] + smax = center_freq / wrange[1] + numoctaves = log2(smax / s0) + scales = s0 * a0 .^ (0:(vpo * numoctaves)) +end -function wavCFandSD( - wName::Symbol, - ga::Int64, - be::Int64 -) - cf = 0 - sigmaT = 0 +function cwtfilterbank!(fb_setup::FbSetup) + ########################################################################### + # setup parameters # + ########################################################################### + fb_setup.cutoff = fb_setup.wavelet == :morse ? 50 : 10 - if (wName == :morse) - cf = exp(1 / ga * (log(be) - log(ga))) + fb_setup.signal_pad = fb_setup.boundary == :reflection ? + fb_setup.length <= 1e5 ? + floor(Int, fb_setup.length / 2) : + ceil(Int, log2(fb_setup.length)) : 0 - # da morseproperties - frac(a, b) = a / b - morse_loga(a, b) = frac(b, a) .* (1 + log(a) - log(b)) + if fb_setup.wavelet == :morse + fb_setup.center_freq, _ = morsepeakfreq(fb_setup.gamma, fb_setup.beta) + _, _, _, sigmaT, _ = morseproperties(fb_setup.gamma, fb_setup.beta) - logsigo1 = frac(2, ga) .* log(frac(ga, 2 * be)) + loggamma(frac(2 * be + 1 + 2, ga)) - loggamma(frac(2 * be + 1, ga)) - logsigo2 = frac(2, ga) .* log(frac(ga, 2 * be)) + 2 .* loggamma(frac(2 * be + 2, ga)) - 2 .* loggamma(frac(2 * be + 1, ga)) + omegaC = get_freq_cutoff_morse( + fb_setup.cutoff, fb_setup.center_freq, fb_setup.gamma, fb_setup.beta) - sigo = sqrt(exp(logsigo1) - exp(logsigo2)) - ra = 2 * morse_loga(ga, be) - 2 * morse_loga(ga, be - 1) + morse_loga(ga, 2 * (be - 1)) - morse_loga(ga, 2 * be) - rb = 2 * morse_loga(ga, be) - 2 * morse_loga(ga, be - 1 + ga) + morse_loga(ga, 2 * (be - 1 + ga)) - morse_loga(ga, 2 * be) - rc = 2 * morse_loga(ga, be) - 2 * morse_loga(ga, be - 1 + ga ./ 2) + morse_loga(ga, 2 * (be - 1 + ga ./ 2)) - morse_loga(ga, 2 * be) + elseif fb_setup.wavelet == :amor + fb_setup.center_freq = 6 + sigmaT = sqrt(2) - logsig2a = ra + frac(2, ga) .* log(frac(be, ga)) + 2 * log(be) + loggamma(frac(2 * (be - 1) + 1, ga)) - loggamma(frac(2 * be + 1, ga)) - logsig2b = rb + frac(2, ga) .* log(frac(be, ga)) + 2 * log(ga) + loggamma(frac(2 * (be - 1 + ga) + 1, ga)) - loggamma(frac(2 * be + 1, ga)) - logsig2c = rc + frac(2, ga) .* log(frac(be, ga)) + log(2) + log(be) + log(ga) + loggamma(frac(2 * (be - 1 + ga ./ 2) + 1, ga)) - loggamma(frac(2 * be + 1, ga)) + omegaC = get_freq_cutoff_amor(fb_setup.cutoff, fb_setup.center_freq) - sig2a = exp(logsig2a) - sig2b = exp(logsig2b) - sig2c = exp(logsig2c) - sigt = sqrt(sig2a + sig2b - sig2c) + elseif fb_setup.wavelet == :bump + fb_setup.center_freq = 5 + # measured standard deviation of bump wavelet + sigmaT = 5.847705 - sigmaT = Real(sigt) - elseif (wName == :amor) - cf = 6 - sigmaT = sqrt(2) + omegaC = get_freq_cutoff_bump(fb_setup.cutoff, fb_setup.center_freq) else - cf = 5 - sigmaT = 5.847705 + omegaC = pi end - fourierFactor = (2 * pi) / cf - - return fourierFactor, sigmaT, cf -end # function wavCFandSD - -function wfilterbank( - fbcell::fbCell, - scales::AbstractVector{T}, - omega::AbstractVector{T}, - ga::Int64, - be::Int64) where {T<:Real} #Wavelet Filter Bank - - if (fbcell.wavelet == :morse) - somega = scales * omega' - absomega = abs.(somega) - powscales = absomega .^ ga + maxscale = fb_setup.length / sigmaT / 2 + minscale = omegaC / pi + # if the max scale (min freq) is beyond the max freq, set it one step away + if maxscale < minscale * 2^(1 / fb_setup.vpo) + maxscale = minscale * 2^(1 / fb_setup.vpo) + end - peakAF = exp(1 / ga * (log(be) - log(ga))) - peakCF = peakAF / (2 * pi) + fourier_factor = (2 * pi) / fb_setup.center_freq + t = 1 / fb_setup.sr - factor = exp(-be * log(peakAF) + peakAF^ga) - psidft = 2 * factor * exp.(be * log.(absomega) .- powscales) .* (somega .> 0) + minperiod = minscale * fourier_factor * t + maxfreq = 1 / (minscale * fourier_factor) * fb_setup.sr - f = (peakAF ./ scales) / (2 * pi) - else + maxperiod = maxscale * fourier_factor * t + minfreq = 1 / (maxscale * fourier_factor) * fb_setup.sr + # guard against edge case + if maxfreq > fb_setup.sr / 2 || minperiod < 2 * t + maxfreq = fb_setup.sr / 2 + minperiod = 2 * t end - return psidft, f -end # function wfilterbank - -function cwtfilterbank( - fbcell::fbCell, - ga::Int64, - be::Int64) + if fb_setup.frequency_range[1] < minfreq + fb_setup.frequency_range[1] = minfreq + end - # setup parameters - timeBandwidth = ga * be - p = 2 # Number of standard deviations + ########################################################################### + # construct the frequency grid for the wavelet DFT # + ########################################################################### + n = fb_setup.length + 2 * fb_setup.signal_pad - fb_parameters = fbParameters(fbcell.wavelet, [], fbcell.length, sr, [], fbcell.vpo, timeBandwidth, [], [], fbcell.boundary) + omega = [1:floor(Int, n / 2)...] .* ((2 * pi) / n) + fb_setup.omega = vcat(0.0, omega, -omega[floor(Int, (n - 1) / 2):-1:1]) + fb_setup.frequencies = fb_setup.sr * fb_setup.omega ./ (2 * pi) - if (fbcell.wavelet == :morse) - cutoff = 50 - else - cutoff = 10 - end + fb_setup.scales = freq2scales( + fb_setup.sr, fb_setup.frequency_range, fb_setup.vpo, fb_setup.center_freq) - normfreqflag = false + somega = fb_setup.scales * fb_setup.omega' - if (fbcell.boundary == :reflection) - if fbcell.length <= 1e5 - signalPad = Int64(floor(fbcell.length / 2)) + if fb_setup.wavelet == :morse + absomega = abs.(somega) + if fb_setup.gamma == 3 + powscales = absomega .* absomega .* absomega else - signalPad = Int64(ceil(log2(fbcell.length))) + powscales = absomega .^ fb_setup.gamma end + factor = exp(-fb_setup.beta * log(fb_setup.center_freq) + + fb_setup.center_freq^fb_setup.gamma) + fb_setup.psidft = 2 * factor * exp.(fb_setup.beta .* log.(absomega) - powscales) .* + (somega .> 0) + + elseif fb_setup.wavelet == :amor + fc = 6 + mul = 2 + squareterm = (somega .- fc) .* (somega .- fc) + gaussexp = -squareterm ./ 2 + expnt = gaussexp .* (somega .> 0) + fb_setup.psidft = mul * exp.(expnt) .* (somega .> 0) + else - signalPad = 0 + fc = 5 + sigma = 0.6 + w = (somega .- fc) ./ sigma + absw2 = w .* w + expnt = -1 ./ (1 .- absw2) + daughter = 2 * exp(1) * exp.(expnt) .* (abs.(w) .< 1 .- eps(1.0)) + daughter[isnan.(daughter)] .= 0 + fb_setup.psidft = daughter end - fourierFactor, sigmaT, cf = wavCFandSD(fbcell.wavelet, ga, be) - - # frequency grid - N = fbcell.length + 2 * signalPad - - omega = [1:Int(floor(N / 2))...] - omega = omega .* ((2 * pi) / N) - omega = vcat(0.0, omega, -omega[Int(floor((N - 1) / 2)):-1:1]) - - frequencies = fbcell.sr * omega ./ (2 * pi) + f = (fb_setup.center_freq ./ fb_setup.scales) / (2 * pi) - minFreq, maxPeriod, maxScale, minScale, maxFreq, minPeriod = cwtfreqlimits(fbcell.wavelet, fbcell.length, ga, be, fbcell.vpo, p, cutoff, fourierFactor, sigmaT, cf) - numoctaves = max(log2(maxScale / minScale), 1 / fbcell.vpo) - a0 = 2^(1 / fbcell.vpo) - scales = minScale * a0 .^ (0:numoctaves*fbcell.vpo) - - # compute filter bank - psidft, f = wfilterbank(fbcell, scales, omega, ga, be) - - # nyquistBin - nt = round(Int, size(psidft, 2)) - f = f .* fbcell.sr - nyquistBin = (nt >>> 1) + 1 - - return (; ga, be, cutoff, normfreqflag, signalPad, cf, omega, frequencies, scales, psidft, f, nyquistBin) + fb_setup.wavelet_center_freqs = f .* fb_setup.sr + # nyquist_bin = (size(psidft, 2) >>> 1) + 1 end # function cwtfilterbank -function createCoiIndices( - n::Int64 -) - - indices = vec(zeros(n, 1)) - if (isodd(length(x))) - # odd length case - M = Int64(ceil(n / 2)) - indices[1:M] .= [1:M;] #[] e ; genera una serie di numeri - indices[M+1:N] .= [M-1:-1:1;] - else - # even length case - indices[1:Int64(n / 2)] = [1:Int64(n / 2);] - indices[Int64(n / 2)+1:n] .= [Int64(n / 2):-1:1;] - end - - return indices -end # function createCoiIndices - function wt( - x::Union{AbstractVector{T},AbstractArray{T}}, - fbcell::fbCell, - fbdataT::NamedTuple -) where {T<:Real} - - n = fbcell.length - dataclass = eltype(x) - psihat = fbdataT.psidft - # check whether input is real or complex - isRealX = isreal(x) - - # x = hcat(x...) #trasforma da vettore a matrice ad una riga - sigvar = var(x, corrected=false) # per avere lo stesso risultato di matlab - - xv = x - if (fbdataT.signalPad > 0) - xv = vcat(reverse(xv[1:fbdataT.signalPad]), xv, xv[end:-1:end-fbdataT.signalPad+1]) + x::AbstractVector{Float64}, + fb_setup::FbSetup +) + if (fb_setup.signal_pad > 0) + x = vcat(reverse(x[1:fb_setup.signal_pad]), x, + x[end:-1:(end - fb_setup.signal_pad + 1)]) end # fourier transform of input - xposdft = fft(xv) - xposdft = hcat(xposdft...) + xposdft = fft(x) # obtain the CWT in the Fourier domain - cfsposdft = xposdft .* fbdataT.psidft + cfsposdft = xposdft' .* fb_setup.psidft # invert to obtain wavelet coefficients - cfspos = ifft(cfsposdft, 2) - cfs = cfspos + cfs = ifft(cfsposdft, 2) + # cfs = cfspos - if (fbdataT.signalPad > 0) - cfs = cfs[:, fbdataT.signalPad+1:fbdataT.signalPad+n, :] + if (fb_setup.signal_pad > 0) + cfs[:, fb_setup.signal_pad + 1:fb_setup.signal_pad + fb_setup.length] end +end # function wt - f = fbdataT.f - if (fbcell.wavelet == :morse) - FourierFactor, sigmaPsi = wavCFandSD(fbcell.wavelet, fbdataT.ga, fbdataT.be) - else +function cwt_windowing( + cwt_spectrum::Matrix{Float64}, + window_length::Int64 +) + c_length = size(cwt_spectrum, 1) + n_feats = size(cwt_spectrum, 2) + n_hops = floor(Int, c_length / window_length) + + y = zeros(Float64, n_hops, n_feats) + + for i = 1:n_feats + for j = 1:n_hops + + # y[j,i] = maximum(cwt_spectrum[(j-1)*window_length+1:j*window_length, i]) + y[j,i] = mean(cwt_spectrum[(j-1)*window_length+1:j*window_length, i]) - end - coiScalar = FourierFactor / sigmaPsi - - dt = 1 / fbcell.sr - samples = createCoiIndices(n) - coitmp = coiScalar * dt * samples - coi = 1 ./ coitmp - max_coi = max(fbdataT.f...) - for i in eachindex(coi) - if coi[i] > max_coi - coi[i] = max_coi end end - return sigvar, cfs, f, coi -end # function wt + return y +end # function buffer function cwt( - x::Union{AbstractVector{T},AbstractArray{T}}, - sr::Int64, - wavelet::Symbol=:morse, - sigLen::Int64=1024, - vpo::Int64=10, # VoicesPerOctave - boundary::Symbol=:reflection, - frqLimitLow::Int64=0, - frqLimitHi::Int64=Int(round(sr / 2)), - ga::Int64=3, - be::Int64=20 -) where {T<:Real} - - signalLength = size(x, 1) # lavora solo con file mono - fbcell = fbCell(sr, signalLength, wavelet, (frqLimitLow, frqLimitHi), vpo, boundary) - fbdataT = cwtfilterbank(fbcell, ga, be) - - # sigvar, cfs, freq, coitmp = wt(x, fbcell, fbdataT) - sigvar, cfs, freq, coitmp = wt(x, fbcell, fbdataT) - - # dt = 1/sr - # t = [0:dt:signalLength*dt-dt;] - - # fbdata = fbData(fbdataT..., sigvar) - - return cfs, freq, coitmp #,scalcfs + x::AbstractVector{Float64}, + sr::Int64; + wavelet::Symbol = :morse, + ga::Int64 = 3, + be::Int64 = 20, + frequency_range::Tuple{Int64, Int64} = (0, round(Int, sr / 2)), + vpo::Int64 = 10, # VoicesPerOctave + boundary::Symbol = :reflection +) + # ga, be = gamma, beta: symmetric parameters for morse wavelet + + fb_setup = FbSetup( + sr = sr, + length = size(x, 1), + wavelet = wavelet, + gamma = ga, + beta = be, + time_bandwidth = ga * be, + vpo = vpo, + boundary = boundary, + frequency_range = frequency_range + ) + + cwtfilterbank!(fb_setup) + + return wt(x, fb_setup), fb_setup.wavelet_center_freqs end # function cwt -# # debug -# using PyCall -# librosa = pyimport_conda("librosa") -# sr_src = 8000 -# x, sr = librosa.load("/home/riccardopasini/Documents/Aclai/Julia_additional_files/test.wav", sr=sr_src, mono=true) - -# depth = 5 -# wname = "db2" -# spec, freqs, times = cwt(x, sr) \ No newline at end of file +function cwt(x::AbstractVector{<:AbstractFloat}, sr::Int64; kwargs...) + cwt(Float64.(x), sr; kwargs...) +end \ No newline at end of file diff --git a/test/a911_vs_matlab_test.jl b/test/a911_vs_matlab_test.jl index f23e7ca..9e4f4cf 100644 --- a/test/a911_vs_matlab_test.jl +++ b/test/a911_vs_matlab_test.jl @@ -16,7 +16,7 @@ using JLD2, DataFrames # # audio parameters # sr = 8000 # fft_length = 256 -# frequency_range = Int[0, sr/2] +# frequency_range = (0, floor(Int, sr/2)) # mel_bands = 26 # num_coeffs = 13 @@ -24,10 +24,10 @@ using JLD2, DataFrames #--------------------------------------------------------------------------------------# # audio911 # #--------------------------------------------------------------------------------------# -# setup = signal_setup( +# setup = AudioSetup( # sr=sr, # # fft -# window_type=[:hann, :periodic], +# window_type=(:hann, :periodic), # window_length=fft_length, # overlap_length=Int(round(fft_length * 0.500)), # window_norm=false, @@ -52,11 +52,11 @@ using JLD2, DataFrames # spectral_spectrum=:linear # :linear, :mel # ) -# data = signal_data( +# data = AudioData( # x=x # ) -# takeFFT(data, setup) +# get_fft!(data, setup) # mel_spectrogram(data, setup) # _mfcc(data, setup) # lin_spectrogram(data, setup) diff --git a/test/cwt_features.jl b/test/cwt_features.jl new file mode 100644 index 0000000..a7c6dd9 --- /dev/null +++ b/test/cwt_features.jl @@ -0,0 +1,66 @@ +using Audio911 + +using SpecialFunctions +using Statistics, Roots +using FFTW +using Parameters +using Plots +include("/home/paso/.julia/dev/Audio911.jl/src/wavelet/cwt.jl") + +TESTPATH = joinpath(dirname(pathof(Audio911)), "..", "test") + +sr_setup = 8000 +x, sr = load_audio("$TESTPATH/common_voice_en_23616312.wav", sr=sr_setup) + +window_length = 256 +frequency_range=(80, 3000) +# mel_bands = 26 +# num_coeffs = 13 + +setup = AudioSetup( + sr=sr, + # fft + window_type=(:hann, :periodic), + window_length=window_length, + overlap_length=Int(round(window_length * 0.500)), + window_norm=true, + # spectrum + frequency_range=frequency_range, + spectrum_type=:power, # :power, :magnitude + # mel + mel_style=:htk, # :htk, :slaney + mel_bands=mel_bands, + filterbank_design_domain=:linear, + filterbank_normalization=:bandwidth, # :bandwidth, :area, :none + frequency_scale=:mel, + # mfcc + num_coeffs=num_coeffs, + normalization_type=:dithered, # :standard, :dithered + rectification=:log, + log_energy_source=:standard, # :standard (after windowing), :mfcc + log_energy_pos=:none, #:append, :replace, :none + delta_window_length=9, + delta_matrix=:standard, # :standard, :transposed + # spectral + spectral_spectrum=:mel # :linear, :linear_focused, :mel +) + +data = AudioData( + x=Float64.(x) +) + +get_fft!(data, setup) + +cwt_spectrum, _ = cwt(data.x, setup.sr, frequency_range=(80,3000)) +cwt_spectrum = abs.(cwt_spectrum') + +data.mel_spectrogram = cwt_windowing(cwt_spectrum, 32) +setup.mel_bands = setup.num_coeffs = size(data.mel_spectrogram, 2) + +# mel_spectrogram(data, setup) +_mfcc(data, setup) +# lin_spectrogram(data, setup) +spectral_features(data, setup) +f0(data, setup) + +# setup.frequency_range = (80, 1000) \ No newline at end of file diff --git a/test/featureExtractorDebug.jl b/test/featureExtractorDebug.jl index 05c6122..71ccc72 100644 --- a/test/featureExtractorDebug.jl +++ b/test/featureExtractorDebug.jl @@ -25,14 +25,14 @@ sr_src = 8000 # x, sr = librosa.load("$TESTPATH/common_voice_en_23616312.wav", sr=sr_src, mono=true) x, sr = librosa.load("/home/riccardopasini/Documents/Aclai/Datasets/Common_voice_ds/6/Wavfiles/common_voice_de_19572503.wav", sr=sr_src, mono=true) fft_length = 256 -frequency_range=Int[0, sr/2] +frequency_range=(0, floor(Int, sr/2)) mel_bands = 26 num_coeffs = 13 -setup = signal_setup( +setup = AudioSetup( sr=sr, # fft - window_type=[:hann, :periodic], + window_type=(:hann, :periodic), window_length=fft_length, overlap_length=Int(round(fft_length * 0.500)), window_norm=true, @@ -60,15 +60,15 @@ setup = signal_setup( # convert to Float64 x = Float64.(x) -data = signal_data( +data = AudioData( x=x ) -takeFFT(data, setup) +get_fft!(data, setup) # mel_spectrogram(data, setup) # _mfcc(data, setup) lin_spectrogram(data, setup) # spectral_features(data, setup) # f0(data, setup) -# setup.frequency_range = Int[80, 1000] \ No newline at end of file +# setup.frequency_range = (80, 1000) \ No newline at end of file diff --git a/test/mfcc_comparison.ipynb b/test/mfcc_comparison.ipynb index 416a1d9..642f01d 100644 --- a/test/mfcc_comparison.ipynb +++ b/test/mfcc_comparison.ipynb @@ -110,15 +110,15 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=false,\n", " # spectrum\n", - " frequency_range=Int[0, sr / 2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:power,\n", " # mel\n", " mel_style=:htk,\n", @@ -128,11 +128,11 @@ " frequency_scale=:mel,\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "mel_spectrogram(data, setup)\n", "\n", "data.mel_spectrogram" @@ -271,15 +271,15 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=false,\n", " # spectrum\n", - " frequency_range=Int[0, sr / 2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:power,\n", " # mel\n", " mel_style=:htk,\n", @@ -295,11 +295,11 @@ " delta_window_length = 9\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "mel_spectrogram(data, setup)\n", "_mfcc(data, setup)\n", "\n", @@ -442,15 +442,15 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=false,\n", " # spectrum\n", - " frequency_range=Int[0, sr / 2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:power,\n", " # mel\n", " mel_style=:htk,\n", @@ -466,11 +466,11 @@ " delta_window_length = 9\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "mel_spectrogram(data, setup)\n", "_mfcc(data, setup)\n", "\n", @@ -528,15 +528,15 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=false,\n", " # spectrum\n", - " frequency_range=Int[0, sr / 2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:power,\n", " # mel\n", " mel_style=:htk,\n", @@ -553,11 +553,11 @@ " delta_matrix = :standard\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "mel_spectrogram(data, setup)\n", "_mfcc(data, setup)\n", "\n", @@ -700,15 +700,15 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=false,\n", " # spectrum\n", - " frequency_range=Int[0, sr / 2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:power,\n", " # mel\n", " mel_style=:htk,\n", @@ -725,11 +725,11 @@ " delta_matrix = :transposed\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "mel_spectrogram(data, setup)\n", "_mfcc(data, setup)\n", "\n", @@ -787,15 +787,15 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=:false,\n", " # spectrum\n", - " frequency_range=Int[0, sr / 2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:power,\n", " # mel\n", " mel_style=:htk,\n", @@ -811,11 +811,11 @@ " delta_window_length = 9\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "mel_spectrogram(data, setup)\n", "_mfcc(data, setup)\n", "\n", @@ -957,15 +957,15 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=:false,\n", " # spectrum\n", - " frequency_range=Int[0, sr / 2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:power,\n", " # mel\n", " mel_style=:htk,\n", @@ -981,11 +981,11 @@ " delta_window_length = 9\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "mel_spectrogram(data, setup)\n", "_mfcc(data, setup)\n", "\n", @@ -1043,25 +1043,25 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=false,\n", " # spectrum\n", - " frequency_range=Int[0, sr / 2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:power, # :power, :magnitude\n", " # spectral\n", " spectral_spectrum=:linear # :linear, :mel\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "lin_spectrogram(data, setup)\n", "spectral_features(data, setup)\n", "\n", @@ -1191,23 +1191,23 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr=sr,\n", " # fft\n", - " window_type=[:hann, :periodic],\n", + " window_type=(:hann, :periodic),\n", " window_length=FFTLength,\n", " overlap_length=Int(round(FFTLength * 0.500)),\n", " window_norm=false,\n", " # spectrum\n", - " frequency_range=Int[0, sr/2],\n", + " frequency_range=(0, floor(Int, sr / 2)),\n", " spectrum_type=:magnitude, # :power, :magnitude\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x=x\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "lin_spectrogram(data, setup)\n", "spectral_features(data, setup)\n", "\n", @@ -1281,23 +1281,23 @@ } ], "source": [ - "setup = signal_setup(\n", + "setup = AudioSetup(\n", " sr = sr,\n", " # fft\n", - " window_type = [:hann, :periodic],\n", + " window_type = (:hann, :periodic),\n", " window_length = FFTLength,\n", " overlap_length = Int(round(FFTLength * 0.500)),\n", " window_norm = false,\n", " # spectrum\n", - " frequency_range = Int[0, sr / 2],\n", + " frequency_range = (0, floor(Int, sr / 2)),\n", " spectrum_type = :power, # :power, :magnitude\n", ")\n", "\n", - "data = signal_data(\n", + "data = AudioData(\n", " x = x,\n", ")\n", "\n", - "takeFFT(data, setup)\n", + "getFFT(data, setup)\n", "f0(data, setup)\n", "\n", "\n", diff --git a/test/spectral_audioflux.jl b/test/spectral_audioflux.jl index 6f03547..523d1af 100644 --- a/test/spectral_audioflux.jl +++ b/test/spectral_audioflux.jl @@ -14,14 +14,14 @@ x, sr = load_audio("$TESTPATH/common_voice_en_23616312.wav"; sr = sr_resample) fft_length = 256 -frequency_range=Int[0, sr/2] +frequency_range=(0, floor(Int, sr/2)) mel_bands = 26 num_coeffs = 13 setup = FeatureSetup( sr=sr, # fft - window_type=[:hann, :periodic], + window_type=(:hann, :periodic), window_length=fft_length, overlap_length=Int(round(fft_length * 0.500)), window_norm=false, @@ -77,7 +77,7 @@ features = lin_spectrogram(x) data = extractFFT(setup, x) -data = signal_data( +data = AudioData( x = x, )