diff --git a/src/Audio911.jl b/src/Audio911.jl index ab579e4..e8fb808 100644 --- a/src/Audio911.jl +++ b/src/Audio911.jl @@ -65,4 +65,6 @@ export extractfeatures # modular export get_frames, get_frames!, get_stft, get_stft! +export get_frames2, get_stft2 + end # module Audio911 diff --git a/src/fft/fft.jl b/src/fft/fft.jl index 129cbc8..29375fe 100644 --- a/src/fft/fft.jl +++ b/src/fft/fft.jl @@ -176,3 +176,121 @@ function get_stft!(a::AudioObj) end a.data.stft, a.setup.stft_freq = _get_stft(a.data.frames .* a.setup.window, a.setup) end + +#------------------------------------------------------------------------------# +# stft # +#------------------------------------------------------------------------------# +function _get_stft2( + x::AbstractArray{Float64}, + sr::Int64; + fft_length::Int64, + frequency_range::Tuple{Int64, Int64}, + spectrum_type::Symbol, +) + @assert fft_length >= size(x, 1) "fft_length must be > window length. Got fft_length = $fft_length, window length = $(size(x,1))." + + # ensure x is of length fft_length + # if the FFT window is larger than the window, the audio data will be zero-padded to match the size of the FFT window. + # this zero-padding in the time domain results in an interpolation in the frequency domain, + # which can provide a more detailed view of the spectral content of the signal. + x = size(x, 1) < fft_length ? vcat(x, zeros(eltype(x), fft_length - size(x, 1), size(x, 2))) : x[1:fft_length, :] + + # get fft + Y = fft(x, (1,)) + + # post process + # trim to desired range + bin_low = ceil(Int, frequency_range[1] * fft_length / sr + 1) + bin_high = floor(Int, frequency_range[2] * fft_length / sr + 1) + bins = collect(bin_low:bin_high) + y = Y[bins, :] + + # convert to half-sided power or magnitude spectrum + spectrum_funcs = Dict( + :power => x -> real.(x .* conj.(x)), + :magnitude => x -> abs.(x), + ) + # check if spectrum_type is valid + @assert haskey(spectrum_funcs, spectrum_type) "Unknown spectrum_type: $spectrum_type." + + y = spectrum_funcs[spectrum_type](y) + + # trim borders + # halve the first bin if it's the lowest bin + bin_low == 1 && (y[1, :] *= 0.5) + # halve the last bin if it's the Nyquist bin and FFT length is even + bin_high == fld(fft_length, 2) + 1 && iseven(fft_length) && (y[end, :] *= 0.5) + + # create frequency vector + stft_freq = (sr / fft_length) * (bins .- 1) + # shift final bin if fftLength is odd and the final range is full to fs/2. + if fft_length % 2 != 0 && bin_high == floor(fftLength / 2 + 1) + stft_freq[end] = sr * (fft_length - 1) / (2 * fft_length) + end + + return y, stft_freq +end + +_get_stft2(x::AbstractArray{Float64}, s::AudioSetup) = _get_stft( + x, + s.sr, + fft_length = s.fft_length, + frequency_range = s.frequency_range, + spectrum_type = s.spectrum_type, +) + +function get_stft2( + x::AbstractArray{<:AbstractFloat}, + sr::Int64, + fft_length::Int64 = sr <= 8000 ? 256 : 512, + frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)), + spectrum_type::Symbol = :power, # :power, :magnitude +) + @assert sr > 0 "Sample rate must be > 0." + @assert 0 <= frequency_range[1] < frequency_range[2] <= sr / 2 "Frequency range must be (0, sr/2)." + + stft_spec, stft_freq = _get_stft2( + x, + sr, + fft_length = fft_length, + frequency_range = frequency_range, + spectrum_type = spectrum_type, + ) +end + +function get_stft2( + x::AbstractVector{<:AbstractFloat}, + sr::Int64; + fft_length::Int64 = sr <= 8000 ? 256 : 512, + window_type::Tuple{Symbol, Symbol} = (:hann, :periodic), + window_length::Int64 = fft_length, + overlap_length::Int64 = round(Int, fft_length / 2), + frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)), + spectrum_type::Symbol = :power, # :power, :magnitude +) + @assert sr > 0 "Sample rate must be > 0." + @assert 0 <= frequency_range[1] < frequency_range[2] <= sr / 2 "Frequency range must be (0, sr/2)." + @assert 0 < overlap_length < window_length "Overlap length must be < window length." + + frames = _get_frames2( + eltype(x) == Float64 ? x : Float64.(x), + window_type = window_type, + window_length = window_length, + overlap_length = overlap_length, + ) + + stft_spec, stft_freq = _get_stft2( + frames .* window, + sr, + fft_length = fft_length, + frequency_range = frequency_range, + spectrum_type = spectrum_type, + ) +end + +function get_stft2!(a::AudioObj) + if isempty(a.data.frames) + a.data.frames, a.setup.window = _get_frames(a.data.x, a.setup) + end + a.data.stft, a.setup.stft_freq = _get_stft(a.data.frames .* a.setup.window, a.setup) +end \ No newline at end of file diff --git a/src/utils/histogram.jl b/src/utils/histogram.jl index a7d74ad..f108306 100644 --- a/src/utils/histogram.jl +++ b/src/utils/histogram.jl @@ -67,7 +67,7 @@ end function get_histcounts( x::AbstractVector{Float64}; nbins::Union{Int64, Nothing} = nothing, - # binwidth::Union{Int64, Float64, Nothing} = nothing, + binwidth::Union{Int64, Float64, Nothing} = nothing, norm::Symbol = :none, allow_nan = :false, ) diff --git a/src/utils/speech_detector.jl b/src/utils/speech_detector.jl index 28bebda..0020c01 100644 --- a/src/utils/speech_detector.jl +++ b/src/utils/speech_detector.jl @@ -39,7 +39,7 @@ function get_threshs_from_feature( hist_bins = max(10, hist_bins) m_feature = mean(feature) - n_feature, edges_feature = histcounts(feature, hist_bins) + n_feature, edges_feature = get_histcounts(feature, nbins=hist_bins) # working with spectral spread if type == :specspread diff --git a/src/windowing/windowing.jl b/src/windowing/windowing.jl index 6339751..d55d0c2 100644 --- a/src/windowing/windowing.jl +++ b/src/windowing/windowing.jl @@ -123,3 +123,40 @@ function get_frames( end get_frames!(a::AudioObj; kwargs...) = a.data.frames, a.setup.window = get_frames(a.data.x; kwargs...) + +#------------------------------------------------------------------------------# +# windowing # +#------------------------------------------------------------------------------# +function _get_frames2( + x::AbstractVector{Float64}; + window_type::Tuple{Symbol, Symbol}, + window_length::Int64, + overlap_length::Int64, +) + frames = buffer(x, window_length, window_length - overlap_length) + window, _ = gencoswin(window_type[1], window_length, window_type[2]) + + return frames .* window +end + +function _get_frames2(x::AbstractVector{Float64}, s::AudioSetup) + _get_frames2(x, window_type = s.window_type, window_length = s.window_length, overlap_length = s.overlap_length) +end + +function get_frames2( + x::AbstractVector{<:AbstractFloat}, + window_type::Tuple{Symbol, Symbol} = (:hann, :periodic), + window_length::Int64 = 256, + overlap_length::Int64 = 128, +) + @assert 0 < overlap_length < window_length "Overlap length must be < window length." + + frames = _get_frames2( + eltype(x) == Float64 ? x : Float64.(x), + window_type = window_type, + window_length = window_length, + overlap_length = overlap_length, + ) +end + +get_frames2!(a::AudioObj; kwargs...) = a.data.frames, a.setup.window = get_frames2(a.data.x; kwargs...) diff --git a/test/features_extraction.jl b/test/features_extraction.jl index 38c8361..18dad2f 100644 --- a/test/features_extraction.jl +++ b/test/features_extraction.jl @@ -201,3 +201,8 @@ audio_6 = audio_obj(x, sr) get_frames!(audio_6) get_stft!(audio_6) display(audio_6.data.stft) + +################ +framest = get_frames2(x) +stft1, stft_freq1 = get_stft2(framest, sr) +fft22 = get_features(x, sr, :fft) \ No newline at end of file