Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed May 14, 2024
1 parent 45f08ed commit ee99852
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/Audio911.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,6 @@ export extractfeatures
# modular
export get_frames, get_frames!, get_stft, get_stft!

export get_frames2, get_stft2

end # module Audio911
118 changes: 118 additions & 0 deletions src/fft/fft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,121 @@ function get_stft!(a::AudioObj)
end
a.data.stft, a.setup.stft_freq = _get_stft(a.data.frames .* a.setup.window, a.setup)
end

#------------------------------------------------------------------------------#
# stft #
#------------------------------------------------------------------------------#
function _get_stft2(
x::AbstractArray{Float64},
sr::Int64;
fft_length::Int64,
frequency_range::Tuple{Int64, Int64},
spectrum_type::Symbol,
)
@assert fft_length >= size(x, 1) "fft_length must be > window length. Got fft_length = $fft_length, window length = $(size(x,1))."

# ensure x is of length fft_length
# if the FFT window is larger than the window, the audio data will be zero-padded to match the size of the FFT window.
# this zero-padding in the time domain results in an interpolation in the frequency domain,
# which can provide a more detailed view of the spectral content of the signal.
x = size(x, 1) < fft_length ? vcat(x, zeros(eltype(x), fft_length - size(x, 1), size(x, 2))) : x[1:fft_length, :]

# get fft
Y = fft(x, (1,))

# post process
# trim to desired range
bin_low = ceil(Int, frequency_range[1] * fft_length / sr + 1)
bin_high = floor(Int, frequency_range[2] * fft_length / sr + 1)
bins = collect(bin_low:bin_high)
y = Y[bins, :]

# convert to half-sided power or magnitude spectrum
spectrum_funcs = Dict(
:power => x -> real.(x .* conj.(x)),
:magnitude => x -> abs.(x),
)
# check if spectrum_type is valid
@assert haskey(spectrum_funcs, spectrum_type) "Unknown spectrum_type: $spectrum_type."

y = spectrum_funcs[spectrum_type](y)

# trim borders
# halve the first bin if it's the lowest bin
bin_low == 1 && (y[1, :] *= 0.5)
# halve the last bin if it's the Nyquist bin and FFT length is even
bin_high == fld(fft_length, 2) + 1 && iseven(fft_length) && (y[end, :] *= 0.5)

# create frequency vector
stft_freq = (sr / fft_length) * (bins .- 1)
# shift final bin if fftLength is odd and the final range is full to fs/2.
if fft_length % 2 != 0 && bin_high == floor(fftLength / 2 + 1)
stft_freq[end] = sr * (fft_length - 1) / (2 * fft_length)
end

return y, stft_freq
end

_get_stft2(x::AbstractArray{Float64}, s::AudioSetup) = _get_stft(
x,
s.sr,
fft_length = s.fft_length,
frequency_range = s.frequency_range,
spectrum_type = s.spectrum_type,
)

function get_stft2(
x::AbstractArray{<:AbstractFloat},
sr::Int64,
fft_length::Int64 = sr <= 8000 ? 256 : 512,
frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)),
spectrum_type::Symbol = :power, # :power, :magnitude
)
@assert sr > 0 "Sample rate must be > 0."
@assert 0 <= frequency_range[1] < frequency_range[2] <= sr / 2 "Frequency range must be (0, sr/2)."

stft_spec, stft_freq = _get_stft2(
x,
sr,
fft_length = fft_length,
frequency_range = frequency_range,
spectrum_type = spectrum_type,
)
end

function get_stft2(
x::AbstractVector{<:AbstractFloat},
sr::Int64;
fft_length::Int64 = sr <= 8000 ? 256 : 512,
window_type::Tuple{Symbol, Symbol} = (:hann, :periodic),
window_length::Int64 = fft_length,
overlap_length::Int64 = round(Int, fft_length / 2),
frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)),
spectrum_type::Symbol = :power, # :power, :magnitude
)
@assert sr > 0 "Sample rate must be > 0."
@assert 0 <= frequency_range[1] < frequency_range[2] <= sr / 2 "Frequency range must be (0, sr/2)."
@assert 0 < overlap_length < window_length "Overlap length must be < window length."

frames = _get_frames2(
eltype(x) == Float64 ? x : Float64.(x),
window_type = window_type,
window_length = window_length,
overlap_length = overlap_length,
)

stft_spec, stft_freq = _get_stft2(
frames .* window,
sr,
fft_length = fft_length,
frequency_range = frequency_range,
spectrum_type = spectrum_type,
)
end

function get_stft2!(a::AudioObj)
if isempty(a.data.frames)
a.data.frames, a.setup.window = _get_frames(a.data.x, a.setup)
end
a.data.stft, a.setup.stft_freq = _get_stft(a.data.frames .* a.setup.window, a.setup)
end
2 changes: 1 addition & 1 deletion src/utils/histogram.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ end
function get_histcounts(
x::AbstractVector{Float64};
nbins::Union{Int64, Nothing} = nothing,
# binwidth::Union{Int64, Float64, Nothing} = nothing,
binwidth::Union{Int64, Float64, Nothing} = nothing,
norm::Symbol = :none,
allow_nan = :false,
)
Expand Down
2 changes: 1 addition & 1 deletion src/utils/speech_detector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ function get_threshs_from_feature(
hist_bins = max(10, hist_bins)

m_feature = mean(feature)
n_feature, edges_feature = histcounts(feature, hist_bins)
n_feature, edges_feature = get_histcounts(feature, nbins=hist_bins)

# working with spectral spread
if type == :specspread
Expand Down
37 changes: 37 additions & 0 deletions src/windowing/windowing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,40 @@ function get_frames(
end

get_frames!(a::AudioObj; kwargs...) = a.data.frames, a.setup.window = get_frames(a.data.x; kwargs...)

#------------------------------------------------------------------------------#
# windowing #
#------------------------------------------------------------------------------#
function _get_frames2(
x::AbstractVector{Float64};
window_type::Tuple{Symbol, Symbol},
window_length::Int64,
overlap_length::Int64,
)
frames = buffer(x, window_length, window_length - overlap_length)
window, _ = gencoswin(window_type[1], window_length, window_type[2])

return frames .* window
end

function _get_frames2(x::AbstractVector{Float64}, s::AudioSetup)
_get_frames2(x, window_type = s.window_type, window_length = s.window_length, overlap_length = s.overlap_length)
end

function get_frames2(
x::AbstractVector{<:AbstractFloat},
window_type::Tuple{Symbol, Symbol} = (:hann, :periodic),
window_length::Int64 = 256,
overlap_length::Int64 = 128,
)
@assert 0 < overlap_length < window_length "Overlap length must be < window length."

frames = _get_frames2(
eltype(x) == Float64 ? x : Float64.(x),
window_type = window_type,
window_length = window_length,
overlap_length = overlap_length,
)
end

get_frames2!(a::AudioObj; kwargs...) = a.data.frames, a.setup.window = get_frames2(a.data.x; kwargs...)
5 changes: 5 additions & 0 deletions test/features_extraction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,8 @@ audio_6 = audio_obj(x, sr)
get_frames!(audio_6)
get_stft!(audio_6)
display(audio_6.data.stft)

################
framest = get_frames2(x)
stft1, stft_freq1 = get_stft2(framest, sr)
fft22 = get_features(x, sr, :fft)

0 comments on commit ee99852

Please sign in to comment.