Skip to content

Commit

Permalink
modular architecture started
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed May 12, 2024
1 parent d780d34 commit c183da7
Show file tree
Hide file tree
Showing 13 changed files with 661 additions and 881 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StatGeochem = "df4de05a-b714-11e8-3c2a-c30fb13e804c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
Expand Down
4 changes: 4 additions & 0 deletions src/Audio911.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ include("fft/lin.jl")
include("fft/mel.jl")
include("fft/spectral.jl")
# utils
include("utils/histogram.jl")
include("utils/speech_detector.jl")
include("utils/in_out.jl")
include("utils/trimaudio.jl")
Expand All @@ -60,4 +61,7 @@ get_fft!
extractfeatures =
export extractfeatures

# modular
export get_frames, get_frames!, get_stft, get_stft!

end # module Audio911
71 changes: 46 additions & 25 deletions src/audioFeaturesExtractor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,22 +126,24 @@ function audio_setup(
sr::Int64;

# fft
fft_length::Int64 = 256,
fft_length::Int64 = sr <= 8000 ? 256 : 512,
stft_freq::AbstractVector{Float64} = Float64[],
window::AbstractVector{Float64} = Float64[],
window_type::Tuple{Symbol, Symbol} = (:hann, :periodic),
window_length::Int64 = 0,
overlap_length::Int64 = 0,
window_length::Int64 = fft_length, # standard setting: round(Int, 0.03 * sr)
overlap_length::Int64 = round(Int, fft_length / 2), # standard setting: round(Int, 0.02 * sr)
window_norm::Bool = false,

# spectrum
frequency_range::Tuple{Int64, Int64} = (0, 0),
spectrum_type::Symbol = :power, # :power, :magnitude
frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)),
spectrum_type::Symbol = :power, # :power, :magnitude

# mel
mel_style::Symbol = :htk, # :htk, :slaney, :tuned
mel_style::Symbol = :htk, # :htk, :slaney, :tuned
mel_bands::Int64 = 26,
filterbank_design_domain::Symbol = :linear,
filterbank_normalization::Symbol = :bandwidth, # :bandwidth, :area, :none
frequency_scale::Symbol = :mel, # TODO :mel, :bark, :erb
filterbank_normalization::Symbol = :bandwidth, # :bandwidth, :area, :none
frequency_scale::Symbol = :mel, # TODO :mel, :bark, :erb
st_peak_range::Tuple{Int64, Int64} = (200, 700),

# chroma
Expand All @@ -151,15 +153,15 @@ function audio_setup(

# mfcc
num_coeffs::Int64 = 13,
normalization_type::Symbol = :dithered, # :standard, :dithered
rectification::Symbol = :log, # :log, :cubic_root
log_energy_source::Symbol = :standard, # :standard (after windowing), :mfcc
log_energy_pos::Symbol = :none, #:append, :replace, :none
normalization_type::Symbol = :dithered, # :standard, :dithered
rectification::Symbol = :log, # :log, :cubic_root
log_energy_source::Symbol = :standard, # :standard (after windowing), :mfcc
log_energy_pos::Symbol = :none, #:append, :replace, :none
delta_window_length::Int64 = 9,
delta_matrix::Symbol = :transposed, # :standard, :transposed
delta_matrix::Symbol = :transposed, # :standard, :transposed

# spectral
spectral_spectrum::Symbol = :lin, # :lin, :mel
spectral_spectrum::Symbol = :lin, # :lin, :mel

# f0
f0_method::Symbol = :nfc,
Expand All @@ -170,20 +172,15 @@ function audio_setup(
freq_limits::Tuple{Float64, Float64} = (0.0, 0.0),
transform_type::Symbol = :full,
)

window_length == 0 ? window_length = fft_length : window_length
overlap_length == 0 ? overlap_length = round(Int, fft_length / 2) : overlap_length
# window_length == 0 ? window_length = round(Int, 0.03 * sr) : window_length
# overlap_length == 0 ? overlap_length = round(Int, 0.02 * sr) : overlap_length
frequency_range == (0, 0) ? frequency_range = (0, floor(Int, sr / 2)) : frequency_range

# TODO metti warning ed errori

AudioSetup(
sr = sr,

# fft
fft_length = fft_length,
stft_freq = stft_freq,
window = window,
window_type = window_type,
window_length = window_length,
overlap_length = overlap_length,
Expand Down Expand Up @@ -243,7 +240,7 @@ function audio_obj(

if preemphasis !== nothing
zi = 2 * x[1] - x[2]
filt!(x, [1.0, -preemphasis], 1.0, x, [zi])
filt!(x, [1.0, - preemphasis], 1.0, x, [zi])
# # aclai preemphasis
# x = filt(PolynomialRatio([1.0, -preemphasis], [1.0]), x)
end
Expand Down Expand Up @@ -274,7 +271,7 @@ end

function audio_obj(
filepath::String,
sr::Int64,
sr::Int64;
preemphasis = nothing,
kwargs...,
)
Expand Down Expand Up @@ -448,7 +445,7 @@ function get_features(
feat::Symbol = :full,
)
func_call = Dict([
:full => get_full,
:full => get_full,
:fft => get_fft,
:lin => get_lin_spec,
:mel => get_mel_spec,
Expand All @@ -457,7 +454,31 @@ function get_features(
:mfcc_delta => get_mfcc_delta,
:spectral => get_spectrals,
:f0 => get_f0,
:cqt => get_cqt])
:cqt => get_cqt,
:age_set =>
(x) -> begin
get_full(x)
return hcat(
(
# audio_obj.data.mel_spectrogram,
# audio_obj.data.log_mel,
audio_obj.data.mfcc_coeffs,
audio_obj.data.mfcc_delta,
audio_obj.data.mfcc_deltadelta,
# audio_obj.data.spectral_centroid,
# audio_obj.data.spectral_crest,
# audio_obj.data.spectral_decrease,
# audio_obj.data.spectral_entropy,
# audio_obj.data.spectral_flatness,
# audio_obj.data.spectral_flux,
# audio_obj.data.spectral_kurtosis,
# audio_obj.data.spectral_rolloff,
# audio_obj.data.spectral_skewness,
# audio_obj.data.spectral_slope,
# audio_obj.data.spectral_spread,
# audio_obj.data.f0,
)...)
end])

if !isnothing(audio_obj)
if haskey(func_call, feat)
Expand Down
Loading

0 comments on commit c183da7

Please sign in to comment.