From c183da739775d200574551bd8b769601b8454ea3 Mon Sep 17 00:00:00 2001
From: paso <paso.studio73@gmail.com>
Date: Sun, 12 May 2024 22:11:44 +0200
Subject: [PATCH] modular architecture started

---
 Project.toml                  |   1 +
 src/Audio911.jl               |   4 +
 src/audioFeaturesExtractor.jl |  71 +++--
 src/fft/fft.jl                | 240 ++++++++++++-----
 src/fft/lin.jl                |   2 +-
 src/fft/mel.jl                |  11 -
 src/fft/spectral.jl           | 250 ++++++++++--------
 src/signalDataStructure.jl    | 481 ++--------------------------------
 src/utils/histogram.jl        | 107 ++++++++
 src/utils/speech_detector.jl  | 286 +++++---------------
 src/windowing/windowing.jl    |  37 +++
 src/windowing/windows.jl      |   2 +
 test/features_extraction.jl   |  50 +++-
 13 files changed, 661 insertions(+), 881 deletions(-)
 create mode 100644 src/utils/histogram.jl

diff --git a/Project.toml b/Project.toml
index 044b970..b2dba7d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,6 +13,7 @@ PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
+StatGeochem = "df4de05a-b714-11e8-3c2a-c30fb13e804c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
diff --git a/src/Audio911.jl b/src/Audio911.jl
index 305f22c..e075911 100644
--- a/src/Audio911.jl
+++ b/src/Audio911.jl
@@ -37,6 +37,7 @@ include("fft/lin.jl")
 include("fft/mel.jl")
 include("fft/spectral.jl")
 # utils
+include("utils/histogram.jl")
 include("utils/speech_detector.jl")
 include("utils/in_out.jl")
 include("utils/trimaudio.jl")
@@ -60,4 +61,7 @@ get_fft!
 extractfeatures = 
 export extractfeatures
 
+# modular
+export get_frames, get_frames!, get_stft, get_stft!
+
 end # module Audio911
diff --git a/src/audioFeaturesExtractor.jl b/src/audioFeaturesExtractor.jl
index 6d08302..e530a3f 100644
--- a/src/audioFeaturesExtractor.jl
+++ b/src/audioFeaturesExtractor.jl
@@ -126,22 +126,24 @@ function audio_setup(
 	sr::Int64;
 
 	# fft
-	fft_length::Int64 = 256,
+	fft_length::Int64 = sr <= 8000 ? 256 : 512,
+	stft_freq::AbstractVector{Float64} = Float64[],
+	window::AbstractVector{Float64} = Float64[],
 	window_type::Tuple{Symbol, Symbol} = (:hann, :periodic),
-	window_length::Int64 = 0,
-	overlap_length::Int64 = 0,
+	window_length::Int64 = fft_length, 					# standard setting: round(Int, 0.03 * sr)
+	overlap_length::Int64 = round(Int, fft_length / 2), # standard setting: round(Int, 0.02 * sr)
 	window_norm::Bool = false,
 
 	# spectrum
-	frequency_range::Tuple{Int64, Int64} = (0, 0),
-	spectrum_type::Symbol = :power, # :power, :magnitude
+	frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)),
+	spectrum_type::Symbol = :power, 					# :power, :magnitude
 
 	# mel
-	mel_style::Symbol = :htk, # :htk, :slaney, :tuned
+	mel_style::Symbol = :htk, 							# :htk, :slaney, :tuned
 	mel_bands::Int64 = 26,
 	filterbank_design_domain::Symbol = :linear,
-	filterbank_normalization::Symbol = :bandwidth, # :bandwidth, :area, :none
-	frequency_scale::Symbol = :mel, # TODO :mel, :bark, :erb
+	filterbank_normalization::Symbol = :bandwidth, 		# :bandwidth, :area, :none
+	frequency_scale::Symbol = :mel, 					# TODO :mel, :bark, :erb
 	st_peak_range::Tuple{Int64, Int64} = (200, 700),
 
 	# chroma
@@ -151,15 +153,15 @@ function audio_setup(
 
 	# mfcc
 	num_coeffs::Int64 = 13,
-	normalization_type::Symbol = :dithered, # :standard, :dithered
-	rectification::Symbol = :log, # :log, :cubic_root
-	log_energy_source::Symbol = :standard, # :standard (after windowing), :mfcc
-	log_energy_pos::Symbol = :none, #:append, :replace, :none
+	normalization_type::Symbol = :dithered, 			# :standard, :dithered
+	rectification::Symbol = :log, 						# :log, :cubic_root
+	log_energy_source::Symbol = :standard, 				# :standard (after windowing), :mfcc
+	log_energy_pos::Symbol = :none, 					#:append, :replace, :none
 	delta_window_length::Int64 = 9,
-	delta_matrix::Symbol = :transposed, # :standard, :transposed
+	delta_matrix::Symbol = :transposed, 				# :standard, :transposed
 
 	# spectral
-	spectral_spectrum::Symbol = :lin, # :lin, :mel
+	spectral_spectrum::Symbol = :lin, 					# :lin, :mel
 
 	# f0
 	f0_method::Symbol = :nfc,
@@ -170,13 +172,6 @@ function audio_setup(
 	freq_limits::Tuple{Float64, Float64} = (0.0, 0.0),
 	transform_type::Symbol = :full,
 )
-
-	window_length == 0 ? window_length = fft_length : window_length
-	overlap_length == 0 ? overlap_length = round(Int, fft_length / 2) : overlap_length
-	# window_length == 0 ? window_length = round(Int, 0.03 * sr) : window_length
-	# overlap_length == 0 ? overlap_length = round(Int, 0.02 * sr) : overlap_length
-	frequency_range == (0, 0) ? frequency_range = (0, floor(Int, sr / 2)) : frequency_range
-
 	# TODO metti warning ed errori
 
 	AudioSetup(
@@ -184,6 +179,8 @@ function audio_setup(
 
 		# fft
 		fft_length = fft_length,
+		stft_freq = stft_freq,
+		window = window,
 		window_type = window_type,
 		window_length = window_length,
 		overlap_length = overlap_length,
@@ -243,7 +240,7 @@ function audio_obj(
 
 	if preemphasis !== nothing
 		zi = 2 * x[1] - x[2]
-		filt!(x, [1.0, -preemphasis], 1.0, x, [zi])
+		filt!(x, [1.0, - preemphasis], 1.0, x, [zi])
 		# # aclai preemphasis
 		# x = filt(PolynomialRatio([1.0, -preemphasis], [1.0]), x)
 	end
@@ -274,7 +271,7 @@ end
 
 function audio_obj(
 	filepath::String,
-	sr::Int64,
+	sr::Int64;
 	preemphasis = nothing,
 	kwargs...,
 )
@@ -448,7 +445,7 @@ function get_features(
 	feat::Symbol = :full,
 )
 	func_call = Dict([
-		:full => get_full,
+		:full => get_full, 
 		:fft => get_fft,
 		:lin => get_lin_spec,
 		:mel => get_mel_spec,
@@ -457,7 +454,31 @@ function get_features(
 		:mfcc_delta => get_mfcc_delta,
 		:spectral => get_spectrals,
 		:f0 => get_f0,
-		:cqt => get_cqt])
+		:cqt => get_cqt,
+		:age_set =>
+			(x) -> begin
+				get_full(x)
+				return hcat(
+					(
+						# audio_obj.data.mel_spectrogram,
+						# audio_obj.data.log_mel,
+						audio_obj.data.mfcc_coeffs,
+						audio_obj.data.mfcc_delta,
+						audio_obj.data.mfcc_deltadelta,
+						# audio_obj.data.spectral_centroid,
+						# audio_obj.data.spectral_crest,
+						# audio_obj.data.spectral_decrease,
+						# audio_obj.data.spectral_entropy,
+						# audio_obj.data.spectral_flatness,
+						# audio_obj.data.spectral_flux,
+						# audio_obj.data.spectral_kurtosis,
+						# audio_obj.data.spectral_rolloff,
+						# audio_obj.data.spectral_skewness,
+						# audio_obj.data.spectral_slope,
+						# audio_obj.data.spectral_spread,
+						# audio_obj.data.f0,
+					)...)
+			end])
 
 	if !isnothing(audio_obj)
 		if haskey(func_call, feat)
diff --git a/src/fft/fft.jl b/src/fft/fft.jl
index bfbde53..129cbc8 100644
--- a/src/fft/fft.jl
+++ b/src/fft/fft.jl
@@ -1,70 +1,178 @@
-# reference: ETSI ES 201 108 V1.1.2 (2000-04)
-# https://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_13/Docs/PDF/SP-010566.pdf
+function get_onesided_fft_range(fft_length::Int64)
+	if mod(fft_length, 2) == 0
+		return collect(1:Int(fft_length / 2 + 1))   # EVEN
+	else
+		return collect(1:Int((fft_length + 1) / 2))  # ODD
+	end
+end # get_onesided_fft_range
 
-# use FFTW package
+#------------------------------------------------------------------------------#
+#              fft version 1 as used in audio features extraction              #
+#------------------------------------------------------------------------------#
+function _get_fft(x::AbstractArray{Float64}, setup::AudioSetup)
+	hop_length = setup.window_length - setup.overlap_length
+	if isempty(setup.window)
+		setup.window, _ = gencoswin(setup.window_type[1], setup.window_length, setup.window_type[2])
+	end
 
-# function get_fft_length( # switch case da ricordare!!!
-#     sr::Int64,
-# )
-#     sr <= 4000 && return 128
-#     sr <= 8000 && return 256
-#     sr <= 16000 && return 512
-#     return 1024
-# end # get_fft_length
+	# split in windows
+	y = buffer(x, setup.window_length, setup.window_length - setup.overlap_length)
 
-function get_onesided_fft_range(fft_length::Int64)
-    if mod(fft_length, 2) == 0
-        return collect(1:Int(fft_length / 2 + 1))   # EVEN
-    else
-        return collect(1:Int((fft_length + 1) / 2))  # ODD
-    end
-end # get_onesided_fft_range
+	# apply window and take fft
+	Z = fft(y .* setup.window, (1,))
+
+	# take one side
+	logical_ossb = falses(setup.fft_length)
+	logical_ossb[get_onesided_fft_range(setup.fft_length)] .= true
+	Z = Z[logical_ossb, :]
+
+	# log energy
+	# reference: ETSI ES 201 108 V1.1.2 (2000-04)
+	# https://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_13/Docs/PDF/SP-010566.pdf
+	if setup.log_energy_pos != :none && setup.log_energy_source == :standard
+		log_energy = sum(eachrow(y .^ 2))
+
+		if setup.normalization_type == :standard
+			log_energy[log_energy.==0] .= floatmin(Float64)
+		elseif setup.normalization_type == :dithered
+			log_energy[log_energy.<1e-8] .= 1e-8
+		end
+		log_energy = log.(log_energy)
+	else
+		log_energy = Vector{Float64}()
+	end
+
+	if setup.spectrum_type == :power
+		real(Z .* conj(Z)), log_energy
+	elseif setup.spectrum_type == :magnitude
+		abs.(Z), log_energy
+	else
+		error("Unknown spectrum type: $(setup.spectrum_type)")
+	end
+end
+
+get_fft!(setup::AudioSetup, data::AudioData) = data.fft, data.log_energy = _get_fft(data.x, setup)
+
+#------------------------------------------------------------------------------#
+#                                     stft                                     #
+#------------------------------------------------------------------------------#
+function _get_stft(
+	x::AbstractArray{Float64},
+	sr::Int64;
+	fft_length::Int64,
+	window::AbstractVector{Float64},
+	frequency_range::Tuple{Int64, Int64},
+	spectrum_type::Symbol,
+)
+	@assert fft_length >= size(x, 1) "fft_length must be > window length. Got fft_length = $fft_length, window length = $(size(x,1))."
+
+	# ensure x is of length fft_length
+	# if the FFT window is larger than the window, the audio data will be zero-padded to match the size of the FFT window.
+	# this zero-padding in the time domain results in an interpolation in the frequency domain, 
+	# which can provide a more detailed view of the spectral content of the signal.
+	x = size(x, 1) < fft_length ? vcat(x, zeros(eltype(x), fft_length - size(x, 1), size(x, 2))) : x[1:fft_length, :]
+
+	# get fft
+	Y = fft(x, (1,))
+
+	# post process
+	# trim to desired range
+	bin_low = ceil(Int, frequency_range[1] * fft_length / sr + 1)
+	bin_high = floor(Int, frequency_range[2] * fft_length / sr + 1)
+	bins = collect(bin_low:bin_high)
+	y = Y[bins, :]
+
+	# convert to half-sided power or magnitude spectrum
+	spectrum_funcs = Dict(
+		:power => x -> real.((x .* conj.(x)) / (0.5 * sum(window.^2))),
+		:magnitude => x -> abs.(x) / (0.5 * sum(window)),
+	)
+	# check if spectrum_type is valid
+	@assert haskey(spectrum_funcs, spectrum_type) "Unknown spectrum_type: $spectrum_type."
+
+	y = spectrum_funcs[spectrum_type](y)
+
+	# trim borders
+	# halve the first bin if it's the lowest bin
+	bin_low == 1 && (y[1, :] *= 0.5)
+	# halve the last bin if it's the Nyquist bin and FFT length is even
+	bin_high == fld(fft_length, 2) + 1 && iseven(fft_length) && (y[end, :] *= 0.5)
+
+	# create frequency vector
+	stft_freq = (sr / fft_length) * (bins .- 1)
+	# shift final bin if fftLength is odd and the final range is full to fs/2.
+	if fft_length % 2 != 0 && bin_high == floor(fftLength / 2 + 1)
+		stft_freq[end] = sr * (fft_length - 1) / (2 * fft_length)
+	end
+
+	return y, stft_freq
+end
+
+_get_stft(x::AbstractArray{Float64}, s::AudioSetup) = _get_stft(
+	x,
+	s.sr,
+	fft_length = s.fft_length,
+	window = s.window,
+	frequency_range = s.frequency_range,
+	spectrum_type = s.spectrum_type,
+)
+
+function get_stft(
+	x::AbstractArray{<:AbstractFloat},
+	sr::Int64,
+	window::AbstractVector{Float64}=Float64[];
+	fft_length::Int64 = sr <= 8000 ? 256 : 512,
+	frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)),
+	spectrum_type::Symbol = :power, # :power, :magnitude
+)
+	@assert !isempty(window) "Window missing as third parameter."
+	@assert sr > 0 "Sample rate must be > 0."
+	@assert 0 <= frequency_range[1] < frequency_range[2] <= sr / 2 "Frequency range must be (0, sr/2)."
+
+	stft_spec, stft_freq = _get_stft(
+		x,
+		sr,
+		fft_length = fft_length,
+		window = window,
+		frequency_range = frequency_range,
+		spectrum_type = spectrum_type,
+	)
+end
+
+function get_stft(
+	x::AbstractVector{<:AbstractFloat},
+	sr::Int64;
+	fft_length::Int64 = sr <= 8000 ? 256 : 512,
+	window_type::Tuple{Symbol, Symbol} = (:hann, :periodic),
+	window_length::Int64 = fft_length,
+	overlap_length::Int64 = round(Int, fft_length / 2),
+	frequency_range::Tuple{Int64, Int64} = (0, floor(Int, sr / 2)),
+	spectrum_type::Symbol = :power, # :power, :magnitude
+)
+	@assert sr > 0 "Sample rate must be > 0."
+	@assert 0 <= frequency_range[1] < frequency_range[2] <= sr / 2 "Frequency range must be (0, sr/2)."
+	@assert 0 < overlap_length < window_length "Overlap length must be < window length."
+
+	frames, window = _get_frames(
+		eltype(x) == Float64 ? x : Float64.(x),
+		window_type = window_type,
+		window_length = window_length,
+		overlap_length = overlap_length,
+	)
+
+	stft_spec, stft_freq = _get_stft(
+		frames .* window,
+		sr,
+		fft_length = fft_length,
+		window = window,
+		frequency_range = frequency_range,
+		spectrum_type = spectrum_type,
+	)
+end
 
-################################################################################
-#                                    main                                      #
-################################################################################
-function get_fft!(setup::AudioSetup, data::AudioData)
-    # TODO validate FFT length, validate overlap length (audioFeatureExtractor.m line 1324)
-
-    setup.fft_length = setup.window_length # definisce la fft pari alla finestra
-    hop_length = setup.window_length - setup.overlap_length
-    data.fft_window, _ = gencoswin(setup.window_type[1], setup.window_length, setup.window_type[2])
-
-    # split in windows
-    y = buffer(data.x, setup.window_length, hop_length)
-
-    # apply window and take fft
-    Z = fft(y .* data.fft_window, (1,))
-
-    # take one side
-    logical_ossb = falses(setup.fft_length)
-    logical_ossb[get_onesided_fft_range(setup.fft_length)] .= true
-    Z = Z[logical_ossb, :]
-
-    # if setup.spectrum_type == :power
-    #     data.fft = real(Z .* conj(Z))
-    # elseif setup.spectrum_type == :magnitude
-    #     data.fft = sqrt(real(Z .* conj(Z)))
-    # else
-    #     data.fft = abs.(Z)
-    # end
-
-    ######### NON è nel punto corretto: la fft deve essere a numeri complessi, poi convertita in reali nel calcolo degli spettrogrammi!!!
-    ########### DA CAMBIARE ANCHE la struttura dati... FORSE
-    setup.spectrum_type == :power ? data.fft = real(Z .* conj(Z)) : data.fft = abs.(Z)
-
-    # log energy
-    # reference: ETSI ES 201 108 V1.1.2 (2000-04)
-    # https://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_13/Docs/PDF/SP-010566.pdf
-    if setup.log_energy_source == :standard
-        log_energy = sum(eachrow(y .^ 2))
-
-        if setup.normalization_type == :standard
-            log_energy[log_energy.==0] .= floatmin(Float64)
-        elseif setup.normalization_type == :dithered
-            log_energy[log_energy.<1e-8] .= 1e-8
-        end
-        
-        data.log_energy = log.(log_energy)
-    end
-end # get_fft!
\ No newline at end of file
+function get_stft!(a::AudioObj)
+	if isempty(a.data.frames)
+		a.data.frames, a.setup.window = _get_frames(a.data.x, a.setup)
+	end
+	 a.data.stft, a.setup.stft_freq = _get_stft(a.data.frames .* a.setup.window, a.setup)
+end
diff --git a/src/fft/lin.jl b/src/fft/lin.jl
index 845cec7..94cb888 100644
--- a/src/fft/lin.jl
+++ b/src/fft/lin.jl
@@ -58,7 +58,7 @@ function lin_spectrogram!(
     end
 
     # calculate linear spectrum and normalization
-    linear_norm_factor = get_lin_norm_factor(setup.spectrum_type, data.fft_window)
+    linear_norm_factor = get_lin_norm_factor(setup.spectrum_type, setup.window)
     full_spectrum ? data.lin_spectrogram = data.fft * linear_norm_factor : data.lin_spectrogram = data.fft[bins_logical, :] * linear_norm_factor
 
     if adjust_bins
diff --git a/src/fft/mel.jl b/src/fft/mel.jl
index 3fbd7d2..4c2a93a 100644
--- a/src/fft/mel.jl
+++ b/src/fft/mel.jl
@@ -104,17 +104,6 @@ function design_filterbank(data::AudioData, setup::AudioSetup)
 		band_edges = mel2hz(LinRange(melRange[1], melRange[end], setup.mel_bands + 2), setup.mel_style)
 	elseif setup.mel_style == :tuned
 		band_edges = mel2hz(LinRange(melRange[1], melRange[end], setup.mel_bands + 2), :htk)
-		# elseif setup.mel_style == :semitones
-		# 	if isempty(data.lin_spectrogram)
-		# 		lin_spectrogram!(setup, data)
-		# 	end
-
-		# 	_, loudest_index = catch_loudest_index(data.lin_spectrogram, data.lin_frequencies, setup.st_peak_range)
-
-		# 	minsemitone = hz2semitone(setup.st_peak_range[1], data.lin_frequencies[loudest_index])
-		# 	maxsemitone = hz2semitone(setup.st_peak_range[2], data.lin_frequencies[loudest_index])
-
-		# 	band_edges = semitone2hz.(minsemitone .+ collect(0:(setup.mel_bands+1)) / (setup.mel_bands + 1) * (maxsemitone - minsemitone), loudest_index)
 	else
 		error("Unknown mel_style $(setup.mel_style).")
 	end
diff --git a/src/fft/spectral.jl b/src/fft/spectral.jl
index c61918f..7a403ab 100644
--- a/src/fft/spectral.jl
+++ b/src/fft/spectral.jl
@@ -1,150 +1,182 @@
 function get_spectrum(setup::AudioSetup, data::AudioData)
-    setup.spectral_spectrum == :mel && return data.mel_spectrogram', data.mel_frequencies
-    setup.spectral_spectrum == :lin && return data.lin_spectrogram', data.lin_frequencies
-    error("Unknown spectral spectrum")
+	setup.spectral_spectrum == :mel && return data.mel_spectrogram', data.mel_frequencies
+	setup.spectral_spectrum == :lin && return data.lin_spectrogram', data.lin_frequencies
+	error("Unknown spectral spectrum")
 end
 
 function spectral_crest(
-    s::AbstractArray{Float64},
-    data::AudioData,
-    sum_x1::Vector{Float64},
-    arithmetic_mean::Vector{Float64}
+	s::AbstractArray{Float64},
+	data::AudioData,
+	sum_x1::Vector{Float64},
+	arithmetic_mean::Vector{Float64},
 )
-    # # calculate spectral mean
-    # m = sum(real(s), dims=1) ./ size(s, 1)
-    # calculate spectral peak
-    peak = maximum(s, dims=1)
-    # calculate spectral crest
-    data.spectral_crest = vec(peak ./ arithmetic_mean')
+	# # calculate spectral mean
+	# m = sum(real(s), dims=1) ./ size(s, 1)
+	# calculate spectral peak
+	peak = maximum(s, dims = 1)
+	# calculate spectral crest
+	data.spectral_crest = vec(peak ./ arithmetic_mean')
 end
 
 function spectral_decrease(s::AbstractArray{Float64}, data::AudioData)
-    # calculate decrease
-    data.spectral_decrease = vec(real(sum((s[2:end, :] .- s[1, :]') ./ (1:size(s, 1)-1), dims=1) ./ sum(s[2:end, :], dims=1)))
+	# calculate decrease
+	data.spectral_decrease = vec(real(sum((s[2:end, :] .- s[1, :]') ./ (1:size(s, 1)-1), dims = 1) ./ sum(s[2:end, :], dims = 1)))
 end
 
 function spectral_entropy(
-    s::AbstractArray{Float64},
-    data::AudioData,
-    sum_x1::Vector{Float64}
+	s::AbstractArray{Float64},
+	data::AudioData,
+	sum_x1::Vector{Float64},
 )
-    # calculate entropy
-    X = s ./ repeat(sum_x1', size(s, 1), 1)
-    t = replace!(-sum(X .* log2.(X), dims=1), NaN => 0)
-    data.spectral_entropy = vec(t ./ log2(size(s, 1)))
+	# calculate entropy
+	X = s ./ repeat(sum_x1', size(s, 1), 1)
+	t = replace!(-sum(X .* log2.(X), dims = 1), NaN => 0)
+	data.spectral_entropy = vec(t ./ log2(size(s, 1)))
 end
 
 function spectral_flatness(
-    s::AbstractArray{Float64},
-    data::AudioData,
-    sum_x1::Vector{Float64},
-    arithmetic_mean::Vector{Float64}
+	s::AbstractArray{Float64},
+	data::AudioData,
+	sum_x1::Vector{Float64},
+	arithmetic_mean::Vector{Float64},
 )
-    # calculate geometric mean, arithmetic mean, and flatness
-    geometric_mean = exp.(sum(log.(s .+ eps(Float64)), dims=1) / size(s, 1))
-    data.spectral_flatness = vec(geometric_mean ./ arithmetic_mean')
+	# calculate geometric mean, arithmetic mean, and flatness
+	geometric_mean = exp.(sum(log.(s .+ eps(Float64)), dims = 1) / size(s, 1))
+	data.spectral_flatness = vec(geometric_mean ./ arithmetic_mean')
 end
 
 function spectral_flux(s::AbstractArray{Float64}, data::AudioData)
-    initial_condition = s[:, 1]
-    # calculate flux
-    temp = diff(hcat(initial_condition, s), dims=2)
-    fl = []
-    for i in axes(temp, 2)
-        append!(fl, norm(temp[:, i]))
-    end
-    data.spectral_flux = fl
+	initial_condition = s[:, 1]
+	# calculate flux
+	temp = diff(hcat(initial_condition, s), dims = 2)
+	fl = []
+	for i in axes(temp, 2)
+		append!(fl, norm(temp[:, i]))
+	end
+	data.spectral_flux = fl
 end
 
 function spectral_kurtosis(
-    s::AbstractArray{Float64},
-    data::AudioData,
-    # sum_x1::Vector{Float64},
-    # centroid::Vector{Float64},
-    higher_moment_tmp::AbstractArray{Float64},
-    higher_moment_denom::Vector{Float64},
-    higher_momement_num::AbstractArray{Float64}
+	s::AbstractArray{Float64},
+	data::AudioData,
+	# sum_x1::Vector{Float64},
+	# centroid::Vector{Float64},
+	higher_moment_tmp::AbstractArray{Float64},
+	higher_moment_denom::Vector{Float64},
+	higher_momement_num::AbstractArray{Float64},
 )
-    # calculate centroid
-    # centroid = vec(sum(s .* setup.fft_frequencies, dims=1) ./ sum(s, dims=1))
-    # # calculate spread
-    # temp = (setup.fft_frequencies .- centroid') .^ 2
-    # spread = sqrt.(sum((temp) .* s, dims=1) ./ sum(s, dims=1))
-    # # calculate kurtosis
-    # data.spectral_kurtosis = vec(real(sum(((temp .^ 2) .* s), dims=1) ./ ((spread .^ 4) .* sum(s, dims=1))))
-    data.spectral_kurtosis = vec(sum(higher_momement_num .* higher_moment_tmp, dims=1) ./ (higher_moment_denom .* data.spectral_spread)')
+	# calculate centroid
+	# centroid = vec(sum(s .* setup.fft_frequencies, dims=1) ./ sum(s, dims=1))
+	# # calculate spread
+	# temp = (setup.fft_frequencies .- centroid') .^ 2
+	# spread = sqrt.(sum((temp) .* s, dims=1) ./ sum(s, dims=1))
+	# # calculate kurtosis
+	# data.spectral_kurtosis = vec(real(sum(((temp .^ 2) .* s), dims=1) ./ ((spread .^ 4) .* sum(s, dims=1))))
+	data.spectral_kurtosis = vec(sum(higher_momement_num .* higher_moment_tmp, dims = 1) ./ (higher_moment_denom .* data.spectral_spread)')
 end
 
 function spectral_rolloff(s::AbstractArray{Float64}, data::AudioData, fft_frequencies::Vector{Float64})
-    # calculate rolloff point
-    threshold = 0.95
-    c = cumsum(s, dims=1)
-    d = c[end, :] * threshold
-    idx = zeros(size(d))
-    for i in eachindex(d)
-        idx[i] = findfirst(real(c[:, i]) .>= real(d[i]))
-    end
-    data.spectral_rolloff = fft_frequencies[Int.(idx)]
+	# calculate rolloff point
+	threshold = 0.95
+	c = cumsum(s, dims = 1)
+	d = c[end, :] * threshold
+	idx = zeros(size(d))
+	for i in eachindex(d)
+		idx[i] = findfirst(real(c[:, i]) .>= real(d[i]))
+	end
+	data.spectral_rolloff = fft_frequencies[Int.(idx)]
 end
 
 function spectral_skewness(
-    s::AbstractArray{Float64},
-    data::AudioData,
-    sum_x1::Vector{Float64},
-    centroid::Vector{Float64},
-    higher_moment_denom::Vector{Float64},
-    higher_momement_num::AbstractArray{Float64}
+	s::AbstractArray{Float64},
+	data::AudioData,
+	sum_x1::Vector{Float64},
+	centroid::Vector{Float64},
+	higher_moment_denom::Vector{Float64},
+	higher_momement_num::AbstractArray{Float64},
 )
-    # # calculate centroid
-    # centroid = vec(sum(s .* setup.fft_frequencies, dims=1) ./ sum(s, dims=1))
-    # # calculate spread
-    # temp = (setup.fft_frequencies .- centroid')
-    # spread = sqrt.(sum((temp .^ 2) .* s, dims=1) ./ sum(s, dims=1))
-    # # calculate skewness
-    # data.spectral_skewness = vec(real(sum((temp .^ 3) .* s, dims=1) ./ ((spread .^ 3) .* sum(s, dims=1))))
-    data.spectral_skewness = vec(sum(higher_momement_num, dims=1) ./ higher_moment_denom')
+	# # calculate centroid
+	# centroid = vec(sum(s .* setup.fft_frequencies, dims=1) ./ sum(s, dims=1))
+	# # calculate spread
+	# temp = (setup.fft_frequencies .- centroid')
+	# spread = sqrt.(sum((temp .^ 2) .* s, dims=1) ./ sum(s, dims=1))
+	# # calculate skewness
+	# data.spectral_skewness = vec(real(sum((temp .^ 3) .* s, dims=1) ./ ((spread .^ 3) .* sum(s, dims=1))))
+	data.spectral_skewness = vec(sum(higher_momement_num, dims = 1) ./ higher_moment_denom')
 end
 
 function spectral_slope(
-    s::AbstractArray{Float64},
-    data::AudioData,
-    sum_x1::Vector{Float64},
-    arithmetic_mean::Vector{Float64},
-    fft_frequencies::Vector{Float64}
+	s::AbstractArray{Float64},
+	data::AudioData,
+	sum_x1::Vector{Float64},
+	arithmetic_mean::Vector{Float64},
+	fft_frequencies::Vector{Float64},
 )
-    # calculate slope
-    f_minus_mu_f = fft_frequencies .- sum(fft_frequencies, dims=1) ./ size(s, 1)
-    X_minus_mu_X = s .- arithmetic_mean'
-    data.spectral_slope = vec(real(sum(X_minus_mu_X .* f_minus_mu_f, dims=1) ./ sum(f_minus_mu_f .^ 2)))
+	# calculate slope
+	f_minus_mu_f = fft_frequencies .- sum(fft_frequencies, dims = 1) ./ size(s, 1)
+	X_minus_mu_X = s .- arithmetic_mean'
+	data.spectral_slope = vec(real(sum(X_minus_mu_X .* f_minus_mu_f, dims = 1) ./ sum(f_minus_mu_f .^ 2)))
+end
+
+function _get_spread(x::AbstractArray{Float64}, setup::AudioSetup)
+
 end
 
 ################################################################################
 #                                    main                                      #
 ################################################################################
 function get_spectrals!(
-    setup::AudioSetup,
-    data::AudioData
-    )
-    s, freq = get_spectrum(setup, data)
-
-    # common data
-    size_x1 = size(s, 1)
-    sum_x1 = vec(sum(s, dims=1))
-    arithmetic_mean = sum_x1 ./ size_x1
-    data.spectral_centroid = vec(sum(s .* freq, dims=1) ./ sum_x1')
-    data.spectral_centroid = replace!(data.spectral_centroid, NaN => 0)
-    higher_moment_tmp = freq .- data.spectral_centroid'
-    data.spectral_spread = vec(sqrt.(sum((higher_moment_tmp .^ 2) .* s, dims=1) ./ sum_x1'))
-    higher_moment_denom = (data.spectral_spread .^ 3) .* sum_x1
-    higher_momement_num = (higher_moment_tmp .^ 3) .* s
-
-    spectral_crest(s, data, sum_x1, arithmetic_mean)
-    spectral_entropy(s, data, sum_x1)
-    spectral_flatness(s, data, sum_x1, arithmetic_mean)
-    spectral_flux(s, data)
-    spectral_kurtosis(s, data, higher_moment_tmp, higher_moment_denom, higher_momement_num)
-    spectral_rolloff(s, data, freq)
-    spectral_skewness(s, data, sum_x1, data.spectral_centroid, higher_moment_denom, higher_momement_num)
-    spectral_decrease(s, data)
-    spectral_slope(s, data, sum_x1, arithmetic_mean, freq)
+	setup::AudioSetup,
+	data::AudioData,
+)
+	s, freq = get_spectrum(setup, data)
+
+	# common data
+	size_x1 = size(s, 1)
+	sum_x1 = vec(sum(s, dims = 1))
+	arithmetic_mean = sum_x1 ./ size_x1
+	data.spectral_centroid = vec(sum(s .* freq, dims = 1) ./ sum_x1')
+	data.spectral_centroid = replace!(data.spectral_centroid, NaN => 0)
+	higher_moment_tmp = freq .- data.spectral_centroid'
+	data.spectral_spread = vec(sqrt.(sum((higher_moment_tmp .^ 2) .* s, dims = 1) ./ sum_x1'))
+	higher_moment_denom = (data.spectral_spread .^ 3) .* sum_x1
+	higher_momement_num = (higher_moment_tmp .^ 3) .* s
+
+	spectral_crest(s, data, sum_x1, arithmetic_mean)
+	spectral_entropy(s, data, sum_x1)
+	spectral_flatness(s, data, sum_x1, arithmetic_mean)
+	spectral_flux(s, data)
+	spectral_kurtosis(s, data, higher_moment_tmp, higher_moment_denom, higher_momement_num)
+	spectral_rolloff(s, data, freq)
+	spectral_skewness(s, data, sum_x1, data.spectral_centroid, higher_moment_denom, higher_momement_num)
+	spectral_decrease(s, data)
+	spectral_slope(s, data, sum_x1, arithmetic_mean, freq)
+end
+
+# ---------------------------------------------------------------------------------- #
+#                                     utilities                                      #
+# ---------------------------------------------------------------------------------- #
+# Calculate sums
+sum_s = (s) -> sum(s, dims = 1)
+sum_sfreq = (s, sfreq) -> sum(s .* sfreq, dims = 1)
+
+# ---------------------------------------------------------------------------------- #
+#                                  spectral centroid                                 #
+# ---------------------------------------------------------------------------------- #
+function _get_spec_centroid(
+	s::AbstractArray{Float64},
+	sfreq::AbstractVector{Float64},
+)
+	vec(sum_sfreq(s, sfreq) ./ sum_s(s))
+	# replace!(vec(sum(s .* sfreq, dims = 1) ./ sum(x1, dims=1)), NaN => 0)
+end
+# ---------------------------------------------------------------------------------- #
+#                                  spectral spread                                   #
+# ---------------------------------------------------------------------------------- #
+function _get_spec_spread(
+	s::AbstractArray{Float64},
+	sfreq::AbstractVector{Float64},
+)
+	centroid = _get_spec_centroid(s, sfreq)
+	vec(sqrt.(sum_s(s .* (sfreq .- centroid').^2) ./ sum_s(s)))
 end
\ No newline at end of file
diff --git a/src/signalDataStructure.jl b/src/signalDataStructure.jl
index a83c4d0..e85a663 100644
--- a/src/signalDataStructure.jl
+++ b/src/signalDataStructure.jl
@@ -6,11 +6,22 @@
 	AudioSetup stores all datas that has to be shared in Audio911 module
 	AudioData stores all results from signal analysis
 """
+### da implementare forse
+mutable struct Spectrals
+	# spectral
+	spec_source::Symbol
+end
+###
+
 @with_kw mutable struct AudioSetup
 	sr::Int64
 
 	# fft
 	fft_length::Int64
+	stft_freq::AbstractVector{Float64}
+
+	# windowing
+	window::AbstractVector{Float64}
 	window_type::Tuple{Symbol, Symbol}
 	window_length::Int64
 	overlap_length::Int64
@@ -43,6 +54,7 @@
 	delta_matrix::Symbol
 
 	# spectral
+	# spectrals::Spectrals
 	spectral_spectrum::Symbol
 
 	# f0
@@ -57,17 +69,18 @@ end
 
 @with_kw mutable struct AudioData
 	x::AbstractVector{Float64}
+	frames::AbstractArray{Float64} = []
 
 	# fft
 	fft::AbstractArray{Float64} = []
-	fft_window::Vector{Float64} = []
+	stft::AbstractArray{Float64} = []
 
 	# linear spectrum
-	lin_frequencies::Vector{Float64} = []
+	lin_frequencies::AbstractVector{Float64} = []
 	lin_spectrogram::AbstractArray{Float64} = []
 
 	# mel_spectrum
-	mel_frequencies::Vector{Float64} = []
+	mel_frequencies::AbstractVector{Float64} = []
 	mel_spectrogram::AbstractArray{Float64} = []
 
 	# logaritmic mel
@@ -77,23 +90,23 @@ end
 	mfcc_coeffs::AbstractArray{Float64} = []
 	mfcc_delta::AbstractArray{Float64} = []
 	mfcc_deltadelta::AbstractArray{Float64} = []
-	log_energy::Vector{Float64} = []
+	log_energy::AbstractVector{Float64} = []
 
 	# spectral
-	spectral_centroid::Vector{Float64} = []
-	spectral_crest::Vector{Float64} = []
-	spectral_decrease::Vector{Float64} = []
-	spectral_entropy::Vector{Float64} = []
-	spectral_flatness::Vector{Float64} = []
-	spectral_flux::Vector{Float64} = []
-	spectral_kurtosis::Vector{Float64} = []
-	spectral_rolloff::Vector{Float64} = []
-	spectral_skewness::Vector{Float64} = []
-	spectral_slope::Vector{Float64} = []
-	spectral_spread::Vector{Float64} = []
+	spectral_centroid::AbstractVector{Float64} = []
+	spectral_crest::AbstractVector{Float64} = []
+	spectral_decrease::AbstractVector{Float64} = []
+	spectral_entropy::AbstractVector{Float64} = []
+	spectral_flatness::AbstractVector{Float64} = []
+	spectral_flux::AbstractVector{Float64} = []
+	spectral_kurtosis::AbstractVector{Float64} = []
+	spectral_rolloff::AbstractVector{Float64} = []
+	spectral_skewness::AbstractVector{Float64} = []
+	spectral_slope::AbstractVector{Float64} = []
+	spectral_spread::AbstractVector{Float64} = []
 
 	# f0
-	f0::Vector{Float64} = []
+	f0::AbstractVector{Float64} = []
 
 	# constant-q transform
 	cqt_spec::AbstractArray{Float64} = []
@@ -103,439 +116,3 @@ mutable struct AudioObj
 	setup::AudioSetup
 	data::AudioData
 end
-
-# reference:
-# https://www.functionalnoise.com/pages/2023-01-31-julia-class/
-# mutable struct AudioObj
-# 	setup::AudioSetup
-# 	data::AudioData
-
-	# const get_fft::Function
-	# const get_lin_spec::Function
-	# const get_mel_spec::Function
-	# const get_log_mel::Function
-	# const get_mfcc::Function
-	# const get_spectrals::Function
-	# const get_f0::Function
-	# const get_cqt::Function
-	# const get_features::Function
-
-	# function get_fft(self::AudioObj)
-	# 	if isempty(self.data.fft)
-	# 		get_fft!(self.setup, self.data)
-	# 	end
-
-	# 	return self.data.fft'
-	# end
-
-	# function get_lin_spec(self::AudioObj)
-	# 	if isempty(self.data.lin_spectrogram)
-	# 		if isempty(self.data.fft)
-	# 			get_fft!(self.setup, self.data)
-	# 		end
-	# 		lin_spectrogram!(self.setup, self.data)
-	# 	end
-
-	# 	return self.data.lin_spectrogram
-	# end
-
-	# function get_mel_spec(self::AudioObj)
-	# 	if isempty(self.data.mel_spectrogram)
-	# 		if isempty(self.data.fft)
-	# 			get_fft!(self.setup, self.data)
-	# 		end
-	# 		get_mel_spec!(self.setup, self.data)
-	# 	end
-
-	# 	return self.data.mel_spectrogram
-	# end
-
-	# function get_log_mel(self::AudioObj)
-	# 	if isempty(self.data.log_mel)
-	# 		if isempty(self.data.mel_spectrogram)
-	# 			if isempty(self.data.fft)
-	# 				get_fft!(self.setup, self.data)
-	# 			end
-	# 			get_mel_spec!(self.setup, self.data)
-	# 		end
-	# 		get_log_mel!(self.setup, self.data)
-	# 	end
-
-	# 	return self.data.log_mel
-	# end
-
-	# function get_mfcc(self::AudioObj)
-	# 	if isempty(self.data.mfcc_coeffs)
-	# 		if isempty(self.data.mel_spectrogram)
-	# 			if isempty(self.data.fft)
-	# 				get_fft!(self.setup, self.data)
-	# 			end
-	# 			get_mel_spec!(self.setup, self.data)
-	# 		end
-	# 		get_mfcc!(self.setup, self.data)
-	# 		get_mfcc_deltas!(self.setup, self.data)
-	# 	end
-
-	# 	return hcat((self.data.mfcc_coeffs, self.data.mfcc_delta, self.data.mfcc_deltadelta)...)
-	# end
-
-	# function get_spectrals(self::AudioObj)
-	# 	if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram)
-	# 		if isempty(self.data.fft)
-	# 			get_fft!(self.setup, self.data)
-	# 		end
-	# 		lin_spectrogram!(self.setup, self.data)
-	# 	elseif self.setup.spectral_spectrum == :mel && isempty(self.data.mel_spectrogram)
-	# 		if isempty(self.data.fft)
-	# 			get_fft!(self.setup, self.data)
-	# 		end
-	# 		mel_spectrogram!(self.setup, self.data)
-	# 	end
-	# 	if isempty(self.data.spectral_centroid)
-	# 		get_spectrals!(self.setup, self.data)
-	# 	end
-
-	# 	return hcat(
-	# 		(
-	# 			self.data.spectral_centroid,
-	# 			self.data.spectral_crest,
-	# 			self.data.spectral_decrease,
-	# 			self.data.spectral_entropy,
-	# 			self.data.spectral_flatness,
-	# 			self.data.spectral_flux,
-	# 			self.data.spectral_kurtosis,
-	# 			self.data.spectral_rolloff,
-	# 			self.data.spectral_skewness,
-	# 			self.data.spectral_slope,
-	# 			self.data.spectral_spread,
-	# 		)...,
-	# 	)
-	# end
-
-	# function get_f0(self::AudioObj)
-	# 	if isempty(self.data.f0)
-	# 		get_f0!(self.setup, self.data)
-	# 	end
-
-	# 	return self.data.f0
-	# end
-
-	# function get_cqt(self::AudioObj)
-	# 	if isempty(self.data.cqt_spec)
-	# 		get_cqt_spec!(self.setup, self.data)
-	# 	end
-
-	# 	return self.data.cqt_spec
-	# end
-
-# 	# --------------------------------------------------------------------------- #
-# 	#                            get features profiles                            #
-# 	# --------------------------------------------------------------------------- #
-# 	function get_features(self::AudioObj, profile::Symbol = :full)
-# 		if profile == :full
-# 			if isempty(self.data.fft)
-# 				get_fft!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mel_spectrogram)
-# 				get_mel_spec!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.log_mel)
-# 				get_log_mel!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mfcc_coeffs)
-# 				get_mfcc!(self.setup, self.data)
-# 				get_mfcc_deltas!(self.setup, self.data)
-# 			end
-# 			if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram)
-# 				lin_spectrogram!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.spectral_centroid)
-# 				get_spectrals!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.f0)
-# 				get_f0!(self.setup, self.data)
-# 			end
-
-# 			return hcat(
-# 				(
-# 					self.data.mel_spectrogram,
-# 					self.data.log_mel,
-# 					self.data.mfcc_coeffs,
-# 					self.data.mfcc_delta,
-# 					self.data.mfcc_deltadelta,
-# 					self.data.spectral_centroid,
-# 					self.data.spectral_crest,
-# 					self.data.spectral_decrease,
-# 					self.data.spectral_entropy,
-# 					self.data.spectral_flatness,
-# 					self.data.spectral_flux,
-# 					self.data.spectral_kurtosis,
-# 					self.data.spectral_rolloff,
-# 					self.data.spectral_skewness,
-# 					self.data.spectral_slope,
-# 					self.data.spectral_spread,
-# 					self.data.f0,
-# 				)...,
-# 			)
-
-# 			# --------------------------------------------------------------------------- #
-# 			#                           emotion work in progress                          #
-# 			# --------------------------------------------------------------------------- #
-# 		elseif profile == :emotion
-# 			if isempty(self.data.fft)
-# 				get_fft!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mel_spectrogram)
-# 				get_mel_spec!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.log_mel)
-# 				get_log_mel!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mfcc_coeffs)
-# 				get_mfcc!(self.setup, self.data)
-# 				get_mfcc_deltas!(self.setup, self.data)
-# 			end
-# 			if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram)
-# 				lin_spectrogram!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.spectral_centroid)
-# 				get_spectrals!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.f0)
-# 				get_f0!(self.setup, self.data)
-# 			end
-
-# 			return hcat(
-# 				(
-# 					# self.data.mel_spectrogram,
-# 					self.data.log_mel,
-# 					# self.data.mfcc_coeffs,
-# 					self.data.mfcc_delta,
-# 					self.data.mfcc_deltadelta,
-# 					self.data.spectral_centroid,
-# 					self.data.spectral_crest,
-# 					self.data.spectral_decrease,
-# 					self.data.spectral_entropy,
-# 					self.data.spectral_flatness,
-# 					self.data.spectral_flux,
-# 					self.data.spectral_kurtosis,
-# 					self.data.spectral_rolloff,
-# 					self.data.spectral_skewness,
-# 					self.data.spectral_slope,
-# 					self.data.spectral_spread,
-# 					self.data.f0,
-# 				)...,
-# 			)
-
-# 			# elseif profile == :emotion_set_1
-# 			# 	if isempty(self.data.fft)
-# 			# 		get_fft!(self.setup, self.data)
-# 			# 	end
-# 			# 	if isempty(self.data.mel_spectrogram)
-# 			# 		get_mel_spec!(self.setup, self.data)
-# 			# 	end
-# 			# 	if isempty(self.data.mfcc_coeffs)
-# 			# 		get_mfcc!(self.setup, self.data)
-# 			# 		# get_mfcc_deltas!(self.setup, self.data)
-# 			# 	end
-# 			# 	if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram)
-# 			# 		lin_spectrogram!(self.setup, self.data)
-# 			# 	end
-# 			# 	if isempty(self.data.spectral_centroid)
-# 			# 		get_spectrals!(self.setup, self.data)
-# 			# 	end
-# 			# 	# if isempty(self.data.f0)
-# 			# 	#     get_f0!(self.setup, self.data)
-# 			# 	# end
-
-# 			# 	return vcat(
-# 			# 		(
-# 			# 			self.data.mel_spectrogram',
-# 			# 			self.data.mfcc_coeffs',
-# 			# 			# self.data.mfcc_delta',
-# 			# 			# self.data.mfcc_deltadelta',
-# 			# 			self.data.spectral_centroid',
-# 			# 			self.data.spectral_crest',
-# 			# 			self.data.spectral_decrease',
-# 			# 			self.data.spectral_entropy',
-# 			# 			self.data.spectral_flatness',
-# 			# 			self.data.spectral_flux',
-# 			# 			self.data.spectral_kurtosis',
-# 			# 			self.data.spectral_rolloff',
-# 			# 			self.data.spectral_skewness',
-# 			# 			self.data.spectral_slope',
-# 			# 			self.data.spectral_spread',
-# 			# 			# self.data.f0'
-# 			# 		)...,
-# 			# 	)
-
-# 			# elseif profile == :emotion_set_2
-# 			# 	if isempty(self.data.fft)
-# 			# 		get_fft!(self.setup, self.data)
-# 			# 	end
-# 			# 	if isempty(self.data.mfcc_coeffs)
-# 			# 		get_mel_spec!(self.setup, self.data)
-# 			# 		get_mfcc!(self.setup, self.data)
-# 			# 		get_mfcc_deltas!(self.setup, self.data)
-# 			# 	end
-# 			# 	if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram)
-# 			# 		lin_spectrogram!(self.setup, self.data)
-# 			# 	end
-# 			# 	if isempty(self.data.spectral_centroid)
-# 			# 		get_spectrals!(self.setup, self.data)
-# 			# 	end
-# 			# 	if isempty(self.data.f0)
-# 			# 		get_f0!(self.setup, self.data)
-# 			# 	end
-
-# 			# 	return vcat(
-# 			# 		(
-# 			# 			# self.data.mel_spectrogram',
-# 			# 			self.data.mfcc_coeffs',
-# 			# 			self.data.mfcc_delta',
-# 			# 			self.data.mfcc_deltadelta',
-# 			# 			self.data.spectral_centroid',
-# 			# 			self.data.spectral_crest',
-# 			# 			self.data.spectral_decrease',
-# 			# 			self.data.spectral_entropy',
-# 			# 			self.data.spectral_flatness',
-# 			# 			self.data.spectral_flux',
-# 			# 			self.data.spectral_kurtosis',
-# 			# 			self.data.spectral_rolloff',
-# 			# 			self.data.spectral_skewness',
-# 			# 			self.data.spectral_slope',
-# 			# 			self.data.spectral_spread',
-# 			# 			self.data.f0',
-# 			# 		)...,
-# 			# 	)
-
-# 			# elseif profile == :emotion_set_3
-# 			if isempty(self.data.fft)
-# 				get_fft!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mel_spectrogram)
-# 				get_mel_spec!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mfcc_coeffs)
-# 				get_mfcc!(self.setup, self.data)
-# 				get_mfcc_deltas!(self.setup, self.data)
-# 			end
-# 			if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram)
-# 				lin_spectrogram!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.spectral_centroid)
-# 				get_spectrals!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.f0)
-# 				get_f0!(self.setup, self.data)
-# 			end
-
-# 			return vcat(
-# 				(
-# 					self.data.mel_spectrogram',
-# 					self.data.mfcc_coeffs',
-# 					self.data.mfcc_delta',
-# 					self.data.mfcc_deltadelta',
-# 					self.data.spectral_centroid',
-# 					self.data.spectral_crest',
-# 					self.data.spectral_decrease',
-# 					self.data.spectral_entropy',
-# 					self.data.spectral_flatness',
-# 					self.data.spectral_flux',
-# 					self.data.spectral_kurtosis',
-# 					self.data.spectral_rolloff',
-# 					self.data.spectral_skewness',
-# 					self.data.spectral_slope',
-# 					self.data.spectral_spread',
-# 					self.data.f0',
-# 				)...,
-# 			)
-
-# 			# --------------------------------------------------------------------------- #
-# 			#                                 Gio Paglia                                  #
-# 			# --------------------------------------------------------------------------- #
-
-# 		elseif profile == :kdd
-# 			if isempty(self.data.fft)
-# 				get_fft!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mel_spectrogram)
-# 				get_mel_spec!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mfcc_coeffs)
-# 				get_mfcc!(self.setup, self.data)
-# 				# get_mfcc_deltas!(self.setup, self.data)
-# 			end
-# 			# if self.setup.spectral_spectrum == :lin && isempty(self.data.lin_spectrogram)
-# 			# 	lin_spectrogram!(self.setup, self.data)
-# 			# end
-# 			# if isempty(self.data.spectral_centroid)
-# 			# 	get_spectrals!(self.setup, self.data)
-# 			# end
-# 			if isempty(self.data.f0)
-# 				self.setup.f0_range = (200, 700)
-# 				get_f0!(self.setup, self.data)
-# 			end
-
-# 			return hcat(
-# 				(
-# 					# self.data.mel_spectrogram,
-# 					self.data.mfcc_coeffs,
-# 					# self.data.mfcc_delta,
-# 					# self.data.mfcc_deltadelta,
-# 					# self.data.spectral_centroid,
-# 					# self.data.spectral_crest,
-# 					# self.data.spectral_decrease,
-# 					# self.data.spectral_entropy,
-# 					# self.data.spectral_flatness,
-# 					# self.data.spectral_flux,
-# 					# self.data.spectral_kurtosis,
-# 					# self.data.spectral_rolloff,
-# 					# self.data.spectral_skewness,
-# 					# self.data.spectral_slope,
-# 					# self.data.spectral_spread,
-# 					self.data.f0,
-# 				)...,
-# 			)
-# 		elseif profile == :kdd2
-# 			if isempty(self.data.fft)
-# 				get_fft!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.mel_spectrogram)
-# 				get_mel_spec!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.log_mel)
-# 				get_log_mel!(self.setup, self.data)
-# 			end
-# 			if isempty(self.data.f0)
-# 				self.setup.f0_range = (200, 700)
-# 				get_f0!(self.setup, self.data)
-# 			end
-
-# 			return hcat(
-# 				(
-# 					self.data.log_mel,
-# 					self.data.f0,
-# 				)...,
-# 			)
-# 		else
-# 			@error("Unknown $profile profile.")
-# 		end
-# 	end
-
-# 	function AudioObj(setup::AudioSetup, data::AudioData)
-# 		obj = new(
-# 			setup, data,
-# 			() -> get_fft(obj),
-# 			() -> get_lin_spec(obj),
-# 			() -> get_mel_spec(obj),
-# 			() -> get_log_mel(obj),
-# 			() -> get_mfcc(obj),
-# 			() -> get_spectrals(obj),
-# 			() -> get_f0(obj),
-# 			() -> get_cqt(obj),
-# 			(x) -> get_features(obj, x),
-# 		)
-# 		#   return obj
-# 	end
-# end
\ No newline at end of file
diff --git a/src/utils/histogram.jl b/src/utils/histogram.jl
new file mode 100644
index 0000000..9cb87c3
--- /dev/null
+++ b/src/utils/histogram.jl
@@ -0,0 +1,107 @@
+function binpicker(
+	xmin::Float64,
+	xmax::Float64,
+	nbins::Int64,
+	raw_bins_width::Float64,
+)
+	xscale = max(abs(xmin), abs(xmax))
+	xrange = xmax - xmin
+	raw_bins_width = max(raw_bins_width, eps(xscale))
+
+	# if the data are not constant, place the bins at "nice" locations
+	if xrange > max(sqrt(eps(Float64)) * xscale, floatmin(Float64))
+		# choose the bin width as a "nice" value.
+		pow_of_ten = 10 .^ floor(log10(raw_bins_width))   # next lower power of 10
+		rel_size = raw_bins_width / pow_of_ten           # guaranteed in [1, 10)
+
+		# automatic rule specified
+		# if isempty(nbins)      nbins viene passato quindi questo codice non serve
+		#     if  rel_size < 1.5
+		#         bin_width = 1*pow_of_ten
+		#     elseif rel_size < 2.5
+		#         bin_width = 2*pow_of_ten
+		#     elseif rel_size < 4
+		#         bin_width = 3*pow_of_ten
+		#     elseif rel_size < 7.5
+		#         bin_width = 5*pow_of_ten
+		#     else
+		#         bin_width = 10*pow_of_ten
+		#     end
+
+		# put the bin edges at multiples of the bin width, covering x.
+		# the actual number of bins used may not be exactly equal to the requested rule.
+		# left_edge = max(min(bin_width*floor(xmin ./ bin_width), xmin),-realmax(class(xmax)))
+		# nbinsActual = max(1, ceil((xmax-left_edge) ./ bin_width))
+		# rightEdge = min(max(left_edge + nbinsActual.*bin_width, xmax),realmax(class(xmax)))
+
+		# number of bins specified
+		# else
+		# temporarily set a raw bin_width to a nice power of 10.
+		# bin_width will be set again to a different value if more than 1 bin.
+		bin_width = pow_of_ten * floor(rel_size)
+		# set the left edge at multiples of the raw bin width.
+		# then adjust bin width such that all bins are of the same size and xmax fall into the rightmost bin.
+		left_edge = max(min(bin_width * floor(xmin ./ bin_width), xmin), -floatmax(Float64))
+		if nbins > 1
+			ll = (xmax - left_edge) / nbins      # bin_width lower bound, xmax
+			# on right edge of last bin
+			ul = (xmax - left_edge) / (nbins - 1)  # bin_width upper bound,
+			# xmax on left edge of last bin
+			p10 = 10^floor(log10(ul - ll))
+			bin_width = p10 * ceil(ll ./ p10)    # bin_width-ll < p10 <= ul-ll
+			# thus, bin_width < ul
+		end
+
+		nbins_actual = nbins
+		right_edge = min(
+			max(left_edge + nbins_actual .* bin_width, xmax), floatmax(Float64))
+		# end
+
+	else # the data are nearly constant
+		# for automatic rules, use a single bin.
+		if isempty(nbins)
+			nbins = 1
+		end
+
+		# there's no way to know what scale the caller has in mind, just create
+		# something simple that covers the data.
+
+		# make the bins cover a unit width, or as small an integer width as
+		# possible without the individual bin width being zero relative to xscale.
+		# put the left edge on an integer or half integer below xmin, with the data in the middle 50% of the bin.
+		# put the right edge similarly above xmax.
+		bin_range = max(1, ceil(nbins * eps(xscale)))
+		left_edge = floor(2 * (xmin - bin_range ./ 4)) / 2
+		right_edge = ceil(2 * (xmax + bin_range ./ 4)) / 2
+
+		bin_width = (right_edge - left_edge) ./ nbins
+		nbins_actual = nbins
+	end
+
+	if !isfinite(bin_width)
+		# if binWidth overflows, don't worry about nice bin edges anymore
+		edges = LinRange(left_edge, right_edge, nbins_actual + 1)
+	else
+		edges = union(
+			left_edge, left_edge .+ (1:(nbins_actual-1)) .* bin_width, right_edge)
+		step = round(minimum(diff(edges)), digits = 8)
+		edges = range(edges[1], edges[end], step = step)
+	end
+
+	return edges
+end
+
+function histcounts(
+	feature::Vector{Float64},
+	hist_bins::Int64,
+)
+	xmin, xmax = extrema(filter(x -> !isnan(x), feature))
+	range_feature = xmax - xmin
+	raw_bins_width = range_feature / hist_bins
+
+	edges = binpicker(xmin, xmax, hist_bins, raw_bins_width)
+
+	n, _ = histcountindices(feature, edges)
+
+	return n, edges
+end
\ No newline at end of file
diff --git a/src/utils/speech_detector.jl b/src/utils/speech_detector.jl
index 2a69312..28bebda 100644
--- a/src/utils/speech_detector.jl
+++ b/src/utils/speech_detector.jl
@@ -1,128 +1,8 @@
-function moving_mean(
-	x::Vector{Float64},
-	w::Int64,
-)
-	# w must be odd!
-	x_length = size(x, 1)
-	m = zeros(x_length)
-	pad = Int(floor(w / 2))
-	for i in eachindex(x)
-		index1 = i - pad < 1 ? 1 : i - pad
-		index2 = i + pad > x_length ? x_length : i + pad
-		m[i] = median(x[index1:index2])
-	end
-	return m
-end
-
-function binpicker(
-	xmin::Float64,
-	xmax::Float64,
-	nbins::Int64,
-	raw_bins_width::Float64,
-)
-	xscale = max(abs(xmin), abs(xmax))
-	xrange = xmax - xmin
-	raw_bins_width = max(raw_bins_width, eps(xscale))
-
-	# if the data are not constant, place the bins at "nice" locations
-	if xrange > max(sqrt(eps(Float64)) * xscale, floatmin(Float64))
-		# choose the bin width as a "nice" value.
-		pow_of_ten = 10 .^ floor(log10(raw_bins_width))   # next lower power of 10
-		rel_size = raw_bins_width / pow_of_ten           # guaranteed in [1, 10)
-
-		# automatic rule specified
-		# if isempty(nbins)      nbins viene passato quindi questo codice non serve
-		#     if  rel_size < 1.5
-		#         bin_width = 1*pow_of_ten
-		#     elseif rel_size < 2.5
-		#         bin_width = 2*pow_of_ten
-		#     elseif rel_size < 4
-		#         bin_width = 3*pow_of_ten
-		#     elseif rel_size < 7.5
-		#         bin_width = 5*pow_of_ten
-		#     else
-		#         bin_width = 10*pow_of_ten
-		#     end
-
-		# put the bin edges at multiples of the bin width, covering x.
-		# the actual number of bins used may not be exactly equal to the requested rule.
-		# left_edge = max(min(bin_width*floor(xmin ./ bin_width), xmin),-realmax(class(xmax)))
-		# nbinsActual = max(1, ceil((xmax-left_edge) ./ bin_width))
-		# rightEdge = min(max(left_edge + nbinsActual.*bin_width, xmax),realmax(class(xmax)))
-
-		# number of bins specified
-		# else
-		# temporarily set a raw bin_width to a nice power of 10.
-		# bin_width will be set again to a different value if more than 1 bin.
-		bin_width = pow_of_ten * floor(rel_size)
-		# set the left edge at multiples of the raw bin width.
-		# then adjust bin width such that all bins are of the same size and xmax fall into the rightmost bin.
-		left_edge = max(min(bin_width * floor(xmin ./ bin_width), xmin), -floatmax(Float64))
-		if nbins > 1
-			ll = (xmax - left_edge) / nbins      # bin_width lower bound, xmax
-			# on right edge of last bin
-			ul = (xmax - left_edge) / (nbins - 1)  # bin_width upper bound,
-			# xmax on left edge of last bin
-			p10 = 10^floor(log10(ul - ll))
-			bin_width = p10 * ceil(ll ./ p10)    # bin_width-ll < p10 <= ul-ll
-			# thus, bin_width < ul
-		end
-
-		nbins_actual = nbins
-		right_edge = min(
-			max(left_edge + nbins_actual .* bin_width, xmax), floatmax(Float64))
-		# end
-
-	else # the data are nearly constant
-		# for automatic rules, use a single bin.
-		if isempty(nbins)
-			nbins = 1
-		end
-
-		# there's no way to know what scale the caller has in mind, just create
-		# something simple that covers the data.
-
-		# make the bins cover a unit width, or as small an integer width as
-		# possible without the individual bin width being zero relative to xscale.
-		# put the left edge on an integer or half integer below xmin, with the data in the middle 50% of the bin.
-		# put the right edge similarly above xmax.
-		bin_range = max(1, ceil(nbins * eps(xscale)))
-		left_edge = floor(2 * (xmin - bin_range ./ 4)) / 2
-		right_edge = ceil(2 * (xmax + bin_range ./ 4)) / 2
-
-		bin_width = (right_edge - left_edge) ./ nbins
-		nbins_actual = nbins
-	end
-
-	if !isfinite(bin_width)
-		# if binWidth overflows, don't worry about nice bin edges anymore
-		edges = LinRange(left_edge, right_edge, nbins_actual + 1)
-	else
-		edges = union(
-			left_edge, left_edge .+ (1:(nbins_actual-1)) .* bin_width, right_edge)
-		step = round(minimum(diff(edges)), digits = 8)
-		edges = range(edges[1], edges[end], step = step)
-	end
-
-	return edges
-end
-
-function histcounts(
-	feature::Vector{Float64},
-	hist_bins::Int64,
-)
-	edgestransposed = false
-
-	xmin = minimum(feature)
-	xmax = maximum(feature)
-	range_feature = xmax - xmin
-	raw_bins_width = range_feature / hist_bins
-
-	edges = binpicker(xmin, xmax, hist_bins, raw_bins_width)
-
-	n, _ = histcountindices(feature, edges)
-
-	return n, edges
+function moving_mean(x::AbstractVector{Float64}, w::Int64)
+	@assert isodd(w) "Window size must be odd"
+	n = length(x)
+	pad = w ÷ 2
+	map(i -> median(@view x[max(1, i - pad):min(n, i + pad)]), 1:n)
 end
 
 function f_peaks(n::Vector{T}) where {T <: AbstractFloat}
@@ -210,103 +90,82 @@ function get_threshs_from_feature(
 	return M1, M2
 end
 
-function spectral_spread(
-	x::Vector{Float64},
-	sr::Int64;
-	fft_length::Int64,
-	window_length::Int64,
-	overlap_length::Int64,
-	window_norm::Bool = true,
-	spectrum_type::Symbol = :magnitude,
-)
-	X = audio_features_obj(
-		x, sr,
-		fft_length = fft_length,
-		window_length = window_length,
-		overlap_length = overlap_length,
-		window_norm = window_norm,
-		spectrum_type = spectrum_type,
-	)
-	s = X.get_lin_spec()
-
-	freq = X.data.lin_frequencies
-
-	sum_x1 = vec(sum(s, dims = 1))
-	spectral_centroid = vec(sum(s .* freq, dims = 1) ./ sum_x1')
-	spectral_centroid = replace!(spectral_centroid, NaN => 0)
-	higher_moment_tmp = freq .- spectral_centroid'
-
-	spectral_spread = vec(sqrt.(sum((higher_moment_tmp .^ 2) .* s, dims = 1) ./ sum_x1'))
-
-	return spectral_spread
-end
-
 function speech_detector(
-	x_in::AbstractVector{Float64},
-	sr::Int64;        #thresholds
+	x::AbstractVector{Float64},
+	sr::Int64;
+	window_type::Tuple{Symbol, Symbol} = (:hann, :periodic),
+	window_length::Int64 = round(Int, 0.03 * sr),
+	overlap_length::Int64 = 0,
+	thresholds::Tuple{Float64, Float64} = (-Inf, -Inf),
+	merge_distance::Int64 = window_length * 5,
 )
-	window, _ = gencoswin(:hann, Int(round(0.03 * sr)), :periodic)
-	frame_length = size(window, 1)
-	merge_distance = frame_length * 5
-
-	## helper detect speech
-	W = 5                              # weight for finding local maxima
-	bins = 15                          # number of bins for histograms
-	spectral_spread_threshold = 0.05     # threshold used for not counting spectral spread when under this energy
-	lower_spread_threshold_factor = 0.8   # factor to lower the spectral spread threshold.
-	smoothing_filter_length = 5          # after getting spectral spread and energy data, these features are smoothed by filters of this length
-
-	#----------------------------------------------------------------------------------#
-	#      step 1: extract short-term spectral spread and energy from whole signal     #
-	#----------------------------------------------------------------------------------#
-	sig_max = maximum(abs.(x_in))
+	# ---------------------------------------------------------------------------------- #
+	#                                     parameters                                     #
+	# ---------------------------------------------------------------------------------- #
+	# options = SdOptions(threshold, merge_distance)
+
+	weight = 5 # weight for finding local maxima
+	bins = 15 # number of bins for histograms
+	spread_threshold = 0.05 # threshold used for not counting spectral spread when under this energy
+	lower_spread_threshold_factor = 0.8 # factor to lower the spectral spread threshold.
+	smoothing_filter_length = 5 # after getting spectral spread and energy data, 
+	# these features are smoothed by filters of this length
+
+	# ---------------------------------------------------------------------------------- #
+	#      step 1: extract short-term spectral spread and energy from whole signal       #
+	# ---------------------------------------------------------------------------------- #
 
-	x = deepcopy(x_in)
 	# normalize
-	if sig_max > 0
-		x = x ./ sig_max
-	end
-
-	# buffer signal
-	frames = buffer(x, frame_length, frame_length)
+	x = normalize_audio(x)
 
-	# determine short term energy
-	energy = vec(window' .^ 2 * frames .^ 2)
-	# filter the short term energy twice
-	filtered_energy = moving_mean(
-		moving_mean(energy, smoothing_filter_length), smoothing_filter_length)
-	# get spectral spread
-	spec_spread = spectral_spread(
+	# get stft
+	frames, window = _get_frames(
 		x,
+		window_type = window_type,
+		window_length = window_length,
+		overlap_length = overlap_length,
+	)
+	s, sfreq = _get_stft(
+		frames .* window,
 		sr,
-		fft_length = 2 * frame_length,
-		window_length = frame_length,
-		overlap_length = 0,
+		fft_length = 2 * window_length,
+		window = window,
+		frequency_range = (0, floor(Int, sr / 2)),
 		spectrum_type = :magnitude,
 	)
-	# normalize the feature
-	spec_spread = spec_spread / (sr / 2)
+
+	# determine short term energy
+	energy = vec(window' .^ 2 * frames .^ 2)
+
+	# filter the short term energy twice
+	filtered_energy = moving_mean(moving_mean(energy, smoothing_filter_length), smoothing_filter_length)
+
+	# get spectral spread and normalize
+	spread = _get_spec_spread(s, sfreq) / (sr / 2)
+
 	# set spectral spread value to 0 for frames with low energy
-	spec_spread[energy.<spectral_spread_threshold] .= 0
+	spread[energy.<spread_threshold] .= 0
+
 	# filter spectral spread twice
-	filtered_spread = moving_mean(
-		moving_mean(spec_spread, smoothing_filter_length), smoothing_filter_length)
+	filtered_spread = moving_mean(moving_mean(spread, smoothing_filter_length), smoothing_filter_length)
 
 	#----------------------------------------------------------------------------------#
 	#                        step 2: determine thresholds                              #
 	#----------------------------------------------------------------------------------#
-
 	# if both energy and spectral spread thresholds are defined
-	# TODO
-	# calculate energy and spectral spread local bin maxima
-	e1_M1, e1_M2 = get_threshs_from_feature(filtered_energy, bins, :energy)
-	ss_M1, ss_M2 = get_threshs_from_feature(filtered_spread, bins, :specspread)
-
-	# calculate energy and spectral spread thresholds
-	ww = 1 / (W + 1)
-	sspread_thresh = ww * (W * ss_M2 + ss_M1[1]) * lower_spread_threshold_factor
-	energy_Thresh = ww * (W * e1_M2 + e1_M1[1])
-
+	if thresholds != (-Inf, -Inf)
+		energy_thresh = thresholds[1]
+		spread_thresh = thresholds[2]
+	else
+		# calculate energy and spectral spread local bin maxima
+		e_m1, e_m2 = get_threshs_from_feature(filtered_energy, bins, :energy)
+		s_m1, s_m2 = get_threshs_from_feature(filtered_spread, bins, :specspread)
+
+		# calculate energy and spectral spread thresholds
+		ww = 1 / (weight + 1)
+		sspread_thresh = ww * (weight * s_m2 + s_m1[1]) * lower_spread_threshold_factor
+		energy_Thresh = ww * (weight * e_m2 + e_m1[1])
+	end
 	#----------------------------------------------------------------------------------#
 	#                       step 3: apply threshold criterion                          #
 	#----------------------------------------------------------------------------------#
@@ -316,10 +175,9 @@ function speech_detector(
 	#       step 4: Merge frames if they are close as described by MergeDistance       #
 	#----------------------------------------------------------------------------------#
 	unbuff_out = speech_mask
-	frame_length_new = frame_length
 
 	# change frames into data points
-	a = repeat(unbuff_out', outer = [frame_length_new, 1])
+	a = repeat(unbuff_out', outer = [window_length, 1])
 	unbuff_out_mask = [a[:]; falses(length(x) - length(a), 1)]
 	difference = diff([unbuff_out_mask; false], dims = 1)
 
@@ -354,17 +212,17 @@ function speech_detector(
 		outidx = reshape([idx_p1[1]; idx_p2[amask]; idx_m2[amask]; idx_m1[end]], :, 2)
 	end
 
-	y = []
+	y = Float64[]
 	for i in eachrow(outidx)
-		y = [y; x_in[i[1]:i[2]]]
+		y = [y; x[i[1]:i[2]]]
 	end
 
-	return Float64.(y), outidx
+	return y, outidx
 end
 
-function speech_detector(x_in::AbstractVector{<:AbstractFloat}, sr::Int64)
-	speech_detector(Float64.(x_in), sr)
+function speech_detector(x::AbstractVector{<:AbstractFloat}, sr::Int64; kwargs...)
+	speech_detector(Float64.(x), sr, kwargs...)
 end
 
 # references
-# https://github.com/linan2/Voice-activity-detection-VAD-paper-and-code
+# https://github.com/linan2/Voice-activity-detection-VAD-paper-and-code
\ No newline at end of file
diff --git a/src/windowing/windowing.jl b/src/windowing/windowing.jl
index e057a39..6339751 100644
--- a/src/windowing/windowing.jl
+++ b/src/windowing/windowing.jl
@@ -86,3 +86,40 @@ function fade(
 
     return x
 end # function fade
+
+#------------------------------------------------------------------------------#
+#                                   windowing                                  #
+#------------------------------------------------------------------------------#
+function _get_frames(
+	x::AbstractVector{Float64};
+	window_type::Tuple{Symbol, Symbol},
+	window_length::Int64,
+	overlap_length::Int64,
+)
+	frames = buffer(x, window_length, window_length - overlap_length)
+	window, _ = gencoswin(window_type[1], window_length, window_type[2])
+
+	return frames, window
+end
+
+function _get_frames(x::AbstractVector{Float64}, s::AudioSetup)
+	_get_frames(x, window_type = s.window_type, window_length = s.window_length, overlap_length = s.overlap_length)
+end
+
+function get_frames(
+	x::AbstractVector{<:AbstractFloat},
+	window_type::Tuple{Symbol, Symbol} = (:hann, :periodic),
+	window_length::Int64 = 256,
+	overlap_length::Int64 = 128,
+)
+	@assert 0 < overlap_length < window_length "Overlap length must be < window length."
+
+	frames, window = _get_frames(
+		eltype(x) == Float64 ? x : Float64.(x),
+		window_type = window_type,
+		window_length = window_length,
+		overlap_length = overlap_length,
+	)
+end
+
+get_frames!(a::AudioObj; kwargs...) = a.data.frames, a.setup.window = get_frames(a.data.x; kwargs...)
diff --git a/src/windowing/windows.jl b/src/windowing/windows.jl
index 64036ef..fc6e355 100644
--- a/src/windowing/windows.jl
+++ b/src/windowing/windows.jl
@@ -23,6 +23,8 @@ function calc_window(
         a3 = 0.083578947
         a4 = 0.006947368
         w = a0 .- a1 * cos.(2 * pi * x) .+ a2 * cos.(4 * pi * x) .- a3 * cos.(6 * pi * x) .+ a4 * cos.(8 * pi * x)
+    else
+        error("Unknown window type: $winType")
     end
 
     return w
diff --git a/test/features_extraction.jl b/test/features_extraction.jl
index b09a1fd..33045a5 100644
--- a/test/features_extraction.jl
+++ b/test/features_extraction.jl
@@ -42,6 +42,7 @@
 """
 
 using Revise
+using Plots
 using Audio911
 
 TESTPATH = joinpath(dirname(pathof(Audio911)), "..", "test")
@@ -52,7 +53,7 @@ wavfile = joinpath(TESTPATH, TESTFILE)
 # -------------------------------------------------------------------------- #
 #                                 parameters                                 #
 # -------------------------------------------------------------------------- #
-sr_src = 8000
+sr_src = 16000
 
 # -------------------------------------------------------------------------- #
 #                                 load audio                                 #
@@ -67,7 +68,7 @@ x, sr = load_audio(wavfile, sr = sr_src)
 # normalize audio signal
 x = normalize_audio(x)
 # speech detector trim
-# x = speech_detector(x, sr) BROKEN!
+x, _ = speech_detector(x, sr)
 
 # -------------------------------------------------------------------------- #
 #     usage example 1: create audio object and call get features on that     #
@@ -156,4 +157,47 @@ bad_1 = get_features(bad_x, sr)
 # -------------------------------------------------------------------------- #
 #                        	   Gio's semitones                               #
 # -------------------------------------------------------------------------- #
-(tune_1, tunefreq_1) = get_features(x, sr, :mel, mel_style=:tuned)
\ No newline at end of file
+(tune_1, tunefreq_1) = get_features(x, sr, :mel, mel_style = :tuned, st_peak_range = (50, 500))
+
+# -------------------------------------------------------------------------- #
+#                           modular implementation                           #
+# -------------------------------------------------------------------------- #
+
+# --------------------------------- stft ----------------------------------- #
+# example 1:
+stft_1, stft_freq_1 = get_stft(x, sr)
+
+# example 2, with args:
+stft_2, stft_freq_2 = get_stft(
+	x,
+	sr,
+	fft_length = 512,
+	spectrum_type = :magnitude,
+	window_type = (:hamming, :symmetric),
+	frequency_range = (100, 3000),
+)
+
+# example 3, modular: signal > windowing > stft
+buffer_3, win_3 = get_frames(x)
+stft_3, stft_freq_3 = get_stft(buffer_3, sr, win_3)
+
+# example 4, utilizing audio_obj
+audio_4 = audio_obj(x, sr)
+get_stft!(audio_4)
+# display(audio_4.data.stft)
+
+# example 5, utilizing audio_obj with args
+audio_5 = audio_obj(x,
+	sr,
+	fft_length = 512,
+	spectrum_type = :magnitude,
+	window_type = (:hamming, :symmetric),
+	frequency_range = (100, 3000))
+get_stft!(audio_5)
+# display(audio_5.data.stft)
+
+# example 6, utilizing audio_obj modular
+audio_6 = audio_obj(x, sr)
+get_frames!(audio_6)
+get_stft!(audio_6)
+display(audio_6.data.stft)