Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Jul 29, 2024
1 parent f1c91a9 commit 456ffe0
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 36 deletions.
8 changes: 4 additions & 4 deletions src/Audio911.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ function __init__()
import librosa as librosa
import soundfile as soundfile
def load_audio(fname, sr):
x, sr_def = librosa.load(fname, sr=sr, mono=True)
def load_audio(file, sr):
x, sr_def = librosa.load(file, sr=sr, mono=True)
return x, sr_def
def save_audio(fname, x, sr):
soundfile.write(fname, x, samplerate=sr, subtype='PCM_16')
def save_audio(file, x, sr):
soundfile.write(file, x, samplerate=sr, subtype='PCM_16')
"""
end

Expand Down
16 changes: 8 additions & 8 deletions src/structs/audio.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,31 @@ function Base.display(audio::Audio)
end

function load_audio(;
fname::Union{AbstractString, AbstractVector{Float64}},
file::Union{AbstractString, AbstractVector{Float64}},
sr::Union{Nothing, Int64} = nothing,
norm::Bool = false
)
if fname isa AbstractString
if file isa AbstractString
audio = Audio(
py"load_audio"(fname, sr)...
py"load_audio"(file, sr)...
)
elseif fname isa AbstractVector{Float64} && sr isa Int64
audio = Audio(fname, sr)
elseif file isa AbstractVector{Float64} && sr isa Int64
audio = Audio(file, sr)
else
throw(ArgumentError("Invalid arguments"))
end

# normalize audio
if norm && length(audio.data) != 0
audio.data ./ maximum(abs.(audio.data))
audio.data = audio.data ./ maximum(abs.(audio.data))
end

return audio
end

function save_audio(;
audio::Audio,
fname::AbstractString
file::AbstractString
)
py"save_audio"(fname, audio.data, audio.sr)
py"save_audio"(file, audio.data, audio.sr)
end
8 changes: 4 additions & 4 deletions src/structs/mfcc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,12 @@ function get_mfcc(;
end

function _get_deltas(;
mfcc::AbstractArray{Float64},
source::AbstractArray{Float64},
freq::AbstractVector{Float64},
deltas::Deltas,
)
deltas.delta = audioDelta(
mfcc, deltas.d_length, deltas.d_matrix)
source, deltas.d_length, deltas.d_matrix)
deltas.ddelta = audioDelta(
deltas.delta, deltas.d_length, deltas.d_matrix)

Expand Down Expand Up @@ -220,8 +220,8 @@ function Base.display(deltas::Deltas)
end

function get_deltas(;
mfcc::Mfcc,
source::Mfcc,
kwargs...
)
_get_deltas(mfcc=mfcc.mfcc, freq=mfcc.freq, deltas=Deltas(; sr=mfcc.sr, kwargs...))
_get_deltas(source=source.mfcc, freq=source.freq, deltas=Deltas(; sr=source.sr, kwargs...))
end
2 changes: 2 additions & 0 deletions src/utils/speech_detector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ function _speech_detector(;
norm=:magnitude
);

# stftspec.spec = (stftspec.spec ./ (0.5 * sum(stftspec.win)))./2

# determine short term energy
energy = vec(stftspec.win' .^ 2 * stftspec.frames .^ 2)

Expand Down
6 changes: 3 additions & 3 deletions src/wavelets/wpdec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ end # function orthfilt
function wfilters(
wname::String
)
fname = match(r"(?:[a-zA-Z]+)", wname).match # estrapola le lettere tramite regex da wname, .match riconverte da regex a string
wcode = wname[length(fname)+1:end] # estrapola la parte numerica
i_fam = winfo[fname] # recupera i dati dal dizionario generale
file = match(r"(?:[a-zA-Z]+)", wname).match # estrapola le lettere tramite regex da wname, .match riconverte da regex a string
wcode = wname[length(file)+1:end] # estrapola la parte numerica
i_fam = winfo[file] # recupera i dati dal dizionario generale

F = i_fam.coeff[wcode]
lo_D, hi_D, lo_R, hi_R = orthfilt(F)
Expand Down
26 changes: 13 additions & 13 deletions test/afe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function audio911_extractor(
stft_norm::Symbol=:power, # :power, :magnitude, :pow2mag
# mel filterbank module
nbands::Int64=26,
scale::Symbol=:mel_htk, # :mel_htk, :mel_slaney, :erb, :bark
scale::Symbol=:bark, # :mel_htk, :mel_slaney, :erb, :bark
melfb_norm::Symbol=:bandwidth, # :bandwidth, :area, :none
freq_range::Union{Tuple{Int64, Int64}, Nothing}=nothing,
# mel spectrogram module
Expand All @@ -31,9 +31,9 @@ function audio911_extractor(
ncoeffs::Int64=13,
rectification::Symbol=:log, # :log, :cubic_root
dither::Bool=true,
# deltas module
d_length = 9,
d_matrix = :transposed, # :standard, :transposed
# # deltas module
# d_length = 9,
# d_matrix = :transposed, # :standard, :transposed
# f0 module
method::Symbol=:nfc,
f0_range::Tuple{Int64, Int64}=(50, 400),
Expand All @@ -42,7 +42,7 @@ function audio911_extractor(
)
# audio module
audio = load_audio(
fname=wavfile,
file=wavfile,
sr=sr,
norm=norm,
);
Expand Down Expand Up @@ -96,12 +96,12 @@ function audio911_extractor(
dither=dither,
);

# deltas module
deltas = get_deltas(
source=mfcc,
d_length=d_length,
d_matrix=d_matrix
);
# # deltas module
# deltas = get_deltas(
# source=mfcc,
# d_length=d_length,
# d_matrix=d_matrix
# );

# f0 module
f0 = get_f0(
Expand All @@ -122,8 +122,8 @@ function audio911_extractor(
return hcat(
melspec.spec',
mfcc.mfcc',
deltas.delta',
deltas.ddelta',
# deltas.delta',
# deltas.ddelta',
f0.f0,
spect.centroid,
spect.crest,
Expand Down
4 changes: 2 additions & 2 deletions test/features_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ TESTFILE = "common_voice_en_23616312.wav"
wavfile = joinpath(TESTPATH, TESTFILE)

sr = 16000
audio = load_audio(fname=wavfile);
audio = load_audio(fname=wavfile, sr=sr);
audio = load_audio(file=wavfile);
audio = load_audio(file=wavfile, sr=sr);
display(audio)

stftspec = get_stft(audio=audio);
Expand Down
2 changes: 1 addition & 1 deletion test/usage_example.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ wavfile = joinpath(TESTPATH, TESTFILE)
# sample rate suggested for vocal analysis is 8000hz
# always good pratice to normalize the audio beforehand
audio = load_audio(
fname=wavfile,
file=wavfile,
sr=8000,
norm=true,
);
Expand Down
2 changes: 1 addition & 1 deletion test/wavelet_mfcc_example.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ wavfile = joinpath(TESTPATH, TESTFILE)
# sample rate suggested for vocal analysis is 8000hz
# always good pratice to normalize the audio beforehand
audio = load_audio(
fname=wavfile,
file=wavfile,
sr=8000,
norm=true,
);
Expand Down

0 comments on commit 456ffe0

Please sign in to comment.