Skip to content

Commit

Permalink
mfcc added
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Nov 13, 2023
1 parent ad53437 commit ad2954c
Show file tree
Hide file tree
Showing 4 changed files with 379 additions and 33 deletions.
312 changes: 312 additions & 0 deletions src/mfcc.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
using FFTW
using DSP

include("windowing.jl")

mutable struct mfccOptions{T<:Real}
window::Vector{Any}
windowLength::Vector{Any}
overlapLength::Int64
FFTLength::Int64
bandEdges::AbstractVector{T}
numCoeffs::Int64
rectification::Symbol
logEnergy::Symbol
deltaWindowLength::Int64
end

function getDefaultBandEdges()
# default band edges
factor = 133.33333333333333
bE = zeros(1, 42)
for i in 1:13
bE[i] = factor + (factor / 2) * (i - 1)
end
for i in 14:42
bE[i] = bE[i-1] * 1.0711703
end

return vec(bE)
end

function designMelFilterBank(
sr::Int64,
bandEdges::AbstractVector{T},
NFFT::Int64,
sumExponent::Int64,
normalization::Symbol,
designDomain::Symbol,
dataType::DataType,
keepTwoSided::Bool,
FilterBankDesignDomain::Symbol,
melStyle::Symbol
) where {T<:AbstractFloat}

zeroCast = convert(dataType, 0)
oneCast = convert(dataType, 1)
twoCast = convert(dataType, 2)
threeCast = convert(dataType, 3)
NFFTCast = convert(dataType, NFFT)
bandEdgesCast = convert.(dataType, bandEdges)
fsCast = convert(dataType, sr)

# Determine the number of bands
numEdges = convert(dataType, length(bandEdgesCast))
numBands = numEdges - twoCast

# Determine the number of valid bands
validNumEdges = sum((bandEdgesCast .- (fsCast / twoCast)) .< sqrt(eps(dataType)))
validNumBands = validNumEdges - twoCast

# Preallocate the filter bank
filterBank = zeros(dataType, NFFT, Int(numBands))
centerFrequencies = bandEdgesCast[2:end-1]

# Set this flag to true if the number of FFT length is insufficient to
# compute the specified number of mel bands
FFTLengthTooSmall = false

if (designDomain == :bin)

else # designDomain = :Hz
linFq = collect(zeroCast:NFFTCast-oneCast) / NFFTCast * fsCast

# Determine inflection points
@assert(validNumEdges <= numEdges)
p = zeros(dataType, validNumEdges, 1)

# da valutarne il risultato se Float64
for edgeNumber in 1:validNumEdges
for index in eachindex(linFq)
if linFq[index] > bandEdgesCast[edgeNumber]
p[edgeNumber] = index
break
end
end
end

bandEdgesCastMod = bandEdgesCast
FqMod = linFq

if (FilterBankDesignDomain == :mel || FilterBankDesignDomain == :warped)
#da fare!!!
elseif (FilterBankDesignDomain == :bark)
# da fare
end

bw = diff(bandEdgesCastMod)

for k in 1:Int(validNumBands)
for j in Int(p[k]):Int(p[k+1])-1
filterBank[j, k] = (FqMod[j] - bandEdgesCastMod[k]) / bw[k]
end

for j = Int(p[k+1]):Int(p[k+2])-1
filterBank[j, k] = (bandEdgesCastMod[k+2] - FqMod[j]) / bw[k+1]
end
emptyRange1 = p[k] .> p[k+1] - 1
emptyRange2 = p[k+1] .> p[k+2] - 1
if (!FFTLengthTooSmall && (emptyRange1 || emptyRange2))
FFTLengthTooSmall = true
end
end

# Apply normalization
weightPerBand = ones(dataType, 1, Int(numBands))

if (normalization == :area)
# da fare
elseif (normalization == :bandwidth)
filterBandWidth = bandEdgesCast[3:end] .- bandEdgesCast[1:end-2]
weightPerBand = filterBandWidth / 2
end

for i in 1:Int(numBands)
filterBank[:, i] = filterBank[:, i] / weightPerBand[i]
end

# if keepTwoSided
# range = audio.internal.getOnesidedFFTRange(NFFT);
# range = range(2:end);
# filterBank(end:-1:end-numel(range)+1,:) = filterBank(range,:);
# end
end
return filterBank
end # function designMelFilterBank

function createDCTmatrix(
numCoeffs::Int64,
numFilters::Int64,
DT::DataType
)
# create DCT matrix
N = convert(DT, numCoeffs)
K = numFilters
matrix = zeros(DT, Int(N), numFilters)
A = sqrt(1 / K)
B = sqrt(2 / K)
C = 2 * K
piCCast = convert(DT, 2 * pi / C)

matrix[1, :] .= A
for k in 1:K
for n in 2:Int(N)
matrix[n, k] = B * cos(piCCast * (n - 1) * (k - 0.5))
end
end

return matrix
end

function cepstralCoefficients(
S::AbstractMatrix{T},
numCoeffs::Int64,
rectification::Symbol
) where {T<:AbstractFloat}
DT = eltype(S)
# Rectify
if (rectification == :log)
amin = floatmin(DT)
S[S.==0] .= amin
S = log10.(S)
end
# Reshape spectrogram to matrix for vectorized matrix multiplication
L, M = size(S)
N = 1
S = reshape(S, L, M * N)
# Design DCT matrix
DCTmatrix = createDCTmatrix(numCoeffs, L, DT)
# Apply DCT matrix
coeffs = DCTmatrix * S

return permutedims(coeffs, [2 1])

# # Unpack matrix back to 3d array DA EVITARE già da prima
# coeffs = reshape(coeffs,numCoeffs,M,N)
# # Put time along the first dimension
# coeffs = permutedims(coeffs,[2 1 3])
end # function cepstralCoefficients

function audioDelta(
x::AbstractMatrix{T},
windowLength::Int64
) where {T<:AbstractFloat}

DT = eltype(x)
M = convert(DT, floor(windowLength / 2))
b = collect(M:-1:-M) ./ sum((1:M) .^ 2)

return delta = filt(b, 1, x)
end

function mfcc(
x::Union{AbstractVector{T},AbstractArray{T}},
sr::Int64;
numCoeffs::Int64=13,
rectification::Symbol=:log,
logEnergy::Symbol=:append,
overlapLength::Int64=Int(round(0.02 * sr)),
FFTLength::Int64=Int(round(0.02 * sr))
) where {T<:AbstractFloat}

x = convert.(Float64, x) # mantiene compatibilità con Matlab che scricchiola con Float32, da verificare se è il caso.
# costruisco struttura deltaWindowLength
window = []
windowLength = []
bandEdges = getDefaultBandEdges()
deltaWindowLength = 9

options = mfccOptions(
window,
windowLength,
overlapLength,
FFTLength,
bandEdges,
numCoeffs,
rectification,
logEnergy,
deltaWindowLength
)

# win, winLength, overlapLength = iGetWindow(x, sr, options.window, options.windowLength, options.overlapLength)
win, hL = gencoswin(:hamming, Int(round(0.03 * sr)), :periodic)
win = convert.(eltype(x), win)
winLength = length(win)

# Convenience variables
r = size(x, 1)
DT = eltype(x)
hopLength = winLength - overlapLength

# y = windowing(x, FFTLength, :hamming, :periodic, true)
numChan = size(x, 2)
nHops = Integer(floor((r - FFTLength) / hopLength) + 1) # numero delle window necessarie
y = buffer(x, FFTLength, hopLength, nHops, 1)

# log energy
if (options.logEnergy != :ignore)
E = sum(eachrow(y .^ 2)) # somma per righe
E[E.==0] .= floatmin(DT) # il minimo float al posto di zero
logE = log.(E)
end

# Multiply each segment by the window, and take the FFT
wincast = convert.(DT, win[:]) # casta al tipo originale del file audio
Z = abs.(fft(y .* wincast, (1,)))

# Design the mel filter bank
numValidEdges = sum(options.bandEdges .<= convert(DT, sr) / convert(DT, 2))
if (numValidEdges < length(options.bandEdges))
edges = options.bandEdges[1:numValidEdges+1]
edges[end] = convert(DT, sr) / convert(DT, 2)
else
edges = options.bandEdges
end
filterBank = designMelFilterBank(sr, edges, FFTLength, 1, :bandwidth, :Hz, DT, false, :linear, :default)
# Calculate cepstral coefficients
coeffs = cepstralCoefficients(filterBank' * Z, options.numCoeffs, options.rectification)

# fermi a 177

if (options.logEnergy == :append)
# coeffs = (logE',coeffs)
coeffs = hcat(logE, coeffs)
elseif (options.logEnergy == :replace)
# coeffs = [logE.',coeffs(:,2:end)];
# da fare
end

# coeffs = reshape(coeffs, Int(size(coeffs, 1) / numChan), numChan, size(coeffs, 2))
# coeffs = permutedims(coeffs,[1 3 2])

# METTERE IL CASO CHE le delta vengono calcolate solo se necessario
delta = audioDelta(coeffs, options.deltaWindowLength)
deltaDelta = audioDelta(delta, options.deltaWindowLength)
loc = collect(0:(nHops-1)) * hopLength .+ winLength

return coeffs, delta, deltaDelta, loc
end

## Main
# if (!@isdefined(x)) # carica solo se non è definita mono, serve giusto per debug
# # cosi ogni volta che lancio non ricarica tutto
# using PyCall
# librosa = pyimport("librosa")
# sr_src = 8000
# x, sr = librosa.load("/home/riccardopasini/Documents/Aclai/Julia_additional_files/test.wav", sr=sr_src, mono=true)
# end
# numCoeffs = 13
# rectification = :log
# logEnergy = :append
# overlapLength = Int(round(0.02 * sr))
# FFTLength = Int(round(0.03 * sr))

# coeffs, delta, deltaDelta, loc = mfcc(
# x,
# sr,
# numCoeffs=numCoeffs,
# rectification=rectification,
# logEnergy=logEnergy,
# overlapLength=overlapLength,
# FFTLength=FFTLength
# )
34 changes: 34 additions & 0 deletions src/test_wavelet.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using DSP
using MFCC
# using Plots

include("wpspectrum.jl")
include("cwt.jl")

## main
using PyCall
librosa = pyimport("librosa")
soundfile = pyimport("soundfile")
plt = pyimport("matplotlib.pyplot")
sr_src = 8000
x, sr = librosa.load("/home/riccardopasini/Documents/Julia/test_mono.wav", sr=sr_src, mono=true)

depth = 2
wname = "sym6"
entType=:shannon
entPar=0.0

t = wpdec(x, depth, wname, entType, entPar)

spec, freqs, times = wpspectrum(x, sr, depth, wname)

soundfile.write("test1.wav",spec[1,:],samplerate=sr,subtype="PCM_16")
soundfile.write("test2.wav",spec[2,:],samplerate=sr,subtype="PCM_16")
soundfile.write("test3.wav",spec[3,:],samplerate=sr,subtype="PCM_16")
soundfile.write("test4.wav",spec[4,:],samplerate=sr,subtype="PCM_16")

# heatmap(1:size(spec, 1),
# 1:size(spec, 2), spec,
# c=cgrad([:blue, :white, :red, :yellow]),
# xlabel="x values", ylabel="y values",
# title="My title")
22 changes: 11 additions & 11 deletions src/test_wavelet_mfcc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ using PyCall
librosa = pyimport("librosa")
plt = pyimport("matplotlib.pyplot")
sr_src = 8000
x, sr = librosa.load("/home/paso/Documents/Julia_additional_files/test.wav", sr=sr_src, mono=true)
x, sr = librosa.load("/home/riccardopasini/Documents/Julia/test_mono.wav", sr=sr_src, mono=true)

numcep = 13
lifterexp = -22
Expand All @@ -95,7 +95,7 @@ fbtype = :htkmel
usecmp = false
modelorder = 0

depth = 6
depth = 2
wname = "sym6"
spec, freqs, times = wpspectrum(x, sr, depth, wname)

Expand All @@ -104,14 +104,14 @@ a = wmfcc(spec, sr)

b, z, zz = mfcc(x, sr)

# heatmap(1:size(a, 1),
# 1:size(a, 2), a,
# c=cgrad([:blue, :white, :red, :yellow]),
# xlabel="x values", ylabel="y values",
# title="My title")

heatmap(1:size(b, 1),
1:size(b, 2), b,
heatmap(1:size(a, 1),
1:size(a, 2), a,
c=cgrad([:blue, :white, :red, :yellow]),
xlabel="x values", ylabel="y values",
title="My title")
title="My title")

# heatmap(1:size(b, 1),
# 1:size(b, 2), b,
# c=cgrad([:blue, :white, :red, :yellow]),
# xlabel="x values", ylabel="y values",
# title="My title")
Loading

0 comments on commit ad2954c

Please sign in to comment.