beta-v0.0

aclai-lab · Sep 24, 2023 · 05662ff · 05662ff
1 parent bc14668
commit 05662ff
Show file tree

Hide file tree

Showing 7 changed files with 1,311 additions and 0 deletions.
diff --git a/src/conv.jl b/src/conv.jl
@@ -0,0 +1,226 @@
+FORWARD = -1
+
+for (Tr, Tc) in ((:Float32, :(Complex{Float32})), (:Float64, :(Complex{Float64})))
+    # Note: use $FORWARD and $BACKWARD below because of issue #9775
+    @eval begin
+        function plan_rfft(X::StridedArray{$Tr,N}, region;
+            flags::Integer=ESTIMATE,
+            timelimit::Real=NO_TIMELIMIT,
+            num_threads::Union{Nothing,Integer}=nothing) where {N}
+            if num_threads !== nothing
+                plan = set_num_threads(num_threads) do
+                    plan_rfft(X, region; flags=flags, timelimit=timelimit)
+                end
+                return plan
+            end
+            osize = rfft_output_size(X, region)
+            Y = flags & ESTIMATE != 0 ? FakeArray{$Tc}(osize) : Array{$Tc}(undef, osize)
+            rFFTWPlan{$Tr,$FORWARD,false,N}(X, Y, region, flags, timelimit)
+        end
+
+        plan_rfft(X::StridedArray{$Tr}; kws...) = plan_rfft(X, ntuple(identity, ndims(X)); kws...)
+    end
+end
+
+"""
+    nextfastfft(n)
+
+Return the closest product of 2, 3, 5, and 7 greater than or equal
+to `n`. FFTW contains optimized kernels for these sizes and
+computes Fourier transforms of input that is a product of these
+sizes faster than for input of other sizes.
+"""
+FAST_FFT_SIZES = [2, 3, 5, 7]
+
+nextfastfft(n) = nextprod(FAST_FFT_SIZES, n)
+nextfastfft(ns::Tuple) = nextfastfft.(ns)
+nextfastfft(ns...) = nextfastfft(ns)
+
+"""
+Estimate the number of floating point multiplications per output sample for an
+overlap-save algorithm with fft size `nfft`, and filter size `nb`.
+"""
+os_fft_complexity(nfft, nb) = (nfft * log2(nfft) + nfft) / (nfft - nb + 1)
+
+"""
+Determine the length of FFT that minimizes the number of multiplications per
+output sample for an overlap-save convolution of vectors of size `nb` and `nx`.
+"""
+function optimalfftfiltlength(nb, nx)
+    nfull = nb + nx - 1
+
+    # Step through possible nffts and find the nfft that minimizes complexity
+    # Assumes that complexity is convex
+    first_pow2 = ceil(Int, log2(nb))
+    max_pow2 = ceil(Int, log2(nfull))
+    prev_complexity = os_fft_complexity(2^first_pow2, nb)
+    pow2 = first_pow2 + 1
+    while pow2 <= max_pow2
+        new_complexity = os_fft_complexity(2^pow2, nb)
+        new_complexity > prev_complexity && break
+        prev_complexity = new_complexity
+        pow2 += 1
+    end
+    nfft = pow2 > max_pow2 ? 2^max_pow2 : 2^(pow2 - 1)
+
+    if nfft > nfull
+        # If nfft > nfull, then it's better to find next fast power
+        nfft = nextfastfft(nfull)
+    end
+
+    nfft
+end
+
+"""
+    _zeropad!(padded::AbstractVector,
+              u::AbstractVector,
+              padded_axes = axes(padded),
+              data_dest::Tuple = (1,),
+              data_region = CartesianIndices(u))
+
+Place the portion of `u` specified by `data_region` into `padded`, starting at
+location `data_dest`. Sets the rest of `padded` to zero. This will mutate
+`padded`. `padded_axes` must correspond to the axes of `padded`.
+
+"""
+@inline function _zeropad!(
+    padded::AbstractVector,
+    u::AbstractVector,
+    padded_axes=axes(padded),
+    data_dest::Tuple=(first(padded_axes[1]),),
+    data_region=CartesianIndices(u),
+)
+    datasize = length(data_region)
+    # Use axes to accommodate arrays that do not start at index 1
+    data_first_i = first(data_region)[1]
+    dest_first_i = data_dest[1]
+    copyto!(padded, dest_first_i, u, data_first_i, datasize)
+    padded[first(padded_axes[1]):dest_first_i-1] .= 0
+    padded[dest_first_i+datasize:end] .= 0
+
+    padded
+end
+
+@inline function _zeropad!(
+    padded::AbstractArray,
+    u::AbstractArray,
+    padded_axes=axes(padded),
+    data_dest::Tuple=first.(padded_axes),
+    data_region=CartesianIndices(u),
+)
+    fill!(padded, zero(eltype(padded)))
+    dest_axes = UnitRange.(data_dest, data_dest .+ size(data_region) .- 1)
+    dest_region = CartesianIndices(dest_axes)
+    copyto!(padded, dest_region, u, data_region)
+
+    padded
+end
+
+"""
+    _zeropad(u, padded_size, [data_dest, data_region])
+
+Creates and returns a new base-1 index array of size `padded_size`, with the
+section of `u` specified by `data_region` copied into the region of the new
+ array as specified by `data_dest`. All other values will be initialized to
+ zero.
+
+If either `data_dest` or `data_region` is not specified, then the defaults
+described in [`_zeropad!`](@ref) will be used.
+"""
+function _zeropad(u, padded_size, args...)
+    padded = similar(u, padded_size)
+    _zeropad!(padded, u, axes(padded), args...)
+end
+
+function _zeropad_keep_offset(u, padded_size, u_axes, args...)
+    ax_starts = first.(u_axes)
+    new_axes = UnitRange.(ax_starts, ax_starts .+ padded_size .- 1)
+    _zeropad!(similar(u, new_axes), u, args...)
+end
+
+function _conv_kern_fft!(out,
+    u::AbstractArray{T,N},
+    v::AbstractArray{T,N},
+    su,
+    sv,
+    outsize,
+    nffts) where {T<:Real,N}
+    padded = _zeropad(u, nffts)
+    p = plan_rfft(padded)
+    uf = p * padded
+    _zeropad!(padded, v)
+    vf = p * padded
+    uf .*= vf
+    raw_out = irfft(uf, nffts[1])
+    copyto!(out,
+        CartesianIndices(out),
+        raw_out,
+        CartesianIndices(UnitRange.(1, outsize)))
+end
+function _conv_kern_fft!(out, u, v, su, sv, outsize, nffts)
+    upad = _zeropad(u, nffts)
+    vpad = _zeropad(v, nffts)
+    p! = plan_fft!(upad)
+    p! * upad # Operates in place on upad
+    p! * vpad
+    upad .*= vpad
+    ifft!(upad)
+    copyto!(out,
+        CartesianIndices(out),
+        upad,
+        CartesianIndices(UnitRange.(1, outsize)))
+end
+
+# v should be smaller than u for good performance
+function conv_fft!(out, x, f, sx, sf, outsize)
+    os_nffts = map(optimalfftfiltlength, sf, sx)
+    # if any(os_nffts .< outsize)
+    #     unsafe_conv_kern_os!(out, u, v, su, sv, outsize, os_nffts)
+    # else
+    nffts = nextfastfft(outsize)
+    _conv_kern_fft!(out, u, v, su, sv, outsize, nffts)
+    # end
+end
+
+"""
+Convolution of two arrays. Uses either FFT convolution or overlap-save,
+depending on the size of the input. `u` and `v` can be  N-dimensional arrays,
+with arbitrary indexing offsets, but their axes must be a `UnitRange`.
+"""
+function conv(
+    x::Union{AbstractVector{T},AbstractArray{T}},
+    f::AbstractVector{S},
+    shape::Symbol
+) where {T<:Real,S<:Real} # two dimensional convolution
+
+    sx = size(x)
+    sf = size(f)
+
+    if prod(sx) < prod(sf) # prod = moltiplicazione di tutti gli elementi
+        x, f = f, x
+        sx, sf = sf, sx
+    end
+
+    if (shape == :full)
+        outsize = sx .+ sf .- 1
+    elseif (shape == :valid)
+        outsize = sx .- sf .+ 1
+        ioffset = sz - 1
+    elseif (shape == :same)
+        outsize = sx
+    end
+
+    out = similar(x, outsize)
+    conv_fft!(out, x, f, sx, sf, outsize)
+end
+
+using DSP
+
+x = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]
+f = [5, 6, 7, 8]
+x = Float64.(x)
+f = Float64.(f)
+shape = :full
+
+c = conv(x, f, :full)
+# c = conv(x, f)
diff --git a/src/fft.jl b/src/fft.jl
@@ -0,0 +1,29 @@
+if (!@isdefined(mono)) # carica solo se non è definita mono, serve giusto per debug
+    # cosi ogni volta che lancio non ricarica tutto
+    using PyCall
+    librosa = pyimport("librosa")
+    sr_src = 8000
+    mono, sr = librosa.load("/home/riccardopasini/Documents/Julia/test_mono.wav", sr=sr_src, mono=true)
+    stereo, sr = librosa.load("/home/riccardopasini/Documents/Julia/test_stereo.wav", sr=sr_src, mono=false)
+    stereo = stereo'
+end
+
+include("windowing.jl")
+
+function fft(
+    x::Union{AbstractVector{T},AbstractArray{T}},
+    fftLength::Int64=256,
+    winType::Symbol=:hann,
+    winParam::Symbol=:symmetric,
+    logEnergy::Bool=true
+) where {T<:Real}
+
+    y = windowing(x, fftLength, winType, winParam, logEnergy)
+end
+
+logEnergy=false
+fftLength=256
+winType=:hann
+winParam=:symmetric
+
+y = fft(mono, fftLength, winType, winParam, logEnergy)
diff --git a/src/trim_audio.jl b/src/trim_audio.jl
@@ -0,0 +1,109 @@
+# lavora solo su segnali mono, quindi bisogna caricare i wav sempre con librosa > mono=true
+using Statistics
+
+include("windowing.jl")
+
+function signal_to_rms(
+    y::Union{AbstractVector{T},AbstractArray{T}}
+) where {T<:Real}
+    # calcola power e rms
+    return sqrt.(mean(abs.(y) .^ 2, dims=2))
+end
+
+function signal_to_db(
+    y::Union{AbstractVector{T},AbstractArray{T}}
+) where {T<:Real}
+    magnitude = abs.(y)
+    power = magnitude .^ 2
+
+    ref_value = maximum(magnitude)^2
+    amin = 1e-5^2
+
+    # power to db
+    db_log = 10.0 * log10.(max.(maximum.(power), amin))
+    db_log .-= 10.0 * log10(max(amin, ref_value))
+    return db_log
+end
+
+function trimAudio(
+    x::Union{AbstractVector{T},AbstractArray{T}},
+    fftLength::Int64=1024,
+    threshold::Real=60
+) where {T<:Real}
+
+    # crea la matrice delle finestre: le righe sono le finestre.
+    y = windowing(x, fftLength, :rect, :symmetric, false)
+
+    # converte l'audio in rms e poi in db
+    rms = signal_to_rms(y)
+    db = signal_to_db(rms)
+    framesValidated = db .> -threshold
+
+    signal = zeros(eltype(x), 0)
+    silence = zeros(eltype(x), 0)
+    flag = :start
+    for i in eachindex(framesValidated)
+        # caso 1: partenza segnale
+        if (framesValidated[i] && (flag == :silence || flag == :start))
+            fade_y = fade(y[i, :], fftLength, :in)
+            signal = vcat(signal, y[i, :])
+
+            if (flag == :silence)
+                silence = fade(silence, fftLength, :out)
+            end
+
+            flag = :signal
+        # caso 2: continuazione segnale
+        elseif (framesValidated[i] && flag == :signal)
+            signal = vcat(signal, y[i, :])
+        #caso 3: partenza silenzio
+        elseif (!framesValidated[i] && (flag == :signal || flag == :start))
+            fade_y = fade(y[i, :], fftLength, :in)
+            silence = vcat(silence, y[i, :])
+
+            if (flag == :signal)
+                signal = fade(signal, fftLength, :out)
+            end
+
+            flag = :silence
+            #caso 4: continuazione silenzio
+        else
+            silence = vcat(silence, y[i, :])
+        end
+
+        #chiudo con un fade out
+        if (flag == :silence)
+            silence = fade(silence, fftLength, :out)
+        else
+            signal = fade(signal, fftLength, :out)
+        end
+    end
+
+    return signal, silence
+end
+
+##MAIN
+if (!@isdefined(mono)) # carica solo se non è definita mono, serve giusto per debug
+    # cosi ogni volta che lancio non ricarica tutto
+    using PyCall
+    librosa = pyimport("librosa")
+    sr_src = 8000
+    mono, sr = librosa.load("/home/riccardopasini/Documents/Julia/test_mono.wav", sr=sr_src, mono=true)
+end
+
+x, y = trimAudio(mono, 1024, 15)
+
+soundfile = pyimport("soundfile")
+## Save audiofile
+soundfile.write(
+    "signal.wav",
+    x,
+    samplerate=sr,
+    subtype="PCM_16")
+soundfile.write(
+    "silence.wav",
+    y,
+    samplerate=sr,
+    subtype="PCM_16")
+
+