Skip to content

Commit

Permalink
filt perf optimizations (#516)
Browse files Browse the repository at this point in the history
* create `si` only once, and reuse

* no inbounds faster for N < 24

* @generated `_small_filt_fir!`

- use Base.Cartesian.@ntuple

* inbounds if N > 18

major speedups for N > 18, much slower if inbounds for small N

* reorder store in `_filt_fir` and `_filt_iir`

>20% faster for fir

* maybe prevent undefvarerror

no need to assign values to enum

* small things

- eachindex stuff
- `==` to `===`  nothing
- `npairs = max(nz, np)`

* isone, iszero, etc.

* `filt` for `FIRFilter`: remove redundant definitions

* extract type instability from loop

* remove fft conv type instability

- in `unsafe_conv_kern_os!`
- probably no real impact

* correct `filt` arg types

* nicer loop

* inv plan

* cleanup filt_order

* use `muladd`s in filt.jl

* `_prod_freq`

* more `muladd`s

* `fill!`, `reverse`, don't broadcast scalar /

* micro-optimizations, cosmetics

- reduced exceptions in `@code_llvm` for `N < 18`
- `@noinline mul!`, `broadcast`, only for julia > v1.8
- name `SMALL_FILT_VECT_CUTOFF`, selective bounds check

* Compat noinline

* add test for error

(cherry picked from commit 9de4c0c)
  • Loading branch information
wheeheee authored and martinholters committed Sep 13, 2024
1 parent 26ca32e commit d2e69ee
Show file tree
Hide file tree
Showing 12 changed files with 257 additions and 293 deletions.
1 change: 1 addition & 0 deletions src/DSP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module DSP
using FFTW
using LinearAlgebra: mul!, rmul!
using IterTools: subsets
using Compat: Compat

export conv, deconv, filt, filt!, xcorr

Expand Down
20 changes: 9 additions & 11 deletions src/Filters/design.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,7 @@ function Chebyshev2(::Type{T}, n::Integer, ripple::Real) where {T<:Real}

ε = 1/sqrt(10^(convert(T, ripple)/10)-1)
p = chebyshev_poles(T, n, ε)
for i = 1:length(p)
p[i] = inv(p[i])
end
map!(inv, p, p)

z = zeros(Complex{T}, n-isodd(n))
k = one(T)
Expand Down Expand Up @@ -155,7 +153,7 @@ function asne(w::Number, k::Real)
# Eq. (50)
k = abs2(k/(1+sqrt(1-abs2(k))))
# Eq. (56)
w = 2*w/((1+k)*(1+sqrt(1-abs2(kold)*w^2)))
w = 2w / ((1 + k) * (1 + sqrt(muladd(-abs2(kold), w^2, 1))))
end
2*asin(w)/π
end
Expand Down Expand Up @@ -355,9 +353,9 @@ function transform_prototype(ftype::Bandpass, proto::ZeroPoleGain{:s})
newz = zeros(TR, 2*nz+np-ncommon)
newp = zeros(TR, 2*np+nz-ncommon)
for (oldc, newc) in ((p, newp), (z, newz))
for i = 1:length(oldc)
for i in eachindex(oldc)
b = oldc[i] * ((ftype.w2 - ftype.w1)/2)
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newc[2i-1] = b + pm
newc[2i] = b - pm
end
Expand All @@ -372,25 +370,25 @@ function transform_prototype(ftype::Bandstop, proto::ZeroPoleGain{:s})
k = proto.k
nz = length(z)
np = length(p)
npairs = nz+np-min(nz, np)
npairs = max(nz, np)
TR = Base.promote_eltype(z, p)
newz = Vector{TR}(undef, 2*npairs)
newp = Vector{TR}(undef, 2*npairs)

num = one(eltype(z))
for i = 1:nz
num *= -z[i]
b = (ftype.w2 - ftype.w1)/2/z[i]
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
b = (ftype.w2 - ftype.w1)/2z[i]
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newz[2i-1] = b - pm
newz[2i] = b + pm
end

den = one(eltype(p))
for i = 1:np
den *= -p[i]
b = (ftype.w2 - ftype.w1)/2/p[i]
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
b = (ftype.w2 - ftype.w1)/2p[i]
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newp[2i-1] = b - pm
newp[2i] = b + pm
end
Expand Down
113 changes: 54 additions & 59 deletions src/Filters/filt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


## PolynomialRatio
_zerosi(f::PolynomialRatio{:z,T}, x::AbstractArray{S}) where {T,S} =
_zerosi(f::PolynomialRatio{:z,T}, ::AbstractArray{S}) where {T,S} =
zeros(promote_type(T, S), max(-firstindex(f.a), -firstindex(f.b)))

"""
Expand Down Expand Up @@ -35,7 +35,7 @@ selected based on the data and filter length.
filt(f::PolynomialRatio{:z}, x, si=_zerosi(f, x)) = filt(coefb(f), coefa(f), x, si)

## SecondOrderSections
_zerosi(f::SecondOrderSections{:z,T,G}, x::AbstractArray{S}) where {T,G,S} =
_zerosi(f::SecondOrderSections{:z,T,G}, ::AbstractArray{S}) where {T,G,S} =
zeros(promote_type(T, G, S), 2, length(f.biquads))

# filt! algorithm (no checking, returns si)
Expand All @@ -44,14 +44,14 @@ function _filt!(out::AbstractArray, si::AbstractArray{S,N}, f::SecondOrderSectio
g = f.g
biquads = f.biquads
n = length(biquads)
@inbounds for i = 1:size(x, 1)
@inbounds for i in axes(x, 1)
yi = x[i, col]
for fi = 1:n
biquad = biquads[fi]
xi = yi
yi = si[1, fi] + biquad.b0*xi
si[1, fi] = si[2, fi] + biquad.b1*xi - biquad.a1*yi
si[2, fi] = biquad.b2*xi - biquad.a2*yi
yi = muladd(biquad.b0, xi, si[1, fi])
si[1, fi] = muladd(biquad.a1, -yi, muladd(biquad.b1, xi, si[2, fi]))
si[2, fi] = muladd(biquad.b2, xi, -biquad.a2 * yi)
end
out[i, col] = yi*g
end
Expand Down Expand Up @@ -80,17 +80,17 @@ filt(f::SecondOrderSections{:z,T,G}, x::AbstractArray{S}, si=_zerosi(f, x)) wher
filt!(Array{promote_type(T, G, S)}(undef, size(x)), f, x, si)

## Biquad
_zerosi(f::Biquad{:z,T}, x::AbstractArray{S}) where {T,S} =
_zerosi(::Biquad{:z,T}, ::AbstractArray{S}) where {T,S} =
zeros(promote_type(T, S), 2)

# filt! algorithm (no checking, returns si)
function _filt!(out::AbstractArray, si1::Number, si2::Number, f::Biquad{:z},
x::AbstractArray, col::Int)
@inbounds for i = 1:size(x, 1)
@inbounds for i in axes(x, 1)
xi = x[i, col]
yi = si1 + f.b0*xi
si1 = si2 + f.b1*xi - f.a1*yi
si2 = f.b2*xi - f.a2*yi
yi = muladd(f.b0, xi, si1)
si1 = muladd(f.a1, -yi, muladd(f.b1, xi, si2))
si2 = muladd(f.b2, xi, -f.a2 * yi)
out[i, col] = yi
end
(si1, si2)
Expand All @@ -105,9 +105,8 @@ function filt!(out::AbstractArray, f::Biquad{:z}, x::AbstractArray,
(size(si, 1) != 2 || (N > 1 && Base.trailingsize(si, 2) != ncols)) &&
error("si must have two rows and 1 or nsignals columns")

initial_si = si
for col = 1:ncols
_filt!(out, initial_si[1, N > 1 ? col : 1], initial_si[2, N > 1 ? col : 1], f, x, col)
_filt!(out, si[1, N > 1 ? col : 1], si[2, N > 1 ? col : 1], f, x, col)
end
out
end
Expand Down Expand Up @@ -170,13 +169,13 @@ function filt!(out::AbstractVector, f::DF2TFilter{<:PolynomialRatio,<:Vector}, x
if n == 1
mul!(out, x, b[1])
else
@inbounds for i=1:length(x)
@inbounds for i in eachindex(x, out)
xi = x[i]
val = si[1] + b[1]*xi
val = muladd(b[1], xi, si[1])
for j=2:n-1
si[j-1] = si[j] + b[j]*xi - a[j]*val
si[j-1] = muladd(a[j], -val, muladd(b[j], xi, si[j]))
end
si[n-1] = b[n]*xi - a[n]*val
si[n-1] = muladd(b[n], xi, -a[n] * val)
out[i] = val
end
end
Expand All @@ -200,13 +199,13 @@ DF2TFilter(coef::Biquad{:z,T}, state::Vector{S}=zeros(T, 2)) where {T,S} =
function filt!(out::AbstractVector, f::DF2TFilter{<:Biquad,<:Vector}, x::AbstractVector)
length(x) != length(out) && throw(ArgumentError("out size must match x"))
si = f.state
(si[1], si[2]) =_filt!(out, si[1], si[2], f.coef, x, 1)
(si[1], si[2]) = _filt!(out, si[1], si[2], f.coef, x, 1)
out
end

# Variant that allocates the output
filt(f::DF2TFilter{<:FilterCoefficients{:z},S}, x::AbstractVector) where {S<:Array} =
filt!(Vector{eltype(S)}(undef, length(x)), f, x)
filt(f::DF2TFilter{<:FilterCoefficients{:z},<:Array{T}}, x::AbstractVector) where {T} =
filt!(Vector{T}(undef, length(x)), f, x)

# Fall back to SecondOrderSections
DF2TFilter(coef::FilterCoefficients{:z}) = DF2TFilter(convert(SecondOrderSections, coef))
Expand Down Expand Up @@ -346,7 +345,7 @@ filtfilt(f::PolynomialRatio{:z}, x) = filtfilt(coefb(f), coefa(f), x)
# response to a step function is steady state.
function filt_stepstate(b::Union{AbstractVector{T}, T}, a::Union{AbstractVector{T}, T}) where T<:Number
scale_factor = a[1]
if scale_factor != 1.0
if !isone(scale_factor)
a = a ./ scale_factor
b = b ./ scale_factor
end
Expand All @@ -362,8 +361,8 @@ function filt_stepstate(b::Union{AbstractVector{T}, T}, a::Union{AbstractVector{
as<sz && (a = copyto!(zeros(eltype(a), sz), a))

# construct the companion matrix A and vector B:
A = [-a[2:end] [I; zeros(T, 1, sz-2)]]
B = b[2:end] - a[2:end] * b[1]
A = [-a[2:end] Matrix{T}(I, sz-1, sz-2)]
B = @views @. muladd(a[2:end], -b[1], b[2:end])
# Solve si = A*si + B
# (I - A)*si = B
scale_factor \ (I - A) \ B
Expand All @@ -375,18 +374,18 @@ function filt_stepstate(f::SecondOrderSections{:z,T}) where T
y = one(T)
for i = 1:length(biquads)
biquad = biquads[i]
a1, a2, b0, b1, b2 = biquad.a1, biquad.a2, biquad.b0, biquad.b1, biquad.b2

# At steady state, we have:
# y = s1 + b0*x
# s1 = s2 + b1*x - a1*y
# s2 = b2*x - a2*y
# where x is the input and y is the output. Solving these
# equations yields the following.
si[1, i] = (-(biquad.a1 + biquad.a2)*biquad.b0 + biquad.b1 + biquad.b2)/
(1 + biquad.a1 + biquad.a2)*y
si[2, i] = (biquad.a1*biquad.b2 - biquad.a2*(biquad.b0 + biquad.b1) + biquad.b2)/
(1 + biquad.a1 + biquad.a2)*y
y *= (biquad.b0 + biquad.b1 + biquad.b2)/(1 + biquad.a1 + biquad.a2)
den = (1 + a1 + a2)
si[1, i] = muladd((a1 + a2), -b0, b1 + b2) / den * y
si[2, i] = muladd(a1, b2, muladd(-a2, (b0 + b1), b2)) / den * y
y *= (b0 + b1 + b2) / den
end
si
end
Expand All @@ -397,8 +396,8 @@ end
Apply filter or filter coefficients `h` along the first dimension
of array `x` using a naïve time-domain algorithm
"""
function tdfilt(h::AbstractVector, x::AbstractArray{T}) where T<:Real
filt!(Array{T}(undef, size(x)), h, ones(eltype(h), 1), x)
function tdfilt(h::AbstractVector{H}, x::AbstractArray{T}) where {H,T<:Real}
filt(h, one(H), x)
end

"""
Expand All @@ -407,12 +406,12 @@ end
Like `tdfilt`, but writes the result into array `out`. Output array `out` may
not be an alias of `x`, i.e. filtering may not be done in place.
"""
function tdfilt!(out::AbstractArray, h::AbstractVector, x::AbstractArray)
filt!(out, h, ones(eltype(h), 1), x)
function tdfilt!(out::AbstractArray, h::AbstractVector{H}, x::AbstractArray) where H
filt!(out, h, one(H), x)
end

filt(h::AbstractArray, x::AbstractArray) =
filt!(Array{eltype(x)}(undef, size(x)), h, x)
filt(h::AbstractVector{H}, x::AbstractArray{T}) where {H,T} =
filt!(Array{promote_type(H, T)}(undef, size(x)), h, x)

#
# fftfilt and filt
Expand Down Expand Up @@ -447,48 +446,44 @@ end
# Like fftfilt! but does not check if out and x are the same size
function _fftfilt!(
out::AbstractArray{<:Real},
b::AbstractVector{<:Real},
b::AbstractVector{H},
x::AbstractArray{T},
nfft::Integer
) where T<:Real
) where {T<:Real,H<:Real}
nb = length(b)
nx = size(x, 1)
normfactor = 1/nfft
normfactor = nfft
W = promote_type(H, T)

L = min(nx, nfft - (nb - 1))
tmp1 = Vector{T}(undef, nfft)
tmp2 = Vector{Complex{T}}(undef, nfft >> 1 + 1)
tmp1 = Vector{W}(undef, nfft)
tmp2 = Vector{Complex{W}}(undef, nfft >> 1 + 1)

p1 = plan_rfft(tmp1)
p2 = plan_brfft(tmp2, nfft)

# FFT of filter
filterft = similar(tmp2)
tmp1[1:nb] .= b .* normfactor
tmp1[nb+1:end] .= zero(T)
tmp1[1:nb] .= b ./ normfactor
tmp1[nb+1:end] .= zero(W)
mul!(filterft, p1, tmp1)

# FFT of chunks
for colstart = 0:nx:length(x)-1
off = 1
while off <= nx
npadbefore = max(0, nb - off)
xstart = off - nb + npadbefore + 1
n = min(nfft - npadbefore, nx - xstart + 1)
for colstart = 0:nx:length(x)-1, off = 1:L:nx
npadbefore = max(0, nb - off)
xstart = off - nb + npadbefore + 1
n = min(nfft - npadbefore, nx - xstart + 1)

tmp1[1:npadbefore] .= zero(T)
tmp1[npadbefore+n+1:end] .= zero(T)
tmp1[1:npadbefore] .= zero(W)
tmp1[npadbefore+n+1:end] .= zero(W)

copyto!(tmp1, npadbefore+1, x, colstart+xstart, n)
mul!(tmp2, p1, tmp1)
broadcast!(*, tmp2, tmp2, filterft)
mul!(tmp1, p2, tmp2)
copyto!(tmp1, npadbefore+1, x, colstart+xstart, n)
mul!(tmp2, p1, tmp1)
broadcast!(*, tmp2, tmp2, filterft)
mul!(tmp1, p2, tmp2)

# Copy to output
copyto!(out, colstart+off, tmp1, nb, min(L, nx - off + 1))

off += L
end
# Copy to output
copyto!(out, colstart+off, tmp1, nb, min(L, nx - off + 1))
end

out
Expand Down Expand Up @@ -524,6 +519,6 @@ function filt_choose_alg!(
end
end

function filt_choose_alg!(out::AbstractArray, b::AbstractArray, x::AbstractArray)
function filt_choose_alg!(out::AbstractArray, b::AbstractVector, x::AbstractArray)
tdfilt!(out, b, x)
end
Loading

0 comments on commit d2e69ee

Please sign in to comment.