-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ea0b685
commit 16d15bc
Showing
73 changed files
with
8,700 additions
and
10 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import numpy as np | ||
import mdct | ||
import io | ||
from tqdm.auto import trange | ||
|
||
|
||
def peripheral_adjustment(mdct_data, round_decimal_threshold=4, zero_threshold=0.001): | ||
# mdct_data = np.round(mdct_data, decimals=round_decimal_threshold) | ||
# ramp = np.arange(512)/512 | ||
if zero_threshold > 0: | ||
mdct_data = np.where(abs(mdct_data) < zero_threshold, 0, mdct_data) | ||
return mdct_data | ||
|
||
|
||
def encode_mdct(raw_wave, sample_rate, npz_file, | ||
cut_threshold=400, | ||
zero_threshold=0.001, | ||
lossy=True): | ||
|
||
def encode_frame(n): | ||
return peripheral_adjustment(data[n], zero_threshold=zero_threshold * max(data[n])) | ||
|
||
data = mdct.mdct(raw_wave) | ||
data = np.float16(data) | ||
data = np.transpose(data) | ||
if lossy: | ||
# data[:, cut_threshold:] = 0 | ||
for i in trange(len(data), desc='MDCT', leave=False): | ||
data[i] = encode_frame(i) | ||
np.savez_compressed(npz_file, data=data, sample_rate=[sample_rate]) | ||
return data | ||
|
||
|
||
def calc_errors(data, lossy): | ||
if lossy: | ||
# return np.count_nonzero((data != 0) & (data != -15.945)) | ||
return np.count_nonzero(data) | ||
else: | ||
compressed_array = io.BytesIO() | ||
np.savez_compressed(compressed_array, data) | ||
return len(compressed_array.getvalue()) | ||
|
||
|
||
def calc_diff(frame1, frame2): | ||
# eps = 1e-7 | ||
# frame1 = np.log(frame1**2 + eps) | ||
# frame2 = np.log(frame2**2 + eps) | ||
diff = frame1 - frame2 | ||
return diff | ||
|
||
|
||
def encode_mdct_diff(raw_wave, sample_rate, npz_file, | ||
zero_threshold_i_frame=0.000, | ||
zero_threshold_p_frame=0.001, | ||
cut_threshold=400, | ||
i_rate=2, | ||
trick=False, | ||
lossy=True): | ||
|
||
def encode_frame_diff(n): | ||
best_p = 0 | ||
base = peripheral_adjustment(ref[n], zero_threshold=zero_threshold_i_frame * max(ref[n])) if lossy else ref[n] | ||
best_diff = base | ||
least_err = calc_errors(best_diff, lossy) | ||
initial_err = least_err | ||
if least_err > 0: | ||
for p in range(1, n): | ||
|
||
if (n - p) % (i_rate * sample_rate // 512) == 0: | ||
break | ||
|
||
diff = calc_diff(ref[n], ref[n - p]) | ||
if lossy: | ||
diff = peripheral_adjustment(diff, zero_threshold=zero_threshold_p_frame) | ||
|
||
# for i in range(512): | ||
# if base[i] == 0 and diff[i] != 0: | ||
# diff[i] = -10 | ||
if trick: | ||
diff = np.where((base == 0) & (diff != 0), -10, diff) | ||
|
||
err = calc_errors(diff, lossy) | ||
|
||
if err < least_err: | ||
least_err = err | ||
best_p = p | ||
best_diff = diff | ||
|
||
if err == 0: | ||
break | ||
|
||
return best_p, best_diff, initial_err - least_err | ||
|
||
data = mdct.mdct(raw_wave) | ||
data = np.float16(data) | ||
data = np.transpose(data) | ||
# if lossy: | ||
# data[:, cut_threshold:] = 0 | ||
# eps = 1e-7 | ||
# signs = np.int8(np.sign(data)) | ||
# data = np.log(data ** 2 + eps) | ||
|
||
ref = data.copy() | ||
relative_indices = np.zeros(len(data), dtype=np.int16) | ||
improves = np.zeros(len(data), dtype=np.int16) | ||
|
||
for i in trange(len(data), desc='MDCT DIFF', leave=False): | ||
relative_indices[i], data[i], improves[i] = encode_frame_diff(i) | ||
|
||
np.savez_compressed(npz_file, data=data, relative=relative_indices, sample_rate=[sample_rate])#, signs=signs) | ||
return data, relative_indices, improves | ||
|
||
|
||
def decode_mdct(encoded_data): | ||
encoded_data = np.transpose(encoded_data) | ||
return mdct.imdct(encoded_data) | ||
|
||
|
||
def decode_mdct_diff(loaded, trick=False): | ||
encoded_data = loaded['data'] | ||
relative_indices = loaded['relative'] | ||
# signs = loaded['signs'] | ||
|
||
for n in trange(1, len(encoded_data), desc='MDCT DIFF Decode', leave=False): | ||
p = relative_indices[n] | ||
if relative_indices[n] > 0: | ||
# encoded_data[n] = signs[n] * np.sqrt(np.exp(encoded_data[n] + prev_frame)) | ||
encoded_data[n] += encoded_data[n-p] | ||
|
||
if trick: | ||
diff = encoded_data[n].copy() | ||
encoded_data[n] = np.where(diff <= -5, 0, encoded_data[n]) | ||
# for i in range(512): | ||
# if diff[i] <= -5: | ||
# encoded_data[n][i] = 0 | ||
|
||
# encoded_data = signs * np.sqrt(np.exp(encoded_data)) | ||
return decode_mdct(encoded_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
df = pd.read_csv('tmp/output.csv') | ||
df = df.drop(columns=['name']) | ||
yvals = df.mean(axis=0).values | ||
y_std = df.std() / np.sqrt(len(df)) * 1.96 | ||
plt.bar(df.columns, yvals, yerr=y_std, width=0.5, capsize=10) | ||
plt.xticks(rotation=90) | ||
plt.tight_layout() | ||
plt.savefig('tmp/bar.png') | ||
plt.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
function [Nc, fc, fl, fu, dz] = PQCB (Version) | ||
% Critical band parameters for the FFT model | ||
|
||
% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:32:57 $ | ||
|
||
B = inline ('7 * asinh (f / 650)'); | ||
BI = inline ('650 * sinh (z / 7)'); | ||
|
||
fL = 80; | ||
fU = 18000; | ||
|
||
% Critical bands - set up the tables | ||
if (strcmp (Version, 'Basic')) | ||
dz = 1/4; | ||
elseif (strcmp (Version, 'Advanced')) | ||
dz = 1/2; | ||
else | ||
error ('PQCB: Invalid version'); | ||
end | ||
|
||
zL = B(fL); | ||
zU = B(fU); | ||
Nc = ceil((zU - zL) / dz); | ||
zl = zL + (0:Nc-1) * dz; | ||
zu = min (zL + (1:Nc) * dz, zU); | ||
zc = 0.5 * (zl + zu); | ||
|
||
fl = BI (zl); | ||
fc = BI (zc); | ||
fu = BI (zu); | ||
|
||
if (strcmp (Version, 'Basic')) | ||
fl = [ 80.000, 103.445, 127.023, 150.762, 174.694, ... | ||
198.849, 223.257, 247.950, 272.959, 298.317, ... | ||
324.055, 350.207, 376.805, 403.884, 431.478, ... | ||
459.622, 488.353, 517.707, 547.721, 578.434, ... | ||
609.885, 642.114, 675.161, 709.071, 743.884, ... | ||
779.647, 816.404, 854.203, 893.091, 933.119, ... | ||
974.336, 1016.797, 1060.555, 1105.666, 1152.187, ... | ||
1200.178, 1249.700, 1300.816, 1353.592, 1408.094, ... | ||
1464.392, 1522.559, 1582.668, 1644.795, 1709.021, ... | ||
1775.427, 1844.098, 1915.121, 1988.587, 2064.590, ... | ||
2143.227, 2224.597, 2308.806, 2395.959, 2486.169, ... | ||
2579.551, 2676.223, 2776.309, 2879.937, 2987.238, ... | ||
3098.350, 3213.415, 3332.579, 3455.993, 3583.817, ... | ||
3716.212, 3853.817, 3995.399, 4142.547, 4294.979, ... | ||
4452.890, 4616.482, 4785.962, 4961.548, 5143.463, ... | ||
5331.939, 5527.217, 5729.545, 5939.183, 6156.396, ... | ||
6381.463, 6614.671, 6856.316, 7106.708, 7366.166, ... | ||
7635.020, 7913.614, 8202.302, 8501.454, 8811.450, ... | ||
9132.688, 9465.574, 9810.536, 10168.013, 10538.460, ... | ||
10922.351, 11320.175, 11732.438, 12159.670, 12602.412, ... | ||
13061.229, 13536.710, 14029.458, 14540.103, 15069.295, ... | ||
15617.710, 16186.049, 16775.035, 17385.420 ]; | ||
fc = [ 91.708, 115.216, 138.870, 162.702, 186.742, ... | ||
211.019, 235.566, 260.413, 285.593, 311.136, ... | ||
337.077, 363.448, 390.282, 417.614, 445.479, ... | ||
473.912, 502.950, 532.629, 562.988, 594.065, ... | ||
625.899, 658.533, 692.006, 726.362, 761.644, ... | ||
797.898, 835.170, 873.508, 912.959, 953.576, ... | ||
995.408, 1038.511, 1082.938, 1128.746, 1175.995, ... | ||
1224.744, 1275.055, 1326.992, 1380.623, 1436.014, ... | ||
1493.237, 1552.366, 1613.474, 1676.641, 1741.946, ... | ||
1809.474, 1879.310, 1951.543, 2026.266, 2103.573, ... | ||
2183.564, 2266.340, 2352.008, 2440.675, 2532.456, ... | ||
2627.468, 2725.832, 2827.672, 2933.120, 3042.309, ... | ||
3155.379, 3272.475, 3393.745, 3519.344, 3649.432, ... | ||
3784.176, 3923.748, 4068.324, 4218.090, 4373.237, ... | ||
4533.963, 4700.473, 4872.978, 5051.700, 5236.866, ... | ||
5428.712, 5627.484, 5833.434, 6046.825, 6267.931, ... | ||
6497.031, 6734.420, 6980.399, 7235.284, 7499.397, ... | ||
7773.077, 8056.673, 8350.547, 8655.072, 8970.639, ... | ||
9297.648, 9636.520, 9987.683, 10351.586, 10728.695, ... | ||
11119.490, 11524.470, 11944.149, 12379.066, 12829.775, ... | ||
13294.850, 13780.887, 14282.503, 14802.338, 15341.057, ... | ||
15899.345, 16477.914, 17077.504, 17690.045 ]; | ||
fu = [ 103.445, 127.023, 150.762, 174.694, 198.849, ... | ||
223.257, 247.950, 272.959, 298.317, 324.055, ... | ||
350.207, 376.805, 403.884, 431.478, 459.622, ... | ||
488.353, 517.707, 547.721, 578.434, 609.885, ... | ||
642.114, 675.161, 709.071, 743.884, 779.647, ... | ||
816.404, 854.203, 893.091, 933.113, 974.336, ... | ||
1016.797, 1060.555, 1105.666, 1152.187, 1200.178, ... | ||
1249.700, 1300.816, 1353.592, 1408.094, 1464.392, ... | ||
1522.559, 1582.668, 1644.795, 1709.021, 1775.427, ... | ||
1844.098, 1915.121, 1988.587, 2064.590, 2143.227, ... | ||
2224.597, 2308.806, 2395.959, 2486.169, 2579.551, ... | ||
2676.223, 2776.309, 2879.937, 2987.238, 3098.350, ... | ||
3213.415, 3332.579, 3455.993, 3583.817, 3716.212, ... | ||
3853.348, 3995.399, 4142.547, 4294.979, 4452.890, ... | ||
4643.482, 4785.962, 4961.548, 5143.463, 5331.939, ... | ||
5527.217, 5729.545, 5939.183, 6156.396, 6381.463, ... | ||
6614.671, 6856.316, 7106.708, 7366.166, 7635.020, ... | ||
7913.614, 8202.302, 8501.454, 8811.450, 9132.688, ... | ||
9465.574, 9810.536, 10168.013, 10538.460, 10922.351, ... | ||
11320.175, 11732.438, 12159.670, 12602.412, 13061.229, ... | ||
13536.710, 14029.458, 14540.103, 15069.295, 15617.710, ... | ||
16186.049, 16775.035, 17385.420, 18000.000 ]; | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
function X2 = PQDFTFrame (x) | ||
% Calculate the DFT of a frame of data (NF values), returning the | ||
% squared-magnitude DFT vector (NF/2 + 1 values) | ||
|
||
% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:32:57 $ | ||
|
||
persistent hw | ||
|
||
NF = length(x); % Frame size (samples) | ||
|
||
if (isempty (hw)) | ||
Amax = 32768; | ||
fc = 1019.5; | ||
Fs = 48000; | ||
Lp = 92; | ||
% Set up the window (including all gains) | ||
GL = PQ_GL (NF, Amax, fc/Fs, Lp); | ||
hw = GL * PQHannWin (NF); | ||
end | ||
|
||
% Window the data | ||
xw = hw .* x; | ||
|
||
% DFT (output is real followed by imaginary) | ||
X = PQRFFT (xw, NF, 1); | ||
|
||
% Squared magnitude | ||
X2 = PQRFFTMSq (X, NF); | ||
|
||
%---------------------------------------- | ||
function GL = PQ_GL (NF, Amax, fcN, Lp) | ||
% Scaled Hann window, including loudness scaling | ||
|
||
% Calculate the gain for the Hann Window | ||
% - level Lp (SPL) corresponds to a sine with normalized frequency | ||
% fcN and a peak value of Amax | ||
|
||
W = NF - 1; | ||
gp = PQ_gp (fcN, NF, W); | ||
GL = 10^(Lp / 20) / (gp * Amax/4 * W); | ||
|
||
%---------- | ||
function gp = PQ_gp (fcN, NF, W) | ||
% Calculate the peak factor. The signal is a sinusoid windowed with | ||
% a Hann window. The sinusoid frequency falls between DFT bins. The | ||
% peak of the frequency response (on a continuous frequency scale) falls | ||
% between DFT bins. The largest DFT bin value is the peak factor times | ||
% the peak of the continuous response. | ||
% fcN - Normalized sinusoid frequency (0-1) | ||
% NF - Frame (DFT) length samples | ||
% NW - Window length samples | ||
|
||
% Distance to the nearest DFT bin | ||
df = 1 / NF; | ||
k = floor (fcN / df); | ||
dfN = min ((k+1) * df - fcN, fcN - k * df); | ||
|
||
dfW = dfN * W; | ||
gp = sin(pi * dfW) / (pi * dfW * (1 - dfW^2)); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
%-------------------- | ||
function [EbN, Es] = PQ_excitCB (X2) | ||
|
||
persistent W2 EIN | ||
|
||
NF = 2048; | ||
Version = 'Basic'; | ||
if (isempty (W2)) | ||
Fs = 48000; | ||
f = linspace (0, Fs/2, NF/2+1); | ||
W2 = PQWOME (f); | ||
[Nc, fc] = PQCB (Version); | ||
EIN = PQIntNoise (fc); | ||
end | ||
|
||
% Allocate storage | ||
XwN2 = zeros (1, NF/2+1); | ||
|
||
% Outer and middle ear filtering | ||
Xw2(1,:) = W2 .* X2(1,1:NF/2+1); | ||
Xw2(2,:) = W2 .* X2(2,1:NF/2+1); | ||
|
||
% Form the difference magnitude signal | ||
for (k = 0:NF/2) | ||
XwN2(k+1) = (Xw2(1,k+1) - 2 * sqrt (Xw2(1,k+1) * Xw2(2,k+1)) ... | ||
+ Xw2(2,k+1)); | ||
end | ||
|
||
% Group into partial critical bands | ||
Eb(1,:) = PQgroupCB (Xw2(1,:), Version); | ||
Eb(2,:) = PQgroupCB (Xw2(2,:), Version); | ||
EbN = PQgroupCB (XwN2, Version); | ||
|
||
% Add the internal noise term => "Pitch patterns" | ||
E(1,:) = Eb(1,:) + EIN; | ||
E(2,:) = Eb(2,:) + EIN; | ||
|
||
% Critical band spreading => "Unsmeared excitation patterns" | ||
Es(1,:) = PQspreadCB (E(1,:), Version); | ||
Es(2,:) = PQspreadCB (E(2,:), Version); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
function [Ehs, Ef] = PQ_timeSpread (Es, Ef) | ||
%% Seperate version added for testing for python comparison - SW. | ||
|
||
persistent Nc a b | ||
|
||
if (isempty (Nc)) | ||
[Nc, fc] = PQCB ('Basic'); | ||
Fs = 48000; | ||
NF = 2048; | ||
Nadv = NF / 2; | ||
Fss = Fs / Nadv; | ||
t100 = 0.030; | ||
tmin = 0.008; | ||
[a, b] = PQtConst (t100, tmin, fc, Fss); | ||
end | ||
|
||
% Allocate storage | ||
Ehs = zeros (1, Nc); | ||
|
||
% Time domain smoothing | ||
for (m = 0:Nc-1) | ||
Ef(m+1) = a(m+1) * Ef(m+1) + b(m+1) * Es(m+1); | ||
Ehs(m+1) = max(Ef(m+1), Es(m+1)); | ||
end |
Oops, something went wrong.