Skip to content

Commit

Permalink
Add Python code
Browse files Browse the repository at this point in the history
  • Loading branch information
matinlotfali committed Jul 8, 2022
1 parent ea0b685 commit 16d15bc
Show file tree
Hide file tree
Showing 73 changed files with 8,700 additions and 10 deletions.
638 changes: 638 additions & 0 deletions Figures/chap2/mp3aac.uxf

Large diffs are not rendered by default.

Binary file removed InformationOnBibliographyStyles.pdf
Binary file not shown.
10 changes: 0 additions & 10 deletions README_FIRST.txt

This file was deleted.

138 changes: 138 additions & 0 deletions python/Encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import numpy as np
import mdct
import io
from tqdm.auto import trange


def peripheral_adjustment(mdct_data, round_decimal_threshold=4, zero_threshold=0.001):
# mdct_data = np.round(mdct_data, decimals=round_decimal_threshold)
# ramp = np.arange(512)/512
if zero_threshold > 0:
mdct_data = np.where(abs(mdct_data) < zero_threshold, 0, mdct_data)
return mdct_data


def encode_mdct(raw_wave, sample_rate, npz_file,
cut_threshold=400,
zero_threshold=0.001,
lossy=True):

def encode_frame(n):
return peripheral_adjustment(data[n], zero_threshold=zero_threshold * max(data[n]))

data = mdct.mdct(raw_wave)
data = np.float16(data)
data = np.transpose(data)
if lossy:
# data[:, cut_threshold:] = 0
for i in trange(len(data), desc='MDCT', leave=False):
data[i] = encode_frame(i)
np.savez_compressed(npz_file, data=data, sample_rate=[sample_rate])
return data


def calc_errors(data, lossy):
if lossy:
# return np.count_nonzero((data != 0) & (data != -15.945))
return np.count_nonzero(data)
else:
compressed_array = io.BytesIO()
np.savez_compressed(compressed_array, data)
return len(compressed_array.getvalue())


def calc_diff(frame1, frame2):
# eps = 1e-7
# frame1 = np.log(frame1**2 + eps)
# frame2 = np.log(frame2**2 + eps)
diff = frame1 - frame2
return diff


def encode_mdct_diff(raw_wave, sample_rate, npz_file,
zero_threshold_i_frame=0.000,
zero_threshold_p_frame=0.001,
cut_threshold=400,
i_rate=2,
trick=False,
lossy=True):

def encode_frame_diff(n):
best_p = 0
base = peripheral_adjustment(ref[n], zero_threshold=zero_threshold_i_frame * max(ref[n])) if lossy else ref[n]
best_diff = base
least_err = calc_errors(best_diff, lossy)
initial_err = least_err
if least_err > 0:
for p in range(1, n):

if (n - p) % (i_rate * sample_rate // 512) == 0:
break

diff = calc_diff(ref[n], ref[n - p])
if lossy:
diff = peripheral_adjustment(diff, zero_threshold=zero_threshold_p_frame)

# for i in range(512):
# if base[i] == 0 and diff[i] != 0:
# diff[i] = -10
if trick:
diff = np.where((base == 0) & (diff != 0), -10, diff)

err = calc_errors(diff, lossy)

if err < least_err:
least_err = err
best_p = p
best_diff = diff

if err == 0:
break

return best_p, best_diff, initial_err - least_err

data = mdct.mdct(raw_wave)
data = np.float16(data)
data = np.transpose(data)
# if lossy:
# data[:, cut_threshold:] = 0
# eps = 1e-7
# signs = np.int8(np.sign(data))
# data = np.log(data ** 2 + eps)

ref = data.copy()
relative_indices = np.zeros(len(data), dtype=np.int16)
improves = np.zeros(len(data), dtype=np.int16)

for i in trange(len(data), desc='MDCT DIFF', leave=False):
relative_indices[i], data[i], improves[i] = encode_frame_diff(i)

np.savez_compressed(npz_file, data=data, relative=relative_indices, sample_rate=[sample_rate])#, signs=signs)
return data, relative_indices, improves


def decode_mdct(encoded_data):
encoded_data = np.transpose(encoded_data)
return mdct.imdct(encoded_data)


def decode_mdct_diff(loaded, trick=False):
encoded_data = loaded['data']
relative_indices = loaded['relative']
# signs = loaded['signs']

for n in trange(1, len(encoded_data), desc='MDCT DIFF Decode', leave=False):
p = relative_indices[n]
if relative_indices[n] > 0:
# encoded_data[n] = signs[n] * np.sqrt(np.exp(encoded_data[n] + prev_frame))
encoded_data[n] += encoded_data[n-p]

if trick:
diff = encoded_data[n].copy()
encoded_data[n] = np.where(diff <= -5, 0, encoded_data[n])
# for i in range(512):
# if diff[i] <= -5:
# encoded_data[n][i] = 0

# encoded_data = signs * np.sqrt(np.exp(encoded_data))
return decode_mdct(encoded_data)
13 changes: 13 additions & 0 deletions python/GenerateBarChart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('tmp/output.csv')
df = df.drop(columns=['name'])
yvals = df.mean(axis=0).values
y_std = df.std() / np.sqrt(len(df)) * 1.96
plt.bar(df.columns, yvals, yerr=y_std, width=0.5, capsize=10)
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig('tmp/bar.png')
plt.close()
99 changes: 99 additions & 0 deletions python/PQevalAudioMATLAB/PQevalAudio/CB/PQCB.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
function [Nc, fc, fl, fu, dz] = PQCB (Version)
% Critical band parameters for the FFT model

% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:32:57 $

B = inline ('7 * asinh (f / 650)');
BI = inline ('650 * sinh (z / 7)');

fL = 80;
fU = 18000;

% Critical bands - set up the tables
if (strcmp (Version, 'Basic'))
dz = 1/4;
elseif (strcmp (Version, 'Advanced'))
dz = 1/2;
else
error ('PQCB: Invalid version');
end

zL = B(fL);
zU = B(fU);
Nc = ceil((zU - zL) / dz);
zl = zL + (0:Nc-1) * dz;
zu = min (zL + (1:Nc) * dz, zU);
zc = 0.5 * (zl + zu);

fl = BI (zl);
fc = BI (zc);
fu = BI (zu);

if (strcmp (Version, 'Basic'))
fl = [ 80.000, 103.445, 127.023, 150.762, 174.694, ...
198.849, 223.257, 247.950, 272.959, 298.317, ...
324.055, 350.207, 376.805, 403.884, 431.478, ...
459.622, 488.353, 517.707, 547.721, 578.434, ...
609.885, 642.114, 675.161, 709.071, 743.884, ...
779.647, 816.404, 854.203, 893.091, 933.119, ...
974.336, 1016.797, 1060.555, 1105.666, 1152.187, ...
1200.178, 1249.700, 1300.816, 1353.592, 1408.094, ...
1464.392, 1522.559, 1582.668, 1644.795, 1709.021, ...
1775.427, 1844.098, 1915.121, 1988.587, 2064.590, ...
2143.227, 2224.597, 2308.806, 2395.959, 2486.169, ...
2579.551, 2676.223, 2776.309, 2879.937, 2987.238, ...
3098.350, 3213.415, 3332.579, 3455.993, 3583.817, ...
3716.212, 3853.817, 3995.399, 4142.547, 4294.979, ...
4452.890, 4616.482, 4785.962, 4961.548, 5143.463, ...
5331.939, 5527.217, 5729.545, 5939.183, 6156.396, ...
6381.463, 6614.671, 6856.316, 7106.708, 7366.166, ...
7635.020, 7913.614, 8202.302, 8501.454, 8811.450, ...
9132.688, 9465.574, 9810.536, 10168.013, 10538.460, ...
10922.351, 11320.175, 11732.438, 12159.670, 12602.412, ...
13061.229, 13536.710, 14029.458, 14540.103, 15069.295, ...
15617.710, 16186.049, 16775.035, 17385.420 ];
fc = [ 91.708, 115.216, 138.870, 162.702, 186.742, ...
211.019, 235.566, 260.413, 285.593, 311.136, ...
337.077, 363.448, 390.282, 417.614, 445.479, ...
473.912, 502.950, 532.629, 562.988, 594.065, ...
625.899, 658.533, 692.006, 726.362, 761.644, ...
797.898, 835.170, 873.508, 912.959, 953.576, ...
995.408, 1038.511, 1082.938, 1128.746, 1175.995, ...
1224.744, 1275.055, 1326.992, 1380.623, 1436.014, ...
1493.237, 1552.366, 1613.474, 1676.641, 1741.946, ...
1809.474, 1879.310, 1951.543, 2026.266, 2103.573, ...
2183.564, 2266.340, 2352.008, 2440.675, 2532.456, ...
2627.468, 2725.832, 2827.672, 2933.120, 3042.309, ...
3155.379, 3272.475, 3393.745, 3519.344, 3649.432, ...
3784.176, 3923.748, 4068.324, 4218.090, 4373.237, ...
4533.963, 4700.473, 4872.978, 5051.700, 5236.866, ...
5428.712, 5627.484, 5833.434, 6046.825, 6267.931, ...
6497.031, 6734.420, 6980.399, 7235.284, 7499.397, ...
7773.077, 8056.673, 8350.547, 8655.072, 8970.639, ...
9297.648, 9636.520, 9987.683, 10351.586, 10728.695, ...
11119.490, 11524.470, 11944.149, 12379.066, 12829.775, ...
13294.850, 13780.887, 14282.503, 14802.338, 15341.057, ...
15899.345, 16477.914, 17077.504, 17690.045 ];
fu = [ 103.445, 127.023, 150.762, 174.694, 198.849, ...
223.257, 247.950, 272.959, 298.317, 324.055, ...
350.207, 376.805, 403.884, 431.478, 459.622, ...
488.353, 517.707, 547.721, 578.434, 609.885, ...
642.114, 675.161, 709.071, 743.884, 779.647, ...
816.404, 854.203, 893.091, 933.113, 974.336, ...
1016.797, 1060.555, 1105.666, 1152.187, 1200.178, ...
1249.700, 1300.816, 1353.592, 1408.094, 1464.392, ...
1522.559, 1582.668, 1644.795, 1709.021, 1775.427, ...
1844.098, 1915.121, 1988.587, 2064.590, 2143.227, ...
2224.597, 2308.806, 2395.959, 2486.169, 2579.551, ...
2676.223, 2776.309, 2879.937, 2987.238, 3098.350, ...
3213.415, 3332.579, 3455.993, 3583.817, 3716.212, ...
3853.348, 3995.399, 4142.547, 4294.979, 4452.890, ...
4643.482, 4785.962, 4961.548, 5143.463, 5331.939, ...
5527.217, 5729.545, 5939.183, 6156.396, 6381.463, ...
6614.671, 6856.316, 7106.708, 7366.166, 7635.020, ...
7913.614, 8202.302, 8501.454, 8811.450, 9132.688, ...
9465.574, 9810.536, 10168.013, 10538.460, 10922.351, ...
11320.175, 11732.438, 12159.670, 12602.412, 13061.229, ...
13536.710, 14029.458, 14540.103, 15069.295, 15617.710, ...
16186.049, 16775.035, 17385.420, 18000.000 ];
end
60 changes: 60 additions & 0 deletions python/PQevalAudioMATLAB/PQevalAudio/CB/PQDFTFrame.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
function X2 = PQDFTFrame (x)
% Calculate the DFT of a frame of data (NF values), returning the
% squared-magnitude DFT vector (NF/2 + 1 values)

% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:32:57 $

persistent hw

NF = length(x); % Frame size (samples)

if (isempty (hw))
Amax = 32768;
fc = 1019.5;
Fs = 48000;
Lp = 92;
% Set up the window (including all gains)
GL = PQ_GL (NF, Amax, fc/Fs, Lp);
hw = GL * PQHannWin (NF);
end

% Window the data
xw = hw .* x;

% DFT (output is real followed by imaginary)
X = PQRFFT (xw, NF, 1);

% Squared magnitude
X2 = PQRFFTMSq (X, NF);

%----------------------------------------
function GL = PQ_GL (NF, Amax, fcN, Lp)
% Scaled Hann window, including loudness scaling

% Calculate the gain for the Hann Window
% - level Lp (SPL) corresponds to a sine with normalized frequency
% fcN and a peak value of Amax

W = NF - 1;
gp = PQ_gp (fcN, NF, W);
GL = 10^(Lp / 20) / (gp * Amax/4 * W);

%----------
function gp = PQ_gp (fcN, NF, W)
% Calculate the peak factor. The signal is a sinusoid windowed with
% a Hann window. The sinusoid frequency falls between DFT bins. The
% peak of the frequency response (on a continuous frequency scale) falls
% between DFT bins. The largest DFT bin value is the peak factor times
% the peak of the continuous response.
% fcN - Normalized sinusoid frequency (0-1)
% NF - Frame (DFT) length samples
% NW - Window length samples

% Distance to the nearest DFT bin
df = 1 / NF;
k = floor (fcN / df);
dfN = min ((k+1) * df - fcN, fcN - k * df);

dfW = dfN * W;
gp = sin(pi * dfW) / (pi * dfW * (1 - dfW^2));

40 changes: 40 additions & 0 deletions python/PQevalAudioMATLAB/PQevalAudio/CB/PQ_excitCB.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
%--------------------
function [EbN, Es] = PQ_excitCB (X2)

persistent W2 EIN

NF = 2048;
Version = 'Basic';
if (isempty (W2))
Fs = 48000;
f = linspace (0, Fs/2, NF/2+1);
W2 = PQWOME (f);
[Nc, fc] = PQCB (Version);
EIN = PQIntNoise (fc);
end

% Allocate storage
XwN2 = zeros (1, NF/2+1);

% Outer and middle ear filtering
Xw2(1,:) = W2 .* X2(1,1:NF/2+1);
Xw2(2,:) = W2 .* X2(2,1:NF/2+1);

% Form the difference magnitude signal
for (k = 0:NF/2)
XwN2(k+1) = (Xw2(1,k+1) - 2 * sqrt (Xw2(1,k+1) * Xw2(2,k+1)) ...
+ Xw2(2,k+1));
end

% Group into partial critical bands
Eb(1,:) = PQgroupCB (Xw2(1,:), Version);
Eb(2,:) = PQgroupCB (Xw2(2,:), Version);
EbN = PQgroupCB (XwN2, Version);

% Add the internal noise term => "Pitch patterns"
E(1,:) = Eb(1,:) + EIN;
E(2,:) = Eb(2,:) + EIN;

% Critical band spreading => "Unsmeared excitation patterns"
Es(1,:) = PQspreadCB (E(1,:), Version);
Es(2,:) = PQspreadCB (E(2,:), Version);
24 changes: 24 additions & 0 deletions python/PQevalAudioMATLAB/PQevalAudio/CB/PQ_timeSpread.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
function [Ehs, Ef] = PQ_timeSpread (Es, Ef)
%% Seperate version added for testing for python comparison - SW.

persistent Nc a b

if (isempty (Nc))
[Nc, fc] = PQCB ('Basic');
Fs = 48000;
NF = 2048;
Nadv = NF / 2;
Fss = Fs / Nadv;
t100 = 0.030;
tmin = 0.008;
[a, b] = PQtConst (t100, tmin, fc, Fss);
end

% Allocate storage
Ehs = zeros (1, Nc);

% Time domain smoothing
for (m = 0:Nc-1)
Ef(m+1) = a(m+1) * Ef(m+1) + b(m+1) * Es(m+1);
Ehs(m+1) = max(Ef(m+1), Es(m+1));
end
Loading

0 comments on commit 16d15bc

Please sign in to comment.