From 2862414b60f808b7c89a42071edb09796dd22af7 Mon Sep 17 00:00:00 2001 From: iramirsina Date: Sat, 25 Nov 2017 20:53:22 -0500 Subject: [PATCH] correction for cmvnw function --- docs/source/content/features.rst | 12 ++++----- docs/source/content/features.rst~ | 32 +++++++++++++++++++++++ speechpy/__init__.py | 5 ++-- speechpy/__init__.py~ | 0 speechpy/{main.py => feature.py} | 0 speechpy/functions.py~ | 43 +++++++++++++++++++++++++++++++ tests/test_local.py | 21 +++++++++------ 7 files changed, 96 insertions(+), 17 deletions(-) create mode 100644 docs/source/content/features.rst~ create mode 100755 speechpy/__init__.py~ rename speechpy/{main.py => feature.py} (100%) create mode 100755 speechpy/functions.py~ diff --git a/docs/source/content/features.rst b/docs/source/content/features.rst index a468c08..115a861 100644 --- a/docs/source/content/features.rst +++ b/docs/source/content/features.rst @@ -4,29 +4,29 @@ Features ========= -.. automodule:: speechpy.main -.. currentmodule:: speechpy.main +.. automodule:: speechpy.feature +.. currentmodule:: speechpy.feature :hidden:`MFCC` ~~~~~~~~~~~~~~ -.. autofunction:: speechpy.main.mfcc +.. autofunction:: speechpy.feature.mfcc :hidden:`Mel Frequency Energy` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autofunction:: speechpy.main.mfe +.. autofunction:: speechpy.feature.mfe :hidden:`Log Mel Frequency Energy` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autofunction:: speechpy.main.lmfe +.. autofunction:: speechpy.feature.lmfe :hidden:`Extract Derivative Features` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autofunction:: speechpy.main.extract_derivative_feature +.. autofunction:: speechpy.feature.extract_derivative_feature diff --git a/docs/source/content/features.rst~ b/docs/source/content/features.rst~ new file mode 100644 index 0000000..a468c08 --- /dev/null +++ b/docs/source/content/features.rst~ @@ -0,0 +1,32 @@ +.. role:: hidden + :class: hidden-section + +Features +========= + +.. automodule:: speechpy.main +.. currentmodule:: speechpy.main + + +:hidden:`MFCC` +~~~~~~~~~~~~~~ + +.. autofunction:: speechpy.main.mfcc + + +:hidden:`Mel Frequency Energy` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: speechpy.main.mfe + + +:hidden:`Log Mel Frequency Energy` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: speechpy.main.lmfe + + +:hidden:`Extract Derivative Features` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: speechpy.main.extract_derivative_feature diff --git a/speechpy/__init__.py b/speechpy/__init__.py index a7bf0cb..b65a3c2 100755 --- a/speechpy/__init__.py +++ b/speechpy/__init__.py @@ -1,3 +1,2 @@ -from .main import * -from .processing import * - +import feature +import processing diff --git a/speechpy/__init__.py~ b/speechpy/__init__.py~ new file mode 100755 index 0000000..e69de29 diff --git a/speechpy/main.py b/speechpy/feature.py similarity index 100% rename from speechpy/main.py rename to speechpy/feature.py diff --git a/speechpy/functions.py~ b/speechpy/functions.py~ new file mode 100755 index 0000000..f751d36 --- /dev/null +++ b/speechpy/functions.py~ @@ -0,0 +1,43 @@ +from __future__ import division +import numpy as np +from . import feature +from scipy.fftpack import dct +import math + + +def frequency_to_mel(f): + """converting from frequency to Mel scale. + + :param f: The frequency values(or a single frequency) in Hz. + :returns: The mel scale values(or a single mel). + """ + return 1127 * np.log(1 + f / 700.) + + +def mel_to_frequency(mel): + """converting from Mel scale to frequency. + + :param mel: The mel scale values(or a single mel). + :returns: The frequency values(or a single frequency) in Hz. + """ + return 700 * (np.exp(mel / 1127.0) - 1) + + +def triangle(x, left, middle, right): + out = np.zeros(x.shape) + out[x <= left] = 0 + out[x >= right] = 0 + first_half = np.logical_and(left < x, x <= middle) + out[first_half] = (x[first_half] - left) / (middle - left) + second_half = np.logical_and(middle <= x, x < right) + out[second_half] = (right - x[second_half]) / (right - middle) + return out + + +def zero_handling(x): + """ + This function handle the issue with zero values if the are exposed to become an argument for any lof function. + :param x: The vector. + :return: The vector with zeros substituted with epsilon values. + """ + return np.where(x == 0, np.finfo(float).eps, x) diff --git a/tests/test_local.py b/tests/test_local.py index f0b036f..b43d71e 100755 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -12,24 +12,29 @@ fs, signal = wav.read(file_name) signal = signal[:,0] +# Example of staching frames +frames = speechpy.processing.stack_frames(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, Filter=lambda x: np.ones((x,)), + zero_padding=True) + +# Example of extracting power spectrum +frames = speechpy.processing.power_spectrum(frames, fft_length=512) + ############# Extract MFCC features ############# -mfcc = speechpy.mfcc(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, +mfcc = speechpy.feature.mfcc(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, num_filters=40, fft_length=512, low_frequency=0, high_frequency=None) -mfcc_cmvn = speechpy.cmvnw(mfcc,win_size=301,variance_normalization=True) +mfcc_cmvn = speechpy.processing.cmvnw(mfcc,win_size=301,variance_normalization=True) print('mfcc(mean + variance normalized) feature shape=', mfcc_cmvn.shape) -mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc) +mfcc_feature_cube = speechpy.feature.extract_derivative_feature(mfcc) print('mfcc feature cube shape=', mfcc_feature_cube.shape) ############# Extract logenergy features ############# -logenergy = speechpy.lmfe(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, +logenergy = speechpy.feature.lmfe(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, num_filters=40, fft_length=512, low_frequency=0, high_frequency=None) -logenergy_feature_cube = speechpy.extract_derivative_feature(logenergy) +logenergy_feature_cube = speechpy.feature.extract_derivative_feature(logenergy) print('logenergy features=', logenergy.shape) -# Example of staching frames -signal = speechpy.stack_frames(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, Filter=lambda x: np.ones((x,)), - zero_padding=True) +