Source code for spafe.utils.converters

import numpy as np
from ..utils.exceptions import ParameterError, ErrorMsgs

# init vars
F0 = 0
FSP = 200 / 3
BARK_FREQ = 1000
BARK_PT = (BARK_FREQ - F0) / FSP
LOGSTEP = np.exp(np.log(6.4) / 27.0)


[docs]def hz2erb(f): """ Convert Hz frequencies to Bark. Args: f (np.array) : input frequencies [Hz]. Returns: (np.array): frequencies in Bark [Bark]. """ return 24.7 * (4.37 * (f / 1000) + 1)
[docs]def erb2hz(fe): """ Convert Bark frequencies to Hz. Args: fb (np.array) : input frequencies [Bark]. Returns: (np.array) : frequencies in Hz [Hz]. """ return ((fe / 24.7) - 1) * (1000. / 4.37)
[docs]def fft2erb(fft, fs, nfft): """ Convert Bark frequencies to Hz. Args: fft (np.array) : fft bin numbers. Returns: (np.array): frequencies in Bark [Bark]. """ return hz2erb((fft * fs) / (nfft + 1))
[docs]def erb2fft(fb, fs, nfft): """ Convert Bark frequencies to fft bins. Args: fb (np.array): frequencies in Bark [Bark]. fs (int): sample rate/ sampling frequency of the signal. nfft (int): the FFT size. Returns: (np.array) : fft bin numbers. """ return (nfft + 1) * erb2hz(fb) / fs
[docs]def hz2bark(f): """ Convert Hz frequencies to Bark acoording to Wang, Sekey & Gersho, 1992. Args: f (np.array) : input frequencies [Hz]. Returns: (np.array): frequencies in Bark [Bark]. """ return 6. * np.arcsinh(f / 600.)
[docs]def bark2hz(fb): """ Convert Bark frequencies to Hz. Args: fb (np.array) : input frequencies [Bark]. Returns: (np.array) : frequencies in Hz [Hz]. """ return 600. * np.sinh(fb / 6.)
[docs]def fft2hz(fft, fs, nfft): """ Convert Bark frequencies to Hz. Args: fft (np.array): fft bin numbers. fs (int): sample rate/ sampling frequency of the signal. nfft (int): the FFT size. Returns: (np.array): frequencies in Bark [Bark]. """ return (fft * fs) / (nfft + 1)
[docs]def hz2fft(fb, fs, nfft): """ Convert Bark frequencies to fft bins. Args: fb (np.array): frequencies in Bark [Bark]. fs (int): sample rate/ sampling frequency of the signal. nfft (int): the FFT size. Returns: (np.array) : fft bin numbers. """ return (nfft + 1) * fb / fs
[docs]def fft2bark(fft, fs, nfft): """ Convert Bark frequencies to Hz. Args: fft (np.array): fft bin numbers. fs (int): sample rate/ sampling frequency of the signal. nfft (int): the FFT size. Returns: (np.array): frequencies in Bark [Bark]. """ return hz2bark((fft * fs) / (nfft + 1))
[docs]def bark2fft(fb, fs, nfft): """ Convert Bark frequencies to fft bins. Args: fb (np.array): frequencies in Bark [Bark]. fs (int): sample rate/ sampling frequency of the signal. nfft (int): the FFT size. Returns: (np.array) : fft bin numbers. """ return (nfft + 1) * bark2hz(fb) / fs
[docs]def hz2mel(hz, htk=1): """ Convert a value in Hertz to Mels Args: hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise. htk: Optional variable, if htk = 1 uses the mel axis defined in the HTKBook otherwise use Slaney's formula. Returns: a value in Mels. If an array was passed in, an identical sized array is returned. """ if htk == 1: return 2595 * np.log10(1 + hz / 700.) else: # format variable hz = np.array(hz, ndmin=1) # definee lambda functions to simplify code def e(i): return (hz[i] - F0) / FSP def g(i): return BARK_PT + (np.log(hz[i] / BARK_FREQ) / np.log(LOGSTEP)) mel = [e(i) if hz[i] < BARK_PT else g(i) for i in range(hz.shape[0])] return np.array(mel)
[docs]def mel2hz(mel, htk=1): """ Convert a value in Mels to Hertz Args: mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise. htk: Optional variable, if htk = 1 uses the mel axis defined in the HTKBook otherwise use Slaney's formula. Returns: a value in Hertz. If an array was passed in, an identical sized array is returned. """ if htk == 1: return 700 * (10**(mel / 2595.0) - 1) else: # format variable mel = np.array(mel, ndmin=1) # definee lambda functions to simplify code def e(i): return F0 + FSP * mel[i] def g(i): return BARK_FREQ * np.exp(np.log(LOGSTEP) * (mel[i] - BARK_PT)) f = [e(i) if mel[i] < BARK_PT else g(i) for i in range(mel.shape[0])] return np.array(f)
[docs]def fft2melmx(nfft, fs, nfilts=0, bwidth=1, low_freq=0, high_freq=0, htk=False, constamp=False): """ Generate a matrix of weights to combine FFT bins into Mel bins. Args: nfft (int) : the FFT size. (Default is 512) fs (int) : sample rate/ sampling frequency of the signal. (Default 16000 Hz) nfilts (int) : the number of filters in the filterbank. (Default 20) bwidth (int) : the constant width of each band relative to standard Mel (default 1). Default is 1. low_freq (int) : lowest band edge of mel filters. (Default 0 Hz) high_freq (int) : highest band edge of mel filters. (Default samplerate/2) htkmel (bool) : use HTK's version of the mel curve, not Slaney's. Default is False. constamp (bool) : if True then make integration windows peak at 1, not sum to 1. Default is False. Notes `low_freq` default is 0, but 133.33 is a common standard (to skip low frequencies). `high_freq` default is fs/2 You can exactly duplicate the mel matrix in Slaney'ss using `fft2melmx(nfft=512, fs=8000, nfilts=40, bwidth=1, low_freq=133.33, high_freq=6855.5, 0)` Returns: matrix of weights to combine FFT bins into Mel bins. """ if high_freq == 0: high_freq = fs / 2 if nfilts == 0: nfilts = int(np.ceil(hz2mel(high_freq, htk) / 2)) if not isinstance(nfilts, int): raise ParameterError(ErrorMsgs["nfilts"]) if not isinstance(nfft, int): raise ParameterError(ErrorMsgs["nfft"]) wts = np.zeros((nfilts, nfft)) fftfrqs = (fs / nfft) * np.arange(0, nfft / 2 + 1) min_mel = hz2mel(low_freq, htk) max_mel = hz2mel(high_freq, htk) dif_mel = max_mel - min_mel binfrqs = mel2hz( min_mel + np.arange(0, nfilts + 2) * dif_mel / (nfilts + 1), htk) for i in range(nfilts): fs_tmp = binfrqs[np.arange(0, 3) + i] fs_tmp = fs_tmp[1] + bwidth * (fs_tmp - fs_tmp[1]) # slopes loslope = (fftfrqs - fs_tmp[0]) / (fs_tmp[1] - fs_tmp[0]) hislope = (fs_tmp[2] - fftfrqs) / (fs_tmp[2] - fs_tmp[1]) wts[i, 0:nfft // 2 + 1] = np.maximum(0, np.minimum(loslope, hislope)) if not constamp: wts = np.matmul( np.diag(2 / (binfrqs[2:nfilts + 2] - binfrqs[0:nfilts])), wts) return wts
[docs]def fft2barkmx(nfft, fs, nfilts=0, bwidth=1, low_freq=0, high_freq=0): """ Generate a matrix of weights to combine FFT bins into Bark bins. Args: nfft (int) : the FFT size. (Default is 512) fs (int) : sample rate/ sampling frequency of the signal. (Default 16000 Hz) nfilts (int) : the number of filters in the filterbank. (Default 20) bwidth (int) : the constant width of each band relative to standard Mel (default 1). Default is 1. low_freq (int) : lowest band edge of mel filters. (Default 0 Hz) high_freq (int) : highest band edge of mel filters. (Default sample rate/2) Notes: Optional nfilts specifies the number of output bands required (else one per bark), and width is the constant width of each band in Bark (default 1). Returns: matrix of weights to combine FFT bins into Bark bins. """ if high_freq == 0: high_freq = fs / 2 min_bark = hz2bark(low_freq) nyqbark = hz2bark(high_freq) - min_bark if nfilts == 0: nfilts = int(np.add(np.ceil(nyqbark), 1)) if not isinstance(nfilts, int): raise ParameterError(ErrorMsgs["nfilts"]) if not isinstance(nfft, int): raise ParameterError(ErrorMsgs["nfft"]) wts = np.zeros((nfilts, nfft)) step_barks = nyqbark / (nfilts - 1) binbarks = hz2bark((fs / nfft) * np.arange(0, nfft / 2 + 1)) for i in range(nfilts): f_bark_mid = min_bark + i * step_barks lof = binbarks - f_bark_mid - 0.5 hif = binbarks - f_bark_mid + 0.5 wts[i, 0:nfft // 2 + 1] = 10**np.minimum( 0, np.minimum(hif, -2.5 * lof) / bwidth) return wts