import numpy as np
from scipy.fftpack import ifft
from spafe.utils import levinsondr
from spafe.utils.filters import rasta_filter
from ..utils.preprocessing import pre_emphasis
from spafe.utils.cepstral import cms, cmvn, lifter_ceps
from spafe.utils.spectral import powspec, audspec, postaud, invpowspec
[docs]def lpcc(sig,
fs=16000,
num_ceps=13,
pre_emph=1,
pre_emph_coeff=0.97,
win_type="hann",
win_len=0.025,
win_hop=0.01,
do_rasta=True,
lifter=1,
normalize=1,
dither=1):
"""
Compute the LINEAR PREDICTIVE CEPSTRAL COEFFICIENTS (LPCC) from an audio signal.
Args:
sig (array) : a mono audio signal (Nx1) from which to compute features.
fs (int) : the sampling frequency of the signal we are working with.
Default is 16000.
num_ceps (float) : number of cepstra to return (order of the model to compute).
Default is 13.
pre_emph (int) : apply pre-emphasis if 1.
Default is 1.
pre_emph_coeff (float) : apply pre-emphasis filter [1 -pre_emph] (0 = none).
Default is 0.97.
win_type (float) : window type to apply for the windowing.
Default is hanning.
win_len (float) : window length in sec.
Default is 0.025.
win_hop (float) : step between successive windows in sec.
Default is 0.01.
do_rasta (int) : if 1 then apply rasta filtering.
Default is 0.
lifter (int) : apply liftering if value > 0.
Default is 22.
normalize (int) : apply normalization if 1.
Default is 0.
dither (int) : 1 = add offset to spectrum as if dither noise.
Default is 0.
Returns:
(array) : 2d array of LPCC features (num_frames x num_ceps)
"""
lpcs = lpc(sig=sig,
fs=fs,
num_ceps=num_ceps,
pre_emph=pre_emph,
pre_emph_coeff=pre_emph_coeff,
win_len=win_len,
win_hop=win_hop,
do_rasta=True,
dither=dither)
lpccs = lpc2cep(lpcs.T)
# liftering
if lifter > 0:
lpccs = lifter_ceps(lpccs, lifter)
# normalization
if normalize:
lpccs = cmvn(cms(lpccs))
lpccs = lpccs.T
return lpccs[:, :]
[docs]def lpc(sig,
fs=16000,
num_ceps=13,
pre_emph=0,
pre_emph_coeff=0.97,
win_type="hann",
win_len=0.025,
win_hop=0.01,
do_rasta=True,
dither=1):
"""
Compute the LINEAR PREDICTIVE COEFFICIENTS (LPC) from an audio signal.
Args:
sig (array) : a mono audio signal (Nx1) from which to compute features.
fs (int) : the sampling frequency of the signal we are working with.
Default is 16000.
num_ceps (int) : number of cepstra to return(order of the model to compute).
Default is 13.
pre_emph (int) : apply pre-emphasis if 1.
Default is 1.
pre_emph_coeff (float) : apply pre-emphasis filter [1 -pre_emph] (0 = none).
Default is 0.97.
win_type (float) : window type to apply for the windowing.
Default is hanning.
win_len (float) : window length in sec.
Default is 0.025.
win_hop (float) : step between successive windows in sec.
Default is 0.01.
do_rasta (int) : if 1 then apply rasta filtering.
Default is 0.
lifter (int) : apply liftering if value > 0.
Default is 22.
normalize (int) : apply normalization if 1.
Default is 0.
dither (int) : 1 = add offset to spectrum as if dither noise.
Default is 0.
Returns:
(array) : 2d array of LPC features (num_frames x num_ceps)
"""
# pre-emphasis
if pre_emph:
sig = pre_emphasis(sig=sig, pre_emph_coeff=pre_emph_coeff)
# compute power spectrum
power_spectrum, _ = powspec(sig=sig,
fs=fs,
win_type=win_type,
win_len=win_len,
win_hop=win_hop,
dither=dither)
# group to critical bands
auditory_spectrum = audspec(power_spectrum, fs)
nbands = auditory_spectrum.shape[0]
if do_rasta:
# put in log domain
log_auditory_spectrum = np.log(auditory_spectrum)
# next do rasta filtering
rasta_filtered_log_auditory_spectrum = rasta_filter(
log_auditory_spectrum)
# do inverse log
auditory_spectrum = np.exp(rasta_filtered_log_auditory_spectrum)
post_processing_spectrum, _ = postaud(auditory_spectrum, fs / 2)
lpcs = do_lpc(x=post_processing_spectrum, model_order=num_ceps)
lpcs = lpcs.T
return lpcs[:, :num_ceps]
[docs]def do_lpc(x, model_order=8):
"""
Compute the autoregressive model from spectral magnitude samples.
Args:
x (array) : array of the audio signal to process.
model_order (int) : order of the model to compute.
Returns:
array of the autoregressive model
"""
nbands, nframes = x.shape
ncorr = 2 * (nbands - 1)
R = np.zeros((ncorr, nframes))
R[0:nbands, :] = x
for i in range(nbands - 1):
R[i + nbands - 1, :] = x[nbands - (i + 1), :]
r = ifft(R.T).real.T
r = r[0:nbands, :]
y = np.ones((nframes, model_order + 1))
e = np.zeros((nframes, 1))
if model_order == 0:
for i in range(nframes):
_, e_tmp, _ = levinsondr.LEVINSON(r[:, i],
model_order,
allow_singularity=True)
e[i, 0] = e_tmp
else:
for i in range(nframes):
y_tmp, e_tmp, _ = levinsondr.LEVINSON(r[:, i],
model_order,
allow_singularity=True)
y[i, 1:model_order + 1] = y_tmp
e[i, 0] = e_tmp
y = y.T / (np.tile(e.T, (model_order + 1, 1)) + 1e-8)
return y
[docs]def lpc2cep(a, nout=0):
"""
convert LPC coefficients directly to cepstral values.
- convert the LPC 'a' coefficients in each column of lpcs into frames of cepstra.
Args:
a (array) : cepstral values.
nout (int) : number of cepstra to produce
Returns:
array of LPC coefficients.
Default size(lpcs, 1)
"""
nin, ncol = a.shape
order = nin - 1
if nout == 0:
nout = order + 1
cep = np.zeros((nout, ncol))
cep[0, :] = -np.log(a[0, :])
norm_a = np.divide(a, np.add(np.tile(a[0, :], (nin, 1)), 1e-8))
for n in range(1, nout):
sum_var = 0
for m in range(1, n):
sum_var = np.add(
sum_var,
np.multiply(np.multiply((n - m), norm_a[m, :]),
cep[(n - m), :]))
cep[n, :] = -np.add(norm_a[n, :], np.divide(sum_var, n))
return cep
[docs]def lpc2spec(lpcs, nout=17, FMout=False):
"""
convert LPC coefficients back into spectra by sampling the z-plane.
Args:
lpcs (array) : array including the LPC coefficients.
nout (int) : number of freq channels, default 17 (i.e. for 8 kHz)
FMout (bool) :
Returns:
list including the features, F and M
"""
rows, cols = lpcs.shape
order = rows - 1
gg = lpcs[0, :]
aa = lpcs / np.tile(gg, (rows, 1))
# Calculate the actual z-plane polyvals: nout points around unit circle
tmp_1 = np.array(np.arange(0, nout), ndmin=2).T
tmp_1 = (-1j * tmp_1 * np.pi) / (nout - 1)
tmp_2 = np.array(np.arange(0, order + 1), ndmin=2)
zz = np.exp(np.matmul(tmp_1, tmp_2))
# Actual polyvals, in power (mag^2)
features = np.tile(gg, (nout, 1)) / np.abs(np.matmul(zz, aa))**2
F = np.zeros((cols, int(np.ceil(rows / 2))))
M = F
if FMout:
for c in range(cols):
aaa = aa[:, c]
rr = np.roots(aaa)
ff_tmp = np.angle(rr)
ff = np.array(ff_tmp, ndmin=2).T
zz = np.exp(
1j *
np.matmul(ff, np.array(np.arange(0, aaa.shape[0]), ndmin=2)))
mags = np.sqrt(gg[c] /
np.abs(np.matmul(zz,
np.array(aaa, ndmin=2).T))**2)
ix = np.argsort(ff_tmp)
dummy = np.sort(ff_tmp)
tmp_F_list = []
tmp_M_list = []
for i in range(ff.shape[0]):
if dummy[i] > 0:
tmp_F_list = np.append(tmp_F_list, dummy[i])
tmp_M_list = np.append(tmp_M_list, mags[ix[i]])
M[c, 0:tmp_M_list.shape[0]] = tmp_M_list
F[c, 0:tmp_F_list.shape[0]] = tmp_F_list
return features, F, M