Source code for spafe.features.lpc

import numpy as np
from scipy.fftpack import ifft
from spafe.utils import levinsondr
from spafe.utils.filters import rasta_filter
from ..utils.preprocessing import pre_emphasis
from spafe.utils.cepstral import cms, cmvn, lifter_ceps
from spafe.utils.spectral import powspec, audspec, postaud, invpowspec


[docs]def lpcc(sig,
         fs=16000,
         num_ceps=13,
         pre_emph=1,
         pre_emph_coeff=0.97,
         win_type="hann",
         win_len=0.025,
         win_hop=0.01,
         do_rasta=True,
         lifter=1,
         normalize=1,
         dither=1):
    """
    Compute the LINEAR PREDICTIVE CEPSTRAL COEFFICIENTS (LPCC) from an audio signal.

    Args:
        sig            (array) : a mono audio signal (Nx1) from which to compute features.
        fs               (int) : the sampling frequency of the signal we are working with.
                                 Default is 16000.
        num_ceps       (float) : number of cepstra to return (order of the model to compute).
                                 Default is 13.
        pre_emph         (int) : apply pre-emphasis if 1.
                                 Default is 1.
        pre_emph_coeff (float) : apply pre-emphasis filter [1 -pre_emph] (0 = none).
                                 Default is 0.97.
        win_type       (float) : window type to apply for the windowing.
                                 Default is hanning.
        win_len        (float) : window length in sec.
                                 Default is 0.025.
        win_hop        (float) : step between successive windows in sec.
                                 Default is 0.01.
        do_rasta         (int) : if 1 then apply rasta filtering.
                                 Default is 0.
        lifter           (int) : apply liftering if value > 0.
                                 Default is 22.
        normalize        (int) : apply normalization if 1.
                                 Default is 0.
        dither           (int) : 1 = add offset to spectrum as if dither noise.
                                 Default is 0.
    Returns:
        (array) : 2d array of LPCC features (num_frames x num_ceps)
    """
    lpcs = lpc(sig=sig,
               fs=fs,
               num_ceps=num_ceps,
               pre_emph=pre_emph,
               pre_emph_coeff=pre_emph_coeff,
               win_len=win_len,
               win_hop=win_hop,
               do_rasta=True,
               dither=dither)
    lpccs = lpc2cep(lpcs.T)

    # liftering
    if lifter > 0:
        lpccs = lifter_ceps(lpccs, lifter)

    # normalization
    if normalize:
        lpccs = cmvn(cms(lpccs))

    lpccs = lpccs.T
    return lpccs[:, :]


[docs]def lpc(sig,
        fs=16000,
        num_ceps=13,
        pre_emph=0,
        pre_emph_coeff=0.97,
        win_type="hann",
        win_len=0.025,
        win_hop=0.01,
        do_rasta=True,
        dither=1):
    """
    Compute the LINEAR PREDICTIVE COEFFICIENTS (LPC) from an audio signal.

    Args:
        sig            (array) : a mono audio signal (Nx1) from which to compute features.
        fs               (int) : the sampling frequency of the signal we are working with.
                                 Default is 16000.
        num_ceps       (int) : number of cepstra to return(order of the model to compute).
                                 Default is 13.
        pre_emph         (int) : apply pre-emphasis if 1.
                                 Default is 1.
        pre_emph_coeff (float) : apply pre-emphasis filter [1 -pre_emph] (0 = none).
                                 Default is 0.97.
        win_type       (float) : window type to apply for the windowing.
                                 Default is hanning.
        win_len        (float) : window length in sec.
                                 Default is 0.025.
        win_hop        (float) : step between successive windows in sec.
                                 Default is 0.01.
        do_rasta         (int) : if 1 then apply rasta filtering.
                                 Default is 0.
        lifter           (int) : apply liftering if value > 0.
                                 Default is 22.
        normalize        (int) : apply normalization if 1.
                                 Default is 0.
        dither           (int) : 1 = add offset to spectrum as if dither noise.
                                 Default is 0.
    Returns:
        (array) : 2d array of LPC features (num_frames x num_ceps)
    """
    # pre-emphasis
    if pre_emph:
        sig = pre_emphasis(sig=sig, pre_emph_coeff=pre_emph_coeff)

    # compute power spectrum
    power_spectrum, _ = powspec(sig=sig,
                                fs=fs,
                                win_type=win_type,
                                win_len=win_len,
                                win_hop=win_hop,
                                dither=dither)

    # group to critical bands
    auditory_spectrum = audspec(power_spectrum, fs)
    nbands = auditory_spectrum.shape[0]

    if do_rasta:
        # put in log domain
        log_auditory_spectrum = np.log(auditory_spectrum)
        # next do rasta filtering
        rasta_filtered_log_auditory_spectrum = rasta_filter(
            log_auditory_spectrum)
        # do inverse log
        auditory_spectrum = np.exp(rasta_filtered_log_auditory_spectrum)

    post_processing_spectrum, _ = postaud(auditory_spectrum, fs / 2)
    lpcs = do_lpc(x=post_processing_spectrum, model_order=num_ceps)
    lpcs = lpcs.T
    return lpcs[:, :num_ceps]


[docs]def do_lpc(x, model_order=8):
    """
    Compute the autoregressive model from spectral magnitude samples.

    Args:
        x         (array) : array of the audio signal to process.
        model_order (int) : order of the model to compute.

    Returns:
        array of the autoregressive model
    """
    nbands, nframes = x.shape
    ncorr = 2 * (nbands - 1)
    R = np.zeros((ncorr, nframes))

    R[0:nbands, :] = x
    for i in range(nbands - 1):
        R[i + nbands - 1, :] = x[nbands - (i + 1), :]

    r = ifft(R.T).real.T
    r = r[0:nbands, :]

    y = np.ones((nframes, model_order + 1))
    e = np.zeros((nframes, 1))

    if model_order == 0:
        for i in range(nframes):
            _, e_tmp, _ = levinsondr.LEVINSON(r[:, i],
                                              model_order,
                                              allow_singularity=True)
            e[i, 0] = e_tmp
    else:
        for i in range(nframes):
            y_tmp, e_tmp, _ = levinsondr.LEVINSON(r[:, i],
                                                  model_order,
                                                  allow_singularity=True)
            y[i, 1:model_order + 1] = y_tmp
            e[i, 0] = e_tmp

    y = y.T / (np.tile(e.T, (model_order + 1, 1)) + 1e-8)

    return y


[docs]def lpc2cep(a, nout=0):
    """
    convert LPC coefficients directly to cepstral values.
     - convert the LPC 'a' coefficients in each column of lpcs into frames of cepstra.

    Args:
        a  (array) : cepstral values.
        nout (int) : number of cepstra to produce

    Returns:
        array of LPC coefficients.
        Default size(lpcs, 1)
    """
    nin, ncol = a.shape

    order = nin - 1

    if nout == 0:
        nout = order + 1

    cep = np.zeros((nout, ncol))
    cep[0, :] = -np.log(a[0, :])

    norm_a = np.divide(a, np.add(np.tile(a[0, :], (nin, 1)), 1e-8))

    for n in range(1, nout):
        sum_var = 0
        for m in range(1, n):
            sum_var = np.add(
                sum_var,
                np.multiply(np.multiply((n - m), norm_a[m, :]),
                            cep[(n - m), :]))

        cep[n, :] = -np.add(norm_a[n, :], np.divide(sum_var, n))

    return cep


[docs]def lpc2spec(lpcs, nout=17, FMout=False):
    """
    convert LPC coefficients back into spectra by sampling the z-plane.

    Args:
        lpcs (array) : array including the LPC coefficients.
        nout   (int) : number of freq channels, default 17 (i.e. for 8 kHz)
        FMout (bool) :

    Returns:
        list including the features, F and M
    """
    rows, cols = lpcs.shape
    order = rows - 1

    gg = lpcs[0, :]
    aa = lpcs / np.tile(gg, (rows, 1))

    # Calculate the actual z-plane polyvals: nout points around unit circle
    tmp_1 = np.array(np.arange(0, nout), ndmin=2).T
    tmp_1 = (-1j * tmp_1 * np.pi) / (nout - 1)
    tmp_2 = np.array(np.arange(0, order + 1), ndmin=2)
    zz = np.exp(np.matmul(tmp_1, tmp_2))

    # Actual polyvals, in power (mag^2)
    features = np.tile(gg, (nout, 1)) / np.abs(np.matmul(zz, aa))**2
    F = np.zeros((cols, int(np.ceil(rows / 2))))
    M = F

    if FMout:
        for c in range(cols):
            aaa = aa[:, c]
            rr = np.roots(aaa)
            ff_tmp = np.angle(rr)
            ff = np.array(ff_tmp, ndmin=2).T
            zz = np.exp(
                1j *
                np.matmul(ff, np.array(np.arange(0, aaa.shape[0]), ndmin=2)))
            mags = np.sqrt(gg[c] /
                           np.abs(np.matmul(zz,
                                            np.array(aaa, ndmin=2).T))**2)

            ix = np.argsort(ff_tmp)
            dummy = np.sort(ff_tmp)
            tmp_F_list = []
            tmp_M_list = []

            for i in range(ff.shape[0]):
                if dummy[i] > 0:
                    tmp_F_list = np.append(tmp_F_list, dummy[i])
                    tmp_M_list = np.append(tmp_M_list, mags[ix[i]])

            M[c, 0:tmp_M_list.shape[0]] = tmp_M_list
            F[c, 0:tmp_F_list.shape[0]] = tmp_F_list

    return features, F, M