Source code for spafe.features.spfeats

# -*- coding: utf-8 -*-
"""
This module is part of the spafe library and has the purpose of of computing the following spectral stats:
    - meanfreq : mean frequency (in kHz)
    - sd       : standard deviation of frequency
    - median   : median frequency (in kHz)
    - Q25      : first quantile (in kHz)
    - Q75      : third quantile (in kHz)
    - IQR      : interquantile range (in kHz)
    - skew     : skewness (see note in specprop description)
    - kurt     : kurtosis (see note in specprop description)
    - sp.ent   : spectral entropy
    - sfm      : spectral flatness
    - mode     : mode frequency
    - centroid : frequency centroid (see specprop)
    - peakf    : peak frequency (frequency with highest energy)
    - meanfun  : average of fundamental frequency measured across acoustic signal
    - minfun   : minimum fundamental frequency measured across acoustic signal
    - maxfun   : maximum fundamental frequency measured across acoustic signal
    - meandom  : average of dominant frequency measured across acoustic signal
    - mindom   : minimum of dominant frequency measured across acoustic signal
    - maxdom   : maximum of dominant frequency measured across acoustic signal
    - dfrange  : range of dominant frequency measured across acoustic signal
    - modindx  : modulation index. Calculated as the accumulated absolute difference
                 between adjacent measurements of fundamental frequencies divided
                 by the frequency range
    - label    : male or female

Todo:
    * For module TODOs
    * You have to also use ``sphinx.ext.todo`` extension

Reference:
    http://ijeee.iust.ac.ir/article-1-1074-en.pdf
"""
import scipy
import numpy as np
from ..utils.spectral import stft, rfft
from ..frequencies.dominant_frequencies import get_dominant_frequencies
from ..frequencies.fundamental_frequencies import FundamentalFrequenciesExtractor


[docs]def compute_fund_freqs(sig, fs):
    """
    compute fundamental frequencies.

    Args:
        centroid (float) : spectral centroid.
        spectrum (array) : spectrum array.

    Returns:
        (float) spectral spread.
    """
    # fundamental frequencies calculations
    fund_freqs_extractor = FundamentalFrequenciesExtractor(debug=False)
    pitches, harmonic_rates, argmins, times = fund_freqs_extractor.main(
        sig=sig, fs=fs)
    return pitches


[docs]def compute_dom_freqs_and_mod_index(sig,
                                    fs,
                                    lower_cutoff=50,
                                    upper_cutoff=3000,
                                    nfft=512,
                                    win_len=0.03,
                                    win_hop=0.015,
                                    win_type='hamming',
                                    debug=False):
    """
    compute dominant frequencies and modulation index.

    Args:
        sig (array) : spectral centroid.
        fs (int) : spectrum array.

    Returns:
        (float) spectral spread.
    """
    # dominant frequencies calculations
    dom_freqs = get_dominant_frequencies(sig=sig,
                                         fs=fs,
                                         lower_cutoff=50,
                                         upper_cutoff=upper_cutoff,
                                         nfft=nfft,
                                         win_len=win_len,
                                         win_hop=win_hop,
                                         win_type=win_type,
                                         debug=debug)

    # modulation index calculation
    changes = np.abs(dom_freqs[:-1] - dom_freqs[1:])
    dfrange = dom_freqs.max() - dom_freqs.min()
    if dom_freqs.min() == dom_freqs.max():
        mod_index = 0
    else:
        mod_index = changes.mean() / dfrange
    return dom_freqs, mod_index


[docs]def spectral_centroid(sig, fs):
    """
    compute spectral centroid.
    """
    # compute magnitude spectrum
    magnitude_spectrum = np.fft.rfft(sig)
    # compute positive frequencies
    freqs = np.abs(np.fft.fftfreq(len(sig), 1.0 / fs)[:len(sig) // 2 + 1])
    # return weighted mean
    sc = np.sum(magnitude_spectrum * freqs) / np.sum(magnitude_spectrum)
    return sc


[docs]def spectral_flatness(sig):
    """
    compute spectral flatness.
    """
    # compute magnitude spectrum
    magnitude_spectrum = np.fft.rfft(sig)
    # select half of the spectrum due to symetrie
    magnitude_spectrum = magnitude_spectrum[:len(sig) // 2 + 1]
    sf = scipy.stats.mstats.gmean(magnitude_spectrum) / np.mean(
        magnitude_spectrum)
    return sf


[docs]def spectral_rolloff(sig, fs, k=0.85):
    # convert to frequency domain
    magnitude_spectrum, _ = stft(sig=sig, fs=fs)
    power_spectrum = np.abs(magnitude_spectrum)**2
    tbins, fbins = np.shape(magnitude_spectrum)

    # when do these blocks begin (time in seconds)?
    tstamps = (np.arange(0, tbins - 1) * (tbins / float(fs)))
    # compute the spectral sum
    spectral_sum = np.sum(power_spectrum, axis=1)

    # find frequency-bin indeces where the cummulative sum of all bins is higher
    # than k-percent of the sum of all bins. Lowest index = Rolloff
    sr = [
        np.where(np.cumsum(power_spectrum[t, :]) >= k * spectral_sum[t])[0][0]
        for t in range(tbins - 1)
    ]
    sr = np.asarray(sr).astype(float)

    # convert frequency-bin index to frequency in Hz
    sr = (sr / fbins) * (fs / 2.0)
    return sr, np.asarray(tstamps)


[docs]def spectral_flux(sig, fs):
    # convert to frequency domain
    magnitude_spectrum, _ = stft(sig=sig, fs=fs)
    tbins, fbins = np.shape(magnitude_spectrum)

    # when do these blocks begin (time in seconds)?
    tstamps = (np.arange(0, tbins - 1) * (tbins / float(fs)))
    sf = np.sqrt(np.sum(np.diff(np.abs(magnitude_spectrum))**2,
                        axis=1)) / fbins

    return sf[1:], np.asarray(tstamps)


[docs]def spectral_spread(centroid, spectrum, fs):
    """
    Compute the spectral spread (basically a variance of the spectrum around the spectral centroid)

    Args:
        centroid (float) : spectral centroid.
        spectrum (array) : spectrum array.

    Returns:
        (float) spectral spread.
    """
    bin_count, numerator, denominator = 0, 0, 0

    for bin_i in spectrum:
        # Compute center frequency
        f = ((fs / 2.0) / len(spectrum)) * bin_count
        numerator = numerator + (((f - centroid)**2) * abs(bin_i))
        denominator = denominator + abs(bin_i)
        bin_count = bin_count + 1

    return np.sqrt((numerator * 1.0) / denominator)


[docs]def zero_crossing_rate(sig, fs, block_length=256):
    # how many blocks have to be processed?
    num_blocks = int(np.ceil(len(sig) / block_length))

    # when do these blocks begin (time in seconds)?
    timestamps = (np.arange(0, num_blocks - 1) * (block_length / float(fs)))
    zcr = []

    for i in range(0, num_blocks - 1):
        start = i * block_length
        stop = np.min([(start + block_length - 1), len(sig)])

        zc = 0.5 * np.mean(np.abs(np.diff(np.sign(sig[start:stop]))))
        zcr.append(zc)

    return np.asarray(zcr), np.asarray(timestamps)


[docs]def root_mean_square(sig, fs, block_length=256):
    # how many blocks have to be processed?
    num_blocks = int(np.ceil(len(sig) / block_length))

    # when do these blocks begin (time in seconds)?
    tstamps = (np.arange(0, num_blocks - 1) * (block_length / float(fs)))

    rms = []

    for i in range(0, num_blocks - 1):

        start = i * block_length
        stop = np.min([(start + block_length - 1), len(sig)])

        # This is wrong but why? rms_seg = np.sqrt(np.mean(sig[start:stop]**2))
        rms_seg = np.sqrt(np.mean(np.power(sig[start:stop], 2)))
        rms.append(rms_seg)
    return np.asarray(rms), np.asarray(tstamps)


[docs]def spectral_bandwidth(sig, fs):
    return []


[docs]def extract_feats(sig, fs, nfft=512):
    """
    Compute the spectral features.

    Args:
        centroid (float) : spectral centroid.
        spectrum (array) : spectrum array.

    Returns:
        (float) spectral spread.
    """
    # init features dictionary
    feats = {}

    # compute the fft
    fourrier_transform = rfft(sig, nfft)

    # compute magnitude spectrum
    magnitude_spectrum = (1/nfft) * np.abs(fourrier_transform)
    power_spectrum = (1/nfft)**2 * magnitude_spectrum**2

    # get all frequncies and  only keep positive frequencies
    frequencies = np.fft.fftfreq(len(power_spectrum), 1 / fs)
    frequencies = frequencies[np.where(frequencies >= 0)] // 2 + 1

    # keep only half of the spectra
    magnitude_spectrum = magnitude_spectrum[:len(frequencies)]
    power_spectrum = power_spectrum[:len(frequencies)]

    # define amplitudes and spectrum
    spectrum = power_spectrum
    amplitudes = power_spectrum
    amp_cumsum = np.cumsum(amplitudes)

    # general stats
    feats["duration"] = len(sig) / float(fs)
    feats["spectrum"] = spectrum

    # spectral stats I
    feats["mean_frequency"] = frequencies.sum()
    feats["peak_frequency"] = frequencies[np.argmax(amplitudes)]
    feats["frequencies_std"] = frequencies.std()
    feats["amplitudes_cum_sum"] = np.cumsum(amplitudes)
    feats["mode_frequency"] = frequencies[amplitudes.argmax()]
    feats["median_frequency"] = np.median(frequencies)
    feats["frequencies_q25"] = frequencies[len(amp_cumsum[amp_cumsum <= 0.25])-1]
    feats["frequencies_q75"] = frequencies[len(amp_cumsum[amp_cumsum <= 0.75])-1]
    feats["iqr"] = feats["frequencies_q75"] - feats["frequencies_q25"]

    # spectral stats II
    feats["freqs_skewness"] = scipy.stats.skew(frequencies)
    feats["freqs_kurtosis"] = scipy.stats.kurtosis(frequencies)
    feats["spectral_entropy"] = scipy.stats.entropy(amplitudes)
    feats["spectral_flatness"] = spectral_flatness(sig)
    feats["spectral_centroid"] = spectral_centroid(sig, fs)
    feats["spectral_bandwidth"] = spectral_bandwidth(sig, fs)
    feats["spectral_spread"] = spectral_spread(feats["spectral_centroid"],
                                               feats["spectrum"], fs)
    feats["spectral_flatness"] = spectral_flatness(sig)
    feats["spectral_rolloff"] = spectral_rolloff(sig, fs)

    # compute energy
    feats["energy"] = magnitude_spectrum

    # compute root-mean-square (RMS).
    feats["rms"] = root_mean_square(sig=sig, fs=fs)

    # compute the zero-crossing rate of an audio time series
    feats["zcr"] = zero_crossing_rate(sig=sig, fs=fs)

    # spectral stats
    feats["spectral_mean"] = np.mean(spectrum)
    feats["spectral_rms"] = np.sqrt(np.mean(spectrum**2))
    feats["spectral_std"] = np.std(spectrum)
    feats["spectral_variance"] = np.var(spectrum)

    # assign fundamental frequencies stats
    fund_freqs = compute_fund_freqs(sig=sig, fs=fs)
    feats["meanfun"] = fund_freqs.mean()
    feats["minfun"] = fund_freqs.min()
    feats["maxfun"] = fund_freqs.max()

    # assign dominant frequencies stats
    dom_freqs, mod_idx = compute_dom_freqs_and_mod_index(sig=sig,
                                                         fs=fs,
                                                         lower_cutoff = 50,
                                                         upper_cutoff = 3000,
                                                         nfft = 512,
                                                         win_len = 0.03,
                                                         win_hop = 0.015,
                                                         win_type = 'hamming',
                                                         debug = False)
    feats["meandom"] = dom_freqs.mean()
    feats["mindom"] = dom_freqs.min()
    feats["maxdom"] = dom_freqs.max()

    # range of dominant frequency measured across acoustic signal
    feats["dfrange"] = feats["maxdom"] - feats["mindom"]

    # modulation index: Calculated as the accumulated absolute difference
    # between adjacent measurements of fundamental frequencies divided by the
    # frequency range
    feats["modindex"] = mod_idx
    return feats