Source code for spafe.frequencies.fundamental_frequencies
"""
Credits to:
Patrice Guyot. (2018, April 19).
Fast Python implementation of the Yin algorithm (Version v1.1.1).
Zenodo. http://doi.org/10.5281/zenodo.1220947
"""
import time
import scipy
import numpy as np
import scipy.io.wavfile
from scipy import signal
import matplotlib.pyplot as plt
[docs]class FundamentalFrequenciesExtractor:
def __init__(self, debug=False):
self.debug = debug
[docs] def differenceFunction(self, x, N, tau_max):
"""
Compute difference function of data x. This corresponds to equation (6) in [1]
Fastest implementation. Use the same approach than differenceFunction_scipy.
This solution is implemented directly with np fft.
Args:
x (array) : audio data
N (int) : length of data
tau_max (int) : integration window size
Returns:
(list) : difference function
"""
x = np.array(x, np.float64)
w = x.size
x_cumsum = np.concatenate((np.array([0]), (x * x).cumsum()))
conv = scipy.signal.fftconvolve(x, x[::-1])
tmp = x_cumsum[w:0:-1] + x_cumsum[w] - x_cumsum[:w] - 2 * conv[w - 1:]
return tmp[:tau_max]
[docs] def cumulativeMeanNormalizedDifferenceFunction(self, df, N):
"""
Compute cumulative mean normalized difference function (CMND).
This corresponds to equation (8) in [1].
Args:
df (list) : Difference function
N (int) : length of data
tau_max (int) : integration window size
Returns:
(list) : cumulative mean normalized difference function
"""
cmndf = df[1:] * range(1, N) / np.cumsum(df[1:]).astype(
float) # scipy method
return np.insert(cmndf, 0, 1)
[docs] def getPitch(self, cmdf, tau_min, tau_max, harmo_th=0.1):
"""
Return fundamental period of a frame based on CMND function.
- cmdf: Cumulative Mean Normalized Difference function
Args:
tau_min (int) : minimum period for speech
tau_max (int) : maximum period for speech
harmo_th (float) : harmonicity threshold to determine if it is
necessary to compute pitch frequency
Returns:
(float) : fundamental period if there is values under threshold, 0 otherwise
"""
tau = tau_min
while tau < tau_max:
if cmdf[tau] < harmo_th:
while tau + 1 < tau_max and cmdf[tau + 1] < cmdf[tau]:
tau += 1
return tau
tau += 1
return 0 # if unvoiced
[docs] def compute_yin(self,
sig,
fs,
dataFileName=None,
w_len=512,
w_step=256,
f0_min=50,
f0_max=3000,
harmo_thresh=0.1):
"""
Compute the Yin Algorithm. Return fundamental frequency and harmonic rate.
Args:
sig (list) : Audio signal (list of float)
fs (int) : sampling rate (= average number of samples pro 1 second)
w_len (int) : size of the analysis window (in #samples)
w_step (int) : size of the lag between two consecutives windows (in #samples)
f0_min (int) : Minimum fundamental frequency that can be detected (in Hertz)
f0_max (int) : Maximum fundamental frequency that can be detected (in Hertz)
harmo_tresh (int) : Threshold of detection. The yalgorithmù return the
first minimum of the CMND fubction below this threshold.
Returns:
(tuple) : tuple include the following
- pitches : list of fundamental frequencies,
- harmonic_rates: list of harmonic rate values for each fundamental
frequency value (= confidence value)
- argmins : minimums of the Cumulative Mean Normalized DifferenceFunction
- times : list of time of each estimation
"""
if self.debug:
print('Yin: compute yin algorithm')
tau_min, tau_max = int(fs / f0_max), int(fs / f0_min)
timeScale = range(0,
len(sig) - w_len,
w_step) # time values for each analysis window
times = [t / float(fs) for t in timeScale]
frames = [sig[t:t + w_len] for t in timeScale]
pitches = [0.0] * len(timeScale)
harmonic_rates = [0.0] * len(timeScale)
argmins = [0.0] * len(timeScale)
for i, frame in enumerate(frames):
# Compute YIN
df = self.differenceFunction(frame, w_len, tau_max)
cmdf = self.cumulativeMeanNormalizedDifferenceFunction(df, tau_max)
p = self.getPitch(cmdf, tau_min, tau_max, harmo_thresh)
# Get results
if np.argmin(cmdf) > tau_min:
argmins[i] = float(fs / np.argmin(cmdf))
# A pitch was found
if p != 0:
pitches[i] = float(fs / p)
harmonic_rates[i] = cmdf[p]
# No pitch, but we compute a value of the harmonic rate
else:
harmonic_rates[i] = min(cmdf)
return pitches, harmonic_rates, argmins, times
[docs] def main(self,
sig,
fs,
w_len=1024,
w_step=256,
f0_min=70,
f0_max=200,
harmo_thresh=0.85,
audioDir="./",
dataFileName=None):
"""
Run the computation of the Yin algorithm on a example file.
Args:
sig (list) : Audio signal (list of float)
fs (int) : sampling rate (= average number of samples pro 1 second)
w_len (int) : size of the analysis window (in #samples)
w_step (int) : size of the lag between two consecutives windows (in #samples)
f0_min (int) : Minimum fundamental frequency that can be detected (in Hertz)
f0_max (int) : Maximum fundamental frequency that can be detected (in Hertz)
harmo_tresh (int) : Threshold of detection. The yalgorithmù return the
first minimum of the CMND fubction below this threshold.
Returns:
(tuple) : tuple include the following
- pitches : list of fundamental frequencies,
- harmonic_rates: list of harmonic rate values for each
fundamental frequency value (= confidence value)
- argmins : minimums of the Cumulative Mean Normalized DifferenceFunction
- times : list of time of each estimation
"""
start = time.time()
duration = len(sig) / float(fs)
pitches, harmonic_rates, argmins, times = self.compute_yin(
sig, fs, dataFileName, w_len, w_step, f0_min, f0_max, harmo_thresh)
if self.debug:
print("Yin computed in: ", time.time() - start)
plt.figure(figsize=(20, 10))
plt.subplots_adjust(left=0.125,
right=0.9,
bottom=0.1,
top=0.9,
wspace=0.2,
hspace=0.99)
# plot audio data
ax1 = plt.subplot(4, 1, 1)
ax1.plot(
[float(x) * duration / len(sig) for x in range(0, len(sig))],
sig)
ax1.set_title('Audio data')
ax1.set_ylabel('Amplitude')
# plot F0
ax2 = plt.subplot(4, 1, 2)
ax2.plot([
float(x) * duration / len(pitches)
for x in range(0, len(pitches))
], pitches)
ax2.set_title('F0')
ax2.set_ylabel('Frequency (Hz)')
# plot Harmonic rate
ax3 = plt.subplot(4, 1, 3, sharex=ax2)
ax3.plot([
float(x) * duration / len(harmonic_rates)
for x in range(0, len(harmonic_rates))
], harmonic_rates, "-x")
ax3.plot([
float(x) * duration / len(harmonic_rates)
for x in range(0, len(harmonic_rates))
], [harmo_thresh] * len(harmonic_rates), 'r', "--")
ax3.set_title('Harmonic rate')
ax3.set_ylabel('Rate')
# plot Index of minimums of CMND
ax4 = plt.subplot(4, 1, 4, sharex=ax2)
ax4.plot([
float(x) * duration / len(argmins)
for x in range(0, len(argmins))
], argmins, "-x")
ax4.set_title('Index of minimums of CMND')
ax4.set_ylabel('Frequency (Hz)')
ax4.set_xlabel('Time (seconds)')
plt.show()
return np.array(pitches), harmonic_rates, argmins, times