parse_audio_files which takes parent directory name, subdirectories within parent directory and file extension (default is .wav) as input. It then iterates over all the files within subdirectories and call second helper function extract_feature.
It takes file path as input, read the file by calling librosa.load method, extract and return features melspectrogram: Compute a Mel-scaled power spectrogram mfcc: Mel-frequency cepstral coefficients chorma-stft: Compute a chromagram from a waveform or power spectrogram spectral_contrast: Compute spectral contrast, using method defined in [1] tonnetz: Computes the tonal centroid features (tonnetz), following the method of [2]
def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
features, labels = np.empty((0,193)), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features = np.vstack([features,ext_features])
labels = np.append(labels, fn.split('/')[2].split('-')[1])
return np.array(features), np.array(labels, dtype = np.int)
def extract_feature(file_name):
X, sample_rate = librosa.load(file_name)
stft = np.abs(librosa.stft(X))
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz