In [1]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Loading of Audio

### In this librosa.load ,loads an audio file and decodes it into a 1-dimensional array which is a time series x , and sr is a sampling rate of x . Default sr is 22kHz.

In [2]:
audio_path = 'genres\classical\classical.00000.wav'
x,sr = librosa.load(audio_path)
print(type(x),type(sr))

<class 'numpy.ndarray'> <class 'int'>


###  We can override the sr by

In [None]:
librosa.load(audio_path, sr=44100)

### We can disable sampling by

In [None]:
#librosa.load(audio_path, sr=none)

## Playing an Audio

In [None]:
import IPython.display as ipd
ipd.Audio(audio_path)

In [None]:
# IPython.display allow us to play audio on jupyter notebook directly. 
# It has a very simple interface with some basic buttons.

## Display Waveform

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display
plt.figure(figsize=(14, 5))
librosa.display.waveplot(x, sr=sr)

In [None]:
# Here librosa.display is used to display the audio files in different formats such as wave plot, spectrogram, or colormap.

In [None]:
# Waveplots let us know the loudness of the audio at a given time. 
# librosa.display.waveplot is used to plot waveform of amplitude vs time where the first axis is an amplitude and second 
# axis is time.

## Spectogram 

### Spectogram shows different frequencies playing at a particular time along with it’s amplitude. Amplitude and frequency are important parameters of the sound and are unique for each audio. 

In [None]:
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz') 
plt.colorbar()

#### Here .stft converts data into short term Fourier transform. STFT converts signal such that we can know the amplitude of given frequency at a given time. Using STFT we can determine the amplitude of various frequencies playing at a given time of an audio signal. .specshow is used to display spectogram.

## Feature Extraction

### Zero Crossing Rate

### The zero crossing rate is the rate of sign-changes along a signal, i.e., the rate at which the signal changes from positive to negative or back. This feature has been used heavily in both speech recognition and music information retrieval. It usually has higher values for highly percussive sounds like those in metal and rock.

In [None]:
x, sr = librosa.load(audio_path)
#Plot the signal:
plt.figure(figsize=(14, 5))
librosa.display.waveplot(x, sr=sr)

In [None]:
# Zooming In: Here we will zoom or print spectrum for 100 array columns only.

In [None]:
# Zooming in
n0 = 9000
n1 = 9100
plt.figure(figsize=(14, 5))
plt.plot(x[n0:n1])
plt.grid()

### As we can see there are 10 Zero crossings

In [None]:
# We can calculate Zero crossing in this way also
zero_crossings = librosa.zero_crossings(x[n0:n1], pad=False)
print(sum(zero_crossings))

### Spectral Centroid :- 
### It indicates where the ”centre of mass” for a sound is located and is calculated as the weighted mean of the frequencies present in the sound. If the frequencies in music are same throughout then spectral centroid would be around a centre and if there are high frequencies at the end of sound then the centroid would be towards its end.

In [None]:
#spectral centroid -- centre of mass -- weighted mean of the frequencies present in the sound
import sklearn
spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0]
spectral_centroids.shape
# Computing the time variable for visualization
frames = range(len(spectral_centroids))
t = librosa.frames_to_time(frames)
# Normalising the spectral centroid for visualisation
def normalize(x, axis=0):
    return sklearn.preprocessing.minmax_scale(x, axis=axis)
#Plotting the Spectral Centroid along the waveform
librosa.display.waveplot(x, sr=sr, alpha=0.4)
plt.plot(t, normalize(spectral_centroids), color='r')

In [None]:
# Here .spectral_centroid is used to calculate the spectral centroid for each frame. 
# So it’ll return an array with columns equal to a number of frames present in your sample.
# .frames_to_time converts frame to time. time[i] == frame[i]

### Spectral Rolloff

### Spectral rolloff is the frequency below which a specified percentage of the total spectral energy, e.g. 85%, lies.
### It also gives results for each frame.

In [None]:
spectral_rolloff = librosa.feature.spectral_rolloff(x, sr=sr)[0]
librosa.display.waveplot(x, sr=sr, alpha=0.4)
plt.plot(t, normalize(spectral_rolloff), color='r')

In [None]:
# Here .spectral_rolloff is used to calculate rolloff for a given frame.

### MFCC — Mel-Frequency Cepstral Coefficients

### This feature is one of the most important method to extract a feature of an audio signal and is used majorly whenever working on audio signals. The mel frequency cepstral coefficients (MFCCs) of a signal are a small set of features (usually about 10–20) which concisely describe the overall shape of a spectral envelope.

In [None]:
mfccs = librosa.feature.mfcc(x, sr=sr)
print(mfccs.shape)
#Displaying  the MFCCs:
librosa.display.specshow(mfccs, sr=sr, x_axis='time')

In [None]:
# Here .mfcc is used to calculate mfccs of a signal.

### By printing the shape of mfccs you get how many mfccs are calculated on how many frames. The first value represents the number of mfccs calculated and another value represents a number of frames available.

# Now, we have extracted the features of music signals. We can use these feature extracted in classification of music genre.