In [1]:
import librosa
import pandas as pd
import numpy as np

## Load song

In [11]:
#input parameters=1  : Path of the song
#output = 2 : Array(NxN), sampling rate

def loadsong(song_path):
    y, sr = librosa.load(song_path)
    return y

# Feature Extraction Functions

### chroma stft

In [3]:
#input parameters=1  : Song matrix
#output = 1 : chroma stft

def chroma_stft(y):
    chroma_stft = librosa.feature.chroma_stft(y)
    return chroma_stft

### Notes in a song

#### class notes :

the class notes contains all the function we require for note extraction and manipulation

Function are listed below:
1. extract_notes: 
    input arg: song array
    output: matrix of Nx2
    
2. tonic_chord:
    input arg: song array
    output: chord of the song (single value)

In [16]:
notes_dict = {'0': 'C', '1': 'C#', '2': 'D','3': 'D#','4': 'E','5': 'F','6': 'F#','7': 'G','8': 'G#','9': 'A','10': 'A#','11': 'B'}

class notes:   
    def extract_notes(y):

        #filtering the array before loading to librosa
        y_harm = librosa.effects.harmonic(y=y)
        chroma_os_harm = librosa.feature.chroma_cqt(y=y_harm, bins_per_octave=12*3)

        chroma_filter = np.minimum(chroma_os_harm,librosa.decompose.nn_filter(chroma_os_harm,aggregate=np.median,metric='cosine'))
        chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))

        #creating an array of dominant notes of a particular frame
        note_list=[]
        for i in range(len(chroma_smooth[0])):
            max_val = max(chroma_smooth[0][i], chroma_smooth[1][i], chroma_smooth[2][i], chroma_smooth[3][i], chroma_smooth[4][i], chroma_smooth[5][i], chroma_smooth[6][i], chroma_smooth[7][i], chroma_smooth[8][i], chroma_smooth[9][i], chroma_smooth[10][i], chroma_smooth[11][i])
            for j in range(12):
                if max_val == chroma_smooth[j][i]:
                    note_list+=(notes_dict[str(j)])

        #list correction
        i=0
        updated_list =[]
        while(i<len(note_list)-1):
            if note_list[i+1]=='#':
                updated_list.append(str(note_list[i])+str('#'))
                i+=1
            else:
                updated_list.append(note_list[i])
            i+=1

        #creating a data structure
        data_sequence = []
        length = len(updated_list)
        i=0
        while(i!=length-2):
            count=1
            while(updated_list[i]==updated_list[i+1] and i!=length-3):
                count+=1
                i+=1
            if i==length-3:
                data_sequence.append([updated_list[i],count+2])
            else:
                data_sequence.append([updated_list[i],count])
            i+=1

        return data_sequence
    
    def tonic_chord(y):
        y_harm = librosa.effects.harmonic(y=y)
        chroma_os_harm = librosa.feature.chroma_cqt(y=y_harm, bins_per_octave=12*3)

        chroma_filter = np.minimum(chroma_os_harm,librosa.decompose.nn_filter(chroma_os_harm,aggregate=np.median,metric='cosine'))
        chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))

        note_list=[]
        for i in range(len(chroma_smooth[0])):
            max_val = max(chroma_smooth[0][i], chroma_smooth[1][i], chroma_smooth[2][i], chroma_smooth[3][i], chroma_smooth[4][i], chroma_smooth[5][i], chroma_smooth[6][i], chroma_smooth[7][i], chroma_smooth[8][i], chroma_smooth[9][i], chroma_smooth[10][i], chroma_smooth[11][i])
            for j in range(12):
                if max_val == chroma_smooth[j][i]:
                    note_list+=(notes_dict[str(j)])

        i=0
        updated_list =[]
        while(i<len(note_list)-1):
            if note_list[i+1]=='#':
                updated_list.append(str(note_list[i])+str('#'))
                i+=1
            else:
                updated_list.append(note_list[i])
            i+=1

        data = Counter(updated_list)
        new = data.most_common(1)
        note = new[0][0]

In [13]:
y = loadsong(r"C:\Users\Mahip\Documents\songdata\EDM\girl_like_you.mp3")

## MFCC

### notes:

MFCC will give a matric of Nx20. The dataset will contain only one row of matix in one cell block
i.e. MFCC will be divided into rows and then put each row in different column

In [19]:
#input: song array
#output: mfcc values as a matix of NxN



def mfcc(y):
    D = np.abs(librosa.stft(y))**2
    S = librosa.feature.melspectrogram(S=D)
    
    return s

## Beats correlation

In [20]:
#input: song array
#output: correlation values as a matix of NxN

def beats_corelation(y):
    hop_length = 200 # samples per frame
    onset_env = librosa.onset.onset_strength(y, sr=sr, hop_length=hop_length, n_fft=2048)
    
    corelation = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr, hop_length=hop_length, win_length=400)
    return corelation

In [21]:
def tempo(y):
    tempo = librosa.beat.tempo(y, sr=sr)
    return tempo

## MFCC corelation

In [23]:
#input: song array
#output: correlation values as a matix of NxN

def mfcc_corelation(y):
    mfcc = mfcc(y)
    corelaton = librosa.feature.delta(mfcc)
    
    return corelation