In [56]:
import os
import librosa 
import pandas as pd
import numpy as np
from scipy.stats import mode
from sklearn.preprocessing import MultiLabelBinarizer
import statistics

In [2]:
'''This function loads the files in the input directory to a dataframe and outputs the dataframe to pkl file'''
def loadFiles(inputDirectory, outputDirectory, outputFileName):
    list_of_files = os.listdir(inputDirectory)
    y_list = []
    sr_list = []
    song_list = []
    for song in list_of_files:
        if song in '.DS_Store':
            continue
        else:
            #loads an audio file as a floating point time series and returns audio time series and sampling rate
            y, sr = librosa.load(inputDirectory+'/'+ song)
            #appends the audio time series to a list
            y_list.append(y)
            #appends the sampling rate to a list
            sr_list.append(sr)
            #appends the song name to a list, this is done to create a dataframe without .DS_Store
            song_list.append(song)
    songs_df = pd.DataFrame({'Song Name' : song_list, 'Audio Time Series' : y_list, 'Sampling Rate' : sr_list})
    print(songs_df)
    #check if the output directory already exists, else create the output directory
    if not os.path.exists(outputDirectory):
        os.makedirs(outputDirectory)
    
    #convert the dataframe to pkl file
    songs_df.to_pickle(os.path.join(outputDirectory, outputFileName + 'LoadedData.pkl'))

In [3]:
inputDirectory = str(input("Enter the input directory: "))
outputDirectory = str(input("Enter the output directory: "))
outputFileName = str(input("Enter the output file name: "))
loadFiles(inputDirectory, outputDirectory, outputFileName)

Enter the input directory:  /Users/sangeetha/Downloads/MachineLearning/Project/input/input
Enter the output directory:  /Users/sangeetha/Downloads/MachineLearning/Project/output/output
Enter the output file name:  songs




                    Song Name  \
0           Uyire Uyire 9.mp3   
1          Irava Pagala 2.mp3   
2       Kadhale Kadhale 4.mp3   
3    Poovum Malarnthida 9.mp3   
4          Poongathavae 2.mp3   
..                        ...   
211     Kadhale Kadhale 9.mp3   
212         Uyire Uyire 4.mp3   
213       Irava Pagala 10.mp3   
214     Megam Karukuthu 6.mp3   
215       Poongathavae 11.mp3   

                                     Audio Time Series  Sampling Rate  
0    [0.13423224, 0.4250051, 0.56778175, 0.638403, ...          22050  
1    [0.13576293, 0.41159385, 0.55869347, 0.6245957...          22050  
2    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...          22050  
3    [0.1335479, 0.47566232, 0.61170423, 0.69262034...          22050  
4    [0.13182306, 0.45056605, 0.6442481, 0.72061074...          22050  
..                                                 ...            ...  
211  [0.12405978, 0.3609431, 0.47775462, 0.5381871,...          22050  
212  [0.14254594, 0.4291452

In [4]:
def data_cleaning(inputDirectory, outputDirectory,outputFileName):
    print(outputFileName+'/' + outputFileName +'LoadedData.pkl')
    songs_df = pd.read_pickle(outputDirectory+'/' + outputFileName +'LoadedData.pkl') #handle expection for file not present
    cleaned_dict = {'Song Name': [], 'Audio Time Series': [], 'Sampling Rate': []}
    #midi note of C in first octave is 36
    midiC = librosa.midi_to_hz(36)
    #number of samples between successive frames
    hop_length = 512
    songs = list(songs_df['Song Name'])
    y_list = list(songs_df['Audio Time Series'])
    sr_list = list(songs_df['Sampling Rate'])
    
    pitchDf = pd.read_csv('FileVsScales.csv',delimiter=',')
    print(pitchDf)
    print(songs_df)
    for i in range(len(songs_df)):
        y = y_list[i]
        sr = sr_list[i]

        #check pitch of each input, convert it to C if it's not 
        pitchRow = pitchDf.loc[pitchDf['Filename'] == songs_df.iloc[i,0][:-4]]
        pitch = pitchRow.iloc[:,1]
        if((pitch == 'C').bool()):
            transformedy = y
        elif((pitchRow.iloc[:,1] == 'C#').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=-1)
        elif((pitchRow.iloc[:,1] == 'D').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=-2)
        elif((pitchRow.iloc[:,1] == 'D#').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=-3)
        elif((pitchRow.iloc[:,1] == 'E').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=-4)
        elif((pitchRow.iloc[:,1] == 'F').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=-5)
        elif((pitchRow.iloc[:,1] == 'F#').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=-6)
        elif((pitchRow.iloc[:,1] == 'G').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=-7)
        elif((pitchRow.iloc[:,1] == 'Ab').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=+4)
        elif((pitchRow.iloc[:,1] == 'A').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=+3)
        elif((pitchRow.iloc[:,1] == 'Bb').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=+2)
        elif((pitchRow.iloc[:,1] == 'B').bool()):
            newy = librosa.effects.pitch_shift(y, sr, n_steps=+1)
        
        #After transposing all the clips to C, copy the pitch-shifted audio time-series to y
        y = newy
        new_y_list = []
        
        #smaller clips are created using the original audio clips
        start = 0
        end = 100000
        
        while(len(y) > end):
            new_y_list.append(y[start:end])
            start = end
            end = end + 100000
        
        subclip_name = []
        
        for j in range(len(new_y_list)):
            ragaName = pitchRow.iloc[:,2]
            subclip_name.append(ragaName)
            
        for k in range(len(subclip_name)):
            cleaned_dict['Song Name'].append(subclip_name[k]) #song name of the clip is the name of the raga
            cleaned_dict['Audio Time Series'].append(new_y_list[k]) #audio time series for respective sub clip is appended
            cleaned_dict['Sampling Rate'].append(sr) #sampling rate of the audio clip is appended
            
    cleaned_dataframe = pd.DataFrame(cleaned_dict)
    
    #save the file in the output directory and save the cleaned dataframe
    if outputDirectory and not os.path.exists(outputDirectory):
        os.makedirs(outputDirectory)
    
    cleaned_dataframe.to_pickle(os.path.join(outputDirectory, outputFileName + 'CleanedData.pkl'))
    
    return cleaned_dataframe

In [5]:
data_cleaning(inputDirectory, outputDirectory, outputFileName)

songs/songsLoadedData.pkl
                 Filename Scale                    Raga
0    Maalai En Vethanai 2    D#  Dheerashankarabharanam
1    Maalai En Vethanai 3     E  Dheerashankarabharanam
2    Maalai En Vethanai 4     F  Dheerashankarabharanam
3    Maalai En Vethanai 5    F#  Dheerashankarabharanam
4    Maalai En Vethanai 6     G  Dheerashankarabharanam
..                    ...   ...                     ...
211     Kadhale Kadhale 8     E            Natabhairavi
212     Kadhale Kadhale 9     F            Natabhairavi
213    Kadhale Kadhale 10    F#            Natabhairavi
214    Kadhale Kadhale 11     G            Natabhairavi
215    Kadhale Kadhale 12    Ab            Natabhairavi

[216 rows x 3 columns]
                    Song Name  \
0           Uyire Uyire 9.mp3   
1          Irava Pagala 2.mp3   
2       Kadhale Kadhale 4.mp3   
3    Poovum Malarnthida 9.mp3   
4          Poongathavae 2.mp3   
..                        ...   
211     Kadhale Kadhale 9.mp3   
212         Uy

Unnamed: 0,Song Name,Audio Time Series,Sampling Rate
0,"152 Harikhamboji Name: Raga, dtype: object","[0.14415567, 0.4537507, 0.60287446, 0.6463532,...",22050
1,"152 Harikhamboji Name: Raga, dtype: object","[-0.00017436781, 0.0012716996, 0.0005056586, -...",22050
2,"152 Harikhamboji Name: Raga, dtype: object","[-0.0041281856, 0.0034600438, 0.0008789872, 0....",22050
3,"152 Harikhamboji Name: Raga, dtype: object","[-0.037834443, -0.034943987, -0.028770365, -0....",22050
4,"152 Harikhamboji Name: Raga, dtype: object","[-0.08829773, -0.08257084, -0.08392762, -0.082...",22050
...,...,...,...
4842,"202 Mayamalavagowlai Name: Raga, dtype: object","[0.035614934, 0.017803857, 0.011888366, 0.0261...",22050
4843,"202 Mayamalavagowlai Name: Raga, dtype: object","[0.029002443, 0.03735133, 0.037939135, 0.03081...",22050
4844,"202 Mayamalavagowlai Name: Raga, dtype: object","[0.18039195, 0.14463452, 0.104139574, 0.050471...",22050
4845,"202 Mayamalavagowlai Name: Raga, dtype: object","[0.026646433, 0.030965528, 0.034119595, 0.0347...",22050


In [6]:
def numbertonote(num):
    if num == 0:
        return 'C'
    if num == 1:
        return 'C#'
    if num == 2:
        return 'D'
    if num == 3:
        return 'D#'
    if num == 4:
        return 'E'
    if num == 5:
        return 'F'
    if num == 6:
        return 'F#'
    if num == 7:
        return 'G'
    if num == 8:
        return 'Ab'
    if num == 9:
        return 'A'
    if num == 10:
        return 'Bb'
    if num == 11:
        return 'B'

In [7]:
def top_7_notes(noteDataFrame):
    freq = list(noteDataFrame.sort_index()[0])
    notes = list(noteDataFrame.sort_values(by=0,ascending=False).rename(columns={0:'Notes'})[:7].index)
    return [freq, notes]

In [8]:
def average_dist(cleaned_dataframe):
    y_list = cleaned_dataframe['Audio Time Series'] 
    sr_list = cleaned_dataframe['Sampling Rate']
    hop_length = 512
    
    freq_all = []
    notes_all = []
    
    #iterate through all clips
    for i in range(len(y_list)): 
        notes = {'C': 0,'C#': 0,'D': 0,'D#': 0,'E': 0,'F': 0,'F#': 0,'G': 0,'Ab': 0,'A': 0,'Bb': 0,'B': 0}
        chromagram = librosa.feature.chroma_stft(y = y_list[i], sr=sr_list[i], hop_length=hop_length)
        
        notecount = 0 #total number of notes that we would be getting from the clips
        counts = [] 
        
        seq_notes = []
        
        #iterate through every 1/43rd of a second 
        for row in chromagram.T:
            seq_notes.append(np.argmax(row)) #index of note with highest frequency at that 1/43rd of a second
        clipped_notes = pd.Series(seq_notes).apply(numbertonote)
        
        values = []
        for i in range(0, len(clipped_notes),43):
            curr = clipped_notes[i:i+43]
            values.append(mode(curr)[0][0])
        notecount = len(values)

        #adds to local dictionary 
        for i in values:   
            if i in notes:
                notes[i]+=1
            else: 
                notes[i] = 1
        
        for note in notes:
            notes[note] = notes[note]/notecount
        
        #gets top 7 notes & their frequencies 
        noteDataFrame = pd.DataFrame.from_dict(notes, orient = 'index')
        frequencies, notes = top_7_notes(noteDataFrame)
        freq_all.append(frequencies)
        notes_all.append(notes)

    return [freq_all, notes_all]

In [9]:
def extract_features(inputDirectory, outputDirectory, outputFileName):
    cleaned_dataframe = pd.read_pickle(outputDirectory+'/' + outputFileName +'CleanedData.pkl')
    freq_and_notes = average_dist(cleaned_dataframe)
    cleaned_dataframe['frequencies'] = freq_and_notes[0]
    cleaned_dataframe['scale'] = freq_and_notes[1]
    
    if outputDirectory and not os.path.exists(outputDirectory):
        os.makedirs(outputDirectory)
        
    print(cleaned_dataframe)
    
    cleaned_dataframe.to_pickle(os.path.join(outputDirectory, outputFileName + 'FeatureData.pkl'))

In [10]:
extract_features(inputDirectory, outputDirectory, outputFileName)

                                              Song Name  \
0         152    Harikhamboji
Name: Raga, dtype: object   
1         152    Harikhamboji
Name: Raga, dtype: object   
2         152    Harikhamboji
Name: Raga, dtype: object   
3         152    Harikhamboji
Name: Raga, dtype: object   
4         152    Harikhamboji
Name: Raga, dtype: object   
...                                                 ...   
4842  202    Mayamalavagowlai
Name: Raga, dtype: object   
4843  202    Mayamalavagowlai
Name: Raga, dtype: object   
4844  202    Mayamalavagowlai
Name: Raga, dtype: object   
4845  202    Mayamalavagowlai
Name: Raga, dtype: object   
4846  202    Mayamalavagowlai
Name: Raga, dtype: object   

                                      Audio Time Series  Sampling Rate  \
0     [0.14415567, 0.4537507, 0.60287446, 0.6463532,...          22050   
1     [-0.00017436781, 0.0012716996, 0.0005056586, -...          22050   
2     [-0.0041281856, 0.0034600438, 0.0008789872, 0....          2205

In [11]:
TRAINCOLS = [
    'multiplied list'
]
LABEL = 'Song Name'

In [12]:
def load_data(inputDirectory, traincols=TRAINCOLS, label=LABEL):
    multiLabel = MultiLabelBinarizer()
    cleaned_dataframe = pd.read_pickle(outputDirectory+ '/' + outputFileName + 'FeatureData.pkl')

    oneHotNotes = pd.DataFrame(multiLabel.fit_transform(cleaned_dataframe['scale']),columns=multiLabel.classes_, index=cleaned_dataframe.index)
    oneHotNotes = oneHotNotes[['A', 'Bb', 'B', 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'Ab']]
    cleaned_dataframe = cleaned_dataframe.join(oneHotNotes)

    #multiply frequency of occurences to each note  
    new_cleaned_dataframe = cleaned_dataframe[['A','Bb','B','C','C#','D','D#','E','F','F#','G','Ab']]
    #create rows from list 
    list_of_rows =[] 
      
    # Iterate over each row 
    for index, rows in new_cleaned_dataframe.iterrows(): 
        # Create list for the current row 
        my_list =[rows['A'], rows['Bb'],rows['B'],rows['C'],rows['C#'],rows['D'],rows['D#'],
                  rows['E'],rows['F'],rows['F#'],rows['G'],rows['Ab']]
          
        # append the list to the final list 
        list_of_rows.append(my_list) 
    frequencies = list(cleaned_dataframe['frequencies'])
    cleaned_dataframe['One-Hot Encoding'] = list_of_rows

    #multiply the lists
    multiply_lists = []
    for i in range(len(cleaned_dataframe['frequencies'])): 
        list1 = cleaned_dataframe['frequencies'][i]
        list2 = cleaned_dataframe['One-Hot Encoding'][i]
        
        new_frequencies = []
        
        for num1,num2 in zip(list1, list2):
            new_frequencies.append(num1*num2)
        multiply_lists.append(new_frequencies)
    cleaned_dataframe['multiplied list'] = multiply_lists
    
    X = cleaned_dataframe[traincols]
    y_value = cleaned_dataframe[label]
    
    return X,y_value

In [13]:
X,y_value = load_data(inputDirectory)

In [14]:
X

Unnamed: 0,multiplied list
0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, ..."
1,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, ..."
3,"[0.0, 0.6, 0.0, 0.2, 0.0, 0.2, 0.0, 0.0, 0.0, ..."
4,"[0.0, 0.2, 0.0, 0.6, 0.0, 0.0, 0.0, 0.2, 0.0, ..."
...,...
4842,"[0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, ..."
4843,"[0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4844,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4845,"[0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.2, 0.0, 0.0, ..."


In [15]:
XList = []
for i in range (len(X)):
    XList.append(X.iloc[i,0])
X_df = pd.DataFrame(XList)
X_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.6,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.2,0.0,0.6,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
4842,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.4
4843,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6
4844,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4845,0.0,0.0,0.0,0.0,0.4,0.0,0.2,0.0,0.0,0.4,0.0,0.0


In [16]:
y_list = []
for i in range(len(y_value)):
    string = y_value[i].to_string()
    y_list.append(string[6:])
y_df = pd.Series(y_list)

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.25, random_state=45931)

In [18]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=10)

In [19]:
from sklearn import svm
clf = svm.SVC()
clf.fit(X_train, y_train)

SVC()

In [20]:
y_knn_pred = knn.predict(X_test)

In [21]:
y_svm_pred = clf.predict(X_test)

In [22]:
y_knn_pred

array([' Natabhairavi', ' Karaharapriya', ' Kalyani', ...,
       ' Natabhairavi', 'Natabhairavi', ' Dheerashankarabharanam'],
      dtype=object)

In [23]:
y_test

494                Natabhairavi
1725              Karaharapriya
2601                    Kalyani
2506                    Kalyani
1410              Karaharapriya
                 ...           
2231     Dheerashankarabharanam
2498                    Kalyani
2066               Harikhamboji
79                 Natabhairavi
1795                    Kalyani
Length: 1212, dtype: object

In [24]:
y_svm_pred

array([' Natabhairavi', ' Dheerashankarabharanam', ' Kalyani', ...,
       ' Natabhairavi', ' Natabhairavi', ' Natabhairavi'], dtype=object)

In [25]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_knn_pred))

0.38531353135313534


In [26]:
print(accuracy_score(y_test, y_svm_pred))

0.3952145214521452


In [44]:
def confusion_matrix(y_test,y_pred):
    tp = fp = 0
    for i in range(len(y_test)):
        if(y_test[i] == y_pred[i]):
            tp+=1
        else:
            fp+=1
    return tp,fp

In [45]:
type(y_test)

1212

In [46]:
type(y_knn_pred)

numpy.ndarray

In [47]:
tp_knn,fp_knn = confusion_matrix(y_test.to_numpy(),y_knn_pred)
print(tp_knn)
print(fp_knn)

467
745


In [50]:
tp_svm,fp_svm = confusion_matrix(y_test.to_numpy(),y_svm_pred)
print(tp_svm)
print(fp_svm)

479
733


In [57]:
def evaluation_metrics(tp,fp):
    fn = 0
    tn = 0
    accuracy = tp/(tp+fp)
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f1_score = statistics.harmonic_mean([precision,recall])
    return accuracy, precision, recall, f1_score

In [58]:
accuracy_knn, precision_knn, recall_knn, f1_score_knn = evaluation_metrics(tp_knn, fp_knn)
print(accuracy_knn, precision_knn, recall_knn, f1_score_knn)

0.6146864686468647 0.6146864686468647 1.0 0.7613694430250384


In [59]:
accuracy_svm, precision_svm, recall_svm, f1_score_svm = evaluation_metrics(tp_svm, fp_svm)
print(accuracy_svm, precision_svm, recall_svm, f1_score_svm)

0.6047854785478548 0.6047854785478548 1.0 0.7537275064267352
