In [1]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sp
from scipy.fftpack import fft
from scipy.io import wavfile # get the api
import sunau;
import librosa
import librosa.display
import pandas
import os
np.set_printoptions(suppress=True)

In [2]:
def writeMatrixToFile(self,filename,matrix):
        np.save(filename, matrix);
        os.rename(filename+".npy", filename+".txt");
        print("Written successfully..");
        pass;
    
def loadMatrixFromFile(self,filename):        
    matrix=None;
    if(os.path.isfile(filename)):
        matrix=np.load(filename);       
    return matrix;

def readCSVFile(file):
    data=pandas.read_csv(file,",",header=0, na_values='?', skipinitialspace=True);
    return data;
    pass;

In [3]:
class PreProcessing:
    filename=None;
    y=None;
    sr=None;
    log_enabled=True;
    centroid=None;
    spectro=None;
    spectro_phase=None;
    max_sample_vector_size=660000; 
    duration=30;
    def __init__(self,filename,duration=30):
        #self.log(filename);
        self.filename=filename;
        self.reloadAudioFile(duration);
        self.duration=duration;
        pass;
    
    def reloadAudioFile(self,duration=30):
        self.y, self.sr = librosa.load(self.filename,duration=duration);
        self.y=self.y[:self.max_sample_vector_size];
        pass;
    
    #Short-Term-Fourier trasform
    def getSTFT(self):
        self.stft=librosa.stft(y=self.y);
        return self.stft;
        pass;
    
    #spectro graph
    def getSpectrogram(self):
        stft=self.getSTFT();
        self.spectro, self.spectro_phase = librosa.magphase(stft);        
        return self.spectro, self.spectro_phase;
        pass;
    
    def getCentroid(self):
        self.centroid=librosa.feature.spectral_centroid(y=self.y,sr=self.sr);
        return self.centroid;    

    def getSpectralRolloff(self):
        self.rolloff=librosa.feature.spectral_rolloff(y=self.y, sr=self.sr);
        return self.rolloff;
    
    def getZeroCrossing(self):
        self.zero_crossing_rate=librosa.feature.zero_crossing_rate(self.y);
        return self.zero_crossing_rate;
    
    def getSpectralContrast(self):
        #Jiang, Dan-Ning, Lie Lu, Hong-Jiang Zhang, Jian-Hua Tao, and Lian-Hong Cai. “Music type classification by spectral contrast feature.” In Multimedia and Expo, 2002. ICME‘02. Proceedings. 2002 IEEE International Conference on, vol. 1, pp. 113-116. IEEE, 2002.
        S = np.abs(self.getSTFT());
        self.contrast = librosa.feature.spectral_contrast(S=S, sr=self.sr);
        return self.contrast;
    
    def getMFCC(self):
        self.mfcc = librosa.feature.mfcc(y=self.y, sr=self.sr, hop_length=512, n_mfcc=13);
        return self.mfcc;
    
    def getChroma(self):
        self.chroma = librosa.feature.chroma_stft(y=self.y,sr=self.sr,hop_length=512)
        return self.chroma
    
    def getMelSpec(self):
        self.mel=librosa.feature.melspectrogram(y=self.y, sr=self.sr,n_mels=10);
        return self.mel;
    
    def getRMS(self):
        self.rms=librosa.feature.rms(y=self.y);
        return self.rms;

    def drawRMS(self):
        rms=self.getRMS();
        S,phase=self.getSpectrogram();
        plt.figure()
        plt.subplot(2, 1, 1)
        plt.semilogy(rms.T, label='RMS Energy')
        plt.xticks([])
        plt.xlim([0, rms.shape[-1]])
        plt.legend(loc='best')
        plt.subplot(2, 1, 2)
        librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),y_axis='log', x_axis='time')
        plt.title('log Power spectrogram')
        plt.tight_layout()
        plt.show();
        pass;
    
    def drawSpectrogramWithCentroid(self):
        centroid=self.getCentroid();
        S,phase=self.getSpectrogram();
        plt.figure()
        plt.subplot(2, 1, 1)
        plt.semilogy(centroid.T, label='Spectral centroid')
        plt.ylabel('Hz')
        plt.xticks([])
        plt.xlim([0, centroid.shape[-1]])
        plt.legend()
        plt.subplot(2, 1, 2)
        librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),y_axis='log', x_axis='time')
        plt.title('log Power spectrogram')
        plt.tight_layout();
        plt.show();
        pass;
    
    def drawSpectralRolloff(self):
        rolloff=self.getSpectralRolloff();
        S,phase=self.getSpectrogram();
        plt.figure()
        plt.subplot(2, 1, 1)
        plt.semilogy(rolloff.T, label='Roll-off frequency')
        plt.ylabel('Hz')
        plt.xticks([])
        plt.xlim([0, rolloff.shape[-1]])
        plt.legend()
        plt.subplot(2, 1, 2)
        librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),y_axis='log', x_axis='time')
        plt.title('log Power spectrogram')
        plt.tight_layout();
        plt.show();
        pass;
    
    def drawSpectralContrast(self):
        contrast=self.getSpectralContrast();
        S,phase=self.getSpectrogram();
        S = np.abs(self.getSTFT());
        plt.figure()
        plt.subplot(2, 1, 1)
        librosa.display.specshow(librosa.amplitude_to_db(S,ref=np.max),y_axis='log')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Power spectrogram')
        plt.subplot(2, 1, 2)
        librosa.display.specshow(contrast, x_axis='time')
        plt.colorbar()
        plt.ylabel('Frequency bands')
        plt.title('Spectral contrast')
        plt.tight_layout();
        plt.show();
        pass;
    
    def drawMFCC(self):
        mfccs=self.getMFCC();
        plt.figure(figsize=(6, 4))
        librosa.display.specshow(mfccs, x_axis='time')
        #plt.colorbar()
        plt.title('MFCC')
        plt.tight_layout()
        plt.show();
        pass;
    
    
    def log(self,a,b=None):
        if(self.log_enabled):
            if(b!=None):
                print(a,b);
            else:
                print(a);
        pass;  
    
    def estimate_tempo(self,oenv,sample_rate):
        return librosa.beat.tempo(oenv,sr=sample_rate)

    def tempogram_analysis(self,signal, sample_rate, max_tempo = 320.0,hop_length = 512):
        tempogram = librosa.feature.tempogram(y=signal, sr = sample_rate)
        bpms = librosa.core.tempo_frequencies(tempogram.shape[0], hop_length=hop_length, sr=sample_rate)
        means_over_time = np.mean(tempogram, axis = 1)
        #account for max_tempo
        if max_tempo is not None:
            max_idx = np.argmax(bpms < max_tempo)
            means_over_time[:max_idx] = 0    
        top_five_tempos = np.argpartition(means_over_time, -3)[-3:-1]
        top_five_tempo_values = means_over_time[top_five_tempos]
        return bpms[top_five_tempos], top_five_tempo_values
        
    def getRhythm_features(self):
        oenv = librosa.onset.onset_strength(y=self.y, sr=self.sr)
        tempo = self.estimate_tempo(oenv,self.sr)
        top_five_tempos, top_five_tempo_values = self.tempogram_analysis(self.y, self.sr)
        tempos =  np.append(top_five_tempos, top_five_tempo_values)
        return np.append(tempos, tempo)




In [4]:
class ProcessDataset:
    columns = ["id","type","y_index","y","centroid mean","rolloff mean","zero mean","rms mean","rms var","contrast mean","contrast var", "mfcc1 mean","mfcc1 var",
              "mfcc2 mean","mfcc2 var","mfcc3 mean","mfcc3 var","mfcc4 mean","mfcc4 var","mfcc5 mean","mfcc5 var",
               "mel1 mean","mel1 var","mel2 mean","mel2 var","mel3 mean","mel3 var","mel4 mean","mel4 var","mel5 mean", "mel5 var"]+["chroma_mean_"+str(i) for i in range(1,7)]+ ["chroma_var_"+str(i) for i in range(1,7)]+["bpm_second", "bm_first", "tempo_strength_second"]               
    #columns = ["id","type","y_index","y","centroid mean","centroid var","rolloff mean","rolloff var","zero mean",
    #           "zero var","rms mean","rms var","contrast mean","contrast var","mfcc1 mean","mfcc1 var",
    #           "mfcc2 mean","mfcc2 var","mfcc3 mean","mfcc3 var","mfcc4 mean","mfcc4 var","mfcc5 mean","mfcc5 var",
    #           "mfcc6 mean","mfcc6 var","mfcc7 mean","mfcc7 var","mfcc8 mean","mfcc8 var","mfcc9 mean","mfcc9 var",
    #            "mfcc10 mean","mfcc10 var","mfcc11 mean","mfcc11 var","mfcc12 mean","mfcc12 var","mfcc13 mean","mfcc13 var"];
#     +["chroma_mean_"+str(i) for i in range(1,13)]+ ["chroma_var_"+str(i) for i in range(1,13)]+["bpm_second", "bm_first", "tempo_strength_second"]
    genre_out={"blues":[1,0,0,0,0,0,0,0,0,0],"classical":[0,1,0,0,0,0,0,0,0,0],"country":[0,0,1,0,0,0,0,0,0,0],"disco":[0,0,0,1,0,0,0,0,0,0],"hiphop":[0,0,0,0,1,0,0,0,0,0],"jazz":[0,0,0,0,0,1,0,0,0,0],"metal":[0,0,0,0,0,0,1,0,0,0],"pop":[0,0,0,0,0,0,0,1,0,0],"reggae":[0,0,0,0,0,0,0,0,1,0],"rock":[0,0,0,0,0,0,0,0,0,1]};    
    genre_out_index={"blues":0,"classical":1,"country":2,"disco":3,"hiphop":4,"jazz":5,"metal":6,"pop":7,"reggae":8,"rock":9};    
    dataframe=None;
    mfcc_features=5;
    mel_features=5;
    chroma_features=6;
    dir="../genres";
    genre_dir={"blues":"blues","classical":"classical","country":"country","disco":"disco","hiphop":"hiphop","jazz":"jazz","metal":"metal","pop":"pop","reggae":"reggae","rock":"rock"};
    def __init__(self):
        self.dataframe = pandas.DataFrame(columns=self.columns);
        print("Colums",len(self.columns))
        pass;

    def extractTimberalFeatures(self,genre,audio_number,filename):
        features=[];
        pp=PreProcessing(filename);            
        centroid=pp.getCentroid()[0];
        rolloff=pp.getSpectralRolloff()[0];        
        zero=pp.getZeroCrossing()[0];        
        contrast=pp.getSpectralContrast()[0];        
        rms=pp.getRMS()[0];
        mfcc=pp.getMFCC();
        mel=pp.getMelSpec();
        chroma = pp.getChroma();
        rhythm = pp.getRhythm_features();
        
        features.append(audio_number);
        features.append(genre);        
        features.append(self.genre_out_index[genre]);        
        features.append(self.genre_out[genre]);
        features.append(centroid.mean());
#         features.append(centroid.var()); 
        features.append(rolloff.mean());
#         features.append(rolloff.var()); 
        features.append(zero.mean());
#         features.append(zero.var()); 
        features.append(rms.mean());
        features.append(rms.var()); 
        features.append(contrast.mean());
        features.append(contrast.var()); 
        for i in range(self.mfcc_features):
            features.append(mfcc[i].mean());
            features.append(mfcc[i].var());  
        for i in range(self.mel_features):
            features.append(mel[i].mean());
            features.append(mel[i].var());
        for i in range(self.chroma_features):
            features.append(chroma[i].mean());
            features.append(chroma[i].var());
        #features.append(np.mean(chroma,axis=1).reshape(1,12));
        #features.append(np.var(chroma,axis=1).reshape(1,12));
        #print(np.mean(chroma,axis=1).shape);
        #print("Df",self.dataframe.size)
        #print("Features",np.array(features).shape)
        #print(rhythm.shape)
        for i in range(3):
            features.append(rhythm[i]);
        self.dataframe.loc[self.dataframe.size]=features;
    
    def saveDataFrame(self):
        filename="audiofeatures_numpy_matrix";
        arr=self.dataframe.as_matrix(columns=None);
        np.save(filename,arr);       
        self.dataframe.to_csv("audiofeatures.csv",sep=",")
        print("Written successfully..");
        pass;
    
    def extractFeatures(self):
        print("Extracting Features...");
        percent_completed=0;        
        for k in self.genre_dir:    
            genre=k;
            print("-------------------["+genre+"]-----------------------")
            for i in range(100):
                audio_number="%0.5d"%i;
                filename=self.dir+"/"+self.genre_dir[genre]+"/"+self.genre_dir[genre]+"."+audio_number+".wav";              
                self.extractTimberalFeatures(genre,audio_number,filename);  
                percent_completed=i;
                if(percent_completed%10==0):
                    print("Percent completed:",percent_completed);            
        self.saveDataFrame();
        print("Extraction done");
        pass;
        

In [None]:
p1=ProcessDataset();
p1.extractFeatures();
p1.dataframe

Colums 46
Extracting Features...
-------------------[blues]-----------------------
Percent completed: 0
Percent completed: 10
Percent completed: 20
Percent completed: 30
Percent completed: 40
Percent completed: 50


In [None]:
dir="../genres";
genre_dir={"blues":"blues","classical":"classical","country":"country","disco":"disco","hiphop":"hiphop","jazz":"jazz","metal":"metal","pop":"pop","reggae":"reggae","rock":"rock"};

In [None]:
genre="disco"

audio_number="%0.5d"%0;
filename=dir+"/"+genre_dir[genre]+"/"+genre_dir[genre]+"."+audio_number+".wav";  

pp=PreProcessing(filename); 
s,p=pp.getSpectrogram();
len(s)

In [None]:
genre="disco"
audio_number="%0.5d"%0;
filename=dir+"/"+genre_dir[genre]+"/"+genre_dir[genre]+"."+audio_number+".wav";  

pp=PreProcessing(filename); 
a=librosa.feature.melspectrogram(y=pp.y, sr=pp.sr,n_mels=10)
len(a[0])

In [None]:
genre="disco"
pds=ProcessDataset();

audio_number="%0.5d"%98;
filename=dir+"/"+genre_dir[genre]+"/"+genre_dir[genre]+"."+audio_number+".wav";  
pds.extractTimberalFeatures(genre,audio_number,filename);

audio_number="%0.5d"%99;
filename=dir+"/"+genre_dir[genre]+"/"+genre_dir[genre]+"."+audio_number+".wav";  
pds.extractTimberalFeatures(genre,audio_number,filename);

audio_number="%0.5d"%0;
filename=dir+"/"+genre_dir[genre]+"/"+genre_dir[genre]+"."+audio_number+".wav";  
pds.extractTimberalFeatures(genre,audio_number,filename);

#pds.extractFeatures();
pds.dataframe

In [None]:
def partitionDataFrame(df,ratio):
    df = df.sample(frac=1).reset_index(drop=True)#shuffling rows
    df = df.sample(frac=1).reset_index(drop=True)#again shuffling
    size=df["id"].count();
    limit=int(ratio*size);
    train_ds=df.loc[0:limit];    
    test_ds=df.loc[limit:size];
    train_ds.to_csv("train.csv",sep=",");
    test_ds.to_csv("test.csv",sep=",");
    print("Partitioning done");

In [None]:
df=readCSVFile("audiofeatures.csv");
partitionDataFrame(df,0.9);
