In [3]:
import numpy as np
import math

import os
import time
import glob
import soundfile
import pandas

import librosa
import soundfile
from scipy.io.wavfile import read
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

from xgboost import XGBClassifier

import keras
from keras.utils import np_utils
from keras.optimizers import SGD
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.models import Sequential, load_model
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [3]:
GENRES = ['Blues', 'Classic', 'Country', 'Disco', 'Hiphop', 'Jazz', 'Metal', 'Pop', 'Reggae', 'Rock'] 
csv_columns=['mfcc1','mfcc2','mfcc3','mfcc4','mfcc5','mfcc6','mfcc7','mfcc8','mfcc9','mfcc10',
             'mfcc11','mfcc12','mfcc13','mfcc14','mfcc15','centroid','flux','zcr','osc1','osc2','osc3',
             'osc4','osc5','osc6','osc7','osc8','osc9','osc10','osc11','osc12','osc13','osc14','osc15',
             'osc16','osc17','osc18','low','omsc1','omsc2','omsc3','omsc4','omsc5','omsc6','omsc7','omsc8',
             'omsc9','omsc10','msfm1','msfm2','msfm3','msfm4','msfm5','msfm6','msfm7','msfm8','msfm9',
             'msfm10','mscm1','mscm2','mscm3','mscm4','mscm5','mscm6','mscm7','mscm8','mscm9','mscm10']

In [10]:
def melfilter (Fs,fftSize,totalfilters):
    # Maximum frequency of filter (avoid aliasing)
    maxF = Fs/2   

    # Maximal Mel-frequency 
    maxMelF = 2595*np.log10(1+maxF/700)   
    
    # Scatter points in Mel-frequency scale
    melpoints = np.arange(0,(totalfilters+2))/(totalfilters+1) * maxMelF
    
    # Convert points in normal frequency scale
    points = 700*(10**(melpoints/2595)-1)
    
    # DTF bins within half fftSize
    DFTbins = np.round(points/maxF*(fftSize/2)) 
    
    # Set the first value to 1
    DFTbins[0] = 1
    
    # Create an empty matrix to store filter
    MelFilter = np.zeros((totalfilters,fftSize))
    
    # Create Triangle filters by each row (for MFCC)
    for n in range (0,totalfilters):
        low = int(DFTbins[n])           # Triangle start
        center = int(DFTbins[n+1])      # Top of the Triangle
        high = int(DFTbins[n+2])        # Triangle end
        
        UpSlope = center-low       # Number of DFT points in lower side of Triangle
        DownSlope = high-center    # Number of DFT points in upper side of Triangle
        
        # Create lower side slope
        MelFilter[n,range(low-1,center)] = np.arange(0,UpSlope+1)/UpSlope       
        
        # Create upper side slope
        MelFilter[n,range(center-1,high)] = np.flipud(np.arange(0,DownSlope+1)/DownSlope)  
        
    return MelFilter

In [11]:
def zerocrossing (xw):
    
    # Size of windowed signal
    wsize = len(xw)
    
    # Slided signal
    xw2 = np.zeros(wsize)
    xw2[1:] = xw[0:-1]
    
    # Compute Zero-crossing Rate
    z = (1/(2*wsize)) * sum(abs(np.sign(xw)-np.sign(xw2)))
    
    return z

In [12]:
def dctmatrix (totalfilters,mfcccoeff):
    
    # Create an matrix (mfcccoeff * totalfilters)
    [cc,rr] = np.meshgrid(range(0,totalfilters), range(0,mfcccoeff))
    
    # Calculate DCT
    c = np.sqrt(2 / totalfilters) * np.cos(math.pi * (2*cc + 1) * rr / (2 * totalfilters))
    c[0,:] = c[0,:] / np.sqrt(2)                     
    
    return c

In [13]:
def centroid (X1,fftSize,Fs):
    
    # Calculate frequency bins
    k = (Fs/fftSize)*np.arange(0,int(fftSize/2))

    # Calculate Spectral Centroid
    c = sum(k*X1)/sum(X1)

    # Normalise by Fs/2
    c = c/(Fs/2)
    
    return c

In [14]:
def rolloff (param,X1):
    
    # Initialize energy and FFT number
    Energy = 0
    Count = 0
    
    # Find Count which has energy below param*TotalEnergy 
    TotalEnergy = sum(X1**2)
    
    # Find Count which has energy below param*TotalEnergy 
    while Energy <= param*TotalEnergy and Count < len(X1):
        Energy = X1[Count]**2 + Energy
        Count += 1
        
    # Adjust the order
    r = Count - 1
    
    # Normalise Spectral Rolloff
    r = r/len(X1)
    
    return r

In [15]:
def osc (Fs,X1,fftSize,alpha):
    
    # Indicate frequency points to create bins
    Subband_points = [0,100,200,400,800,1600,3200,6400,12800,Fs/2]
    
    # FFT bins within half fftSize
    SubFFTbins = np.round(np.divide(Subband_points,Fs/2)*fftSize/2) 

    # Set the first value to 1
    SubFFTbins[0] = 1

    # Create empty matrices for peak, valley and sum for each band
    peak = np.zeros(len(Subband_points)-1)
    valley = np.zeros(len(Subband_points)-1)
    Xsum = np.zeros(len(Subband_points)-1)

    # Take peaks and valleys from all FFT frames
    for b in range (0,len(Subband_points)-1):   
        Xframe = X1[int(SubFFTbins[b]):int(SubFFTbins[b+1])]     # Take out FFT frame
        Xsmall2big = np.sort(Xframe)                             # Sort values from small to big
        Xbig2small = np.flipud(Xsmall2big)                       # Sort values from big to small
        N = int(np.round(alpha*len(Xframe)))                     # Take values up to N in each frame
        peak[b] = math.log10((1/N)*sum(Xbig2small[0:N]))         # Calculate peak from each frame
        valley[b] = math.log10((1/N)*sum(Xsmall2big[0:N]))       # Calculate valley from each frame
        Xsum[b] = sum(Xframe)                                    # Sum of power spectrum from each sub-band
        Xsum.transpose

    # Take difference (ignore the first value)
    sc = peak[:] - valley[:]

    # Cobmine features
    o = np.hstack((valley[:],sc))
    
    return o,Xsum

In [16]:
def datacleaner(RawData,mode,rc):
    
    # Case of vector input data
    if RawData.ndim == 1:
        NaNData = np.zeros(len(RawData))
        InfData = NaNData
        
        if mode == 0:
            for i in range(0,len(RawData)):
                NaNData[i] = math.isnan(RawData[i])
                InfData[i] = math.isinf(RawData[i])
        BrokenData = NaNData + InfData
        ValidDataIdx = np.nonzero(BrokenData == 0)[0]
        ValidData = RawData[ValidDataIdx]
        
        if mode == 1:
            for i in range(0,len(RawData)):
                NaNData[i] = math.isnan(RawData[i])
        BrokenData = NaNData
        ValidDataIdx = np.nonzero(BrokenData == 0)[0]
        ValidData = RawData[ValidDataIdx]
        
        if mode == 2:
            for i in range(0,len(InfData)):
                InfData[i] = math.isnan(RawData[i])
        BrokenData = InfData
        ValidDataIdx = np.nonzero(BrokenData == 0)[0]
        ValidData = RawData[ValidDataIdx]
        
    # Case of matrix input data
    if RawData.ndim ==  2:
        
        if mode == 0:
        # Check data for each row
            for r in range(0,((RawData.shape)[0])):
                for c in range(0,((RawData.shape)[1])):
                    # Replace Inf as NaN
                    if math.isinf(RawData[r,c]) == 1:
                        RawData[r,c] = np.nan
            # Exclude colmun/row including NaN
            if rc == 0: 
                ValidData = RawData[~np.isnan(RawData).any(axis=1)]             # Exclude Row
            if rc == 1:
                ValidData = np.ma.compress_cols(np.ma.masked_invalid(RawData))  # Exclude Column
                
        if mode == 1:
            if rc == 0:
                ValidData = RawData[~np.isnan(RawData).any(axis=1)]             # Exclude Row
            if rc == 1:
                ValidData = np.ma.compress_cols(np.ma.masked_invalid(RawData))  # Exclude Column
    
        if mode == 2:
            if rc == 0:
                ValidData = RawData[~np.isinf(RawData).any(axis=1)]             # Exclude Row
            if rc == 1:
                ValidData = np.ma.compress_cols(np.ma.masked_invalid(RawData))  # Exclude Column
    
    return ValidData

In [17]:
def normalise(Data,type):
    
    # Check dimension
    Dim = Data.ndim
    
    # Create an empty matrix
    NormData = np.zeros((Data.shape[0],Data.shape[1]))
    
    # Case of vector input
    if Dim == 1:
        for n in range(0,Data.shape[0]):
            Data = (Data-min(Data))/(max(Data)-min(Data))
            
    # Case of matrix input
    else:
        
    # Normalise by column
        if type == 0:
            for n in range(0,Data.shape[1]):
                DataColumn = Data[:,n]
                DataColumn = (DataColumn-min(DataColumn))/(max(DataColumn)-min(DataColumn))
                NormData[:,n] = DataColumn
                
    # Normalise by row
        else:
            for m in range(0,Data.shape[0]):
                DataRow = Data[m,:]
                DataRow = (DataRow-min(DataRow))/(max(DataRow)-min(DataRow))
                NormData[m,:] = DataRow
                
    return NormData

In [18]:
def standardise(Data,type):
    
    # Check dimension
    Dim = Data.ndim
    
    # Create an empty matrix
    StdData = np.zeros((Data.shape[0],Data.shape[1]))
    
    # Case of vector input
    if Dim == 1:
        for n in range(0,Data.shape[0]):
            Data = (Data-min(Data))/(max(Data)-min(Data))
            
    # Case of matrix input
    else:
        
    # Standardise by column
        if type == 0:
            for n in range(0,Data.shape[1]):
                DataColumn = Data[:,n]
                DataColumn = (DataColumn-np.mean(DataColumn))/np.std(DataColumn)
                StdData[:,n] = DataColumn
                
    # Standardise by row
        else:
            for m in range(0,Data.shape[0]):
                DataRow = Data[m,:]
                DataRow = (DataRow-np.mean(DataRow))/np.std(DataRow)
                StdData[m,:] = DataRow
                
    return StdData

In [81]:
def extract_cs_features(file_name):
    ## Variable declaration for features

    # Number of mfccs (+1 in order to truncate the first coefficient)
    mfcccoeff = 15
    mfcccoeff = mfcccoeff + 1

    # Number of Octave based sub-bands
    b = 8

    # FFT size
    fftsize = 4096

    # Number of filters for mel-filter
    totalfilters = 40

    # Sampling frequency
    Fs = 44100

    # Length of analysis window
    windowtime = 46.44

    # Samples in one analysis window
    windowsample = math.floor((windowtime/1000) * Fs)

    # Overlap for window
    overlaptime = 0.5 * windowtime

    # Window step
    windowstep = math.floor(Fs*((windowtime-overlaptime)/1000))

    # Melfilter
    MelFilter = melfilter(Fs,fftsize,totalfilters)

    # DCT Matrix
    c = dctmatrix(totalfilters,mfcccoeff)

    # Number of Octave based sub-bands
    b = 9

    # Parameters for Octave based Spectral Contrast
    alpha = 0.2

    # Size of modulation spectrum
    fftsize1 = 512

    # Number of sub-band for modulation spectrum
    J = 8

    # Silence Removal (0:Off,1:On)
    Srem = 1

    # Variable for modulation spectrum
    valley = np.zeros(J)
    contrast = np.zeros(J)
    
    # Load audio data
    # fs, x = read(file_name)
    x, fs = soundfile.read(file_name, dtype='float32')

    # Extract one channel (0:left, 1:right) if audio file is stereo
    if x.ndim == 2:
        x = x[:,0]

    # Normalize audio input
    audio = x/max(abs(x[:]))
    
    AFrameNum = int(np.floor((len(x)-windowstep)/windowstep))
    TFrameNum = 10
    Audiomatrix = np.zeros((windowsample,AFrameNum))
    
    # Silence Removal
    Valid = np.zeros(AFrameNum)
    for i in range(0,AFrameNum):
        StartAnalysis = i*windowstep                       # Start sample of frame
        EndAnalysis = StartAnalysis + windowsample         # End sample of frame
        Audiomatrix[:,i] = audio[StartAnalysis:EndAnalysis]
        Nonzeros = np.size((abs(audio[StartAnalysis:EndAnalysis]) > 0.0001).nonzero())    # Number of zeros in a frame
        if Nonzeros > len(audio[StartAnalysis:EndAnalysis])/2:
             Valid[i] = 1                                  # Valid only frames have samples more than half of frames

        # Extract only valid frames
        ValidFrames = np.flatnonzero(Valid)           # Number of valid frames
        AFrameNum = np.size(ValidFrames)         # New analysis frame number

    # Number of analysis windows in a texture window
    t = np.floor(AFrameNum/TFrameNum)

    # Create an empty matrix to store MFCC
    MFCC = np.zeros((mfcccoeff, AFrameNum))

    # Create an empty matrix to store spectrogram
    Spectrogram = np.zeros((int(fftsize/2), AFrameNum))

    # Create an empty matrix to store Melspectrogram
    Melspectrogram = np.zeros((int(fftsize/2), AFrameNum))

    # Create an empty matrix to store Spectral centroid
    Centroid = np.zeros(AFrameNum)

    # Create an empty matrix to store Spectral Rolloff
    Rolloff = np.zeros(AFrameNum)

    # Create an empty matrix to store Spectral Flux
    Flux = np.zeros(AFrameNum)
    X1Prev = np.zeros(int(fftsize/2))

    # Create an empty matrix to store Zero-Crossing Rate
    ZCR = np.zeros(AFrameNum)

    # Create an empty matrix to store Octave-based Spectral Contrast
    OSC = np.zeros((b*2,AFrameNum))

    # Create an empty vector to store sum of power spectrum in sub-bands
    XSum = np.zeros((b,AFrameNum))

    # Create an empty matrix to store Root Mean Square Energy and Low Energy
    RMSAnalysis = np.zeros(AFrameNum)

    Low = np.zeros(TFrameNum)
    OMSC = np.zeros((2*J,TFrameNum))
    MSFM = np.zeros((J,TFrameNum))
    MSCM = np.zeros((J,TFrameNum))

    # FFT bins equally distributed
    MScalebinstep = (fftsize1/2/J)

    # Create empty matrices to store long term features
    MFCC_Mean = np.zeros((mfcccoeff,TFrameNum))
    Centroid_Mean = np.zeros(TFrameNum)
    Rolloff_Mean = np.zeros(TFrameNum)
    Flux_Mean = np.zeros(TFrameNum)
    ZCR_Mean = np.zeros(TFrameNum)
    OSC_Mean = np.zeros((2*b,TFrameNum))
    
    # FFT to entire audio file
    Spectrum = np.absolute(np.fft.fft(audio[0:fftsize]))
    
    #============================================================================
    # Short Term Features (Analysis Window)
    #============================================================================
    
    
    for n in range(0,AFrameNum):
        
        # Windowing
        xw = Audiomatrix[:,ValidFrames[n]] * np.hamming(windowsample)
        
        # Spectrum
        X = abs(np.fft.fft(xw,n=fftsize))
        
        # Normalise
        X1 = X / math.sqrt(fftsize*windowsample)
        
        # Compute Root Mean Square Energy
        RMSAnalysis[n] = math.sqrt(1/len(xw)*sum(xw**2))
        
        # Compute Zero-Crossing Rate
        ZCR[n] = zerocrossing(xw)
        
        # Trancate half of spectrum
        X1 = X1[0:int(fftsize/2)]
        
        # Compute Spectral Centroid
        Centroid[n] = centroid(X1,fftsize,Fs)
        
        # Compute Spectral Rolloff
        Rolloff[n] = rolloff(0.89,X1)
        
        # Compute Spectral Flux
        Flux[n] = math.sqrt((sum((X1 - X1Prev)**2))/(Fs/2))
        
        # Compute Octave-based Spectral Contrast
        [OSC[:,n],XSum[:,n]] = osc(Fs,X1,fftsize,alpha)
        
        #Store FFT result
        X1Prev = X1
        
        # Apply Mel scale filter
        Melfft = np.matmul(MelFilter,X)
        
        # Log scale
        earMag = np.log10(Melfft**2)
        
        # Apply DCT to cepstrum
        M = c.dot(earMag)
        
        #Store MFCC into matrix
        MFCC[:,n] = M
        
    # Remove useless data
    MFCC = datacleaner(MFCC,0,0)
    RMSAnalysis = datacleaner(RMSAnalysis,0,0)
    Centroid = datacleaner(Centroid,0,0)
    Rolloff = datacleaner(Rolloff,0,0)
    Flux = datacleaner(Flux,0,0)
    ZCR = datacleaner(ZCR,0,0)
    OSC = datacleaner(OSC,0,0)
    
    #============================================================================
    # Long Term Features (Texture Window)
    #============================================================================
    
    # For each texture window
    for l in range(0,TFrameNum):
        StartTexture = int(l*t)                      # Start point of texture window
        EndTexture = int(StartTexture + t)           # End point of texture window

        if EndTexture >= AFrameNum:
            EndTexture = AFrameNum-1           # End analysis window to avoid exceeding Analysis frame length

        # Average of RMS energy in texture window
        RMSAverage = np.mean(RMSAnalysis[StartTexture:EndTexture])

        # Store RMS energy from analysis window into texture window
        LowRMS = (RMSAverage > RMSAnalysis[StartTexture:EndTexture])

        if len(RMSAnalysis[StartTexture:EndTexture]) == 0:
            Low[l] = np.nan
        else:
        # Compute Low Energy
            Low[l] = (sum(LowRMS)/len(RMSAnalysis[StartTexture:EndTexture]))*100

        # Sum of power spectrum in Sub-band across texture window (8*32)
        T = np.arange(StartTexture,EndTexture)
        E = XSum[:,StartTexture:EndTexture]
        Epadded = np.hstack((E,np.zeros((b,fftsize1-len(T)))))   # Zero padding to make 512 length vector

        M = abs(np.fft.fft(Epadded,n=fftsize1,axis=1))           # Apply fft to each row to get modulation spectrum
        M = M[:,0:int(fftsize1/2)]                               # Truncate half

        for jj in range(0,J):
            Mb = M[jj,:]
            Start = int(jj*MScalebinstep)
            End = int(Start+MScalebinstep)
            Mframe = Mb[Start:End]                                         # Take out FFT frame
            peak = max(np.log10(Mframe[:]))                                # Calculate peaks from each frame
            minimum = min(np.log10(Mframe[:]))                             # Calculate valley from each frame
            valley[jj] = min(np.log10(Mframe[1:round(MScalebinstep/2)]))   # Search valley from first half frame
            contrast[jj] = peak - minimum                                  # Calculate contrast

        # Combine features to create Octave-based Modulation Spectral Contrast
        OMSC[:,l] = np.hstack((contrast,valley))

        def geo_mean(iterable):
            a = np.log(iterable)
            return np.exp(a.sum()/len(a))

        for k in range(0,b-1):
            MSFM[k,l] = geo_mean(M[k,:])/np.mean(M[k,:])
            MSCM[k,l] = max(M[k,:])/np.mean(M[k,:])   
            
        # Compute long-term features (From Texture window)
        MFCC_Mean[:,l] = np.mean(MFCC[:,StartTexture:EndTexture],axis=1)
        Centroid_Mean[l] = np.mean(Centroid[StartTexture:EndTexture])
        Rolloff_Mean[l] = np.mean(Rolloff[StartTexture:EndTexture])
        Flux_Mean[l] = np.mean(Flux[StartTexture:EndTexture])
        ZCR_Mean[l] = np.mean(ZCR[StartTexture:EndTexture])
        OSC_Mean[:,l] = np.mean(OSC[:,StartTexture:EndTexture],axis=1) 
        
    MFCC = np.mean(MFCC_Mean,axis=1)
    MFCC = MFCC[1:]
    Centroid = np.mean(Centroid_Mean)
    Rolloff = np.mean(Rolloff_Mean)
    Flux = np.mean(Flux_Mean)
    ZCR = np.mean(ZCR_Mean)
    OSC = np.mean(OSC_Mean,axis=1)
    Low = np.mean(Low,axis=0)
    OMSC = np.mean(OMSC,axis=0)
    MSFM = np.mean(MSFM,axis=0)
    MSCM = np.mean(MSCM,axis=0)

    return MFCC, Centroid, Rolloff, Flux, ZCR, OSC, Low, OMSC, MSFM, MSCM

In [89]:
def read_audio_files(path):
    genres = os.listdir(path)
    list_features = []
    normalization = True
    standardization = False

    
    columns_features = ['MFCC','Centroid','Rolloff','Flux','Zero Crossing Rate','OSC',
                        'Low Energy','OMSC','MSFM','MSCM','Genre']
    print("Extracting CS Features:", path)
    genreId = 0
    labelId = 0
    genres=sorted(os.listdir('genres'))
    for genre in genres:
        print("Processing", GENRES[genreId])
        files = os.listdir(os.path.join(path,genre))
        # Create label vector
        labelId = labelId + 1
        label = labelId*np.ones(len(files))
        label = np.array([label]).T
        fileId = 0
        for file in sorted(files):
            
            # Process one file
            try:
                featurelist = extract_cs_features(os.path.join(path,genre,file))
            except Exception as e:
                print("Error CS-Features Extracton:", file," - ", e)
                
            # Append to list of files processed
            featurefile = []
            for i in featurelist:
                featurefile = np.append(featurefile,i)
            if fileId == 0:
                featuregenre = np.zeros(len(featurefile))
            featuregenre = np.vstack([featuregenre,featurefile])
            fileId = fileId + 1
         # Transpose feature matrix and add label   
        featuregenre = featuregenre[1:]
        featuregenre = np.hstack([featuregenre, label])
        if genreId == 0:
            feature_set = np.zeros(len(featurefile)+1)
        feature_set = np.vstack([feature_set, featuregenre])
        genreId = genreId + 1
    # Extract first row                           
    feature_set = feature_set[1:]
    Data = pandas.DataFrame(feature_set)
    if normalization:
        Content = Data.iloc[:,0:Data.shape[1]-1]
        Content = pandas.DataFrame(normalise(Content.values,0))
    elif standardization:
        Content = Data.iloc[:,0:Data.shape[1]-1]
        Content = pandas.DataFrame(standardise(Content.values,0))
    Label = Data.iloc[:,Data.shape[1]-1]
    # Combine data and label
    Data = pandas.concat([Content, Label], axis=1)
    Data.to_csv("Data.csv", index = False, header = False)
    return Data

In [47]:
def XGB(X,y):
    parameters = {'max_depth': [2,4,6],'n_estimators': [50,100,200]}
    model = XGBClassifier()
    # model = GridSearchCV(model, parameters, verbose=1)
    model.fit(X,y)
    score = cross_val_score(model, X, y, cv=5)
    return np.mean(score)

In [24]:
def MLP(X,y):
    # Split Train and Test Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
       
    # Define early_stopping_monitor
    early_stopping_monitor = EarlyStopping(patience=2)

    # Set up the model: model
    model = Sequential()

    # Learning Rate
    LearningRate = 0.001

    # Add the first layer
    model.add(Dense(200, activation='relu', input_shape=(X_train.shape[1],)))

    # Add the second layer
    model.add(Dense(150,activation='relu'))

    # Add the second layer
    model.add(Dense(150,activation='relu'))

    # Add the output layer
    model.add(Dense(10))
    
    # Create optimizer
    my_optimizer = SGD(lr=LearningRate)    
    
    # RUn the model
    model.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])
    model_training = model.fit(X_train,y_train,callbacks=[early_stopping_monitor],
                               nb_epoch=50,shuffle=False)
    
    # Save model
    model.save('MGCmlp.h5')
    
    # Accuracy
    plt.plot(model_training.history['acc'])
    plt.plot(model_training.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    #Loss
    plt.plot(model_training.history['loss'])
    plt.plot(model_training.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    # Evaluation with test data
    score = model.evaluate(X_test,y_test)
    
    return score[1]

In [4]:
def SVM(X,y):
    parameters = {'kernel':('linear','rbf'), 'C':[1,10]}
    svc = SVC()
    model = GridSearchCV(svc, parameters)  
    model.fit(X,y)
    score = cross_val_score(model, X, y, cv=5)
    return np.mean[score]        

In [8]:
# Get Data from CSV file
Data = pandas.read_csv('Data.csv', index_col=0)
cols = Data.shape[1] - 2
X=Data.iloc[:,0:cols]
y=Data.iloc[:,-1]

In [11]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,57,58,59,60,61,62,63,64,65,66
0,0.563198,0.445048,0.601508,0.455214,0.513780,0.507315,0.674208,0.535061,0.633776,0.423044,...,0.756991,0.484737,0.429256,0.429559,0.407474,0.472099,0.387857,0.361677,0.443168,0.447112
1,0.640304,0.629122,0.571163,0.562890,0.576005,0.508806,0.705688,0.413399,0.673335,0.438794,...,0.842136,0.272671,0.281753,0.366261,0.258230,0.314706,0.256542,0.281448,0.298581,0.464257
2,0.623779,0.329508,0.500627,0.431797,0.551881,0.357594,0.504142,0.458028,0.454041,0.324445,...,0.790457,0.419648,0.470212,0.476546,0.480814,0.446901,0.324030,0.429120,0.536032,0.500114
3,0.753933,0.552371,0.496908,0.516611,0.531668,0.536423,0.589398,0.537669,0.410683,0.358395,...,0.805061,0.377011,0.262232,0.283385,0.288348,0.237240,0.292371,0.324315,0.291555,0.359601
4,0.498057,0.161718,0.356034,0.099462,0.507805,0.291271,0.538568,0.453084,0.440065,0.347695,...,0.839817,0.482779,0.335914,0.394027,0.385937,0.373176,0.241993,0.255361,0.327950,0.373066
5,0.538842,0.307466,0.428749,0.133437,0.476535,0.278166,0.574013,0.416664,0.438688,0.428633,...,0.816025,0.351735,0.317831,0.233227,0.296039,0.162738,0.242017,0.441318,0.261387,0.355904
6,0.658230,0.162207,0.555106,0.343956,0.137804,0.285747,0.533947,0.415703,0.253174,0.387334,...,0.897740,0.351134,0.246131,0.282574,0.348704,0.059944,0.250578,0.254372,0.350169,0.247879
7,0.646568,0.308450,0.570354,0.324268,0.251320,0.191465,0.538774,0.274323,0.313576,0.312609,...,0.795379,0.453491,0.280035,0.420402,0.195650,0.349798,0.317247,0.482368,0.337207,0.318232
8,0.573516,0.486769,0.621768,0.452429,0.522707,0.490828,0.724562,0.398742,0.506279,0.225785,...,0.811460,0.496858,0.353406,0.429678,0.421676,0.394578,0.356767,0.385742,0.434555,0.391196
9,0.545296,0.422534,0.539976,0.214248,0.588825,0.512761,0.643420,0.517208,0.761863,0.517583,...,0.640023,0.466862,0.212534,0.415132,0.426710,0.457398,0.254187,0.392393,0.460150,0.417777


In [49]:
# XGBoosting
print("XGBoosting:")
start=time.time()
XGB_score=XGB(X,y)
end = time.time()
XGB_time = end - start
print("Accuracy: ", round(XGB_score * 100,2),"%")
print("Time: ", round(XGB_time,2), " seconds.")

XGBoosting:
Accuracy:  54.5 %
Time:  17.28  seconds.


In [50]:
# Generate CS features for GTZAN in CSV file
start=time.time()
read_audio_files('genres')
end=time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

00:00:00.00
