In [4]:
# imports

import numpy as np
import scipy.io as cpio
import os
import matplotlib.pyplot as plt
import glob
from sklearn.svm import SVC

# Work on all files
os.chdir('/Users/yardenc/Documents/Temp/SyllableSpects')
matfiles = glob.glob('*.mat')
numfiles = len(matfiles)
ErrMat = np.zeros((numfiles,numfiles))
for fnum0 in range(numfiles-1):
    for fnum1 in range(fnum0+1,numfiles):
        fname0 = matfiles[fnum0]
        fname1 = matfiles[fnum1]
        print('Comparing ' + fname0 + ' with ' + fname1)
        data0 = cpio.loadmat(fname0)
        data1 = cpio.loadmat(fname1)
        num_syls0 = len(data0['syllable_spects'][0][0][1])
        num_syls1 = len(data1['syllable_spects'][0][0][1])
        print('We have ' + str(num_syls0)  + ' syllables in the first class and ' + str(num_syls1) + 
              ' in the second class');

# Find max number of time bins in both classes and complete spectrograms by zero padding
# (keeping the spectrogram centered)
        max_bins0 = np.max([a[1] for a in[np.shape(data0['syllable_spects'][0][0][1][k][0]) 
                                  for k in range(num_syls0)]])
        max_bins1 = np.max([a[1] for a in[np.shape(data1['syllable_spects'][0][0][1][k][0]) 
                                  for k in range(num_syls1)]])
        max_bins = np.max([max_bins0, max_bins1]) 

        for sylcnt in range(num_syls0):
            [a,numbins] = np.shape(data0['syllable_spects'][0][0][1][sylcnt][0])
            bins_to_add = max_bins - numbins
            bins_before = bins_to_add/2
            bins_after = bins_to_add - bins_before
            data0['syllable_spects'][0][0][1][sylcnt][0] = np.concatenate(
                (np.zeros((513,bins_before)),
                data0['syllable_spects'][0][0][1][sylcnt][0],
                np.zeros((513,bins_after))),axis=1)
        for sylcnt in range(num_syls1):
            [a,numbins] = np.shape(data1['syllable_spects'][0][0][1][sylcnt][0])
            bins_to_add = max_bins - numbins
            bins_before = bins_to_add/2
            bins_after = bins_to_add - bins_before
            data1['syllable_spects'][0][0][1][sylcnt][0] = np.concatenate(
                (np.zeros((513,bins_before)),
                data1['syllable_spects'][0][0][1][sylcnt][0],
                np.zeros((513,bins_after))),axis=1)
        print('All spectrograms were zero padded to equally have ' + str(max_bins) + ' time bins.')        

# create training and testing sets by dividing the data into 2 equal sets
        X0 = [np.reshape(data0['syllable_spects'][0][0][1][k][0],(-1,)) for k in range(num_syls0)]
        X1 = [np.reshape(data1['syllable_spects'][0][0][1][k][0],(-1,)) for k in range(num_syls1)]
        Xtrain = np.concatenate((X0[0:num_syls0/2],X1[0:num_syls1/2]),axis=0)
        Xtest = np.concatenate((X0[num_syls0/2:],X1[num_syls1/2:]),axis=0)
        Ytrain = np.concatenate((np.ones((num_syls0/2,)),2*np.ones((num_syls1/2,))),axis=0)
        Ytest = np.concatenate((np.ones((num_syls0-num_syls0/2,)),
                        2*np.ones((num_syls1-num_syls1/2,))),axis=0)

# Train SVM classifier on data
        
        clf = SVC()
        clf.fit(Xtrain, Ytrain) 

# Show error rate
        err = np.abs(clf.predict(Xtest)-Ytest)
#plt.figure()
#plt.plot(err)
#plt.xlabel('syllable number')
#plt.ylabel('Error')
#plt.show()
        ErrMat[fnum0][fnum1] = np.mean(err)
        print('We get ' + str(np.sum(err)) + ' errors in a test set of ' + str(len(Ytest)) + ' syllables.')
        print('Error rate is: ' + str(np.mean(err)))


Comparing syllable_spects_8.mat with syllable_spects_9.mat
We have 2024 syllables in the first class and 2020 in the second class
All spectrograms were zero padded to equally have 18 time bins.
We get 2.0 errors in a test set of 2022 syllables.
Error rate is: 0.000989119683482
Comparing syllable_spects_8.mat with syllable_spects_208.mat
We have 2024 syllables in the first class and 2024 in the second class
All spectrograms were zero padded to equally have 28 time bins.
We get 0.0 errors in a test set of 2024 syllables.
Error rate is: 0.0
Comparing syllable_spects_8.mat with syllable_spects_308.mat
We have 2024 syllables in the first class and 2013 in the second class
All spectrograms were zero padded to equally have 41 time bins.
We get 3.0 errors in a test set of 2019 syllables.
Error rate is: 0.00148588410104
Comparing syllable_spects_8.mat with syllable_spects_304.mat
We have 2024 syllables in the first class and 2001 in the second class
All spectrograms were zero padded to equally 

In [None]:
plt.figure()
plt.imshow(ErrMat)
plt.show()
    

In [5]:
cpio.savemat('ErrMat.mat' ,{'ErrMat':ErrMat})

In [None]:
range(3,15)

In [1]:
import numpy as np
import scipy.io as cpio
import os
import matplotlib.pyplot as plt
import glob

In [2]:
ErrMat = cpio.loadmat('ErrMat.mat')

In [12]:
matfiles = glob.glob('*.mat')[1:]
filenum = [int(k[16:-4]) for k in matfiles]

In [13]:
print(filenum)

[8, 9, 208, 308, 304, 305, 306, 500, 302, 303, 301, 300, 207, 2, 401, 400, 3, 206, 204, 1, 0, 205, 201, 4, 5, 200, 7, 404, 6, 203]


In [14]:
cpio.savemat('filenum.mat' ,{'filenum':filenum})