In [1]:
# Names of folders and variables

path_to_data_files = '/Users/yardenc/Documents/Experiments/Behavior/Data/CanaryData/Chondroitinase/llb11/Individual syllable spects/'
file_prefix = 'syllable_spects_'

# imports

import numpy as np
import scipy.io as cpio
import os
import matplotlib.pyplot as plt
import glob
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest

np.random.seed(42)

# Work on all files
os.chdir(path_to_data_files)
matfiles = glob.glob(file_prefix + '*.mat')
print(matfiles)
numfiles = len(matfiles)

for fnum0 in range(numfiles):
    
    fname0 = matfiles[fnum0]
    
    print('Working on ' + fname0)
    data0 = cpio.loadmat(fname0) 
    num_syls0 = len(data0['syllable_spects'][0][0][1])
    print('We have ' + str(num_syls0)  + ' syllables in this class');

# Find max number of time bins  and complete spectrograms by zero padding
# (keeping the spectrogram centered)
    max_bins = np.max([a[1] for a in[np.shape(data0['syllable_spects'][0][0][1][k][0]) for k in range(num_syls0)]])
    for sylcnt in range(num_syls0):
        [a,numbins] = np.shape(data0['syllable_spects'][0][0][1][sylcnt][0])
        bins_to_add = max_bins - numbins
        bins_before = bins_to_add/2
        bins_after = bins_to_add - bins_before
        data0['syllable_spects'][0][0][1][sylcnt][0] = np.concatenate(
            (np.zeros((513,bins_before)),
            data0['syllable_spects'][0][0][1][sylcnt][0],
            np.zeros((513,bins_after))),axis=1)
        
    print('All spectrograms were zero padded to equally have ' + str(max_bins) + ' time bins.')        

# Reshape dataset
    X0 = [np.reshape(data0['syllable_spects'][0][0][1][k][0],(-1,)) for k in range(num_syls0)]

# Train model
    clf = IsolationForest(contamination=0.01,
                                         random_state=42)
    #LocalOutlierFactor(n_neighbors=20, contamination=0.01)
    
    y_pred = clf.fit(X0).predict(X0) #fit_predict(X0)
# save results
    output_filename = 'output_' + fname0
    cpio.savemat(output_filename,{'output':y_pred})
        

['syllable_spects_17.mat', 'syllable_spects_16.mat', 'syllable_spects_8.mat', 'syllable_spects_14.mat', 'syllable_spects_28.mat', 'syllable_spects_29.mat', 'syllable_spects_15.mat', 'syllable_spects_9.mat', 'syllable_spects_11.mat', 'syllable_spects_10.mat', 'syllable_spects_12.mat', 'syllable_spects_13.mat', 'syllable_spects_2.mat', 'syllable_spects_22.mat', 'syllable_spects_23.mat', 'syllable_spects_3.mat', 'syllable_spects_1.mat', 'syllable_spects_21.mat', 'syllable_spects_20.mat', 'syllable_spects_4.mat', 'syllable_spects_18.mat', 'syllable_spects_24.mat', 'syllable_spects_25.mat', 'syllable_spects_19.mat', 'syllable_spects_5.mat', 'syllable_spects_7.mat', 'syllable_spects_27.mat', 'syllable_spects_26.mat', 'syllable_spects_6.mat']
Working on syllable_spects_17.mat
We have 233 syllables in this class
All spectrograms were zero padded to equally have 135 time bins.
Working on syllable_spects_16.mat
We have 216 syllables in this class
All spectrograms were zero padded to equally have