In [0]:
import os
import numpy as np
import pandas as pd
import scipy.io.wavfile
import scipy.signal
import matplotlib.pyplot as plt
import keras
from random import sample 
%matplotlib inline

Using TensorFlow backend.


In [0]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
os.chdir('/gdrive/My Drive')

In [0]:
''' SOURCE: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly '''
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=(32,32,32), n_channels=1,
                 n_classes=10, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            #X[i,:,:,0] = np.transpose(np.genfromtxt('processed/%d.csv' % ID, delimiter=','))
            X[i,:,:,0] = np.transpose(np.genfromtxt('processed/%s/%d.csv' % (str(ID)[0],ID), delimiter=','))

            # Store class
            y[i] = self.labels.loc[ID+1, 1]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

In [0]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

Gen RAM Free: 8.0 GB  | Proc size: 10.5 GB
GPU RAM Free: 11441MB | Used: 0MB | Util   0% | Total 11441MB


In [0]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, LSTM
from keras.layers import TimeDistributed, Flatten, Dense, Reshape, Dropout

DATASET_SIZE = 5978
TRAIN_SIZE = int(.8*DATASET_SIZE)


In [0]:
#indices = [int(i.split('.')[0]) for i in os.listdir('processed') if 'label' not in i]
indices = range(1,DATASET_SIZE+1)

In [0]:
labels = pd.read_csv('processed/labels.csv',index_col=0,header=None) # Labels

In [0]:
np.mean(labels[1]==1)

0.5326196052191369

In [0]:
''' 
  Based on the Neural network architecture from: 

    Gianluca Micchi. A neural network for composer classification. 
    International Society for Music Information Retrieval Conference (ISMIR 2018), 2018, Paris, France. <hal-01879276>

  and Keras code from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
'''



# Parameters
params = {'dim': (883,233),
          'batch_size': 64,
          'n_classes': 10,
          'n_channels': 1,
          'shuffle': True}

labels = pd.read_csv('processed/labels.csv',index_col=0,header=None) # Labels
#labels[1] = 1*(labels[1]==1)

#indices = list(labels.index[labels[1]!=1]-1)

#DATASET_SIZE = len(indices)
#TRAIN_SIZE = int(.7*DATASET_SIZE)


#indices = range(1, DATASET_SIZE+1)
#indices = [int(i.split('.')[0]) for i in os.listdir('processed') if 'label' not in i]
trainIDs = sample(indices, TRAIN_SIZE)
valIDs = [i for i in indices if i not in trainIDs]
testIDs = sample(valIDs, int(.5*len(valIDs)))
valIDs = [i for i in valIDs if i not in testIDs]

# Datasets
partition = {'train': trainIDs, 'validation': valIDs, 'test': testIDs} # IDs

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)
testing_generator = DataGenerator(partition['test'], labels, **params)


In [0]:
# Design model
model = Sequential()

model.add(Conv2D(16, (16, 8), input_shape = (883, 233, 1)))
model.add(MaxPooling2D(pool_size=(4, 2)))
model.add(BatchNormalization())
model.add(Dropout(.3))

model.add(Conv2D(16, (16, 8)))
model.add(MaxPooling2D(pool_size=(4, 2)))
model.add(BatchNormalization())
model.add(Dropout(.3))

model.add(Conv2D(16, (16, 8)))
model.add(MaxPooling2D(pool_size=(4, 2)))
model.add(BatchNormalization())

model.add(Reshape([8,23*16]))

model.add(LSTM(10, return_sequences=True))
model.add(Dropout(.3))
model.add(LSTM(10))

model.add(Dense(10))
model.add(Dropout(.1))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy', metrics=['categorical_accuracy'], optimizer='adam')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [0]:
class_weights = {labels.shape[0]*1.0/v.count() for k,v in labels.groupby([1])}

TypeError: ignored

In [0]:
v.count()

1    43
dtype: int64

In [0]:
# Train model on dataset
model.fit_generator(generator=training_generator, epochs=30,
                    class_weights=
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    workers=6)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30

Process ForkPoolWorker-319:
Process ForkPoolWorker-324:
Process ForkPoolWorker-323:
Process ForkPoolWorker-321:
Process ForkPoolWorker-316:
Process ForkPoolWorker-320:
Process ForkPoolWorker-313:
Process ForkPoolWorker-322:
Process ForkPoolWorker-317:
Process ForkPoolWorker-318:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-314:
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, 

KeyboardInterrupt: ignored

In [0]:
model.evaluate_generator(generator=testing_generator, 
                    use_multiprocessing=True,
                    workers=6)

[1.9089533620410495, 0.5920138888888888]

In [0]:
labels.iloc[np.array(valIDs)-1][1].mean()

NameError: ignored

In [0]:
labels.iloc[np.array(testIDs)-1][1].mean()

0.5585284280936454

In [0]:
os.sytem('progress after 30')

In [0]:
# Train model on dataset
model.fit_generator(generator=training_generator, epochs=150,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    workers=6)

In [0]:
testing_generator = DataGenerator(partition['test'], labels, **params)
