# Setup specific to Google Colab

In [None]:
# upload the generate_data.py file to be able to import it

from google.colab import files
files.upload()

In [None]:
# link to google drive to access the data

from google.colab import drive
drive.mount('/content/drive')

# Setting up the data

In [0]:
!pip install wikipedia
import numpy as np
from sklearn.model_selection import train_test_split

In [0]:
features = np.load('/content/drive/My Drive/Machine Learning/Language classifier/features.npy')
labels = np.load('/content/drive/My Drive/Machine Learning/Language classifier/labels.npy')

In [0]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.01, random_state=42)

In [0]:
# FF requires different format for the data

X_train_flat = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])
X_test_flat = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])


# Building the models



In [0]:
import keras
from keras.layers import Dense, LSTM, SeparableConv1D, Flatten, Conv1D
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
import time

In [0]:
checkpointFF = ModelCheckpoint('modelFF.hdf5', monitor='val_acc', verbose=0, save_best_only=True, mode='max')
checkpointCNN = ModelCheckpoint('modelCNN.hdf5', monitor='val_acc', verbose=0, save_best_only=True, mode='max')
checkpointCNNsep = ModelCheckpoint('modelCNNsep.hdf5', monitor='val_acc', verbose=0, save_best_only=True, mode='max')
checkpointRNN = ModelCheckpoint('modelRNN.hdf5', monitor='val_acc', verbose=0, save_best_only=True, mode='max')

In [0]:
modelFF = Sequential()
modelFF.add(Dense(256, activation = 'relu', input_dim = 26*12))
modelFF.add(Dense(128, activation = 'relu'))
modelFF.add(Dense(128, activation = 'relu'))
modelFF.add(Dense(128, activation = 'relu'))
modelFF.add(Dense(3, activation = 'softmax'))

modelFF.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy']);

In [None]:
time0 = time.time()

historyFF = modelFF.fit(X_train_flat, y_train, batch_size = 1028, 
                   epochs = 100, validation_data = (X_test_flat,y_test), callbacks = [checkpointFF], verbose = 1);

print('Time taken to run 100 epochs: ', time.time()-time0)

In [0]:
modelRNN = Sequential()
modelRNN.add(LSTM(128, input_shape = (26,12)))
modelRNN.add(Dense(3, activation = 'softmax'))

modelRNN.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'], )

In [None]:
time0 = time.time()

historyRNN = modelRNN.fit(X_train, y_train, batch_size = 1028, 
                   epochs = 100, validation_data = (X_test,y_test), callbacks = [checkpointRNN], verbose = 1);

print('Time taken to run 100 epochs: ', time.time()-time0)

In [0]:
modelCNN = Sequential()
modelCNN.add(Conv1D(128, 2, activation = 'relu', data_format='channels_first', input_shape = (26,12)))
modelCNN.add(Conv1D(64, 3, activation = 'relu', data_format='channels_first'))
modelCNN.add(Conv1D(64, 4, activation = 'relu', data_format='channels_first'))
modelCNN.add(Conv1D(64, 2, activation = 'relu', data_format='channels_first'))
modelCNN.add(Conv1D(64, 2, activation = 'relu', data_format='channels_first'))
modelCNN.add(Flatten())
modelCNN.add(Dense(3, activation = 'softmax'))

modelCNN.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
time0 = time.time()

historyCNN = modelCNN.fit(X_train, y_train, batch_size = 1028, 
                   epochs = 100, validation_data = (X_test,y_test), callbacks = [checkpointCNN], verbose = 1);

print('Time taken to run 100 epochs: ', time.time()-time0)

In [0]:
modelCNNsep = Sequential()
modelCNNsep.add(SeparableConv1D(128, 2, activation = 'relu', data_format='channels_first', input_shape = (26,12)))
modelCNNsep.add(SeparableConv1D(64, 3, activation = 'relu', data_format='channels_first'))
modelCNNsep.add(SeparableConv1D(64, 4, activation = 'relu', data_format='channels_first'))
modelCNNsep.add(SeparableConv1D(64, 2, activation = 'relu', data_format='channels_first'))
modelCNNsep.add(SeparableConv1D(64, 2, activation = 'relu', data_format='channels_first'))
modelCNNsep.add(Flatten())
modelCNNsep.add(Dense(3, activation = 'softmax'))

modelCNNsep.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
time0 = time.time()

historyCNNsep = modelCNNsep.fit(X_train, y_train, batch_size = 1028, 
                   epochs = 100, validation_data = (X_test,y_test), callbacks = [checkpointCNNsep], verbose = 1);

print('Time taken to run 100 epochs: ', time.time()-time0)

In [None]:
import json

with open('historyFF.json', 'w') as f:
    json.dump(historyFF.history, f)
    
with open('historyRNN.json', 'w') as f:
    json.dump(historyRNN.history, f)
    
with open('historyCNN.json', 'w') as f:
    json.dump(historyCNN.history, f)
    
with open('historyCNNsep.json', 'w') as f:
    json.dump(historyCNNsep.history, f)