In [1]:
from nltk import word_tokenize
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
import re
 
ENStopWords = stopwords.words('english')
 
def tokenize(text):
    min_length = 3
    words = map(lambda word: word.lower(), word_tokenize(text))
    words = [word for word in words if word not in ENStopWords]
    tokens = (list(map(lambda token: PorterStemmer().stem(token), words)))
    p = re.compile('[a-zA-Z]+');
    
    filtered_tokens = list(filter (lambda token: p.match(token) and len(token) >= min_length, tokens))
    return filtered_tokens

In [2]:
from nltk.corpus import stopwords, reuters
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
import numpy as np
# fix random seed for reproducibility
seed = 42
np.random.seed(seed)
from sklearn.metrics import f1_score, precision_score, recall_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation

Using TensorFlow backend.


In [3]:
# List of document ids
documents = reuters.fileids()
 
train_docs_id = list(filter(lambda doc: doc.startswith('train'), documents))
test_docs_id = list(filter(lambda doc: doc.startswith('test'), documents))
 
train_docs = [reuters.raw(doc_id) for doc_id in train_docs_id]
test_docs = [reuters.raw(doc_id) for doc_id in test_docs_id]

In [4]:
# Tokenisation
vectorizer = TfidfVectorizer(stop_words=ENStopWords, tokenizer=tokenize)

In [5]:
# Learn and transform train documents
vectorised_train_documents = vectorizer.fit_transform(train_docs)
vectorised_test_documents = vectorizer.transform(test_docs)

  'stop_words.' % sorted(inconsistent))


In [6]:
vectorised_train_documents

<7769x20682 sparse matrix of type '<class 'numpy.float64'>'
	with 370246 stored elements in Compressed Sparse Row format>

In [7]:
vectorised_test_documents

<3019x20682 sparse matrix of type '<class 'numpy.float64'>'
	with 128981 stored elements in Compressed Sparse Row format>

In [8]:
# Transform multilabel labels
mlb = MultiLabelBinarizer()
train_labels = mlb.fit_transform([reuters.categories(doc_id) for doc_id in train_docs_id])
test_labels = mlb.transform([reuters.categories(doc_id) for doc_id in test_docs_id])

# Modeling

### 1st architecture

The first architecture that will be tested is the simplest one - input layer with reasonable amount of neurons, activation and output layer

In [9]:
model = Sequential()
model.add(Dense(512, input_shape=(20682,)))
model.add(Activation('relu'))
model.add(Dense(90))
model.add(Activation('softmax'))

In [10]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [11]:
history = model.fit(vectorised_train_documents, train_labels,
                    batch_size=457,
                    epochs=20,
                    verbose=1,
                    validation_split=0.1)

Train on 6992 samples, validate on 777 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [12]:
score = model.evaluate(vectorised_test_documents, test_labels, batch_size=457, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 8.613382604825809
Test accuracy: 0.7442861795425415


In [13]:
predictions = model.predict(vectorised_test_documents)
predictions_fixed = predictions > 0.5

In [14]:
precision = precision_score(test_labels, predictions_fixed, average='micro')
recall = recall_score(test_labels, predictions_fixed, average='micro')
f1 = f1_score(test_labels, predictions_fixed, average='micro')

print('Micro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Micro-average quality numbers
Precision: 0.9029, Recall: 0.6132, F1-measure: 0.7304


In [15]:
precision = precision_score(test_labels, predictions_fixed, average='macro')
recall = recall_score(test_labels, predictions_fixed, average='macro')
f1 = f1_score(test_labels, predictions_fixed, average='macro')

print('Macro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Macro-average quality numbers
Precision: 0.2614, Recall: 0.1126, F1-measure: 0.1414


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


### 2nd architecture

The second architecture will be identical to the first one, except one extra middle layer

In [16]:
model = Sequential()
model.add(Dense(512, input_shape=(20682,)))
model.add(Activation('relu'))
model.add(Dense(258))
model.add(Activation('relu'))
model.add(Dense(90))
model.add(Activation('softmax'))

In [17]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [18]:
history = model.fit(vectorised_train_documents, train_labels,
                    batch_size=457,
                    epochs=20,
                    verbose=1,
                    validation_split=0.1)

Train on 6992 samples, validate on 777 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [19]:
score = model.evaluate(vectorised_test_documents, test_labels, batch_size=457, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 615.5082674737243
Test accuracy: 0.06525339186191559


In [20]:
predictions = model.predict(vectorised_test_documents)
predictions_fixed = predictions > 0.5

In [21]:
precision = precision_score(test_labels, predictions_fixed, average='micro')
recall = recall_score(test_labels, predictions_fixed, average='micro')
f1 = f1_score(test_labels, predictions_fixed, average='micro')

print('Micro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Micro-average quality numbers
Precision: 0.0802, Recall: 0.0646, F1-measure: 0.0716


In [22]:
precision = precision_score(test_labels, predictions_fixed, average='macro')
recall = recall_score(test_labels, predictions_fixed, average='macro')
f1 = f1_score(test_labels, predictions_fixed, average='macro')

print('Macro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Macro-average quality numbers
Precision: 0.0283, Recall: 0.0202, F1-measure: 0.0100


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


### 3rd architecture

The second architecture shows much worse results than the first one, even though we saw quite good accuracy at epoch 6. 
This looks like overfitting, thus lets try to add dropout

In [23]:
model = Sequential()
model.add(Dense(512, input_shape=(20682,)))
model.add(Dropout(0.5))
model.add(Activation('relu'))
model.add(Dense(258))
model.add(Dropout(0.5))
model.add(Activation('relu'))
model.add(Dense(90))
model.add(Activation('softmax'))

In [24]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [25]:
history = model.fit(vectorised_train_documents, train_labels,
                    batch_size=457,
                    epochs=20,
                    verbose=1,
                    validation_split=0.1)

Train on 6992 samples, validate on 777 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [26]:
score = model.evaluate(vectorised_test_documents, test_labels, batch_size=457, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 662.8224997784216
Test accuracy: 0.5611129403114319


In [27]:
predictions = model.predict(vectorised_test_documents)
predictions_fixed = predictions > 0.5

In [28]:
precision = precision_score(test_labels, predictions_fixed, average='micro')
recall = recall_score(test_labels, predictions_fixed, average='micro')
f1 = f1_score(test_labels, predictions_fixed, average='micro')

print('Micro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Micro-average quality numbers
Precision: 0.5611, Recall: 0.4525, F1-measure: 0.5010


In [29]:
precision = precision_score(test_labels, predictions_fixed, average='macro')
recall = recall_score(test_labels, predictions_fixed, average='macro')
f1 = f1_score(test_labels, predictions_fixed, average='macro')

print('Macro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Macro-average quality numbers
Precision: 0.0145, Recall: 0.0209, F1-measure: 0.0161


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


### 4th architecture

It seems that the first simple approach works the best. Lets try to expand the network in the other direction.

In [30]:
model = Sequential()
model.add(Dense(2048, input_shape=(20682,)))
model.add(Activation('relu'))
model.add(Dense(90))
model.add(Activation('softmax'))

In [31]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [32]:
history = model.fit(vectorised_train_documents, train_labels,
                    batch_size=457,
                    epochs=20,
                    verbose=1,
                    validation_split=0.1)

Train on 6992 samples, validate on 777 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
score = model.evaluate(vectorised_test_documents, test_labels, batch_size=457, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 27.348058948535797
Test accuracy: 0.6840013265609741


In [34]:
predictions = model.predict(vectorised_test_documents)
predictions_fixed = predictions > 0.5

In [35]:
precision = precision_score(test_labels, predictions_fixed, average='micro')
recall = recall_score(test_labels, predictions_fixed, average='micro')
f1 = f1_score(test_labels, predictions_fixed, average='micro')

print('Micro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Micro-average quality numbers
Precision: 0.8154, Recall: 0.5900, F1-measure: 0.6846


In [36]:
precision = precision_score(test_labels, predictions_fixed, average='macro')
recall = recall_score(test_labels, predictions_fixed, average='macro')
f1 = f1_score(test_labels, predictions_fixed, average='macro')

print('Macro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Macro-average quality numbers
Precision: 0.2476, Recall: 0.1096, F1-measure: 0.1314


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


It seems that the first simple approach works the best. In order to check other parameters, lets make a grid-search.
# Parameter search

In [37]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [38]:
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(512, input_shape=(20682,)))
    model.add(Activation('relu'))
    model.add(Dense(90))
    model.add(Activation('softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
    return model

In [39]:
# create model
model = KerasClassifier(build_fn=create_model, verbose=0)

In [41]:
# define the grid search parameters
# first, tune batch size and epochs
batch_size = [20, 100, 500, 1000]
epochs = [10, 25, 50]

param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(vectorised_train_documents, train_labels)



In [42]:
print('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))

Best: 0.770756 using {'batch_size': 500, 'epochs': 10}


In [43]:
# now lets tune optimizer

In [44]:
def create_model(optimizer='adam'):
    # create model
    model = Sequential()
    model.add(Dense(512, input_shape=(20682,)))
    model.add(Activation('relu'))
    model.add(Dense(90))
    model.add(Activation('softmax'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [45]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=500, verbose=0)

In [46]:
# define the grid search parameters
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(vectorised_train_documents, train_labels)



In [47]:
print('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))

Best: 0.994920 using {'optimizer': 'Nadam'}


In [48]:
# tune learning rate 

In [49]:
from keras.optimizers import Nadam
def create_model(learn_rate=0.01):
    # create model
    model = Sequential()
    model.add(Dense(512, input_shape=(20682,)))
    model.add(Activation('relu'))
    model.add(Dense(90))
    model.add(Activation('softmax'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=Nadam(lr=learn_rate), metrics=['accuracy'])
    return model

In [50]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=500, verbose=0)

In [51]:
# define the grid search parameters
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
param_grid = dict(learn_rate=learn_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(vectorised_train_documents, train_labels)

In [52]:
print('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))

Best: 0.995422 using {'learn_rate': 0.01}


In [53]:
# tune neuron activation function

In [54]:
from keras.optimizers import Nadam
def create_model(activation='relu'):
    # create model
    model = Sequential()
    model.add(Dense(512, input_shape=(20682,)))
    model.add(Activation(activation=activation))
    model.add(Dense(90))
    model.add(Activation('softmax'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=Nadam(lr=0.01), metrics=['accuracy'])
    return model

In [55]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=500, verbose=0)

In [56]:
# define the grid search parameters
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(vectorised_train_documents, train_labels)

In [57]:
print('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))

Best: 0.995529 using {'activation': 'relu'}


In [58]:
# tune dropout

In [59]:
from keras.optimizers import Nadam
def create_model(dropout_rate=0.0):
    # create model
    model = Sequential()
    model.add(Dense(512, input_shape=(20682,)))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(90))
    model.add(Activation('softmax'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=Nadam(lr=0.01), metrics=['accuracy'])
    return model

In [60]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=500, verbose=0)

In [61]:
# define the grid search parameters
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(dropout_rate=dropout_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(vectorised_train_documents, train_labels)

In [62]:
print('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))

Best: 0.995622 using {'dropout_rate': 0.2}


# Final model performance

In [63]:
# create model
model = Sequential()
model.add(Dense(512, input_shape=(20682,)))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Dense(90))
model.add(Activation('softmax'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer=Nadam(lr=0.01), metrics=['accuracy'])

In [64]:
history = model.fit(vectorised_train_documents, train_labels,
                    batch_size=500,
                    epochs=10,
                    verbose=1,
                    validation_split=0.1)

Train on 6992 samples, validate on 777 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [65]:
predictions = model.predict(vectorised_test_documents)
predictions_fixed = predictions > 0.5

In [66]:
precision = precision_score(test_labels, predictions_fixed, average='micro')
recall = recall_score(test_labels, predictions_fixed, average='micro')
f1 = f1_score(test_labels, predictions_fixed, average='micro')

print('Micro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Micro-average quality numbers
Precision: 0.9565, Recall: 0.7166, F1-measure: 0.8194


In [67]:
precision = precision_score(test_labels, predictions_fixed, average='macro')
recall = recall_score(test_labels, predictions_fixed, average='macro')
f1 = f1_score(test_labels, predictions_fixed, average='macro')

print('Macro-average quality numbers')
print('Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}'.format(precision, recall, f1))

Macro-average quality numbers
Precision: 0.5819, Recall: 0.3037, F1-measure: 0.3693


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [68]:
score = model.evaluate(vectorised_test_documents, test_labels, batch_size=457, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.013759654011600812
Test accuracy: 0.9956459999084473
