# Keras example

Trains and evaluate a simple MLP on the Reuters newswire topic classification task.

In [1]:
import numpy as np

import keras
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.preprocessing.text import Tokenizer

import warnings
warnings.filterwarnings("ignore")

# The following import and function call are the only additions to code required
# to automatically log metrics and parameters to MLflow.
import mlflow.keras

In [3]:
# mlflow server --backend-store-uri mlruns/ --default-artifact-root mlruns/ --host 0.0.0.0
remote_server_uri = "http://DESKTOP-BPV4P4B:5000" # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)  # or set the MLFLOW_TRACKING_URI in the env

exp_name = "reuters_keras0"
mlflow.set_experiment(exp_name)

INFO: 'reuters_keras0' does not exist. Creating a new experiment


In [4]:
mlflow.keras.autolog()

In [14]:
max_words = 1000
batch_size = 64
epochs = 7

mlflow.log_params({"max_words": max_words, "batch_size": batch_size, "epochs": epochs})

In [15]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
                                                         test_split=0.2)

print(f"Train Shape - {x_train.shape}")
print(f"Test Shape - {x_test.shape}")

num_classes = np.max(y_train) + 1
print(num_classes, 'classes')

Loading data...
Train Shape - (8982,)
Test Shape - (2246,)
46 classes


In [16]:
print('Vectorizing sequence data...')
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Vectorizing sequence data...
x_train shape: (8982, 1000)
x_test shape: (2246, 1000)


In [17]:
print('Convert class vector to binary class matrix '
      '(for use with categorical_crossentropy)')
print('y_train shape (brfore):', y_train.shape)
print('y_test shape (before):', y_test.shape)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

Convert class vector to binary class matrix (for use with categorical_crossentropy)
y_train shape (brfore): (8982,)
y_test shape (before): (2246,)
y_train shape: (8982, 46)
y_test shape: (2246, 46)


In [18]:
print('Building model...')
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

loss='categorical_crossentropy'
optimizer='adam'
mlflow.log_params({"loss_type": loss})

model.compile(loss=loss,
              optimizer=optimizer,
              metrics=['accuracy'])

Building model...


In [19]:
history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.1)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [20]:
# what is missing in the log? 
import pandas as pd
epoch_data = history.history
epoch_data = pd.DataFrame(epoch_data)
epoch_data

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,1.547703,0.654955,1.153225,0.750834
1,0.885829,0.797105,0.971569,0.783092
2,0.646252,0.845849,0.889596,0.794216
3,0.48669,0.885067,0.857558,0.806452
4,0.389071,0.905604,0.8721,0.807564
5,0.329375,0.916863,0.873904,0.810901
6,0.274686,0.930595,0.889596,0.813126


In [21]:
score = model.evaluate(x_test, y_test,
                       batch_size=batch_size, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])
mlflow.log_metrics({"test_accuarcy": score[1], "test_loss": score[0]})

 1/36 [..............................] - ETA: 0s - loss: 0.9324 - accuracy: 0.7969

  tensor_proto.tensor_content = nparray.tostring()


Test score: 0.882178008556366
Test accuracy: 0.7965271472930908


In [22]:
mlflow.keras.log_model(model, "model")
mlflow.end_run()