In [1]:
### Install dependencies ###
!pip install keras numpy sklearn > /dev/null 2>&1

In [2]:
### Imports ###

# Keras
import keras.utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
# Processing
import numpy as numpy
# Used to encode labels like the monster classes to one-hot vectors
from sklearn.preprocessing import LabelEncoder
# Static typing ftw
from typing import List, cast

Using TensorFlow backend.


In [3]:
### Data processing ###

def labels_to_numbers(labels: List[str]) -> List[int]:
    """
    Transforms:
      ['Ghoul', 'Goblin', 'Ghoul', 'Ghoul', 'Ghost',
    to:
      [1, 2, 1, 1, 0,
    """
    return LabelEncoder().fit(labels).transform(labels)


def load_features(file_name: str) -> List:
    """
    Loads the features we are using for inferring the target.
    Those features are: bone_length  rotting_flesh  hair_length  has_soul color

    The csv files look like this:

    id  bone_length  rotting_flesh  hair_length  has_soul  color    type
    0     0.354512       0.350839     0.465761  0.781142  clear   Ghoul
    1     0.575560       0.425868     0.531401  0.439899  green  Goblin
    2     0.467875       0.354330     0.811616  0.791225  black   Ghoul
    """
    path = 'data/' + file_name
    numeric_features = numpy.genfromtxt(path,
                                        skip_header=1,
                                        delimiter=",",
                                        usecols=(1, 2, 3, 4))
    colors = numpy.genfromtxt(path,
                              skip_header=1,
                              delimiter=",",
                              usecols=5,
                              dtype=str)
    accumulator = []
    for row, color_as_number in zip(numeric_features, labels_to_numbers(colors)):
        accumulator.extend([numpy.append(row, color_as_number)])
    return numpy.asarray(accumulator)

def load_targets(file_name: str) -> List:
  """
  Loads the labels we are using for inferring the target ('Ghost', 'Goblin', 'Ghoul')
  and transforms them into a one-hot vector.
  So basically this:
    ['Ghoul', 'Goblin', 'Ghoul', 'Ghoul', 'Ghost', .....
  becomes:
    [[ 0.,  1.,  0.],
     [ 0.,  0.,  1.],
     [ 0.,  1.,  0.],
  """
  labels = numpy.genfromtxt('data/' + file_name,
                            skip_header=1,
                            delimiter=",",
                            usecols=6,
                            dtype=str)
  return keras.utils.to_categorical(labels_to_numbers(labels))

In [4]:
### Load the data ###

# We have 371 records for training but no test data.
# So let's just use the first 300 records for training and the last 71 for 
SLICE_AT = 300

features = load_features('train.csv')
targets = load_targets('train.csv')
x_train = features[0:SLICE_AT]
y_train = targets[0:SLICE_AT]
x_test = features[SLICE_AT:]
y_test = targets[SLICE_AT:]
x_for_prediction = load_features('test.csv')
y_for_prediction = 'TODO'

In [6]:
### Build the models ###

FEATURES = ['bone_length', 'rotting_flesh', 'hair_length', 'has_soul', 'color']
INPUT_DIM = len(FEATURES)
TARGET_VARIABLE = ['type']
VALID_TYPES = ['Ghost', 'Goblin', 'Ghoul']
OUTPUT_DIM = len(VALID_TYPES)

model = Sequential()

# Basic model inspired from:
# https://keras.io/getting-started/sequential-model-guide/
# section: "Multilayer Perceptron (MLP) for multi-class softmax classification"
#
# Play around with:
# * dropout
# * learning algorithm and its parameters
# * activation
# * learning rate
model.add(Dense(10, activation='relu', input_dim=INPUT_DIM))
model.add(Dense(OUTPUT_DIM, activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(x_train, y_train,
          epochs=20,
          batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)
print("Score is " + str(score))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Score is [1.09611976146698, 0.38028168678283691]
