In [135]:
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.utils import to_categorical

In [136]:
data = pd.read_csv('data/house-votes-84.data.txt')

In [137]:
data.shape

(434, 17)

In [138]:
##Convert labels from strings to integer codes
labels = data.iloc[:, 0]
party_to_int = dict([('democrat', 0), ('republican', 1)])
labels = labels.map(party_to_int)
labels.values.reshape((434, 1))
labels = keras.utils.to_categorical(labels)

##And drop the column we don't need anymore
data.drop(['republican'], axis=1, inplace=True)


In [139]:
##Now to convert results of the votes also
vote_to_int = dict([('y', 1), ('?', 0), ('n', -1)])


##While map can take a dict, applymap can only take functions.
##applymap does apply(data, f), not map(data, f) and apply, unlike map, can only take functions
##Jump around that with a little lambda function magic
data = data.applymap(lambda v: vote_to_int[v])




In [140]:
train, test, train_labels, test_labels = train_test_split(data, labels, train_size=0.85, test_size=0.15)

In [141]:
model = Sequential([
    Dense(7, input_shape = (16,)),
    Activation('relu'),
    Dense(2),
    Activation('softmax'),
])

In [142]:
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [143]:
model.fit(x=train, y=train_labels, batch_size=1, epochs=10, verbose=2, validation_split=0.15)

Train on 312 samples, validate on 56 samples
Epoch 1/10
 - 0s - loss: 0.3024 - acc: 0.8654 - val_loss: 0.2310 - val_acc: 0.9107
Epoch 2/10
 - 0s - loss: 0.1541 - acc: 0.9455 - val_loss: 0.1877 - val_acc: 0.9286
Epoch 3/10
 - 0s - loss: 0.1206 - acc: 0.9583 - val_loss: 0.1560 - val_acc: 0.9286
Epoch 4/10
 - 0s - loss: 0.0991 - acc: 0.9583 - val_loss: 0.1400 - val_acc: 0.9464
Epoch 5/10
 - 0s - loss: 0.0874 - acc: 0.9647 - val_loss: 0.1389 - val_acc: 0.9286
Epoch 6/10
 - 0s - loss: 0.0777 - acc: 0.9679 - val_loss: 0.1348 - val_acc: 0.9286
Epoch 7/10
 - 0s - loss: 0.0701 - acc: 0.9744 - val_loss: 0.1345 - val_acc: 0.9286
Epoch 8/10
 - 0s - loss: 0.0643 - acc: 0.9744 - val_loss: 0.1321 - val_acc: 0.9464
Epoch 9/10
 - 0s - loss: 0.0592 - acc: 0.9840 - val_loss: 0.1334 - val_acc: 0.9464
Epoch 10/10
 - 0s - loss: 0.0547 - acc: 0.9808 - val_loss: 0.1333 - val_acc: 0.9464


<keras.callbacks.History at 0x1c1a1d6f60>

In [145]:
test_score = model.test_on_batch(test, test_labels)
print("The accuracy on the test set is:", test_score[1])

The accuracy on the test set is: 0.9696969985961914
