In [54]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import keras
from keras.utils import np_utils
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder 
from sklearn.cross_validation import train_test_split

In [55]:
#import data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [56]:
def prep_data(data, train=True):
    
    #if training data...
    if train:
        
        #shuffle data
        data = data.reindex(np.random.permutation(data.index))
        
        #prepare feature/target datasets
        X = data.drop(['id','target'],axis=1)
        y = data['target']
        
        #convert type
        X = X.astype('float')
        
        #standardize feature set
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        
        #encode classes as ints
        encoder = LabelEncoder()
        encoder.fit(y)
        y = encoder.transform(y).astype(np.int32)
        
        #convert target vector to categorical matrix
        y = np_utils.to_categorical(y) 
        
        return [X,y]
        
    #if testing data...    
    else:
        
        #drop 'id' field
        X = data.drop(['id'],axis=1)
        
        #convert type
        X = X.astype('float')
        
        #standardize feature set
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        
        return X

In [57]:
training_data = train.copy()

X, y = prep_data(training_data,train=True)

In [58]:
dims = X.shape[1]
print(dims, 'dimensions')

nb_classes = len(y[0])
print(nb_classes, 'classes')

93 dimensions
9 classes


In [59]:
#prep for model
fBestModel = 'best_model.h5' 
early_stop = EarlyStopping(monitor='val_loss', patience=4, verbose=1) 
best_model = ModelCheckpoint(fBestModel, verbose=0, save_best_only=True)

In [60]:
def baseline_model():
    #create model
    model = Sequential()
    model.add(Dense(100, input_shape=(dims,)))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    model.compile(optimizer='sgd', loss='categorical_crossentropy')
    
    return model

In [65]:
def model():
    #create model
    model = Sequential()
    model.add(Dense(1000, input_shape=(dims,)))
    model.add(Dropout(0.05))
    model.add(Dense(1000, activation='relu'))
    model.add(Dropout(0.05))
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.05))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    model.compile(optimizer='sgd', loss='categorical_crossentropy')
    
    return model

## Cross-Validating Model

In [66]:
#cross-validation
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [67]:
print('Building model...')
model = model()

print('Training cross-validated model...')
model.fit(x_train, y_train, epochs=100,
    batch_size=2000, verbose=True, validation_split=0.15, 
    callbacks=[best_model, early_stop])
print('CV Training Complete.')

Building model...
Training cross-validated model...
Train on 44706 samples, validate on 7890 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/10

In [68]:
print('Evaluating model...')
model.evaluate(x_test,y_test, batch_size=100)

Evaluating model...

0.590264311460451

## Test Predictions

In [69]:
test_data = test.copy()

X_test = prep_data(test_data,train=False)

In [70]:
predictions = model.predict_classes(X_test)



In [71]:
results = np_utils.to_categorical(predictions)

In [72]:
df = pd.DataFrame(results)

In [73]:
rng = list(range(1,len(results)+1))
df['id']= rng
df = df.set_index('id')

In [74]:
cols = []

for i in range(1,10):
    cols.append("Class_"+str(i))
    
df.columns = cols

In [75]:
df.to_csv('results_7_19_2017_06_00_00.csv')