In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline
import tensorflow as tf



In [22]:
csv_data = pd.read_csv("train.csv")

In [23]:
# extract species for numeric encoding 
species = sorted(csv_data.species.unique())
species_dict = {specie:index for index, specie in enumerate(species)}

In [27]:
all_data = csv_data.replace({'species':species_dict})

***
#### Build a simple Neural Network using shape, margin, texture features to get a sense of performance
**Split training set and testing set**

In [37]:
train_X, test_X, train_y, test_y = train_test_split(all_data.drop(['species'],axis=1), all_data['species'],
                                                    test_size=330, stratify=all_data['species'], random_state=916)

In [40]:
train_X.head()

Unnamed: 0,id,margin1,margin2,margin3,margin4,margin5,margin6,margin7,margin8,margin9,...,texture55,texture56,texture57,texture58,texture59,texture60,texture61,texture62,texture63,texture64
490,797,0.076172,0.12305,0.021484,0.003906,0.0,0.1582,0.0,0.0,0.007812,...,0.004883,0.0,0.039062,0.008789,0.029297,0.0,0.0,0.013672,0.0,0.074219
172,304,0.001953,0.0,0.017578,0.011719,0.025391,0.0,0.017578,0.0,0.003906,...,0.03125,0.0,0.009766,0.0,0.020508,0.0,0.0,0.0,0.032227,0.025391
652,1031,0.033203,0.10156,0.011719,0.003906,0.001953,0.087891,0.027344,0.0,0.009766,...,0.18945,0.0,0.03418,0.0,0.018555,0.0,0.0,0.0,0.0,0.003906
848,1358,0.007812,0.001953,0.013672,0.015625,0.009766,0.005859,0.021484,0.0,0.0,...,0.042969,0.0,0.056641,0.003906,0.015625,0.0,0.0,0.0,0.0,0.014648
705,1131,0.060547,0.125,0.007812,0.001953,0.0,0.15625,0.001953,0.0,0.005859,...,0.10156,0.0,0.027344,0.0,0.0,0.0,0.0,0.0,0.0,0.003906


**Preprocess data**
 - center data ??
 - one-hot-encode y
 - change data type

In [41]:
num_labels = len(species)

In [78]:
train_y_encode = np.arange(num_labels)==np.array(train_y)[:,None]
test_y_encode = np.arange(num_labels)==np.array(test_y)[:,None]

In [79]:
train_y_encode = train_y_encode.astype(np.float32)
test_y_encode = test_y_encode.astype(np.float32)

In [81]:
train_X_numpy = train_X.drop(['id'],axis=1).as_matrix().astype(np.float32)
test_X_numpy = test_X.drop(['id'],axis=1).as_matrix().astype(np.float32)

In [83]:
print "training set size:\t", train_X_numpy.shape, '\t', train_y_encode.shape
print "testing set size:\t", test_X_numpy.shape, '\t', test_y_encode.shape

training set size:	(660, 192) 	(660, 99)
testing set size:	(330, 192) 	(330, 99)


**A Simple Neural Network/Multi-Layer Preceptron Model**

In [108]:
batch_size = 64
num_features = train_X_numpy.shape[1]
hidden_layer_size = 2048

In [85]:
def initialize(scope, shape, wt_initializer, center=True, scale=True):
    with tf.variable_scope(scope, reuse=None) as sp:
        wt = tf.get_variable("weights", shape, initializer=wt_initializer)
        bi = tf.get_variable("biases", shape[-1], initializer=tf.constant_initializer(1.))
        if center:
            beta = tf.get_variable("beta", shape[-1], initializer=tf.constant_initializer(0.0))
        if scale:
            gamma = tf.get_variable("gamma", shape[-1], initializer=tf.constant_initializer(1.0))
        moving_avg = tf.get_variable("moving_mean", shape[-1], initializer=tf.constant_initializer(0.0), \
                                     trainable=False)
        moving_var = tf.get_variable("moving_variance", shape[-1], initializer=tf.constant_initializer(1.0), \
                                     trainable=False)
        sp.reuse_variables()

In [109]:
init_lr = 0.001
graph = tf.Graph()
with graph.as_default():
    # prepare input 
    train_X_tf = tf.placeholder(tf.float32, shape=[batch_size, num_features])
    train_y_tf = tf.placeholder(tf.float32, shape=[batch_size, num_labels])
    valid_X_tf, valid_y_tf = tf.constant(test_X_numpy), tf.constant(test_y_encode)
    
    # initialize multi-layer parameters
    layers = [{'scope':'hidden_layer', 'shape':[num_features, hidden_layer_size], 
               'initializer':tf.contrib.layers.variance_scaling_initializer()},
              {'scope':'output_layer', 'shape':[hidden_layer_size, num_labels],
               'initializer':tf.contrib.layers.variance_scaling_initializer()}]
    for layer in layers:
        initialize(layer['scope'], layer['shape'], layer['initializer'])
    
    # build model
    def model(X, layer_scopes, is_training, keep_prob, decay=0.9):
        for scope in layer_scopes:
            with tf.variable_scope(scope, reuse=True):
                wt = tf.get_variable("weights")
                bi = tf.get_variable("biases")
            X = tf.matmul(X, wt) + bi
            # Batch Normalizaion
            X = tf.contrib.layers.batch_norm(X, decay=decay, center=True, scale=True, is_training=is_training,
                                            updates_collections=None, scope=scope, reuse=True)
            # ReLu Activation
            X = tf.nn.relu(X)
            # Dropout for non-output layers
            if scope!=layer_scopes[-1]:
                X = tf.nn.dropout(X, keep_prob)
        return X
    
    # setup a few parameters
    layer_scopes = [l['scope'] for l in layers]
    keep_prob = tf.placeholder(tf.float32)
    
    # compute log loss logloss = - 1/N*Sum(yij*log(pij)) -> yij is 1 if observation i is in class j otherwise 0
    train_logits = model(train_X_tf, layer_scopes, True, keep_prob)
    train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits, train_y_tf))
    valid_logits = model(valid_X_tf, layer_scopes, False, keep_prob)
    valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_logits, valid_y_tf))
    
    # setup optimizer
    global_step = tf.Variable(0)
    learning_rate = 0.001
    #tf.train.exponential_decay(init_lr, global_step, decay_steps=5000, decay_rate=0.5, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_loss, global_step=global_step)

In [110]:
num_steps = 10000
with tf.Session(graph=graph) as sess:
    tf.initialize_all_variables().run()
    print("Initialized")
    for step in range(num_steps):
        offset = (step * batch_size) % (train_y_encode.shape[0] - batch_size)
        batch_X = train_X_numpy[offset: (offset+batch_size), :]
        batch_y = train_y_encode[offset: (offset+batch_size), :]
        feed_dict = {train_X_tf: batch_X, train_y_tf: batch_y, keep_prob: 0.8}
        _, tloss = sess.run([optimizer, train_loss], feed_dict=feed_dict)
        if step%100==0:
            vloss = sess.run(valid_loss, feed_dict={keep_prob: 1.0})
            print('Epoch: %d:\tTrain Loss: %.6f\tValid Loss: %.6f' \
                %(step, tloss, vloss))
    print("Finished training")
    vloss = sess.run(valid_loss, feed_dict={keep_prob: 1.0})
    print("Final valid loss: %.6f" %(vloss))

Initialized
Epoch: 0:	Train Loss: 4.800885	Valid Loss: 5.460688
Epoch: 100:	Train Loss: 0.602887	Valid Loss: 0.812202
Epoch: 200:	Train Loss: 0.410338	Valid Loss: 0.600066
Epoch: 300:	Train Loss: 0.351053	Valid Loss: 0.506743
Epoch: 400:	Train Loss: 0.360860	Valid Loss: 0.465197
Epoch: 500:	Train Loss: 0.206955	Valid Loss: 0.439483
Epoch: 600:	Train Loss: 0.234630	Valid Loss: 0.412454
Epoch: 700:	Train Loss: 0.227252	Valid Loss: 0.394866
Epoch: 800:	Train Loss: 0.216372	Valid Loss: 0.378752
Epoch: 900:	Train Loss: 0.184070	Valid Loss: 0.368429
Epoch: 1000:	Train Loss: 0.256197	Valid Loss: 0.357826
Epoch: 1100:	Train Loss: 0.237517	Valid Loss: 0.349896
Epoch: 1200:	Train Loss: 0.051091	Valid Loss: 0.344473
Epoch: 1300:	Train Loss: 0.059759	Valid Loss: 0.334552
Epoch: 1400:	Train Loss: 0.351193	Valid Loss: 0.329385
Epoch: 1500:	Train Loss: 0.179939	Valid Loss: 0.322167
Epoch: 1600:	Train Loss: 0.106806	Valid Loss: 0.315126
Epoch: 1700:	Train Loss: 0.044698	Valid Loss: 0.307684
Epoch: 180