In [37]:
import numpy as np
import pandas as pd
import tensorflow as tf
from  sklearn.model_selection import train_test_split

In [146]:
train_df = pd.read_csv("./datasets/training.csv")
test_df = pd.read_csv("./datasets/test.csv")

In [199]:
train_df = train_df.dropna(axis=0)

In [216]:
X_train_df = train_df["Image"].apply(lambda im: np.fromstring(im, sep=' '))
y_train_df = train_df.drop(["Image"], axis=1)

In [217]:
X_train_df = np.array([i for i in X_train_df])

In [None]:
test_df = test_df["Image"].apply(lambda im: np.fromstring(im, sep=' '))
test_df = np.array([i for i in test_df])

In [219]:
X_train = X_train_df
y_train = y_train_df.values

In [223]:
X_train, X_test ,y_train, y_test = train_test_split(X_train, y_train, test_size=0.1, 
                                                   random_state=42)

array([[68.5886094 , 32.99919317, 29.45230185, ..., 76.93577565,
        53.66044602, 77.89258628],
       [68.80073392, 34.85971108, 28.76514575, ..., 81.09763816,
        51.8791103 , 81.54359092],
       [66.02475   , 40.47675   , 31.50075   , ..., 73.995     ,
        46.584     , 91.089     ],
       ...,
       [67.24939806, 35.8583301 , 29.76093204, ..., 66.26516505,
        47.25530097, 84.17615534],
       [69.7650219 , 38.73740146, 28.71170803, ..., 82.63427737,
        47.55013139, 90.80934307],
       [69.62626415, 32.21859623, 28.74656604, ..., 67.43184906,
        51.00769811, 82.4069434 ]])

In [254]:
# Training Parameters
learning_rate = 0.001
num_steps = 500
batch_size = 128

# Network Parameters
num_input = 96 * 96 # MNIST data input (img shape: 28*28)
num_classes = 30 # MNIST total classes (0-9 digits)
dropout = 0.25 # Dropout, probability to drop a unit


In [255]:
def conv_net2(X,n_classes, dropout, reuse, is_training):
    with tf.variable_scope("ConvNet", reuse=reuse):
#         X = tf.cast(X, tf.float32)
        x = tf.reshape(X, shape=[-1, 96, 96, 1])
        conv1 = tf.layers.conv2d(x, 1, 5, activation=tf.nn.relu)
        conv1 = tf.layers.max_pooling2d(conv1, 2, 2)
        
        fc1 = tf.contrib.layers.flatten(conv1)
        fc1 = tf.layers.dense(fc1, 100)
        fc1 = tf.layers.dropout(fc1, rate=dropout, training=is_training)
        
        out = tf.layers.dense(fc1, 30)
    
    return out

In [256]:
def model_fn(features, labels, mode):
    logits_train = conv_net2(features, num_classes, dropout, reuse=False, is_training=True)
    logits_test = conv_net2(features, num_classes, dropout, reuse=True, is_training=False)
    
#     pred_classes = tf.argmax(logits_test, axis=1)
    pred_classes = logits_test
#     pred_proba = tf.nn.softmax(logits_test)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
    
    loss_op = tf.reduce_mean(tf.pow(logits_train - labels, 2)) / (2 * batch_size)
                                                                           
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
    
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    
    estim_specs = tf.estimator.EstimatorSpec(mode=mode,predictions=pred_classes, 
                                        loss=loss_op, train_op=train_op, 
                                        eval_metric_ops={"accuracy":acc_op})
    return estim_specs

In [257]:
model = tf.estimator.Estimator(model_fn)
input_fn = tf.estimator.inputs.numpy_input_fn(x=X_train,
                                              y=y_train, 
                                              batch_size=batch_size, 
                                             num_epochs=None, shuffle=True)
model.train(input_fn, steps=num_steps)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/j0/mlln6qvj2kl42rr7mg80lsxc0000gn/T/tmpughpwr7r', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1c4b275a20>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/j0

<tensorflow.python.estimator.estimator.Estimator at 0x1c4b275898>

In [259]:
input_fn = tf.estimator.inputs.numpy_input_fn(x=X_test, y=y_test, 
                                              batch_size=batch_size, shuffle=False)
model.evaluate(input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-05-30-13:25:32
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/j0/mlln6qvj2kl42rr7mg80lsxc0000gn/T/tmpughpwr7r/model.ckpt-500
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-30-13:25:32
INFO:tensorflow:Saving dict for global step 500: accuracy = 0.0, global_step = 500, loss = 1.983207


{'accuracy': 0.0, 'global_step': 500, 'loss': 1.983207}

In [274]:
input_fn = tf.estimator.inputs.numpy_input_fn(x=test_df, shuffle=False)
y_pred = np.array( list(model.predict(input_fn)))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/j0/mlln6qvj2kl42rr7mg80lsxc0000gn/T/tmpughpwr7r/model.ckpt-500
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [333]:
idtable = pd.read_csv("./datasets/IdLookupTable.csv")

In [334]:
cols = train_df.columns[:-1]

In [335]:
values = []

In [336]:
for index, row in idtable.iterrows():
     values.append((
        row['RowId'],
        y_pred[row.ImageId - 1][np.where(cols == row.FeatureName)[0][0]],
        ))

In [337]:
submission = pd.DataFrame(values, columns=('RowId', 'Location'))
submission.to_csv('./submission.csv', index=False)