In [474]:
from sklearn.model_selection import train_test_split
import numpy as np
from matplotlib import pyplot as plt
 
import tensorflow as tf
import pandas as pd
 
from tensorflow.python.platform import tf_logging as logging
 
logging.set_verbosity(logging.INFO)
logging.log(logging.INFO, "Tensorflow version " + tf.__version__)

INFO:tensorflow:Tensorflow version 1.10.0


In [475]:
train_labels = pd.read_csv('./dengue_labels_train.csv')
train_labels = train_labels[0: 1450]

In [476]:
data = train_labels['total_cases'].values
data = data.astype(np.float32)

In [477]:
SEQLEN = 10  # unrolled sequence length
BATCHSIZE = 32
 
X = data
Y = np.roll(data, -1)
 
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

X_train, Y_train = X, Y
 
X_train = np.reshape(X_train, [-1, SEQLEN])
Y_train = np.reshape(Y_train, [-1, SEQLEN])
 
# X_test = np.reshape(X_test, [-1, SEQLEN])
# Y_test = np.reshape(Y_test, [-1, SEQLEN])

In [478]:
def train_dataset():
    dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
    dataset = dataset.repeat()
    dataset = dataset.shuffle(DATA_SEQ_LEN * 4 // SEQLEN)
    dataset = dataset.batch(BATCHSIZE)
    samples, labels = dataset.make_one_shot_iterator().get_next()
    return samples, labels
 
# def eval_dataset():
#     evaldataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))
#     evaldataset = evaldataset.repeat(1)
#     evaldataset = evaldataset.batch(BATCHSIZE)
 
#     samples, labels = evaldataset.make_one_shot_iterator().get_next()
#     return samples, labels

In [479]:
RNN_CELLSIZE = 80
N_LAYERS = 2
DROPOUT_PKEEP = 0.7
 
def model_rnn_fn(features, labels, mode):
    
    X = tf.expand_dims(features, axis=2)
 
    batchsize = tf.shape(X)[0]
    seqlen = tf.shape(X)[1]
 
    cells = [tf.nn.rnn_cell.GRUCell(RNN_CELLSIZE) for _ in range(N_LAYERS)]
 
    cells[:-1] = [tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=DROPOUT_PKEEP) for cell in cells[:-1]]
    
     # a stacked RNN cell still works like an RNN cell
    cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=False)
 
    # X[BATCHSIZE, SEQLEN, 1], Hin[BATCHSIZE, RNN_CELLSIZE*N_LAYERS]
    Yn, H = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
 
    Yn = tf.reshape(Yn, [batchsize * seqlen, RNN_CELLSIZE])
    Yr = tf.layers.dense(Yn, 1)  # Yr [BATCHSIZE*SEQLEN, 1]
    Yr = tf.reshape(Yr, [batchsize, seqlen, 1])  # Yr [BATCHSIZE, SEQLEN, 1]
 
    Yout = Yr[:, -1, :]  # Last output Yout [BATCHSIZE, 1]
 
    loss = train_op = None
    if mode != tf.estimator.ModeKeys.PREDICT:
        labels = tf.expand_dims(labels, axis=2)
        loss = tf.losses.mean_squared_error(Yr, labels)  # la  bels[BATCHSIZE, SEQLEN, 1]
        lr = 0.001
        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
 
        train_op = tf.contrib.training.create_train_op(loss, optimizer)
 
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions={"Yout": Yout},
        loss=loss,
        train_op=train_op
    )

In [480]:
training_config = tf.estimator.RunConfig(model_dir="./outputdir")
estimator = tf.estimator.Estimator(model_fn=model_rnn_fn, config=training_config)

INFO:tensorflow:Using config: {'_model_dir': './outputdir', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000281C6B8FB00>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [481]:
estimator.train(input_fn=train_dataset,steps=2000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./outputdir\model.ckpt-16000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 16000 into ./outputdir\model.ckpt.
INFO:tensorflow:loss = 20.322433, step = 16001
INFO:tensorflow:global_step/sec: 49.5088
INFO:tensorflow:loss = 282.3104, step = 16101 (2.022 sec)
INFO:tensorflow:global_step/sec: 65.8485
INFO:tensorflow:loss = 24.790604, step = 16201 (1.518 sec)
INFO:tensorflow:global_step/sec: 67.2429
INFO:tensorflow:loss = 39.5908, step = 16301 (1.490 sec)
INFO:tensorflow:global_step/sec: 66.8833
INFO:tensorflow:loss = 19.566792, step = 16401 (1.493 sec)
INFO:tensorflow:global_step/sec: 67.4695
INFO:tensorflow:loss = 27.938442, step = 16501 (1.481 sec)
INFO:tensorflow:global_step/sec: 67.2881
INFO:tensorflow:loss = 59.508305,

<tensorflow.python.estimator.estimator.Estimator at 0x281c6b8fcc0>

In [482]:
# results = estimator.predict(eval_dataset)
 
# Yout_ = [result["Yout"] for result in results]
 
# actual = Y_test[:, -1]

# fig, ax = plt.subplots(figsize=(20,8))
 
# colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
# plt.plot(actual, label="Actual Values", color='green')
# plt.plot(Yout_, label="Predicted Values", color='red', )
 
# plt.show()

In [483]:
submission_labels = pd.read_csv('./submissions/submission_benchmark.csv', index_col=[0,1,2])
submission_labels_sj = submission_labels.loc['sj']
submission_labels_iq = submission_labels.loc['iq'][0:150]

X_sj = submission_labels_sj['total_cases'].values
X_sj = X_sj.astype(np.float32)

X_iq = submission_labels_iq['total_cases'].values
X_iq = X_iq.astype(np.float32)


In [484]:
Y_sj = np.roll(X_sj, -1)
Y_iq = np.roll(X_iq, -1)
 
X_sj = np.reshape(X_sj, [-1, SEQLEN])
Y_sj = np.reshape(Y_sj, [-1, SEQLEN])
 
X_iq = np.reshape(X_iq, [-1, SEQLEN])
Y_iq = np.reshape(Y_iq, [-1, SEQLEN])


In [485]:
def eval_set_sj():
    evaldataset = tf.data.Dataset.from_tensor_slices((X_sj, Y_sj))
    evaldataset = evaldataset.repeat(1)
    evaldataset = evaldataset.batch(BATCHSIZE)
 
    samples, labels = evaldataset.make_one_shot_iterator().get_next()
    return samples, labels

In [486]:
def eval_set_iq():
    evaldataset = tf.data.Dataset.from_tensor_slices((X_iq, Y_iq))
    evaldataset = evaldataset.repeat(1)
    evaldataset = evaldataset.batch(BATCHSIZE)
 
    samples, labels = evaldataset.make_one_shot_iterator().get_next()
    return samples, labels

In [487]:
results = estimator.predict(eval_set_sj)
 
Yout_ = [result["Yout"][0] for result in results]

Yout_

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./outputdir\model.ckpt-18000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


[52.546043,
 55.826393,
 21.199722,
 23.028265,
 11.916157,
 32.752266,
 43.33929,
 36.225014,
 29.88677,
 31.938469,
 54.170578,
 67.551926,
 35.823956,
 20.245424,
 15.052139,
 20.770754,
 53.831898,
 31.051811,
 29.116327,
 15.796934,
 51.75196,
 46.60069,
 33.745052,
 29.852087,
 16.467482,
 25.795034]

In [488]:
for i in range(26):
    Y_sj[i][9] = Yout_[i]

In [489]:
Y_sj

array([[25.      , 33.      , 27.      , 29.      , 30.      , 32.      ,
        40.      , 42.      , 39.      , 52.546043],
       [37.      , 44.      , 41.      , 51.      , 52.      , 53.      ,
        58.      , 71.      , 63.      , 55.826393],
       [44.      , 56.      , 40.      , 39.      , 35.      , 32.      ,
        40.      , 36.      , 33.      , 21.199722],
       [20.      , 20.      , 24.      , 22.      , 22.      , 21.      ,
        22.      , 23.      , 25.      , 23.028265],
       [20.      , 19.      , 20.      , 21.      , 22.      , 19.      ,
        20.      , 20.      , 21.      , 11.916157],
       [24.      , 24.      , 22.      , 26.      , 25.      , 40.      ,
        35.      , 34.      , 37.      , 32.752266],
       [44.      , 46.      , 47.      , 51.      , 53.      , 45.      ,
        45.      , 47.      , 54.      , 43.33929 ],
       [53.      , 49.      , 45.      , 54.      , 55.      , 59.      ,
        54.      , 43.      , 36.    

In [490]:
temp_sj = Y_sj.reshape(260)

final_sj = np.roll(temp_sj, 1)
df_sj=pd.DataFrame(final_sj, columns=['total_cases'])


In [491]:
results = estimator.predict(eval_set_iq)
 
Yout_ = [result["Yout"][0] for result in results]

Yout_

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./outputdir\model.ckpt-18000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


[5.7207255,
 8.92329,
 12.722458,
 17.503004,
 14.98735,
 4.4938846,
 10.603901,
 18.833677,
 16.043793,
 7.2846193,
 5.764648,
 4.300509,
 23.23917,
 11.738901,
 6.4512973]

In [494]:
for i in range(15):
    Y_iq[i][9] = Yout_[i]
Y_iq

array([[ 5.       ,  8.       ,  2.       ,  2.       ,  5.       ,
         3.       ,  5.       ,  4.       ,  6.       ,  5.7207255],
       [ 5.       ,  5.       ,  5.       ,  5.       ,  5.       ,
         6.       ,  9.       , 12.       ,  7.       ,  8.92329  ],
       [ 9.       ,  9.       , 11.       ,  9.       ,  5.       ,
         8.       ,  6.       ,  8.       ,  7.       , 12.722458 ],
       [ 6.       ,  8.       ,  8.       ,  9.       ,  8.       ,
         8.       ,  8.       ,  8.       , 10.       , 17.503004 ],
       [ 8.       ,  8.       , 10.       , 10.       ,  5.       ,
         8.       ,  9.       ,  7.       ,  6.       , 14.98735  ],
       [ 5.       ,  7.       ,  7.       ,  3.       ,  4.       ,
         5.       ,  4.       ,  4.       ,  4.       ,  4.4938846],
       [ 3.       ,  5.       ,  6.       ,  6.       ,  9.       ,
         6.       ,  8.       , 13.       ,  8.       , 10.603901 ],
       [ 8.       ,  9.       ,  8.      

In [495]:
temp_iq = Y_iq.reshape(150)

final_iq = np.roll(temp_iq, 1)

df_iq_temp = pd.DataFrame(final_iq, columns=['total_cases'])

In [496]:
remainder_iq = submission_labels.loc['iq'][150:156]
remainder_iq.shape

(6, 1)

In [497]:
df_iq = np.concatenate([df_iq_temp, remainder_iq])
df_iq.shape

(156, 1)

In [498]:
final_submission = np.concatenate([df_sj, df_iq])
final_submission = final_submission.astype(np.int64)

In [499]:
submission = pd.read_csv("./submissions/submission_format.csv",
                         index_col=[0, 1, 2])

submission.total_cases = final_submission
submission.to_csv("./benchmark.csv")