In [81]:
import sys
import os
import dataloader
import time

import numpy as np

from keras.models import Sequential
from keras.layers.core import TimeDistributedDense, Activation, Dropout
from keras.layers.recurrent import SimpleRNN, LSTM, JZS1, GRU
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam, SGD

from datetime import datetime

import theano
import theano.tensor as T

def save_list(l, filepath):
    f = open(filepath, 'w')
    for item in l:
        f.write("%s\n" % item)
    f.close()


In [82]:
def floatX(a):
    return np.asarray(a, dtype=theano.config.floatX)

In [83]:
counter = 10000
out_dir = datetime.now().strftime('%Y%m%d') + 'out_' + str(counter) + '/'
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

rng = np.random.RandomState()

# load data
sequence_length = 500
crop_end = 2
n_train = 2400
n_test = 100
n_data = sequence_length * (n_train + n_test)
input_length = 21
percept_length = 18

print 'load data ({0})...'.format(n_data)
data = dataloader.get_data(0, n_data + 1)
inputs, percepts = dataloader.make_batches(data, sequence_length, crop_end=crop_end)
x_train = inputs[:n_train]
y_train = percepts[:n_train]

x_test = inputs[n_train:]
y_test = percepts[n_train:]

n_hidden = 180
dropout = False
early_stopping_patience = 5
n_additional = 180

#
# n_hidden = rng.choice([20, 40, 80, 160, 320])
# dropout = rng.choice([True, False], p=[0.2, 0.8])
# early_stopping_patience = rng.choice([10, 20, 50])
#
# n_additional = rng.choice([0, 40, 60], p=[0.6, 0.2, 0.2])

print 'build model...'
# model = Sequential()
# model.add(LSTM(input_dim=input_length,
#                output_dim=n_hidden,
#                activation='tanh',
#                inner_activation='hard_sigmoid',
#                init='glorot_uniform',
#                inner_init='orthogonal',
#                forget_bias_init='one',
#                return_sequences=True))


# if n_additional > 0:
#     model.add(LSTM(output_dim=n_additional,
#                    activation='tanh',
#                    inner_activation='hard_sigmoid',
#                    init='glorot_uniform',
#                    inner_init='orthogonal',
#                    forget_bias_init='one',
#                    return_sequences=True))


# model.add(GRU(output_dim=2 * percept_length,
#                activation='sigmoid',
#                inner_activation='hard_sigmoid',
#                init='glorot_uniform',
#                inner_init='orthogonal',
#                # forget_bias_init='one',
#                return_sequences=True))


model = Sequential()
model.add(TimeDistributedDense(100,
                               input_dim=input_length))
model.add(Activation('tanh'))
model.add(TimeDistributedDense(2*percept_length))
model.add(Activation('sigmoid'))


# 1/(sqrt(2pi) sigma)  exp(-(x - mu)^2 / (2 sigma^2))
#     loss_value = T.exp(-((y_true - y_pred[:, :percept_length]) ** 2) / (2 * ) ) / y_pred[:, percept_length:]   
# todo: algebraic simplification
# def loss_fn(y_true, y_pred):
#     sigmas_pred = y_pred[:, :percept_length]
#     mus_pred = y_pred[:, percept_length:]
#     return -T.mean(T.log(T.exp(-((y_true - mus_pred) ** 2) / (2 * sigmas_pred ** 2) ) / sigmas_pred))

def loss_fn(y_true, y_pred):
    sigmas_pred = y_pred[:, percept_length:]
    mus_pred = y_pred[:, :percept_length]
    return T.mean(((y_true - mus_pred) ** 2) / (2 * sigmas_pred ** 2) + T.log(sigmas_pred))


tic = time.time()
model.compile(loss=lambda y_true, y_pred: loss_fn(y_true, y_pred), optimizer=SGD(lr=0.001, clipnorm=1.0))
compile_time = time.time() - tic
print 'Compile time: {0} sec'.format(compile_time)

early_stopping = EarlyStopping(monitor='val_loss', patience=early_stopping_patience)
model_checkpoint = ModelCheckpoint(out_dir + 'model_checkpoint.h5',
                                   monitor='val_loss',
                                   save_best_only=True)

print 'start training...'

tic = time.time()
model.fit(x_train, y_train,
          batch_size=1,
          nb_epoch=400,
          validation_split=0.1,
          callbacks=[early_stopping, model_checkpoint],
          shuffle=False)
training_duration = time.time() - tic

score = model.evaluate(x_test, y_test, batch_size=4)

save_list([n_hidden, dropout, early_stopping_patience, n_additional, compile_time, training_duration, score],
          out_dir + '_choice_and_result.dat')

json_string = model.to_json()
save_list([json_string],
          out_dir + 'json.dat')


load data (1250000)...
build model...
Compile time: 3.14011502266 sec
start training...
Train on 2160 samples, validate on 240 samples
Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epo

In [67]:
percept_length = 2
def loss_fn(y_true, y_pred):
    sigmas_pred = y_pred[:, percept_length:]
    mus_pred = y_pred[:, :percept_length]
    return (mus_pred,
            T.mean(((y_true - mus_pred) ** 2) / (2 * sigmas_pred ** 2), axis=1),
            T.mean(((y_true - mus_pred) ** 2) / (2 * sigmas_pred ** 2) + T.log(sigmas_pred), axis=1))


y_true = T.matrix('y_true')
y_pred = T.matrix('y_pred')

for fn in loss_fn(y_true, y_pred):
    fn_ = theano.function([y_true, y_pred], fn, on_unused_input='ignore')
    print fn_(floatX([[1.0, 1.0],
                      [1.0, 1.0],
                      [1.0, 1.0]]),
              floatX([[1.0, 1.0, 0.000001, 0.000001],
                      [0.5, 0.5, 0.001, 0.001],
                      [1.1, 0.9, 2.0, 0.5]]))
    print '---'

# sigma = T.scalar('sigma')
# print (T.log(sigma)).eval({sigma: 0.5})


[[ 1.          1.        ]
 [ 0.5         0.5       ]
 [ 1.10000002  0.89999998]]
---
[  0.00000000e+00   1.24999984e+05   1.06250048e-02]
---
[ -1.38155107e+01   1.24993078e+05   1.06250048e-02]
---


In [84]:
def a(x, y):
    return x+y

In [86]:
b = lambda y: a(3, y)

In [87]:
b(5)

8