In [None]:
from google.colab import drive
drive.mount("/content/drive/", force_remount=True)
import os
os.chdir('./drive/My Drive/TESI/Autoencoder')

Mounted at /content/drive/


In [None]:
import pickle as pickle

from src import TaxiBJ
import numpy as np

np.random.seed(1337)  # for reproducibility

# PARAMETERS
DATAPATH = '../data'  
CACHEDATA = True  # cache data or NOT
nb_epoch = 50 # number of epoch at training stage
# nb_epoch_cont =  100 # number of epoch at training (cont) stage
batch_size = 16  # batch size
T = 48  # number of time intervals in one day
lr = 0.00015 # learning rate

len_closeness = 3 # length of closeness dependent sequence
len_period = 1 # length of peroid dependent sequence
len_trend = 1 # length of trend dependent sequence
nb_flow = 2  # there are two types of flows: inflow and outflow
# divide data into two subsets: Train & Test, of which the test set is the
# last 4 weeks
days_test = 7*4
len_test = T*days_test
len_val = 2*len_test
map_height, map_width = 32, 32  # grid size

path_log = 'log_BJ'
muilt_step = False

path_cache = os.path.join(DATAPATH, 'CACHE', 'Autoencoder')  # cache path
path_result = 'RET'
path_model = 'MODEL'
if os.path.isdir(path_result) is False:
    os.mkdir(path_result)
if os.path.isdir(path_model) is False:
    os.mkdir(path_model)
if CACHEDATA and os.path.isdir(path_cache) is False:
    os.mkdir(path_cache)

In [None]:
from utils import cache, read_cache
import time
import os

# load data
if muilt_step:
    dic_rmse={}
    list_muilt_rmse=[]
print("loading data...")
ts = time.time()
fname = os.path.join(path_cache, 'TaxiBJ_C{}_P{}_T{}.h5'.format(
    len_closeness, len_period, len_trend))
if os.path.exists(fname) and CACHEDATA:
    X_train_all, Y_train_all, X_train, Y_train, \
    X_val, Y_val, X_test, Y_test, mmn, external_dim, \
    timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = read_cache(
        fname)
    print("load %s successfully" % fname)
else:
    X_train_all, Y_train_all, X_train, Y_train, \
    X_val, Y_val, X_test, Y_test, mmn, external_dim, \
    timestamp_train_all, timestamp_train, timestamp_val, timestamp_test = TaxiBJ.load_data(
        T=T, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test,
        len_val=len_val, preprocess_name='preprocessing_bj.pkl', meta_data=True, meteorol_data=True, holiday_data=True, datapath=DATAPATH)
    if CACHEDATA:
        cache(fname, X_train_all, Y_train_all, X_train, Y_train, X_val, Y_val, X_test, Y_test,
              external_dim, timestamp_train_all, timestamp_train, timestamp_val, timestamp_test)
i = 0
print(external_dim)
print("\n days (test): ", [v[:8] for v in timestamp_test[0::T]])
print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts))


loading data...


  num = int(f['num'].value)
  X_train_all.append(f['X_train_all_%i' % i].value)
  X_train.append(f['X_train_%i' % i].value)


load ../data/CACHE/Autoencoder/TaxiBJ_C3_P1_T1.h5 successfully
28

 days (test):  [b'20160309', b'20160310', b'20160311', b'20160312', b'20160313', b'20160314', b'20160315', b'20160316', b'20160317', b'20160318', b'20160319', b'20160320', b'20160321', b'20160322', b'20160325', b'20160326', b'20160327', b'20160328', b'20160329', b'20160401', b'20160402', b'20160403', b'20160404', b'20160405', b'20160406', b'20160407', b'20160408', b'20160409']

elapsed time (loading data): 52.965 seconds



  X_val.append(f['X_val_%i' % i].value)
  X_test.append(f['X_test_%i' % i].value)
  Y_train_all = f['Y_train_all'].value
  Y_train = f['Y_train'].value
  Y_val = f['Y_val'].value
  Y_test = f['Y_test'].value
  external_dim = f['external_dim'].value
  timestamp_train_all = f['T_train_all'].value
  timestamp_train = f['T_train'].value
  timestamp_val = f['T_val'].value
  timestamp_test = f['T_test'].value


In [8]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
from utils import build_model

model_name = 'model2'

model = build_model(len_closeness, len_period, len_trend, model=model_name,
                    external_dim=external_dim, lr=lr, save_model_pic=None)
# model.summary()
hyperparams_name = '{}.TaxiBJ.c{}.p{}.t{}.lr{}'.format(
    model_name, len_closeness, len_period, len_trend, lr)
fname_param = os.path.join('MODEL', '{}.best.h5'.format(hyperparams_name))

early_stopping = EarlyStopping(monitor='val_rmse', patience=10, mode='min')
model_checkpoint = ModelCheckpoint(
    fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min')

In [None]:
# train model
print("training model...")
ts = time.time()
history = model.fit(X_train, Y_train,
                    epochs=nb_epoch,
                    batch_size=batch_size,
                    validation_data=(X_val,Y_val),
                    callbacks=[early_stopping, model_checkpoint],
                    verbose=2)
model.save_weights(os.path.join(
    'MODEL', '{}.h5'.format(hyperparams_name)), overwrite=True)
pickle.dump((history.history), open(os.path.join(
    path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb'))
print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts))

training model...
Epoch 1/50
770/770 - 121s - loss: 1.1884 - rmse: 1.0882 - val_loss: 0.4387 - val_rmse: 0.6102
Epoch 2/50
770/770 - 122s - loss: 0.8517 - rmse: 0.9216 - val_loss: 0.3930 - val_rmse: 0.5924
Epoch 3/50
770/770 - 120s - loss: 0.4163 - rmse: 0.6334 - val_loss: 0.2161 - val_rmse: 0.4648
Epoch 4/50
770/770 - 120s - loss: 0.2232 - rmse: 0.4713 - val_loss: 0.1728 - val_rmse: 0.4156
Epoch 5/50
770/770 - 121s - loss: 0.1455 - rmse: 0.3802 - val_loss: 0.1081 - val_rmse: 0.3287
Epoch 6/50
770/770 - 120s - loss: 0.0881 - rmse: 0.2953 - val_loss: 0.0592 - val_rmse: 0.2433
Epoch 7/50
770/770 - 120s - loss: 0.0482 - rmse: 0.2177 - val_loss: 0.0301 - val_rmse: 0.1733
Epoch 8/50
770/770 - 121s - loss: 0.0231 - rmse: 0.1497 - val_loss: 0.0149 - val_rmse: 0.1219
Epoch 9/50
770/770 - 121s - loss: 0.0093 - rmse: 0.0938 - val_loss: 0.0041 - val_rmse: 0.0640
Epoch 10/50
770/770 - 121s - loss: 0.0034 - rmse: 0.0562 - val_loss: 0.0020 - val_rmse: 0.0438
Epoch 11/50
770/770 - 121s - loss: 0.0016

In [None]:
# model.load_weights('MODEL/TaxiBJ.c3.p1.t1.lr0.00015.best.h5')
model.load_weights(fname_param)
score = model.evaluate(
    X_test, Y_test, batch_size=Y_test.shape[0], verbose=0)
print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' %
        (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.))

Test score: 0.000648 rmse (norm): 0.025462 rmse (real): 16.448581
