In [1]:
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np

from data_util import parse_bj_aq_data, generate_model_data_v1
from model import softmax

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Data preprocessing

In [2]:
# 每一个小时的空气质量使用长度 210 的序列表示
Feature_length = 210
# encoding 取之前5天的空气质量数据
Tx = 120
# decoding 预测之后两天的空气质量数据
Ty = 48

In [3]:
bj_aq_data, stations, bj_aq_stations, bj_aq_stations_merged = parse_bj_aq_data()

NaN in PM2.5 is 23459, 6.508615 %
NaN in PM10 is 96175, 26.683406 %
NaN in NO2 is 21720, 6.026135 %
NaN in CO is 46144, 12.802486 %
NaN in O3 is 23732, 6.584358 %
NaN in SO2 is 21664, 6.010598 %
There are 35 air quality stations in Beijing

The stations in Beijing are:
 {'miyun_aq', 'qianmen_aq', 'yongledian_aq', 'fangshan_aq', 'yungang_aq', 'huairou_aq', 'pingchang_aq', 'nongzhanguan_aq', 'yufa_aq', 'fengtaihuayuan_aq', 'guanyuan_aq', 'yongdingmennei_aq', 'tongzhou_aq', 'xizhimenbei_aq', 'shunyi_aq', 'badaling_aq', 'liulihe_aq', 'miyunshuiku_aq', 'aotizhongxin_aq', 'yizhuang_aq', 'wanliu_aq', 'daxing_aq', 'dongsihuan_aq', 'mentougou_aq', 'wanshouxigong_aq', 'tiantan_aq', 'dongsi_aq', 'gucheng_aq', 'pinggu_aq', 'dingling_aq', 'nansanhuan_aq', 'zhiwuyuan_aq', 'yanqin_aq', 'beibuxinqu_aq', 'donggaocun_aq'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  bj_aq_station.drop("utc_time", axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  bj_aq_station.drop("stationId", axis=1, inplace=True)


In [4]:
X_batches, Y_batches = generate_model_data_v1(bj_aq_stations_merged, 1)
m = X_batches.shape[0]

In [5]:
print(X_batches.shape)
print(Y_batches.shape)

(10130, 120, 210)
(10130, 48, 210)


## Attention model

<table>
<td> 
<img src="images/attn_model.png" style="width:500;height:500px;"> <br>
</td> 
<td> 
<img src="images/attn_mechanism.png" style="width:500;height:500px;"> <br>
</td> 
</table>
<caption><center> **Figure 1**: Neural machine translation with attention</center></caption>

In [6]:
# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights')
dotor = Dot(axes = 1)

In [7]:
def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attetion) LSTM cell
    """
    
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
    s_prev = repeator(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
    concat = concatenator([a, s_prev])
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)
    e = densor1(concat)
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)
    energies = densor2(e)
    # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)
    alphas = activator(energies)   
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
    context = dotor([alphas, a])
    
    return context

In [8]:
n_a = 32
n_s = 64
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(Feature_length, activation="relu")

In [9]:
# GRADED FUNCTION: model

def model(Tx, Ty, n_a, n_s, Feature_length):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM

    Returns:
    model -- Keras model instance
    """
    
    # Define the inputs of your model with a shape (Tx,)
    # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
    X = Input(shape=(Tx, Feature_length))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    # Initialize empty list of outputs
    outputs = []
    
    # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. 
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    
    # Step 2: Iterate for Ty steps
    for t in range(Ty):
    
        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t 
        context = one_step_attention(a, s)
        
        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state] 
        s, _, c = post_activation_LSTM_cell(context, initial_state=[s,c])
        
        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM 
        out = output_layer(s)
        
        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
        outputs.append(out)
    
    # Step 3: Create model instance taking three inputs and returning the list of outputs. 
    model = Model(inputs=[X,s0,c0], outputs=outputs)
    
    ### END CODE HERE ###
    
    return model

In [10]:
model = model(Tx, Ty, n_a, n_s, Feature_length)

In [11]:
opt = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(optimizer=opt, loss='mean_squared_error')

In [12]:
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Y_batches.swapaxes(0,1))

In [13]:
model.fit([X_batches, s0, c0], outputs, epochs=30, batch_size=100)

Epoch 1/30


InternalError: CUB segmented reduce errorout of memory
	 [[Node: bidirectional_1/Sum = Sum[T=DT_FLOAT, Tidx=DT_INT32, keep_dims=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](bidirectional_1/zeros_like, bidirectional_1/Sum/reduction_indices)]]
	 [[Node: loss/dense_3_loss_39/Mean_3/_2421 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_78213_loss/dense_3_loss_39/Mean_3", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'bidirectional_1/Sum', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/cvdev/tf/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/cvdev/tf/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/cvdev/tf/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/cvdev/tf/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 112, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 345, in run_forever
    self._run_once()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 1312, in _run_once
    handle._run()
  File "/usr/lib/python3.5/asyncio/events.py", line 125, in _run
    self._callback(*self._args)
  File "/home/cvdev/tf/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 102, in _handle_events
    handler_func(fileobj, events)
  File "/home/cvdev/tf/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/cvdev/tf/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/cvdev/tf/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/cvdev/tf/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/cvdev/tf/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/cvdev/tf/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/cvdev/tf/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/cvdev/tf/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/cvdev/tf/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/cvdev/tf/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/cvdev/tf/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/cvdev/tf/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/cvdev/tf/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/cvdev/tf/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-7b6861811982>", line 1, in <module>
    model = model(Tx, Ty, n_a, n_s, Feature_length)
  File "<ipython-input-9-c46ce619c319>", line 27, in model
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
  File "/home/cvdev/tf/lib/python3.5/site-packages/keras/layers/wrappers.py", line 324, in __call__
    return super(Bidirectional, self).__call__(inputs, **kwargs)
  File "/home/cvdev/tf/lib/python3.5/site-packages/keras/engine/topology.py", line 619, in __call__
    output = self.call(inputs, **kwargs)
  File "/home/cvdev/tf/lib/python3.5/site-packages/keras/layers/wrappers.py", line 384, in call
    y = self.forward_layer.call(inputs, **kwargs)
  File "/home/cvdev/tf/lib/python3.5/site-packages/keras/layers/recurrent.py", line 2151, in call
    initial_state=initial_state)
  File "/home/cvdev/tf/lib/python3.5/site-packages/keras/layers/recurrent.py", line 559, in call
    initial_state = self.get_initial_state(inputs)
  File "/home/cvdev/tf/lib/python3.5/site-packages/keras/layers/recurrent.py", line 486, in get_initial_state
    initial_state = K.sum(initial_state, axis=(1, 2))  # (samples,)
  File "/home/cvdev/tf/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 1265, in sum
    return tf.reduce_sum(x, axis, keepdims)
  File "/home/cvdev/tf/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 1307, in reduce_sum
    name=name)
  File "/home/cvdev/tf/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py", line 4682, in _sum
    keep_dims=keep_dims, name=name)
  File "/home/cvdev/tf/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/cvdev/tf/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/cvdev/tf/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InternalError (see above for traceback): CUB segmented reduce errorout of memory
	 [[Node: bidirectional_1/Sum = Sum[T=DT_FLOAT, Tidx=DT_INT32, keep_dims=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](bidirectional_1/zeros_like, bidirectional_1/Sum/reduction_indices)]]
	 [[Node: loss/dense_3_loss_39/Mean_3/_2421 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_78213_loss/dense_3_loss_39/Mean_3", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
