```
产生一个2d array，类似于
     X           y
1 0 0 ... 0      1
0 0 0 ... 0      0
0 0 0 ... 0      0
1 0 0 ... 0      1
1 0 0 ... 0      1
预测的目标，就是每个序列seq（比如第1行的X,1 0 0 ... 0，如果第一个是1，则最终预测结果是1）。
如果在一个监督学习中，这个太简单了，但是如果是序列，就不同了。

第一行对应的输入/输出序列是
(1, 1), (0, 1), (0, 1) ... (0, 1)
```

In [1]:
import numpy as np
from numpy.random import choice

SAMPLES = 200
MAX_SEQ_LEN = 20

X_train_raw = np.zeros((SAMPLES, MAX_SEQ_LEN))
_one_idx = choice(SAMPLES, SAMPLES // 2, replace=False)
X_train_raw[_one_idx, 0] = 1
y_train_raw = X_train_raw[:, 0]

In [2]:
def prepare_sequences(x_train, y_train, window_length):
    windows = []
    windows_y = []
    for i, sequence in enumerate(x_train):
        len_seq = len(sequence)
        for window_start in range(0, len_seq - window_length + 1):
            window_end = window_start + window_length
            window = sequence[window_start:window_end]
            windows.append(window)
            windows_y.append(y_train[i])
    return np.array(windows), np.array(windows_y)

In [22]:
from sklearn.model_selection import train_test_split

WINDOW_LENGTH = 10
X_all, y_all = prepare_sequences(X_train_raw, y_train_raw, WINDOW_LENGTH)

X_all = X_all.reshape(X_all.shape[0], X_all.shape[1], 1)
X_train, X_val, y_train, y_val = train_test_split(X_all, y_all, test_size = 0.1)

X_train.shape, X_val.shape, y_train.shape, y_val.shape

((1980, 10, 1), (220, 10, 1), (1980,), (220,))

# Stateless无状态LSTM，无法收敛

In [4]:
from keras.layers import Dense, LSTM
from keras.models import Model, Input, Sequential

LSTM_UNITS = 8

model = Sequential()
model.add(LSTM(LSTM_UNITS, input_shape=X_train.shape[1:], return_sequences=False, stateful=False))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 8)                 320       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 9         
Total params: 329
Trainable params: 329
Non-trainable params: 0
_________________________________________________________________


In [5]:
batch_size = 11 # = MAX_SEQ_LEN - WINDOW_LENGTH + 1
# 因为是stateless，所以凑11个为一个batch无意义，因为模型内在认为是零散的。
# 我们这里仍然这样去设置，以期待可能的最佳结果

model.fit(X_train, y_train, batch_size=batch_size, epochs=15,
          validation_data=(X_val, y_val), shuffle=False)
score, acc = model.evaluate(X_val, y_val, batch_size=batch_size, verbose=0)

Train on 1980 samples, validate on 220 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
 286/1980 [===>..........................] - ETA: 8s - loss: 0.6569 - acc: 0.5804

KeyboardInterrupt: 

# Stateful有状态

In [91]:
SAMPLES = 100
MAX_SEQ_LEN = 20

def get_data():
    data = np.zeros((SAMPLES, MAX_SEQ_LEN))
    _one_idx = choice(SAMPLES, SAMPLES // 2, replace=False)
    data[_one_idx, 0] = 1

#     X_train.shape, X_val.shape, y_train.shape, y_val.shape
    return train_test_split(data, data[:,0], test_size = 0.1)

X_train, X_val, y_train, y_val = get_data()

X_train, X_val = X_train.reshape((1800, 1, 1)), X_val.reshape((200, 1, 1))

X_train.shape, X_val.shape, y_train.shape, y_val.shape

## 写法1：手动控制reset_states，自定义epochs，validation

In [69]:
LSTM_UNITS = 8

model = Sequential()
model.add(LSTM(LSTM_UNITS, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (1, 8)                    320       
_________________________________________________________________
dense_9 (Dense)              (1, 1)                    9         
Total params: 329
Trainable params: 329
Non-trainable params: 0
_________________________________________________________________


In [63]:
for epoch in range(8):
    # train
    train_loss, train_acc = [], []
    for i in range(X_train.shape[0]):
        for j in range(X_train.shape[1]):
            loss, acc = model.train_on_batch(
                np.expand_dims(np.expand_dims(np.expand_dims(X_train[i, j], axis=0), axis=0), axis=0),
                np.expand_dims(y_train[i], axis=0))
            
            train_loss.append(loss)
            train_acc.append(acc)
            
        model.reset_states()
        
    print('train loss {:.2f}, train acc {:.2f}'.format(np.mean(train_loss), np.mean(train_acc)))
    
#     # val
    val_loss, val_acc = [], []
    for i in range(X_val.shape[0]):
        for j in range(X_val.shape[1]):
            loss, acc = model.test_on_batch(
                np.expand_dims(np.expand_dims(np.expand_dims(X_val[i, j], axis=0), axis=0), axis=0),
                np.expand_dims(y_val[i], axis=0))
            
            val_loss.append(loss)
            val_acc.append(acc)
            
        model.reset_states()

    print('val loss {:.2f}, val acc {:.2f}'.format(np.mean(val_loss), np.mean(val_acc)))

train loss 0.70, train acc 0.47
val loss 0.69, val acc 0.50
train loss 0.52, train acc 0.73
val loss 0.11, val acc 1.00
train loss 0.05, train acc 1.00
val loss 0.02, val acc 1.00
train loss 0.01, train acc 1.00
val loss 0.01, val acc 1.00


KeyboardInterrupt: 

## 写法2，在1的基础上增加batch_size

输入输出都没变，只不过之前手动控制一个batch的for j循环，去掉了。

不过一个batch对应的y也要批量扩展

In [None]:
LSTM_UNITS = 4

batch_size = X_train.shape[1]
timesteps = 1
data_dims = 1

model = Sequential()
model.add(LSTM(LSTM_UNITS, batch_input_shape=(batch_size, timesteps, data_dims), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# model.fit(x, y, epochs=8, val,verbose,callback=[model-reset])

for epoch in range(8):
    # train
    train_loss, train_acc = [], []
    for i in range(X_train.shape[0]):
        # X_train[i].shape = (20,)
        loss, acc = model.train_on_batch(
            np.expand_dims(np.expand_dims(X_train[i], axis=1), axis=2),
            np.array([y_train[i]] * batch_size)
        )
        
        train_loss.append(loss)
        train_acc.append(acc)
        
        model.reset_states()
        
    print('train loss {:.2f}, train acc {:.2f}'.format(np.mean(train_loss), np.mean(train_acc)))
    
#     # val
    val_loss, val_acc = [], []
    for i in range(X_val.shape[0]):
        loss, acc = model.test_on_batch(
            np.expand_dims(np.expand_dims(X_val[i], axis=1), axis=2),
            np.array([y_val[i]] * batch_size)
        )
        
        val_loss.append(loss)
        val_acc.append(acc)
        
        model.reset_states()

    print('val loss {:.2f}, val acc {:.2f}'.format(np.mean(val_loss), np.mean(val_acc)))

## 写法3，Keras-impl，batch_size=1

In [112]:
X_train, X_val, y_train, y_val = get_data()

X_train, X_val = X_train.reshape((1800, 1, 1)), X_val.reshape((200, 1, 1))

X_train.shape, X_val.shape, y_train.shape, y_val.shape

((1800, 1, 1), (200, 1, 1), (90,), (10,))

In [113]:
y_train_new = np.zeros((1800, ))

In [114]:
for i in range(y_train.shape[0]):
    for j in range(20):
        y_train_new[j] = y_train[i]

In [108]:
# 用法3
from keras.callbacks import Callback

LSTM_UNITS = 4

batch_size = 1
timesteps = 1
data_dims = 1

model = Sequential()
model.add(LSTM(LSTM_UNITS, batch_input_shape=(batch_size, timesteps, data_dims), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()

class StatusResetCallback(Callback):
    def __init__(self):
        self.counter = 0

    def on_batch_begin(self, batch, logs={}):
        if self.counter % 20 == 0:
            self.model.reset_states()
        self.counter += 1
        
model.fit(X_train, y_train_new, epochs=8, batch_size=1, verbose=1, shuffle=False,
#           validation_data=(X_val, y_val),
          callbacks=[StatusResetCallback()])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_24 (LSTM)               (1, 4)                    96        
_________________________________________________________________
dense_24 (Dense)             (1, 1)                    5         
Total params: 101
Trainable params: 101
Non-trainable params: 0
_________________________________________________________________
Epoch 1/8
  16/1800 [..............................] - ETA: 598s - loss: 0.6865 - acc: 0.9375  

  % delta_t_median)


Epoch 2/8

KeyboardInterrupt: 

## 写法4，Keras-impl，设置batch_size

注意shuffle=False

StatusResetCallback简化了，每个batch reset一下就好

In [116]:
X_train.shape, y_train_new.shape

((1800, 1, 1), (1800,))

In [166]:
# 用法3-more batch_size
from keras.callbacks import Callback

LSTM_UNITS = 4

batch_size = 20# 对1来说，就是20个，对10来说，就是11个，对n来说，就是20-n+1个
timesteps = 1
data_dims = 1

model = Sequential()
model.add(LSTM(LSTM_UNITS, batch_input_shape=(batch_size, timesteps, data_dims), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()

class StatusResetCallback(Callback):
    def on_batch_begin(self, batch, logs={}):
        self.model.reset_states()

model.fit(X_train, y_train_new, epochs=8, batch_size=batch_size, verbose=1, shuffle=False,
#           validation_data=(X_val, y_val),
          callbacks=[StatusResetCallback()])

Epoch 1/8
 240/1800 [===>..........................] - ETA: 36s - loss: 0.6896 - acc: 0.9958

  % delta_t_median)


Epoch 2/8

KeyboardInterrupt: 

## 写法5，4的基础上设置timestemps，不过没有尝试成功

TODO 可以ref http://machinelearningmastery.com/use-timesteps-lstm-networks-time-series-forecasting/

In [187]:
from keras.callbacks import Callback

LSTM_UNITS = 4

timesteps = 2
batch_size = 20 - timesteps + 1 # 19
data_dims = 1

In [188]:
data = np.zeros((SAMPLES, MAX_SEQ_LEN))
_one_idx = choice(SAMPLES, SAMPLES // 2, replace=False)
data[_one_idx, 0] = 1
data.shape

(100, 20)

In [190]:
train_len = SAMPLES * batch_size
X_train_4 = np.zeros((train_len, timesteps))
y_train_4 = np.zeros((train_len, 1))

_len = 0
for i in range(SAMPLES):
    for j in range(MAX_SEQ_LEN - 1):
        X_train_4[_len] = (data[i, j], data[i, j+1])
        y_train_4[_len] = data[i, 0]
        _len += 1

X_train_4 = np.expand_dims(X_train_4, axis=2)

In [198]:
model = Sequential()
model.add(LSTM(LSTM_UNITS, batch_input_shape=(batch_size, timesteps, data_dims), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

class StatusResetCallback(Callback):
    def on_batch_begin(self, batch, logs={}):
        self.model.reset_states()

model.fit(X_train_4, y_train_4, epochs=8, batch_size=batch_size, verbose=1, shuffle=False,
#           validation_data=(X_val, y_val),
          callbacks=[StatusResetCallback()])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_38 (LSTM)               (19, 4)                   96        
_________________________________________________________________
dense_38 (Dense)             (19, 1)                   5         
Total params: 101
Trainable params: 101
Non-trainable params: 0
_________________________________________________________________
Epoch 1/8
 190/1900 [==>...........................] - ETA: 38s - loss: 0.6940 - acc: 0.6000

  % delta_t_median)


Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x13a03ba90>