# 最后一层

回归问题：不需要特殊定义的最后一层  
二分类问题：`model.add(Activation('sigmoid'))`  
多分类问题：`model.add(Activation('softmax'))`  
多分类问题输出的是每个类别的概率，可以使用softmax把概率向量转成类别index.  

# model.summary()

打印完整的结构

# 常用的调参方式

## LSTM的memory cell

```python
model.add(LSTM(n_cells))
```

## batch_size

```python
model.fit(X, y, epochs, batch_size=n_batch)
```

## dropout

```python
model.add(LSTM(..., dropout=0.4))
```

## L1L2正则化

```python
model.add(LSTM(..., kernel_regularizer=L1L2(0.01, 0.01)))
```

# 用指定参数初始化模型

In [4]:
import tensorflow as tf
import numpy as np

class ExampleRandomNormal(tf.keras.initializers.Initializer):

    def __init__(self, weights):
        self.weights = weights

    def __call__(self, shape, dtype=None):
        return self.weights

    def get_config(self):  # To support serialization
        return {'weights': self.weights}

# y = W * x + b，定义W=10，b=0
layer = tf.keras.layers.Dense(1, kernel_initializer=ExampleRandomNormal([[10]]))
model = tf.keras.Sequential([layer])
model.predict([1])

array([[10.]], dtype=float32)

# 输出中间层结果

In [11]:
import tensorflow as tf
import numpy as np

class ExampleRandomNormal(tf.keras.initializers.Initializer):

    def __init__(self, weights):
        self.weights = weights

    def __call__(self, shape, dtype=None):
        return self.weights

    def get_config(self):  # To support serialization
        return {'weights': self.weights}

# y = 10 * x
layer0 = tf.keras.layers.Dense(1, kernel_initializer=ExampleRandomNormal([[10]]), input_shape=[None, 1])
# y = 5 * x
layer1 = tf.keras.layers.Dense(1, kernel_initializer=ExampleRandomNormal([[5]]))
model = tf.keras.Sequential([layer0, layer1])
model.summary()
layer = tf.keras.backend.function(inputs=[model.layers[0].input], outputs=[model.layers[0].output])
# layer(1)  # 不知道为什么会挂掉

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, None, 1)           2         
_________________________________________________________________
dense_16 (Dense)             (None, None, 1)           2         
Total params: 4
Trainable params: 4
Non-trainable params: 0
_________________________________________________________________


# 序列模型

In [None]:
import tensorflow as tf

model = tf.keras.Sequential(
    [
        # Embedding是指把编码转成指定维度的向量，使具有相近语义的单词其向量也是相近的
        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
        # Flatten也可以用GlobalAveragePooling1D代替，后者速度更快
        tf.keras.layers.Flatten().
        tf.keras.layers.Dense(6, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ]
)

## One-to-One Model

![](assets/2.png)  
这样的模型是poor use of the LSTM，因为它put all if the pressure on the internal state of memory，因此not capable of learning accross input or output time steps.

In [None]:
import tensorflow as tf

ANY_VALUE = 5

model = tf.keras.Sequential(
    [
        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(1,ANY_VALUE)),
        tf.keras.layers.Dense(1)
    ]
)

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(1,ANY_VALUE)))
model.add(tf.keras.layers.Dense(1))

## One-to-Many Model

![](assets/3.png)  
X可以是一张图像，那么这个模型可以用于给图像生成标题

In [None]:
import tensorflow as tf

ANY_VALUE = 5

model = tf.keras.Sequential(
    [
        # 这类模型通常用于对一个图像做分析，因此输入是一个图像，用卷积提取特征
        tf.keras.layers.Conv2D(filters=ANY_VALUE,kernel_size=(ANY_VALUE, ANY_VALUE)),
        tf.keras.layers.LSTM(ANY_VALUE),
        # 所有时间步的输出层Dense共享参数，因此使用TimeDistributed
        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))
    ]
)

model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(filters=ANY_VALUE,kernel_size=(ANY_VALUE, ANY_VALUE)))
model.add(tf.keras.layers.LSTM(ANY_VALUE))
model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)))

In [None]:
import tensorflow as tf

models = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(filter=32, kernel_size=5, strides=5, padding='causal', activation='relu', input_shape=['None', 1])
    tf.keras.layers.LSTM(32, return_sequences = True),
    tf.keras.layers.LSTM(32, return_sequences = True),
    tf.keras.layers.Dense(1),
    tf.keras.layers.Lambda(lambda x:x*20)
])

## Many-to-One Model

![](assets/4.png)

In [None]:
# 单层Many-to-One
import tensorflow as tf

ANY_VALUE = 5
STEPS = 8

model = tf.keras.Sequential(
    [
        # Many-to-One和One-to-One的主要区别在于input_shape的第一个参数是1还是steps数
        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE)),
        tf.keras.layers.Dense(1)
    ]
)

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE)))
model.add(tf.keras.layers.Dense(1))

In [None]:
# 多层Many-to-One
import tensorflow as tf

ANY_VALUE = 5
STEPS = 8

model = tf.keras.Sequential(
    [
        # Many-to-One和One-to-One的主要区别在于input_shape的第一个参数是1还是steps数
        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE), return_sequences=True),
        tf.keras.layers.LSTM(ANY_VALUE),
        tf.keras.layers.Dense(1)
    ]
)

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE), return_sequences=True))
model.add(tf.keras.layers.LSTM(ANY_VALUE))
model.add(tf.keras.layers.Dense(1))

## Many-to-Many

![](assets/5.png)  
x的step数和y的step数可以不相同。  
Many-to-Many Model又称为Seq2Seq Model，或Encoder-Decoder Model。  
Input Sequence ---(Encoder)--->固定长度的向量---(Decoder)--->Output Sequence  
Encoder常常被用来做Sequence Embedding.  

Many-to-Many模型可以用于statistical(统计的) machine translation.

In [None]:
# x steps = y steps

import tensorflow as tf

ANY_VALUE = 5
STEPS = 8

model = tf.keras.Sequential(
    [
        # Many-to-One和One-to-One的主要区别在于input_shape的第一个参数是1还是steps数
        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE), return_sequences=True),
        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))
    ]
)

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE), return_sequences=True))
model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)))

In [None]:
# x steps <> y steps

import tensorflow as tf

ANY_VALUE = 5
IN_STEPS = 8
OUT_STEPS = 10

model = tf.keras.Sequential(
    [
        # Encoder，第一个参数memory cells决定了Encoder后的固定长度向量的长度。  
        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(IN_STEPS,ANY_VALUE)),
        # the input time steps are mapped to a fixed sized internal representations
        # Encoder产生的output是2D的，为[samples, vector_size]，
        # Decoder需要的input是3D的，为[samples, steps, vector_size]
        # 中间需要一个适配层，让所有step都使用相同的vector
        tf.keras.layers.RepeatVector(OUT_STEPS),
        # Decoder
        tf.keras.layers.LSTM(ANY_VALUE, return_sequences=True),
        # 所有step的output层的参数共享
        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))
    ]
)

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(IN_STEPS,ANY_VALUE)))
model.add(tf.keras.layers.RepeatVector(OUT_STEPS))
model.add(tf.keras.layers.LSTM(ANY_VALUE, return_sequences=True))
model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)))

## Cardinality from Time Steps (not Features!)

![](assets/6.png)  
例如复数的实部和虚数，可以是两个特征，也可以是两个序列

## CNN-LSTM 模型

CNN可以特征提取，LSTM可以序列预测  
CNN-LSTM模型用于visual time series prediction问题，或者基于图像序列生成文本描述。  
CNN-LSTM模型的输入数据是5D的，维度依次为：samples, timesteps, width, height, channels  
Flatten()将三维的图像数据拉成一个很长的向量，期望这个向量比原始像素更compressed或salient（显著）。  

In [4]:
# 方法一
import tensorflow as tf
ANY_VALUE = 5

cnn = tf.keras.Sequential()
cnn.add(tf.keras.layers.Conv2D(filters=ANY_VALUE,kernel_size=(ANY_VALUE, ANY_VALUE)))
cnn.add(tf.keras.layers.MaxPooling2D(ANY_VALUE))
cnn.add(tf.keras.layers.Flatten())

model = tf.keras.Sequential()
model.add(tf.keras.layers.TimeDistributed(cnn))
model.add(tf.keras.layers.LSTM(ANY_VALUE))
model.add(tf.keras.layers.Dense(1))

In [5]:
# 方法二
# 推荐使用这种方法，因为这种方法model.summary会包含全部信息
import tensorflow as tf
ANY_VALUE = 5

model = tf.keras.Sequential()
model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(filters=ANY_VALUE,kernel_size=(ANY_VALUE, ANY_VALUE))))
model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D(ANY_VALUE)))
model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten()))
model.add(tf.keras.layers.LSTM(ANY_VALUE))
model.add(tf.keras.layers.Dense(1))

# Embedding的可视化

In [6]:
e = model.layers[0]   # embedding层
weights = e.get_weights()[0]
weights.shape()  # (10000, 16), 10000代表单词表的大小，16代表每个单词的维度
# 单词序号转换为单词
reverse_word_index = dict([(value, key) for (key, value) in word_index.items])

# meta.csv: 序号 --> 单词  
# vecs.csv：序号 --> embedding
import io
out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
out_m = io.open('meta.tsv', 'w', encoding='utf-8')
for word_num in range(1, vocab_size): # 0代表OOV
    word = reverse_word_index[word_num]
    embeddings = weights[word_num]
    out_m.write(word + '\n')
    out_v.write('\t'.join(str(x) for x in embeddings)+'\n')
out_m.close()
out_v.close()
# https://projector.tensorflow.org

IndexError: list index out of range

# 一维CNN做NLP

In [7]:
import tensorflow as tf

vocab_size = 10000
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 64),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# 语言生成模型

例如句子为：  
[1, 2, 3, 4, 5, 6, 7]  
[1, 3, 5, 7, 9]  
[2, 4, 6, 8]  
第一步：得到input_sequence为：  
[1, 2]  
[1, 2, 3]  
[1, 2, 3, 4]  
[1, 2, 3, 4, 5]  
[1, 2, 3, 4, 5, 6]  
[1, 2, 3, 4, 5, 6, 7]  
[1, 3]  
[1, 3, 5]  
[1, 3, 5, 7]  
[1, 3, 5, 7, 9]  
[2, 4]  
[2, 4, 6]  
[2, 4, 6, 8]  
第二步：以前面补0的方式全部对齐到同样的长度（长度为7）  
第三步：每一行的前6个为输入，最后一个为输出

In [8]:
data = '.......'  # 训练数据
corpus = data.lower().split('\n')
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1 # +1代表OOV

input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequence.append(n_gram_sequence)
        
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences,
                                         maxlen = max_sequences_len,
                                         padding='pre'))
xs = input_sequences[:, :-1]
labels = input_sequences[:, -1]
ys = tf.keras.utils.to_categorical(labels, num_classes = totoal_words)

NameError: name 'Tokenizer' is not defined

# 另一种写法

In [9]:
import tensorflow as tf

dataset = tf.data.Dataset.range(10)
for val in dataset:
    print (val.numpy())
dataset = dataset.window(5, shift=1, drop_remainder=True)
for window in dataset:
    print (window.numpy())
dataset = dataset.map(lambda window:window[:-1], window[-1:])
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1)

0
1
2
3
4
5
6
7
8
9


AttributeError: '_VariantDataset' object has no attribute 'numpy'

# Lambda层

In [None]:
import tensorflow as tf

# 这句话放在keras的第一层，相当于数据的预处理
# input_shape=[None]代表输入为任意数据
tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), input_shape=[None])

# 这句话放在最后一层，相当于输出数据的后期处理
tf.keras.layers.Lambda(lambda x: x*1000)

# early stopping

In [None]:
from tf.keras.callbacks import EarlyStopping

es = EarlyStopping(monitor='val_loss', patience=100)
model.fit(..., callbacks=[es])

# 模型存储

In [None]:
# 同时存储模型结构和参数

# pip install h5py
model.save('xxx.h5')

from tf.keras.models import load_model

model = load_model('xxx.h5')

In [None]:
# 只存储模型结构

architecture = model.to_json()  # 也可以to_yaml
with open('xxx.json', 'wt') as json_file:
    json_file.write(architecture)