本笔记参考了下面的书籍、文献、博客或者官方说明：
* TensorFlow2官方文档：https://tensorflow.google.cn/
* 简单粗暴TensorFlow 2：https://github.com/snowkylin/tensorflow-handbook
* TensorFlow 2.0 学习笔记：https://zhuanlan.zhihu.com/p/74441082

未注明出处的代码示例，`大概`就是我自己编的，`大概`的意思就是也有极小的概率是忘记注明了。。。

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets  as tfds
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.keras import preprocessing as prep
from matplotlib import pyplot as plt

### toy dataset

In [3]:
def process_toy(x, y):
    x = tf.cast(x, tf.float32)/255.0
    y = tf.cast(y, tf.int64)
    return x,y
def toy_dataset(n):
    (x,y), _ = keras.datasets.mnist.load_data()
    idx = np.random.choice(np.arange(x.shape[0]), n, replace=False)
    x,y = x[idx], y[idx]
    x = tf.expand_dims(x, 3)
    train_data = tf.data.Dataset.from_tensor_slices((x, y))
    train_data = train_data.map(process_toy).repeat()
    return train_data.shuffle(64).batch(32)

## 函数式API

In [3]:
inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(filters=8, kernel_size=[4,4], activation='relu')(inputs)
x = layers.Flatten()(x)
x = layers.Dense(32, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = keras.Model(inputs=inputs, outputs=outputs, name='KerasFunctionAPIModel')

In [4]:
model.summary()

Model: "KerasFunctionAPIModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 25, 25, 8)         136       
_________________________________________________________________
flatten (Flatten)            (None, 5000)              0         
_________________________________________________________________
dense (Dense)                (None, 32)                160032    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 160,498
Trainable params: 160,498
Non-trainable params: 0
_________________________________________________________________


## Sequential

In [5]:
model = keras.Sequential()
model.add(layers.Conv2D(filters=8, kernel_size=[4,4], activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

In [6]:
train_data = toy_dataset(100)
test_data = toy_dataset(100)
model.compile(optimizer=tf.optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
#model.fit(train_data, epochs=2, steps_per_epoch=3, validation_data=test_data, validation_steps=3)

## subclass

为了把Layer在此一并讲清楚，先定制一个Linear层：

In [73]:
class MyDense(keras.layers.Layer):
    def __init__(self, units, name="MyDense"):
        super(MyDense, self).__init__(name=name)
        self.units = units
    def build(self, input_shape):
        self.w = self.add_weight("w", shape=[int(input_shape[-1]), self.units],
                                 initializer=tf.initializers.glorot_normal(),
                                 trainable=True, regularizer=keras.regularizers.l1(0.001),
                                 )
        self.add_loss(tf.reduce_sum(tf.math.abs(self.w)) * 0.0001)
        self.b = self.add_weight("b", shape=[self.units, ],
                                 initializer=tf.initializers.glorot_uniform(),
                                 trainable=True,
                                 )
        #self.built=True//super().build()
    @tf.function
    def call(self, inputs):
        y = tf.add(tf.matmul(inputs, self.w) , self.b)
        return y

In [74]:
class MyModel(tf.keras.Model):
    def __init__(self, name="MyModel", **kwargs):
        super(MyModel, self).__init__(name=name, **kwargs)
        self.conv = layers.Conv2D(filters=8, kernel_size=3, activation='relu', name='conv')
        self.flatten = layers.Flatten(name='flatten')
        self.mydense = MyDense(64, "mydense")
        self.dense = layers.Dense(32, activation='relu',
                                  use_bias=True,
                                  bias_initializer=tf.initializers.glorot_uniform(),
                                  kernel_regularizer=keras.regularizers.l2(0.01),
                                  bias_regularizer=keras.regularizers.l2(0.01),
                                  name="dense")
        self.dropout = tf.keras.layers.Dropout(0.5, name='dropout')
        self.y = layers.Dense(10, 'softmax', name='y')
    @tf.function
    def call(self, inputs, training=False):
        conv = self.conv(inputs)
        flatten = self.flatten(conv)
        mydense = self.mydense(flatten)
        dense = self.dense(mydense)
        dropout = self.dropout(dense, training=training)
        y = self.y(dropout)
        return y

In [75]:
model = MyModel("HHH")
train_data = toy_dataset(100)
a = next(iter(train_data))[0]
model(a).shape



TensorShape([32, 10])

可以看到，各个层的正则化损失都收集在此，model会收集各个层的losses，keras.Layer.add_loss()会将损失加入到自身的losses List中

In [82]:
model.losses

[<tf.Tensor: shape=(), dtype=float32, numpy=5.43597>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.41570833>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.009020054>]

In [77]:
model.dense.losses

[<tf.Tensor: shape=(), dtype=float32, numpy=0.41570833>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.009020054>]

In [78]:
model.dense.trainable_variables[0].shape, model.mydense.trainable_variables[1].shape

(TensorShape([64, 32]), TensorShape([64]))

In [79]:
tf.reduce_sum(tf.math.square(model.dense.trainable_variables[0])) * 0.01

<tf.Tensor: shape=(), dtype=float32, numpy=0.41570833>

在build方法中的self.built=True注释掉了，但是依然会有`model.layers[2].built=True`，说明`__call__`方法调用了`super.build()`

In [80]:
model.get_layer('mydense').built

True

In [81]:
tf.saved_model.save(model, "test/minimodel")

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: test/minimodel/assets


In [12]:
model.summary()

Model: "HHH"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv (Conv2D)                multiple                  80        
_________________________________________________________________
flatten (Flatten)            multiple                  0         
_________________________________________________________________
mydense (MyDense)            multiple                  346176    
_________________________________________________________________
dense (Dense)                multiple                  2080      
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
y (Dense)                    multiple                  330       
Total params: 348,666
Trainable params: 348,666
Non-trainable params: 0
_________________________________________________________

In [13]:
model.losses

[<tf.Tensor: shape=(), dtype=float32, numpy=5.4346266>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.41355148>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.008863682>]

In [14]:
model.get_layer('mydense').losses

[<tf.Tensor: shape=(), dtype=float32, numpy=5.4346266>]

In [15]:
model.get_layer('mydense').get_weights()[0].shape

(5408, 64)

**关于定制的层**  
* `__init__`：可以执行与输入无关的初始化；
* `build`：按照输入张量的shape初始化权重，也可以进行其他的初始化；
* `call`：进行正向计算。


* 第一次调用`__call__`时会首先调用`build`，建立权重；之后调用`call`进行运算；  
* `call`不会自动调用`build`，因此在手动调用`call`之前必须保证权重张量已经存在了；
* 用`build`而不是`__init__`初始化权重是好处是：可以不必过早的指定输入数据的维度，而是在需要计算的时候指定输入数据，再根据输入数据确定权重的shape，初始化权重。也就是可以直到调用`model.fit`方法进行训练时，才根据输入数据的shape初始化权重。

**关于Layer的help信息如下，help(tf.keras.layers.Layer)：**
* `__init__()`: Save configuration in member variables
* `build()`: Called once from `__call__`, when we know the shapes of inputs and `dtype`. Should have the calls to `add_weight()`, and then call the super's `build()` (which sets `self.built = True`, which is nice in case the user wants to call `build()` manually before the first `__call__`).
 * `call()`: Called in `__call__` after making sure `build()` has been called once. Should actually perform the logic of applying the layer to the input tensors (which should be passed in as the first argument).

`model.add`
* 参数activation设置层的激活函数。此函数由内置函数的名称指定，或指定为可调用对象。默认情况下不会应用任何激活函数。
* kernel_initializer,bias_initializer 创建层权重的初始化方案，此参数是一个名称或可调用对象，默认为"Glorot uniform"初始化器。
* kernel_regularizer和bias_regularizer：应用层权重的正则化方案，例如L1和L2正则化。默认情况下，系统不会应用正则化函数。



`model.compile`
* 参数optimizer：从tf.train模块向其传递优化器的实例，tf.optimizers.Adam、tf.optimizers.RMSProp 或 tf.optimizers.SGD等。
* loss：优化期间最小化的函数，由名称或者tf.keras.losses传递可调用对象。常见选择包括均方误差('mse')，'categorical_crossentropy'和'binary_crossentropy'。
*  metrics用于监控训练，它们是tf.keras.metrics模块中的字符串或可调用对象。

```
 # Configure a model for mean-squared error regression.
model.compile(optimizer=tf.optimizers.Adam(0.01),
              loss='mse',       # mean squared error
              metrics=['mae'])  # mean absolute error

 # Configure a model for categorical classification.
model.compile(optimizer=tf.optimizers.RMSProp(0.01),
              loss=tf.keras.losses.categorical_crossentropy,
              metrics=[tf.keras.metrics.categorical_accuracy])
```

`model.fit`
* epochs：以周期为单位进行训练，一个周期是对整个输入数据进行一次迭代（以较小的批次完成迭代）
* batch_size：当传递Numpy数组时，模型将分成较小的批次，并在训练期间迭代这些批次。此整数指定每个批次的样本数量。请注意，如果样本总数不能被批次大小整除，则最后一个批次可能更小。  
如果是tf.data.Dataset对象，则可以不指定这个参数，因为Dataset可以设定batch大小。
* validation_data：指定验证数据集，若指定该参数则会在每一个epoch完成后，输出当前在验证集上的效果。
* steps_for_epoch：每一个周期训练的训练的批次数，默认会把全部样本训练一遍。如果是tf.data则默认会迭代耗尽
* validation_split=0.2: 分出最后0.2的样本作为验证集，每个epoches结束，都继续在该验证集上计算误差。仅支持输入数据为numpy array时，其他想dataset，generator，dataset-iterator等都不支持该参数。
* shuffle=True：每个epoch之前shuffle训练数据
* validation_steps：当指定validation_data，且validation_data是dataset或dataset_iterator时，执行validation的batch数。

## layers

#### layers.Dense

```python
tf.keras.layers.Dense(units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None)
```
* units: 输出神经元个数
* activation：激活函数
* use_bias: Boolean, whether the layer uses a bias vector
* kernel_initializer: Initializer for the `kernel` weights matrix.
* bias_initializer: Initializer for the bias vector.
* kernel_regularizer: Regularizer function applied to the `kernel` weights matrix.
* bias_regularizer: Regularizer function applied to the bias vector.
* `Dense` implements the operation: `output = activation(dot(input, kernel) + bias)`；因此如果是输入没有经过Flatten，也就是多个维度，那么输出本层相当于进行了一次$1\times1$的卷积；实际上kernel就是一个两个维度的矩阵，第一个维度由输入决定，第二个维度units决定；输入和输出的差别就是最后一个维度不同：$input.shape[-1] \to units$
* input一行是一个样本，那么kernel的行数就是输入神经元的个数，kernel的列数就是输出神经元的个数。
* Input shape: (batch_size, input_dim)
* Output shape: (batch_size, units)
* `__call__(self, inputs, *args, **kwargs)`方法：会调用`call`方法，返回输出张量
* 如果本层在第一层，可以通过参数`input_dim=512`指定输入的长度

#### layers.Concatenate

In [16]:
conc = layers.Concatenate(axis=1)

In [17]:
x = np.arange(20).reshape(2, 2, 5)
y = np.arange(20, 30).reshape(2, 1, 5)

In [18]:
x

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9]],

       [[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]]])

In [19]:
y

array([[[20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29]]])

In [20]:
conc([x,y])

<tf.Tensor: shape=(2, 3, 5), dtype=int64, numpy=
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [20, 21, 22, 23, 24]],

       [[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [25, 26, 27, 28, 29]]])>

In [21]:
layers.concatenate([x,y], axis=1)

<tf.Tensor: shape=(2, 3, 5), dtype=int64, numpy=
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [20, 21, 22, 23, 24]],

       [[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [25, 26, 27, 28, 29]]])>

#### layers.Embedding

```python
tf.keras.layers.Embedding(input_dim, output_dim, embeddings_initializer='uniform',input_length=None)
```
* `input_dim`是单词表的长度+1，`output_dim`是嵌入向量的长度，`input_length`：仅截取每个样本的前`input_length`个词
* 输入是一个batch的数据，其中每个样本的每个单词是其在单词表中的下标；输出增加了一个维度，就是单词用嵌入向量表示了
* `.get_weights()`返回该层的参数，`shape=[input_dim, output_dim]`，每一行代表了一个单词的嵌入向量（跳字模型的中心词向量）
* input_length: Length of input sequences, when it is constant. This argument is required if you are going to connect `Flatten`, then `Dense` layers upstream (without it, the shape of the dense outputs cannot be computed).

In [4]:
embed = layers.Embedding(10, 3, input_length=8)
x = np.random.randint(8, size=[20, 10])
embed(x).shape

TensorShape([20, 10, 3])

In [8]:
embed.weights

[<tf.Variable 'embedding/embeddings:0' shape=(10, 3) dtype=float32, numpy=
 array([[-0.04522704, -0.01553626, -0.0178723 ],
        [-0.03180017, -0.01977986, -0.02764804],
        [ 0.00945731,  0.02925307, -0.00442268],
        [-0.04835451,  0.01134862,  0.03678198],
        [ 0.03260419,  0.02927487, -0.03428855],
        [ 0.03009279,  0.04345794,  0.04137078],
        [ 0.01548987, -0.00048443,  0.02370845],
        [-0.03320906,  0.00618073, -0.04780353],
        [-0.03499379, -0.01048565,  0.00601443],
        [ 0.02077663,  0.0185114 , -0.00240123]], dtype=float32)>]

#### layers.SimpleRNN

```python
layers.SimpleRNN(units, return_sequences)
```
* 零时间步的初始状态为全0向量，之后的每一步输出是下一个时间步的状态；
* rnn的每一个时间步的输入shape是`[batch_size,timesteps, input_features]`
* 输出的shape是`[batch_size, output_features]`，`output_features`就是参数中的units
* 如果参数设置为`return_sequences=True`，则输出的shape为`[batch_size, timesteps, output_features]`，也就是每个时间步都输出，如果作为中间层，则应该设置为`return_sequences=True`
* `self.build(input_shape=[])`，初始化权重，第一个权重的第一个维度是`input_shape[-1]`，第二个维度是输出的维度units

In [6]:
rnn = layers.SimpleRNN(5, return_sequences=True)
x = tf.random.normal([6, 8, 3])
rnn(x).shape

TensorShape([6, 8, 5])

In [7]:
for i in rnn.trainable_variables:
    print(i.shape)

(3, 5)
(5, 5)
(5,)


In [8]:
x[0,1]

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.97329193, 1.1908174 , 0.6114931 ], dtype=float32)>

In [9]:
# 下面是第一个样本，第二个时间步的计算过程
a = np.dot(x[0,1], rnn.get_weights()[0])
b = np.dot(rnn(x)[0,0], rnn.get_weights()[1])
r = tf.nn.tanh(a+b+rnn.get_weights()[2])
r

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-0.24215701, -0.94435865,  0.07067382, -0.9792455 , -0.9854835 ],
      dtype=float32)>

In [10]:
rnn(x)[0,1]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-0.24215698, -0.94435865,  0.07067382, -0.9792455 , -0.9854835 ],
      dtype=float32)>

In [30]:
rnn.get_weights()[1].shape

(5, 5)

#### layers.GRU

#### layers.LSTM

In [225]:
x = tf.random.normal([3, 4, 5])  # 3，4，5分别是批次大小，序列长度，嵌入向量长度
lstm = layers.LSTM(6, return_sequences=True)

In [226]:
lstm(x).shape

TensorShape([3, 4, 6])

In [227]:
len(lstm.trainable_variables)

3

In [228]:
lstm.trainable_variables[0].shape

TensorShape([5, 24])

In [229]:
lstm.trainable_variables[1].shape

TensorShape([6, 24])

In [230]:
lstm.trainable_variables[2].shape

TensorShape([24])

In [231]:
lstm(x)[0,0] # 第一个样本的第一个单词输出，也是第二个单词的隐藏状态

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([-0.04134212,  0.02166263,  0.0871288 ,  0.01203906,  0.11119874,
        0.0071309 ], dtype=float32)>

stateful=True: 上一批次的第i个样本的输出状态（包括输出和“传送带”），作为下一个批次第i个样本的起始状态（包括隐藏状态和“传送带”）。这也导致了当设置stateful=True时，batch size必须是固定的，如果需要改变batch size的大小，可以考虑checkpoint保存权重，重新建立模型，再加载权重。


If a RNN is stateful, it needs to know its batch size. Specify the batch size of your input tensors:
- If using a Sequential model, specify the batch size by passing a `batch_input_shape` argument to your first layer.
- If using the functional API, specify the batch size by passing a `batch_shape` argument to your Input layer.
- 下面的测试说明，如果是用subclass则不需要任何改变

model.reset_states() 或者lstm.reset_states() 可以将状态设置为全0.

In [9]:
class TestStateful(keras.Model):
    def __init__(self, name='TestStateful', **kwargs):
        super().__init__(name=name, **kwargs)
        self.embed = layers.Embedding(10, 3)
        self.lstm = layers.LSTM(6, return_sequences=True, stateful=True)
        #self.lstm = layers.LSTM(4, return_sequences=True, stateful=False)
    def call(self, inputs, training=False):
        embed = self.embed(inputs)
        lstm = self.lstm(embed)
        return lstm

In [10]:
model = TestStateful()
model.lstm.build(tf.TensorShape([20, None, 3]))  # 如果stateful==False, build用的shape只会用最后最后的 3

In [11]:
# model.lstm.states  #隐藏状态和”传送带“初始化为0

In [12]:
x = np.random.randint(8, size=[20, 10])
model(x).shape

TensorShape([20, 10, 6])

In [13]:
model.lstm.trainable_variables[0].shape

TensorShape([3, 24])

In [14]:
model.lstm.trainable_variables[1].shape

TensorShape([6, 24])

In [15]:
model.lstm.trainable_variables[2].shape

TensorShape([24])

In [16]:
tf.concat(model.lstm.weights[:2], axis=0).shape  # 实际上这就是4个权重变量，axis=0就是把输入和隐藏状态的concate

TensorShape([9, 24])

In [17]:
len(model.lstm.states)

2

In [18]:
np.all(model(x)[:,-1,:] == model.lstm.states[0])  # states的第一个值就是隐藏状态，第二个值应该是“传送带”

True

In [21]:
model.lstm.states[1].shape

TensorShape([20, 6])

下面的例子说明每次调用都会改变传送带的值

In [22]:
model.lstm.states[1][1]

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([-0.00891904,  0.01448323,  0.00551689,  0.00209127,  0.00680712,
        0.01038946], dtype=float32)>

In [23]:
y = np.random.randint(8, size=[20, 10])
model(y).shape

TensorShape([20, 10, 6])

In [24]:
model.lstm.states[1][1]

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([-0.00859633,  0.01416175,  0.00490054, -0.00143596,  0.00529386,
        0.00665215], dtype=float32)>

In [25]:
model.lstm(model.embed(x)).shape

TensorShape([20, 10, 6])

可以看到有两个状态，应该一个是输出或者叫最后的隐藏状态，另一个是“传送带”

In [34]:
lstm = layers.LSTM(4, return_state=True, return_sequences=True, stateful=True) # return_state=False时，只返回输出
x = tf.random.normal([20, 10, 3])

In [35]:
lstm(x)[1].shape

TensorShape([20, 4])

可以看到，stateful=True，单独使用的lstm layer返回的是一个列表，第一个值是返回的序列，第二和第三个分别是隐藏状态和"传送带"

In [38]:
tf.reduce_all(lstm(x)[1] == lstm.states[0])  # 类似np.all，可以指定axis

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [438]:
tf.reduce_all(lstm(x)[2] == lstm.states[1])

<tf.Tensor: shape=(), dtype=bool, numpy=True>

#### layers.Bidirectional

双向RNN利用的RNN的顺序敏感性：它包含两个普通RNN，每个RNN分别沿一个方向对输入序列进行处理（时间正序和时间逆序），然后将它们的表示合并到一起（concat）。通过沿这两个方向处理序列，双向RNN能捕捉到可能被单向RNN忽略的模式。

In [None]:
bid = layers.Bidirectional()

In [49]:
bid = layers.Bidirectional(layers.LSTM(10, return_sequences=True), input_shape=(5, 10))

In [55]:
x = np.random.normal(size=[6, 5, 10])

In [80]:
tf.reduce_all(bid.forward_layer(x)==bid(x)[:, :, :10])

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [81]:
tf.reduce_all(bid.backward_layer(x)[:, ::-1, :] == bid(x)[:, :, 10:])

<tf.Tensor: shape=(), dtype=bool, numpy=True>

#### layers.Conv1D

```python
layers.Conv1D(filters, kernel_size, strides=1, padding='valid', data_format='channels_last', dilation_rate=1, activation=None)
```
* 一维卷积神经网络用于文本和序列
* 输入的形状是(batch_size, timesteps, features)，在时间轴上做卷积；

#### layers.Conv2D

In [82]:
tmp = tf.random.normal([8, 10, 10, 3])
test = layers.Conv2D(filters=16, kernel_size=[4,4], strides=2, padding='same')

In [83]:
test(tmp).shape

TensorShape([8, 5, 5, 16])

In [84]:
test.trainable_variables[0].shape

TensorShape([4, 4, 3, 16])

In [85]:
test.trainable_variables[1].shape

TensorShape([16])

#### layers.SeparableConv2D

* 深度可分离卷积层：每个层分别进行卷积操作，卷积结果concatenate到一起形成多个层，再用pointwise（$1\times 1$）卷积，将各个通道混合。
* 这么做相当于是把空间特征学习和通道特征学习分开，如果你假设输入中的空间位置高度相关，但不同的通道之间相互独立，那么这么做是很有意义的。

In [86]:
tmp = tf.random.normal([8, 10, 10, 3])
test = layers.SeparableConv2D(filters=16, kernel_size=4, strides=2, padding='same')
test(tmp).shape

TensorShape([8, 5, 5, 16])

In [87]:
test.trainable_variables[0].shape  # 每层各自做卷积

TensorShape([4, 4, 3, 1])

In [606]:
test.trainable_variables[1].shape  # 1x1 pointwise

TensorShape([1, 1, 3, 16])

In [609]:
test.trainable_variables[2].shape  # bias

TensorShape([16])

与上面的Conv2D相比参数减少了

#### layers.MaxPooling

```python
layers.MaxPooling(pool_size=2, strides=None, padding='valid', data_format='channels_last')
```

In [140]:
pool = layers.MaxPool2D(2)
pool(tf.random.normal([2, 3,4, 2]))

<tf.Tensor: shape=(2, 1, 2, 2), dtype=float32, numpy=
array([[[[ 0.64007056,  2.1382844 ],
         [ 0.6870066 ,  1.5542078 ]]],


       [[[ 0.7684488 ,  0.85859704],
         [ 1.5620023 , -0.00679288]]]], dtype=float32)>

In [141]:
pool.trainable_variables

[]

#### layers.GlobalAveragePooling1D

```python
layers.GlobalAveragePooling1D(data_format='channels_last')
```
* 默认输入为`[batch,timesteps,features]`，在`timesteps`维度上做池化；形象点就是在这一批次的词中，在每一个词向量的同一维度上做池化

#### layers.Conv2DTranspose

原理参考[这篇文献: A guide to convolution arithmetic for deep learning](https://arxiv.org/pdf/1603.07285.pdf)，在第20页，4.1节

In [89]:
tmp = tf.random.normal([1, 4, 4, 1])
test = layers.Conv2DTranspose(2, 3, strides=1, padding='same')
test(tmp).shape

TensorShape([1, 4, 4, 2])

In [92]:
#help(layers.Conv2DTranspose)

In [90]:
test.variables[0].shape  # 就是想说明实际上参数还是kernel

TensorShape([3, 3, 2, 1])

In [93]:
tmp = tf.random.normal([8,5,5,16])
test = layers.Conv2DTranspose(filters=3, kernel_size=4, strides=2, padding='same')

In [94]:
test(tmp).shape

TensorShape([8, 10, 10, 3])

In [101]:
test.trainable_variables[0].shape

TensorShape([4, 4, 3, 16])

#### layers.Dropout

In [104]:
drop = keras.layers.Dropout(0.3)

当training=True时，输入的元素以0.3的概率设置为0，然后所有元素除以0.7，training=False不改变输入；这个层并没有可训练的参数，因此与trainable无关。

In [105]:
x = tf.random.normal([4, 5])
x

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[ 0.95118123, -0.03397252,  1.512402  ,  0.54591715,  0.07380949],
       [ 0.7781919 ,  0.7562036 ,  0.5761015 ,  1.5349052 , -1.6187378 ],
       [ 0.70513034, -0.54004735, -0.51421523,  1.808198  , -0.47636107],
       [ 0.03281746,  1.4903724 ,  1.0035995 , -0.6940724 ,  0.77543175]],
      dtype=float32)>

In [106]:
drop(x, training=True)

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[ 1.3588303 , -0.        ,  0.        ,  0.77988166,  0.10544213],
       [ 1.1117028 ,  0.        ,  0.82300216,  2.1927218 , -2.3124826 ],
       [ 1.0073291 , -0.        , -0.7345932 ,  2.5831401 , -0.        ],
       [ 0.        ,  0.        ,  1.4337137 , -0.9915321 ,  0.        ]],
      dtype=float32)>

In [109]:
drop(x, training=False)/0.7

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[ 1.3588303 , -0.04853217,  2.1605744 ,  0.77988166,  0.10544213],
       [ 1.1117028 ,  1.0802909 ,  0.82300216,  2.1927218 , -2.3124826 ],
       [ 1.0073291 , -0.77149624, -0.7345932 ,  2.58314   , -0.6805158 ],
       [ 0.04688208,  2.1291034 ,  1.4337137 , -0.991532  ,  1.1077597 ]],
      dtype=float32)>

# keras.preprocessing

## image

### PIL

In [638]:
from PIL import Image
from PIL import ImageDraw

PIL和keras对于图片的坐标系统都是左上角为（0，0），但是keras的坐标点是（y_height, x_width)，而PIL的坐标点是（x_width, y_height)

In [644]:
kimg = tf.keras.preprocessing.image.load_img('data/thelight.jpg')
kimg.size

(3359, 2304)

In [645]:
kimg.width, kimg.height

(3359, 2304)

In [646]:
tf.keras.preprocessing.image.img_to_array(kimg)[100, 3000]

array([234., 180.,  45.], dtype=float32)

In [648]:
pimg = Image.open("data/thelight.jpg")
pimg.size

(3359, 2304)

In [649]:
pimg.width,pimg.height

(3359, 2304)

In [650]:
pimg.getpixel((3000, 100))

(234, 180, 45)

In [651]:
np.array(pimg)[100, 3000]

array([234, 180,  45], dtype=uint8)

In [652]:
newimg = Image.new ("RGB", (300, 300), (255, 0, 0))
draw = ImageDraw.Draw(newimg)

In [653]:
draw.chord((10, 50, 40, 100), 0, 360, fill='green')
draw.chord((150,150, 200,200), 0, 360)
draw.rectangle((150, 150, 200, 200))
draw.text((150, 150), "HelloWorld", fill='blue')
#newimg

### ImageDataGenerator

In [136]:
fnames = ['/Users/user/.keras/datasets/flower_photos/roses', '/Users/user/.keras/datasets/flower_photos/sunflowers']
imgs = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, 
                                          rotation_range=40, #随机旋转角度
                                          width_shift_range=0.2, height_shift_range=0.2, # 水平或垂直方向平移的距离（相对于总宽度或总高度的比例）
                                          shear_range=0.2, # 随机错切变换的角度
                                          zoom_range=0.2, # 图像随机缩放的范围
                                          horizontal_flip=True, #随机将一半的图片水平翻转
                                          fill_mode='nearest')
imgs_generator = imgs.flow_from_directory('/Users/user/.keras/datasets/flower_photos', target_size=(150, 150), 
                                          batch_size=20, class_mode='categorical')

FileNotFoundError: [Errno 2] No such file or directory: '/Users/user/.keras/datasets/flower_photos'

In [18]:
imgs_generator.class_indices

{'daisy': 0, 'dandelion': 1, 'roses': 2, 'sunflowers': 3, 'tulips': 4}

In [19]:
a = imgs_generator.next()
#plt.imshow(a[0][0])

In [7]:
d = keras.preprocessing.image.ImageDataGenerator(validation_split=0.25)
d_train = d.flow_from_directory('data/flowers', subset='training', shuffle=True)
d_train.filenames

Found 6 images belonging to 2 classes.


['daisy/2019064575_7656b9340f_m.jpg',
 'daisy/3415180846_d7b5cced14_m.jpg',
 'daisy/4144275653_7c02d47d9b.jpg',
 'sunflowers/8481979626_98c9f88848_n.jpg',
 'sunflowers/9555824387_32b151e9b0_m.jpg',
 'sunflowers/9555827829_74e6f60f1d_m.jpg']

## text

```python
tokenizer = keras.preprocessing.text.Tokenizer(num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ ', split=' ')
```
* `num_words`：只有最常出现的`num_words`个词会被保留
* `filters`：会被过滤掉的，实际上可以认为被替换成`split`指定的分割字符串
* `split`：指定分割文本用的字符串，默认是空格

In [3]:
tokenizer = prep.text.Tokenizer(5)

In [4]:
x = "if you want to sound like a native speaker , you must be willing to practice saying the want to to sound native"

In [5]:
tokenizer.fit_on_texts([x, ])  # 参数字符串组成的list，得到按词频排序的单词表

In [86]:
tokenizer.fit_on_texts(['you you you you you']) # 接着训练

In [88]:
tokenizer.index_word  # 下标-单词 组成的字典，包括所有单词，词频大的下标小；.word_index与之相反

{1: 'you',
 2: 'to',
 3: 'want',
 4: 'sound',
 5: 'native',
 6: 'if',
 7: 'like',
 8: 'a',
 9: 'speaker',
 10: 'must',
 11: 'be',
 12: 'willing',
 13: 'practice',
 14: 'saying',
 15: 'the'}

In [91]:
tokenizer.word_counts # [(单词，频率), (...), ...]，是一个OrderedDict，按训练样本中单词出现的顺序排序

OrderedDict([('if', 1),
             ('you', 7),
             ('want', 2),
             ('to', 4),
             ('sound', 2),
             ('like', 1),
             ('a', 1),
             ('native', 2),
             ('speaker', 1),
             ('must', 1),
             ('be', 1),
             ('willing', 1),
             ('practice', 1),
             ('saying', 1),
             ('the', 1)])

In [92]:
tokenizer.texts_to_sequences(["you are want to a", "a to want are you"])

[[1, 3, 2], [2, 3, 1]]

In [93]:
tokenizer.sequences_to_texts([[1,3,2, 0, 0, 0]])

['you want to']

In [103]:
tokenizer.texts_to_matrix(['you are want to you '], mode='count')

array([[0., 2., 1., 1., 0.]])

In [95]:
prep.text.text_to_word_sequence('you are my best friend you')

['you', 'are', 'my', 'best', 'friend', 'you']

## sequence

```python
keras.preprocessing.sequence.TimeseriesGenerator(data, targets, length, sampling_rate=1, stride=1, start_index=0, end_index=None, shuffle=False, reverse=False, batch_size=128)
```
* `data`：是可索引的生成器（例如元组，列表或numpy数组），第0个轴是时间维度
* `targets`：对应的data时间步的目标值，第0个维度与data的时间维度长度相同
* `length`：每个样本有考虑多少个时间步，或者说当sampling_rate=1时，生成的结果中一个targets值对应多少个data值
* `sampling_rate`：时间步的采样周期，例如当`length=10, sampleing_rate=2`时，每2个时间步取一次，结果就是每个目标值只对应5个时间步，但是跨越了10个时间步
* `stride`：目标值的采样周期
* `start_index,end_index`：data和targets的下标在`[start_index, end_index]`之间的时间步才会被用到
* `shuffle`：是否打乱样本
* `reverse`：是否按时间步的倒序输出
* `batch_size`：每个批次的样本数

In [156]:
x = np.array([chr(i) for i in range(97, 117)])

In [157]:
data = np.array([[i] for i in range(50)])
targets = np.random.normal(size=[50, 3])

In [158]:
data_gen = prep.sequence.TimeseriesGenerator(data, targets, length=10, sampling_rate=2, batch_size=2)

In [159]:
data_gen[0]

(array([[[0.],
         [2.],
         [4.],
         [6.],
         [8.]],
 
        [[1.],
         [3.],
         [5.],
         [7.],
         [9.]]]), array([[-0.9120223 , -0.44903765, -0.45272137],
        [-1.5934117 ,  0.01738436,  0.91008626]]))

In [160]:
targets[10]

array([-0.9120223 , -0.44903765, -0.45272137])

## pad_sequences

```python
prep.sequence.pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0)
```
* sequences：列表的列表，每一个元素是一个序列
* maxlen：默认是所有序列中最长的长度
* value：浮点数，用来补齐的数

In [104]:
x = [[1,2,3], [3,], [4,5]]
prep.sequence.pad_sequences(x, maxlen=2)

array([[2, 3],
       [0, 3],
       [4, 5]], dtype=int32)

In [105]:
x = tf.random.categorical([tf.nn.softmax(tf.random.normal([5]))], 20)
x

<tf.Tensor: shape=(1, 20), dtype=int64, numpy=array([[1, 1, 3, 2, 3, 2, 4, 2, 0, 3, 1, 2, 4, 1, 3, 2, 0, 3, 2, 4]])>

# tensorflow_datasets

In [3]:
# tfds.list_builders() # 可用数据集

In [106]:
(raw_train, raw_validation, raw_test), metadata = tfds.load(
    'cats_vs_dogs',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    with_info=True,
    as_supervised=True
)
# as_supervised=True 返回数据+标签

```
tfds.features.text.Tokenizer(alphanum_only=True, reserved_tokens=None)
```

In [107]:
x = "one one two two th,ree one"

In [124]:
tokenizer = tfds.deprecated.text.Tokenizer(reserved_tokens=['th,ree',])
tokenizer.tokenize(x)

['one', 'one', 'two', 'two', 'th,ree', 'one']

In [125]:
tokenizer = tfds.deprecated.text.Tokenizer(alphanum_only=False)
tokenizer.tokenize(x)

['one', ' ', 'one', ' ', 'two', ' ', 'two', ' ', 'th', ',', 'ree', ' ', 'one']

In [126]:
tokenizer = tfds.deprecated.text.Tokenizer()
tokenizer.tokenize(x)

['one', 'one', 'two', 'two', 'th', 'ree', 'one']

In [127]:
r = tfds.deprecated.text.TokenTextEncoder(tokenizer.tokenize(x))

In [128]:
r.tokens

['one', 'one', 'two', 'two', 'th', 'ree', 'one']

In [129]:
r.tokenizer.tokenize(x)

['one', 'one', 'two', 'two', 'th', 'ree', 'one']

In [130]:
encoder = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(['one two aa aaa ', 'you are one two aa aa bbb,bb'], target_vocab_size=2**15)

In [131]:
encoder.subwords

['aa_', 'two_', 'one_', 'you_', 'bbb', 'bb', 'are_', 'aaa_']

In [132]:
encoder.encode('aa aaa aa T t')

[1, 8, 1, 93, 41, 125]

In [133]:
encoder.encode('aaa aaa %% two ') # 为啥第二个aaa_被分割成字母了

[8, 106, 106, 106, 41, 46, 46, 41, 2]

In [134]:
#encoder.decode([3, 1, 0, 3]) # ValueError，0只能在最后
encoder.decode([3, 1, 3, 44, 0])

'one aa one #'

In [135]:
encoder.encode('bb bbbbb')

[6, 41, 5, 6]