## 6-2、训练模型的三种方法
模型的训练主要有内置fit方法，内置train_on_batch方法，自定义训练循环
注：fit_generator方法在tf.keras中不推荐使用，其功能已经被fit包含

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import *

#打印时间分割线
@tf.function
def printbar():
    today_ts = tf.timestamp()%(24*60*60)

    hour = tf.cast(today_ts//3600+8,tf.int32)%tf.constant(24)
    minite = tf.cast((today_ts%3600)//60,tf.int32)
    second = tf.cast(tf.floor(today_ts%60),tf.int32)
    
    def timeformat(m):
        if tf.strings.length(tf.strings.format("{}",m))==1:
            return(tf.strings.format("0{}",m))
        else:
            return(tf.strings.format("{}",m))
    
    timestring = tf.strings.join([timeformat(hour),timeformat(minite),
                timeformat(second)],separator = ":")
    tf.print("=========="*8+timestring)

In [5]:
MAX_LEN = 300
BATCH_SIZE = 32
(x_train,y_train),(x_test,y_test) = datasets.reuters.load_data()
x_train = preprocessing.sequence.pad_sequences(x_train,maxlen=MAX_LEN)
x_test = preprocessing.sequence.pad_sequences(x_test,maxlen= MAX_LEN)

MAX_WORDS = x_train.max()+1
CAT_NUM = y_train.max()+1

# ds_train = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(buffer_size==1000).batch(BATCH_SIZECH_SIZE).prefetch(tf.data.experimental.AUTOTUNE).cache()
ds_train = tf.data.Dataset.from_tensor_slices((x_train,y_train)) \
          .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \
          .prefetch(tf.data.experimental.AUTOTUNE).cache()
ds_test = tf.data.Dataset.from_tensor_slices((x_test,y_test)) \
          .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \
          .prefetch(tf.data.experimental.AUTOTUNE).cache()

### 一、内置fit方法
该方法功能非常强大，支持对numpy array，tf.array.Dataset以及Python generator数据进行训练。
并且可以通过设置回调函数实现对训练过程的复杂控制逻辑

In [6]:
tf.keras.backend.clear_session()
def create_model():
    
    model = models.Sequential()
    model.add(layers.Embedding(MAX_WORDS,7,input_length=MAX_LEN))
    model.add(layers.Conv1D(filters = 64,kernel_size = 5,activation = "relu"))
    model.add(layers.MaxPool1D(2))
    model.add(layers.Conv1D(filters = 32,kernel_size = 3,activation = "relu"))
    model.add(layers.MaxPool1D(2))
    model.add(layers.Flatten())
    model.add(layers.Dense(CAT_NUM,activation = "softmax"))
    return(model)

def compile_model(model):
    model.compile(optimizer=optimizers.Nadam(),
                loss=losses.SparseCategoricalCrossentropy(),
                metrics=[metrics.SparseCategoricalAccuracy(),metrics.SparseTopKCategoricalAccuracy(5)]) 
    return(model)
 
model = create_model()
model.summary()
model = compile_model(model)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 7)            216874    
_________________________________________________________________
conv1d (Conv1D)              (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 148, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten (Flatten)            (None, 2336)              0         
_________________________________________________________________
dense (Dense)                (None, 46)                1

In [7]:
history = model.fit(ds_train,validation_data = ds_test,epochs=10)

Train for 281 steps, validate for 71 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
def train_model(model,ds_train,ds_valid,epoches):
    for epoch in tf.range(1,epoches+1):
        model.reset_metrics()
        
        if epoch == 5:
            model.optimizer.lr.assign(model.optimizer.lr/2.0)
        for x,y in ds_train:
            train_result = model.train_on_batch(x,y)
        for x,y in ds_valid:
            valid_result = model.test_on_batch(x,y,reset_metrics=False)
        if epoch%1==0:
            printbar()
            tf.print("epoch = ",epoch)
            print("train:",dict(zip(model.metrics_names,train_result)))
            print("valid:",dict(zip(model.metrics_names,valid_result)))
            print("")

In [12]:
train_model(model,ds_train,ds_test,10)

epoch =  1
train: {'loss': 0.036054526, 'sparse_categorical_accuracy': 1.0, 'sparse_top_k_categorical_accuracy': 1.0}
valid: {'loss': 2.563372, 'sparse_categorical_accuracy': 0.63579696, 'sparse_top_k_categorical_accuracy': 0.8103295}

epoch =  2
train: {'loss': 0.026406916, 'sparse_categorical_accuracy': 1.0, 'sparse_top_k_categorical_accuracy': 1.0}
valid: {'loss': 2.6081796, 'sparse_categorical_accuracy': 0.6326803, 'sparse_top_k_categorical_accuracy': 0.8116652}

epoch =  3
train: {'loss': 0.02145644, 'sparse_categorical_accuracy': 1.0, 'sparse_top_k_categorical_accuracy': 1.0}
valid: {'loss': 2.591668, 'sparse_categorical_accuracy': 0.6353517, 'sparse_top_k_categorical_accuracy': 0.8134461}

epoch =  4
train: {'loss': 0.02030645, 'sparse_categorical_accuracy': 1.0, 'sparse_top_k_categorical_accuracy': 1.0}
valid: {'loss': 2.5907695, 'sparse_categorical_accuracy': 0.6313446, 'sparse_top_k_categorical_accuracy': 0.81478184}

epoch =  5
train: {'loss': 0.008047451, 'sparse_categorica

In [None]:
optimizer = optimizer.Adam()
loss_func = losses.SparseCategoricalCrossentropy()

train_loss = metrics.Mean(name='train_loss')
train_metric = metrics.SparseCategoricalAccuracy(name="train_accuarcy")

valid_loss = metrics.Mean(name='valid_loss')
valid_metric = metrics.SparseCategoricalAccuracy(name='valid_accuracy')

@tf.function
def train_step(model,features,labels):
    with tf.GradientTape() as tape:
        predictions = model(features,training=True)
        loss = loss_func(labels,predictions)
    gradients = tape.gradient(loss,model.trainable_variables)
    optimizer.apply_gradients(zip(gradients,model.trainable_variables))
    
    train_loss.update_state(loss)
    train_metric.update_state(labels,predictions)
