# 6-2,训练模型的3种方法

模型的训练主要有内置fit方法、内置tran_on_batch方法、自定义训练循环。

注：fit_generator方法在tf.keras中不推荐使用，其功能已经被fit包含。

In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
from tensorflow.keras import * 

#打印时间分割线
@tf.function
def printbar():
    today_ts = tf.timestamp()%(24*60*60)

    hour = tf.cast(today_ts//3600+8,tf.int32)%tf.constant(24)
    minite = tf.cast((today_ts%3600)//60,tf.int32)
    second = tf.cast(tf.floor(today_ts%60),tf.int32)
    
    def timeformat(m):
        if tf.strings.length(tf.strings.format("{}",m))==1:
            return(tf.strings.format("0{}",m))
        else:
            return(tf.strings.format("{}",m))
    
    timestring = tf.strings.join([timeformat(hour),timeformat(minite),
                timeformat(second)],separator = ":")
    tf.print("=========="*8+timestring)

In [3]:
MAX_LEN = 300
BATCH_SIZE = 32
(x_train,y_train),(x_test,y_test) = datasets.reuters.load_data()
x_train = preprocessing.sequence.pad_sequences(x_train,maxlen=MAX_LEN)
x_test = preprocessing.sequence.pad_sequences(x_test,maxlen=MAX_LEN)

MAX_WORDS = x_train.max()+1
CAT_NUM = y_train.max()+1

ds_train = tf.data.Dataset.from_tensor_slices((x_train,y_train)) \
          .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \
          .prefetch(tf.data.experimental.AUTOTUNE).cache()
   
ds_test = tf.data.Dataset.from_tensor_slices((x_test,y_test)) \
          .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \
          .prefetch(tf.data.experimental.AUTOTUNE).cache()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz


In [4]:
for item in ds_test.take(2):
    print(item)

(<tf.Tensor: shape=(32, 300), dtype=int32, numpy=
array([[   0,    0,    0, ..., 1325,   17,   12],
       [   0,    0,    0, ...,   67,   17,   12],
       [ 126,  174,  247, ...,   16,   17,   12],
       ...,
       [   0,    0,    0, ...,  272,   17,   12],
       [   0,    0,    0, ...,  280,   17,   12],
       [   0,    0,    0, ..., 1097,   17,   12]], dtype=int32)>, <tf.Tensor: shape=(32,), dtype=int64, numpy=
array([ 3,  3, 11,  4,  2, 19, 35, 19,  4, 19,  3,  3, 20,  4, 19,  4,  4,
       19,  3,  3,  3,  3,  4, 11,  3,  1,  3,  1, 19,  3,  4,  1])>)
(<tf.Tensor: shape=(32, 300), dtype=int32, numpy=
array([[   0,    0,    0, ...,   96,   17,   12],
       [   0,    0,    0, ..., 2278,   17,   12],
       [   0,    0,    0, ...,    8,   17,   12],
       ...,
       [   0,    0,    0, ...,  152,   17,   12],
       [   4, 2397,   51, ...,    8,   17,   12],
       [  95,   27,  661, ...,  252,   17,   12]], dtype=int32)>, <tf.Tensor: shape=(32,), dtype=int64, numpy=
array([ 3

## 一，内置方法

该方法功能非常强大, 支持对numpy array, tf.data.Dataset以及 Python generator数据进行训练。

并且可以通过设置回调函数实现对训练过程的复杂控制逻辑。

In [5]:
tf.keras.backend.clear_session()
def create_model():
    
    model = models.Sequential()
    model.add(layers.Embedding(MAX_WORDS,7,input_length=MAX_LEN))
    model.add(layers.Conv1D(filters = 64,kernel_size = 5,activation = "relu"))
    model.add(layers.MaxPool1D(2))
    model.add(layers.Conv1D(filters = 32,kernel_size = 3,activation = "relu"))
    model.add(layers.MaxPool1D(2))
    model.add(layers.Flatten())
    model.add(layers.Dense(CAT_NUM,activation = "softmax"))
    return(model)

def compile_model(model):
    model.compile(optimizer=optimizers.Nadam(),
                loss=losses.SparseCategoricalCrossentropy(),
                metrics=[metrics.SparseCategoricalAccuracy(),metrics.SparseTopKCategoricalAccuracy(5)]) 
    return(model)
 
model = create_model()
model.summary()
model = compile_model(model)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 7)            216874    
_________________________________________________________________
conv1d (Conv1D)              (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 148, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten (Flatten)            (None, 2336)              0         
_________________________________________________________________
dense (Dense)                (None, 46)                1

In [6]:
history = model.fit(ds_train,validation_data = ds_test,epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
history

<tensorflow.python.keras.callbacks.History at 0x7f0e1873fc90>

In [9]:
history.history

{'loss': [2.030376672744751,
  1.509628176689148,
  1.2274523973464966,
  0.9560253024101257,
  0.7123274207115173,
  0.5339815020561218,
  0.4191669821739197,
  0.3471987247467041,
  0.3004436492919922,
  0.2680912911891937],
 'sparse_categorical_accuracy': [0.45301714539527893,
  0.6151190996170044,
  0.6840347647666931,
  0.7506123185157776,
  0.8170785903930664,
  0.8659541010856628,
  0.8960142731666565,
  0.9156090021133423,
  0.9255176782608032,
  0.9331997036933899],
 'sparse_top_k_categorical_accuracy': [0.7420396208763123,
  0.7920284867286682,
  0.8425740599632263,
  0.9015809297561646,
  0.9422177672386169,
  0.9668225049972534,
  0.9791805744171143,
  0.9859719276428223,
  0.9899799823760986,
  0.9925406575202942],
 'val_loss': [1.7188937664031982,
  1.5630298852920532,
  1.5711621046066284,
  1.7296655178070068,
  1.9535658359527588,
  2.1979970932006836,
  2.459477424621582,
  2.6805038452148438,
  2.867391347885132,
  3.0320279598236084],
 'val_sparse_categorical_accura

## 二，内置train_on_batch方法

该内置方法相比较fit方法更加灵活，可以不通过回调函数而直接在批次层次上更加精细地控制训练的过程。

In [10]:
tf.keras.backend.clear_session()

def create_model():
    model = models.Sequential()

    model.add(layers.Embedding(MAX_WORDS,7,input_length=MAX_LEN))
    model.add(layers.Conv1D(filters = 64,kernel_size = 5,activation = "relu"))
    model.add(layers.MaxPool1D(2))
    model.add(layers.Conv1D(filters = 32,kernel_size = 3,activation = "relu"))
    model.add(layers.MaxPool1D(2))
    model.add(layers.Flatten())
    model.add(layers.Dense(CAT_NUM,activation = "softmax"))
    return(model)

def compile_model(model):
    model.compile(optimizer=optimizers.Nadam(),
                loss=losses.SparseCategoricalCrossentropy(),
                metrics=[metrics.SparseCategoricalAccuracy(),metrics.SparseTopKCategoricalAccuracy(5)]) 
    return(model)
 
model = create_model()
model.summary()
model = compile_model(model)


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 7)            216874    
_________________________________________________________________
conv1d (Conv1D)              (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 148, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten (Flatten)            (None, 2336)              0         
_________________________________________________________________
dense (Dense)                (None, 46)                1

In [11]:
def train_model(model,ds_train,ds_valid,epoches):

    for epoch in tf.range(1,epoches+1):
        model.reset_metrics()
        
        # 在后期降低学习率
        if epoch == 5:
            model.optimizer.lr.assign(model.optimizer.lr/2.0)
            tf.print("Lowering optimizer Learning Rate...\n\n")
        
        for x, y in ds_train:
            train_result = model.train_on_batch(x, y)

        for x, y in ds_valid:
            valid_result = model.test_on_batch(x, y,reset_metrics=False)
            
        if epoch%1 ==0:
            printbar()
            tf.print("epoch = ",epoch)
            print("train:",dict(zip(model.metrics_names,train_result)))
            print("valid:",dict(zip(model.metrics_names,valid_result)))
            print("")

In [12]:
train_model(model,ds_train,ds_test,10)

epoch =  1
train: {'loss': 2.1657514572143555, 'sparse_categorical_accuracy': 0.5454545617103577, 'sparse_top_k_categorical_accuracy': 0.7272727489471436}
valid: {'loss': 1.706920862197876, 'sparse_categorical_accuracy': 0.5529831051826477, 'sparse_top_k_categorical_accuracy': 0.7569011449813843}

epoch =  2
train: {'loss': 1.8134582042694092, 'sparse_categorical_accuracy': 0.5454545617103577, 'sparse_top_k_categorical_accuracy': 0.7272727489471436}
valid: {'loss': 1.550385594367981, 'sparse_categorical_accuracy': 0.6032947301864624, 'sparse_top_k_categorical_accuracy': 0.7796081900596619}

epoch =  3
train: {'loss': 1.4261715412139893, 'sparse_categorical_accuracy': 0.5, 'sparse_top_k_categorical_accuracy': 0.8181818127632141}
valid: {'loss': 1.5552479028701782, 'sparse_categorical_accuracy': 0.6402493119239807, 'sparse_top_k_categorical_accuracy': 0.8040961623191833}

epoch =  4
train: {'loss': 1.0415945053100586, 'sparse_categorical_accuracy': 0.6818181872367859, 'sparse_top_k_categ

## 三，自定义训练循环

自定义训练循环无需编译模型，直接利用优化器根据损失函数反向传播迭代参数，拥有最高的灵活性。

In [13]:
tf.keras.backend.clear_session()

def create_model():
    
    model = models.Sequential()

    model.add(layers.Embedding(MAX_WORDS,7,input_length=MAX_LEN))
    model.add(layers.Conv1D(filters = 64,kernel_size = 5,activation = "relu"))
    model.add(layers.MaxPool1D(2))
    model.add(layers.Conv1D(filters = 32,kernel_size = 3,activation = "relu"))
    model.add(layers.MaxPool1D(2))
    model.add(layers.Flatten())
    model.add(layers.Dense(CAT_NUM,activation = "softmax"))
    return(model)

model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 7)            216874    
_________________________________________________________________
conv1d (Conv1D)              (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 148, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten (Flatten)            (None, 2336)              0         
_________________________________________________________________
dense (Dense)                (None, 46)                1

In [14]:
optimizer = optimizers.Nadam()
loss_func = losses.SparseCategoricalCrossentropy()

train_loss = metrics.Mean(name='train_loss')
train_metric = metrics.SparseCategoricalAccuracy(name='train_accuracy')

valid_loss = metrics.Mean(name='valid_loss')
valid_metric = metrics.SparseCategoricalAccuracy(name='valid_accuracy')

@tf.function
def train_step(model, features, labels):
    with tf.GradientTape() as tape:
        predictions = model(features,training = True)
        loss = loss_func(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss.update_state(loss)
    train_metric.update_state(labels, predictions)
    

@tf.function
def valid_step(model, features, labels):
    predictions = model(features)
    batch_loss = loss_func(labels, predictions)
    valid_loss.update_state(batch_loss)
    valid_metric.update_state(labels, predictions)
    

def train_model(model,ds_train,ds_valid,epochs):
    for epoch in tf.range(1,epochs+1):
        
        for features, labels in ds_train:
            train_step(model,features,labels)

        for features, labels in ds_valid:
            valid_step(model,features,labels)

        logs = 'Epoch={},Loss:{},Accuracy:{},Valid Loss:{},Valid Accuracy:{}'
        
        if epoch%1 ==0:
            printbar()
            tf.print(tf.strings.format(logs,
            (epoch,train_loss.result(),train_metric.result(),valid_loss.result(),valid_metric.result())))
            tf.print("")
            
        train_loss.reset_states()
        valid_loss.reset_states()
        train_metric.reset_states()
        valid_metric.reset_states()

train_model(model,ds_train,ds_test,10)


Epoch=1,Loss:2.01347947,Accuracy:0.46938321,Valid Loss:1.67509925,Valid Accuracy:0.564114

Epoch=2,Loss:1.48007202,Accuracy:0.61445111,Valid Loss:1.54901266,Valid Accuracy:0.606411397

Epoch=3,Loss:1.19723749,Accuracy:0.687931418,Valid Loss:1.57189929,Valid Accuracy:0.630454123

Epoch=4,Loss:0.936013877,Accuracy:0.754620373,Valid Loss:1.741889,Valid Accuracy:0.626892269

Epoch=5,Loss:0.709292531,Accuracy:0.815185905,Valid Loss:1.98559523,Valid Accuracy:0.625111282

Epoch=6,Loss:0.53942579,Accuracy:0.863950133,Valid Loss:2.23997688,Valid Accuracy:0.61843276

Epoch=7,Loss:0.428467482,Accuracy:0.89723891,Valid Loss:2.49434376,Valid Accuracy:0.616206586

Epoch=8,Loss:0.35636577,Accuracy:0.915943,Valid Loss:2.75245285,Valid Accuracy:0.614425659

Epoch=9,Loss:0.307280302,Accuracy:0.925963044,Valid Loss:2.96957374,Valid Accuracy:0.610418499

Epoch=10,Loss:0.271974206,Accuracy:0.934090376,Valid Loss:3.17662549,Valid Accuracy:0.605520904

