In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import gc
import math
import kashgari
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error,accuracy_score,f1_score
from IPython.core.display import display, HTML
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
display(HTML("<style>.container { width:90% !important; }</style>"))

from keras_bert import bert
from tensorflow.python.ops.math_ops import erf, sqrt
def gelu(x):
    return 0.5 * x * (1.0 + erf(x / sqrt(2.0)))
bert.gelu = gelu

W1125 20:44:28.791555   356 macros.py:34] CUDA GPU available, you can set `kashgari.config.use_cudnn_cell = True` to use CuDNNCell. This will speed up the training, but will make model incompatible with CPU device.
W1125 20:44:31.004682   356 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



# 1.0 Load Data

In [3]:
dataset = pd.read_csv('../input/samples.csv',sep=',',encoding='gbk')
answer = pd.read_csv('../input/answer.csv',sep=',',encoding='utf-8')
dataset.columns = ['question','label']
dataset.shape,answer.shape

((456, 2), (74, 2))

In [4]:
dataset.head(5)

Unnamed: 0,question,label
0,你好,1
1,您好,1
2,早上好,1
3,上午好,1
4,晚上好,1


In [5]:
answer = answer.groupby('question').agg('first')

In [6]:
answer.head(5)

Unnamed: 0_level_0,answer
question,Unnamed: 1_level_1
1,您好！
2,好的。
3,我叫云天明。
4,25岁。
5,我现在住中大宿舍。


# 2.0 Create dataset

In [7]:
max_length = 30
x_train = [list(text[:max_length]) for text in dataset.loc[:,'question']]
y_train = list(dataset['label'].astype(int))
x_test = x_train

In [8]:
 def get_stratifiedkfold_ids(x,y,n_folds=4,random_state=42,shuttle=True):
    kfold = StratifiedKFold(n_splits=n_folds, random_state=42, shuffle=True)
    fold = kfold.split(x, y)
    fold_ids = []
    for k, (train_in, test_in) in enumerate(fold):
        fold_ids.append([train_in,test_in])
    return fold_ids
fold_ids = get_stratifiedkfold_ids(dataset['label'],dataset['label'],n_folds=4)

# 3.0 Model Training

In [11]:
from kashgari.embeddings import BERTEmbedding
from kashgari.tasks.classification import BiLSTM_Model,BiGRU_Model
from keras.callbacks import EarlyStopping
def train_model(x_train,y_train,x_valid,y_valid,seed = 1,epochs = 10,max_length=30):
    BERT_PATH = './bert/chinese_L-12_H-768_A-12/'
    embed = BERTEmbedding(BERT_PATH,task=kashgari.CLASSIFICATION,sequence_length=max_length)
    model = BiGRU_Model(embed)
    model.build_model(x_train, y_train, x_valid, y_valid)
    
    layer_names = [layer.name for layer in model.tf_model.layers]
    trainable_layer_names = [layer_name for layer_name in layer_names if 'Encoder-12' in layer_name]
    for layer_name in trainable_layer_names:
        tlayer = model.tf_model.get_layer(layer_name)
        tlayer.trainable = True
        
    opt = tf.keras.optimizers.Adam(1e-4)
    model.compile_model(optimizer=opt)
    early_stopping = EarlyStopping(monitor='val_loss', patience=0)
    model.fit(x_train, y_train, x_valid, y_valid, batch_size=32,epochs=epochs,callbacks=[early_stopping],fit_kwargs = {'verbose':1})

    return model

Using TensorFlow backend.


In [12]:
def kfold_training(x_train,y_train,x_test,fold_ids,epochs=10,max_length=30):
    fold_index = 0
    models = []
    valid_pred = np.zeros(len(x_train))
    test_pred = np.zeros([len(x_test),len(set(y_train))])
    for trainid,validid in fold_ids:
        print('---------------------------------------')
        print('training at ', fold_index, ' fold ... ')
        train_x = [x_train[i] for i in trainid]
        train_y = [y_train[i] for i in trainid]
        valid_x = [x_train[i] for i in validid]
        valid_y = [y_train[i] for i in validid]
        model = train_model(train_x,train_y,valid_x,valid_y,seed = 1,epochs = epochs,max_length=max_length)
        model.save('../model/bigru_bert_model_versionE_len30'+str(fold_index) + '/')
        valid_label,valid_prob = model.predict(valid_x)
        valid_pred[validid] = valid_prob.argmax(axis=-1)
        test_label,test_prob = model.predict(x_test)
        test_pred += test_prob/5
        models.append(model)
        fold_index += 1
    return valid_pred,test_pred,models

In [17]:
valid_pred,test_pred,models = kfold_training(x_train,y_train,x_test,fold_ids,epochs=10,max_length=30)

---------------------------------------
training at  0  fold ... 


W1028 21:42:40.979765  6448 bert_embedding.py:126] seq_len: 30


Model: "model_29"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input-Token (InputLayer)        [(None, 30)]         0                                            
__________________________________________________________________________________________________
Input-Segment (InputLayer)      [(None, 30)]         0                                            
__________________________________________________________________________________________________
Embedding-Token (TokenEmbedding [(None, 30, 768), (2 16226304    Input-Token[0][0]                
__________________________________________________________________________________________________
Embedding-Segment (Embedding)   (None, 30, 768)      1536        Input-Segment[0][0]              
___________________________________________________________________________________________

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
---------------------------------------
training at  1  fold ... 


W1028 21:46:00.686187  6448 bert_embedding.py:126] seq_len: 30


Model: "model_34"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input-Token (InputLayer)        [(None, 30)]         0                                            
__________________________________________________________________________________________________
Input-Segment (InputLayer)      [(None, 30)]         0                                            
__________________________________________________________________________________________________
Embedding-Token (TokenEmbedding [(None, 30, 768), (2 16226304    Input-Token[0][0]                
__________________________________________________________________________________________________
Embedding-Segment (Embedding)   (None, 30, 768)      1536        Input-Segment[0][0]              
___________________________________________________________________________________________

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
---------------------------------------
training at  2  fold ... 


W1028 21:49:41.340808  6448 bert_embedding.py:126] seq_len: 30


Model: "model_39"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input-Token (InputLayer)        [(None, 30)]         0                                            
__________________________________________________________________________________________________
Input-Segment (InputLayer)      [(None, 30)]         0                                            
__________________________________________________________________________________________________
Embedding-Token (TokenEmbedding [(None, 30, 768), (2 16226304    Input-Token[0][0]                
__________________________________________________________________________________________________
Embedding-Segment (Embedding)   (None, 30, 768)      1536        Input-Segment[0][0]              
___________________________________________________________________________________________

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
---------------------------------------
training at  3  fold ... 


W1028 21:53:51.830136  6448 bert_embedding.py:126] seq_len: 30


Model: "model_44"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input-Token (InputLayer)        [(None, 30)]         0                                            
__________________________________________________________________________________________________
Input-Segment (InputLayer)      [(None, 30)]         0                                            
__________________________________________________________________________________________________
Embedding-Token (TokenEmbedding [(None, 30, 768), (2 16226304    Input-Token[0][0]                
__________________________________________________________________________________________________
Embedding-Segment (Embedding)   (None, 30, 768)      1536        Input-Segment[0][0]              
___________________________________________________________________________________________

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


In [None]:
sentence = '你咳嗽厉害么'
sentence = list(sentence)
question_ind,prob_all = models[0].predict([sentence])
question_ind = question_ind[0]
question_prob = prob_all.max()
print('question = ', question_ind, ', prob = ', question_prob)
reply = answer.loc[question_ind,'answer']
print(reply)

In [None]:
from kashgari.utils import load_model
new_model = load_model('../model/bigru_bert_model_versionE_0', load_weights=True)
# new_model.tf_model.load_weights('saved-model-05-0.96.hdf5')

In [None]:
answer.loc[36,:]

In [None]:
sentence = '你咳嗽厉害么'
sentence = list(sentence)
question_ind,prob_all = new_model.predict([sentence])
question_ind = int(question_ind[0])
question_prob = prob_all.max()
print('question = ', question_ind, ', prob = ', question_prob)

In [None]:
reply = answer.loc[question_ind,'answer']
print(reply)

In [None]:
import pandas as pd
from kashgari.utils import load_model
new_model = load_model('./chat_model/', load_weights=True)

In [None]:
from kashgari.utils import convert_to_saved_model

convert_to_saved_model(model = new_model, version = 1, model_path='chat_model_serving/gru_bert/')

In [None]:
answer = pd.read_csv('../input/answer.csv',sep=',',encoding='utf-8')
answer = answer.groupby('question').agg('first')

In [None]:
sentence = '你有发烧吗'
sentence = list(sentence)
question_ind,prob_all = new_model.predict([sentence])
question_ind = int(question_ind[0])
question_prob = prob_all.max()
print('question = ', question_ind, ', prob = ', question_prob)
reply = answer.loc[question_ind,'answer']
print(reply)