In [1]:
from extract_feature import BertVector
import gensim
import numpy as np
import random
from sklearn.externals import joblib
import os
import csv
import matplotlib.pyplot as plt 
from keras.models import load_model

bv = BertVector()
hidden_size=768
seq_len=128

'''data_path defines where "data.csv" is(from extract_trainset.ipynb).'''
data_path="../../mid_data/training_data/mda_data"
'''model_path defines where the trained model will save'''
model_path="../../model/bert_model"

if not os.path.exists(model_path):
    os.makedirs(model_path)

Using TensorFlow backend.


INFO:tensorflow:Using config: {'_model_dir': '../tmp', '_num_worker_replicas': 1, '_global_id_in_cluster': 0, '_experimental_distribute': None, '_evaluation_master': '', '_task_id': 0, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
  allow_growth: true
}
graph_options {
  optimizer_options {
    global_jit_level: ON_1
  }
}
, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001D72EBAB748>, '_device_fn': None, '_protocol': None, '_train_distribute': None, '_task_type': 'worker', '_save_checkpoints_steps': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_master': '', '_keep_checkpoint_every_n_hours': 10000, '_is_chief': True, '_log_step_count_steps': 100, '_eval_distribute': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_keep_checkpoint_max': 5, '_service': None}
INFO:tensorflow:Could not find trained model in model_dir: ../tmp, running initialization to predict.
INFO:tensorflow:Calling model_fn.
INFO:t

# Building Training set, Test set
####  根据extract_trainset.ipynb 中生成的data.csv，得到 train.csv,dev.csv,test.csv
- 从原csv数据中抽取训练模型时需要的句子内容，具体标签。
- 按照标签分为不同的list，-1为消极，0为中性，1为积极。统计各个标签的数据量。
- 由于数据的不均衡，按照最少标签数据量向其他两个list中随机取相同数量的数据。
- 按比例将标签均衡的数据集分为train set 和dev set。
- 把剩下所有其他数据归入test set。
- 返回三个csv 文件 后续使用

In [2]:
def build_trainset_testset(scale):
    data_positive=list()
    data_negative=list()
    data_neutral=list()
    data_all=list()
    data_file=csv.reader(open(os.path.join(data_path,"data.csv"),'r',encoding='utf-8-sig'))
    
    
    for line in data_file:
        
        if line[4]=='1':
            data_positive.append([line[4],line[5]])
            data_all.append([line[4],line[5]])
        elif line[4]=='-1':
            data_negative.append([line[4],line[5]])
            data_all.append([line[4],line[5]])
        elif line[4]=='0':
            data_neutral.append([line[4],line[5]])
            data_all.append([line[4],line[5]])
        
    print("len_positive:"+str(len(data_positive))+" len_neutral:"+str(len(data_neutral))+" len_negative:"+str(len(data_negative)) )  
    data_min_num=min([len(data_positive),len(data_neutral),len(data_negative)])
    
    data_positive = random.sample(data_positive, data_min_num)
    data_negative =random.sample(data_negative, data_min_num)
    data_neutral = random.sample(data_neutral,  data_min_num)
    data_test=[item for item in data_all if item not in data_positive and item not in data_neutral and item not in data_negative]
    
    data_positive_train = random.sample(data_positive, int(scale*data_min_num))
    data_negative_train = random.sample(data_negative,  int(scale*data_min_num))
    data_neutral_train = random.sample(data_neutral,  int(scale*data_min_num))  
    data_positive_test=[item for item in data_positive if item not in data_positive_train]
    data_negative_test=[item for item in data_negative if item not in data_negative_train]
    data_neutral_test=[item for item in data_neutral if item not in data_neutral_train]
    
    
    
    print("len_positive_test:"+str(len(data_positive_test))+" len_neutral_test:"+str(len(data_neutral_test))+" len_negative_test:"+str(len(data_negative_test)) )
    data_train=data_positive_train+data_negative_train+data_neutral_train
    data_dev=data_positive_test+data_negative_test+data_neutral_test
    print(len(data_train))
    print(len(data_dev))
    print(len(data_test))
    f_train=open(os.path.join(data_path,"train.csv"),mode="w",encoding="utf-8-sig",newline="")
    for item in data_train:
        f_train_write=csv.writer(f_train,dialect='excel')
        f_train_write.writerow(item)
    f_train.close()
    
    f_dev=open(os.path.join(data_path,"dev.csv"),mode="w",encoding="utf-8-sig",newline="")
    for item in data_dev:
        f_dev_write=csv.writer(f_dev,dialect='excel')
        f_dev_write.writerow(item)
    f_dev.close()
    
    f_test=open(os.path.join(data_path,"test.csv"),mode="w",encoding="utf-8-sig",newline="")
    for item in data_test:
        f_test_write=csv.writer(f_test,dialect='excel')
        f_test_write.writerow(item)
    f_test.close()


'''the rate to depart total data into train set and dev set is 80% '''
build_trainset_testset(0.8)

len_positive:18464 len_neutral:20097 len_negative:4504
len_positive_test:898 len_neutral_test:890 len_negative_test:889
10809
2677
29400


# Building train.npy,dev.npy,test.npy

#### 从train.csv,dev.csv,test.csv 中读取句子和相应标签并构建数组，np.shape=（句子数，128，768）
- bert 对文本进行embedding的过程比较缓慢，因此在转换完毕后，为了方便后续使用，会将转换好的numpy 保存。
- 读入保存的numpy文件。每一条句子的embedding是句子长度(seq_len=128）*768（bert中文版每一个字的编码长度）
- 机器学习用到的句子编码需要在bert编码基础上进行平均改动。机器学习每个句子的编码长度是1*768。

In [2]:
'''transfer sentence to embedding'''
def bert_rep_sentencevector(sentence):
    sentence=sentence.replace(" ","")
    embedding_matrix = np.array(bv.encode([sentence]))
    return embedding_matrix
    
'''building array train,dev,test from train.csv, dev.csv, test.csv'''
def build_traindata():
    X_train = list()
    Y_train = list()
    X_test = list()
    Y_test = list()
    X_dev=list()
    Y_dev=list()
 

    account=0    
    print("------start building train dataset------")    
    for line in csv.reader(open(os.path.join(data_path,"train.csv"),mode='r',encoding='utf-8-sig')):        
        sent_vector = bert_rep_sentencevector(line[1])        
        X_train.append(sent_vector)
        
        if line[0] == '1':
            Y_train.append([0,0,1])
        elif line[0]=='0':
            Y_train.append([0,1,0])
        elif line[0]=='-1':
            Y_train.append([1,0,0])
        else:
            print("出错")
            print(line[0])   
        account +=1
        print("\r %d" %(account),end=" ")       
    print("\n------end building train dataset------")

    
    account=0
    print("------start building dev dataset------")
    for line in csv.reader(open(os.path.join(data_path,"dev.csv"),mode='r',encoding='utf-8-sig')):        
        sent_vector = bert_rep_sentencevector(line[1])
        X_dev.append(sent_vector)
        if line[0] == '1':
            Y_dev.append([0,0,1])
        elif line[0]=='0':
            Y_dev.append([0,1,0])
        elif line[0]=='-1':
            Y_dev.append([1,0,0])
        else:
            print("出错")
            print(line[0])
        account +=1
        print("\r %d" %(account),end=" ")
    print("\n------end building dev dataset------")
    
    
    account=0
    print("------start building test dataset------")
    for line in csv.reader(open(os.path.join(data_path,"test.csv"),mode='r',encoding='utf-8-sig')):
        
        sent_vector = bert_rep_sentencevector(line[1])
        X_test.append(sent_vector)
        if line[0] == '1':
            Y_test.append([0,0,1])
        elif line[0]=='0':
            Y_test.append([0,1,0])
        elif line[0]=='-1':
            Y_test.append([1,0,0])
        else:
            print("出错")
            print(line[0])
        account +=1
        print("\r %d" %(account),end=" ")
    print("\n------end building test dataset------")
 

    return np.array(X_train), np.array(Y_train), np.array(X_dev), np.array(Y_dev),np.array(X_test), np.array(Y_test)



In [None]:
X_train_2, Y_train_2, X_dev_2, Y_dev_2 ,X_test_2,Y_test_2= build_traindata()

X_train_2=np.squeeze(X_train_2)
X_dev_2=np.squeeze(X_dev_2)
X_test_2=np.squeeze(X_test_2)

np.save(data_path+"/trainX_vec_2",X_train_2)
np.save(data_path+"/devX_vec_2",X_dev_2) 
np.save(data_path+"/testX_vec_2",X_test_2)      
np.save(data_path+"/trainY_vec_2",Y_train_2)
np.save(data_path+"/devY_vec_2",Y_dev_2)
np.save(data_path+"/testY_vec_2",Y_test_2)


print(X_train_2.shape, Y_train_2.shape)
print(X_dev_2.shape, Y_dev_2.shape)
print(X_test_2.shape, Y_test_2.shape)

In [3]:
def Label_Judge(Y):
    assert np.sum(Y)==1
    if Y[0]==1:
        return -1
    elif Y[1]==1:
        return 0
    elif Y[2]==1:
        return 1 
       
X_train_2=np.load(data_path+"/trainX_vec_2.npy")
X_dev_2=np.load(data_path+"/devX_vec_2.npy")
X_test_2=np.load(data_path+"/testX_vec_2.npy")
Y_train_2=np.load(data_path+"/trainY_vec_2.npy")
Y_dev_2=np.load(data_path+"/devY_vec_2.npy")
Y_test_2=np.load(data_path+"/testY_vec_2.npy")

'''average data to build the other numpies for machine learning '''
X_train=X_train_2.mean(axis=1)
X_dev=X_dev_2.mean(axis=1)
X_test=X_test_2.mean(axis=1)


Y_train=np.zeros(Y_train_2.shape[0])
for i in range(Y_train_2.shape[0]):
    Y_train[i]=Label_Judge(Y_train_2[i]) 
    
Y_dev=np.zeros(Y_dev_2.shape[0])
for i in range(Y_dev_2.shape[0]):
    Y_dev[i]=Label_Judge(Y_dev_2[i])
    
Y_test=np.zeros(Y_test_2.shape[0])
for i in range(Y_test_2.shape[0]):
    Y_test[i]=Label_Judge(Y_test_2[i])

print(X_train_2.shape,Y_train_2.shape)
print(X_dev_2.shape, Y_dev_2.shape)
print(X_test_2.shape, Y_test_2.shape)
print(X_train.shape, Y_train.shape)
print(X_dev.shape, Y_dev.shape)
print(X_test.shape, Y_test.shape)

(4113, 128, 768) (4113, 3)
(1013, 128, 768) (1013, 3)
(9074, 128, 768) (9074, 3)
(4113, 768) (4113,)
(1013, 768) (1013,)
(9074, 768) (9074,)


# SVM

In [4]:
'''default setting'''
def train_svm(X_train, Y_train):
    from sklearn.svm import SVC
    model = SVC(kernel='linear',probability=True)
    model.fit(X_train, Y_train)
    joblib.dump(model, os.path.join(model_path,'sentiment_svm_model.m'))

def evaluate_svm(model_filepath, X_test, Y_test):
    model = joblib.load(model_filepath)
    Y_predict = list()
    Y_test = list(Y_test)
    right = 0
    for sent in X_test:
        Y_predict.append(model.predict(sent.reshape(1, -1))[0])
    for index in range(len(Y_predict)):
        if int(Y_predict[index]) == int(Y_test[index]):
            right += 1
    score = right / len(Y_predict)
    print('model accuray is :{0}'.format(score)) #0.8302767589196399  model accuray is :0.77675891963988
    return score


def predict_svm(model_filepath):
    model = joblib.load(model_filepath)
    sentence1 = '在经营中努力为客户提供快捷优质的信息、仓储、物流、类金融等服务，利用自身资源积极拓展新的客户，同时维护与上游客户良好的关系，总体保持持续稳定的发展。'
    sentence2 = '(3)  应收账款期末较期初减少 59,289,691.24 元，减少 35.01%，主要系本公司之子公司西藏泰达厚生医药有限公司本期销售收入下降以及整体出售原子公司四川禾正制药有限责任公司导致应收账款减少。'
    rep_sen1 = np.array(bert_rep_sentencevector(sentence1)).reshape(1,128,768).mean(axis=1)
    rep_sen2 = np.array(bert_rep_sentencevector(sentence2)).reshape(1,128,768).mean(axis=1)
    print('sentence1', model.predict_proba(rep_sen1)) #sentence1 [1]
    print('sentence2', model.predict_proba(rep_sen2)) #sentence2 [0]

In [5]:
train_svm(X_train, Y_train)
model_filepath_svm = os.path.join(model_path,'sentiment_svm_model.m')
print(X_train.shape, Y_train.shape)
print(X_dev.shape, Y_dev.shape)
print(X_test.shape, Y_test.shape)
evaluate_svm(model_filepath_svm, X_dev, Y_dev)
evaluate_svm(model_filepath_svm, X_test, Y_test)
predict_svm(model_filepath_svm)

(4113, 768) (4113,)
(1013, 768) (1013,)
(9074, 768) (9074,)
model accuray is :0.8262586377097729
model accuray is :0.7956799647344059
sentence1 [[1.78321454e-04 1.53821599e-02 9.84439519e-01]]
sentence2 [[9.97290486e-01 2.22656378e-03 4.82950567e-04]]


# Bayes

In [6]:

def train_bayes(X_train, Y_train):
    from sklearn.naive_bayes import GaussianNB
    model = GaussianNB()
    model.fit(X_train, Y_train)
    joblib.dump(model, os.path.join(model_path,'sentiment_bayes_model.m'))


def evaluate_bayes(model_filepath, X_test, Y_test):
    model = joblib.load(model_filepath)
    Y_predict = list()
    Y_test = list(Y_test)
    right = 0
    for sent in X_test:
        Y_predict.append(model.predict(sent.reshape(1, -1))[0])
    for index in range(len(Y_predict)):
        if int(Y_predict[index]) == int(Y_test[index]):
            right += 1
    score = right / len(Y_predict)
    print('model accuray is :{0}'.format(score))
    return score


def predict_bayes(model_filepath):
    model = joblib.load(model_filepath)
    sentence1 = '在经营中努力为客户提供快捷优质的信息、仓储、物流、类金融等服务，利用自身资源积极拓展新的客户，同时维护与上游客户良好的关系，总体保持持续稳定的发展。'
    sentence2 = '(3)  应收账款期末较期初减少 59,289,691.24 元，减少 35.01%，主要系本公司之子公司西藏泰达厚生医药有限公司本期销售收入下降以及整体出售原子公司四川禾正制药有限责任公司导致应收账款减少。'
    rep_sen1 = np.array(bert_rep_sentencevector(sentence1)).reshape(1,128,768).mean(axis=1)
    rep_sen2 = np.array(bert_rep_sentencevector(sentence2)).reshape(1,128,768).mean(axis=1)
    print('sentence1', model.predict_proba(rep_sen1))
    print('sentence2', model.predict_proba(rep_sen2))


In [7]:
model_filepath_bayes = os.path.join(model_path,'sentiment_bayes_model.m')
print(X_train.shape, Y_train.shape)
print(X_dev.shape, Y_dev.shape)
print(X_test.shape, Y_test.shape)
train_bayes(X_train, Y_train)
evaluate_bayes(model_filepath_bayes, X_dev, Y_dev)
evaluate_bayes(model_filepath_bayes, X_test, Y_test)
predict_bayes(model_filepath_bayes)

(29434, 768) (29434,)
model accuray is :0.666270299653462


0.666270299653462

# KNN

In [8]:

def train_knn(X_train, Y_train, X_test, Y_test):
    from sklearn.neighbors import KNeighborsClassifier
    
    for x in range(1,101,5):
        model = KNeighborsClassifier(n_neighbors=x)
        model.fit(X_train, Y_train)
        
        preds = model.predict(X_test)
        num = 0
        num = 0
        preds = preds.tolist()
        for i, pred in enumerate(preds):
            if int(pred) == int(Y_test[i]):
                num += 1
        print('K= ' + str(x) + ', precision_score:' + str(float(num) / len(preds)))
 
    '''choose k=14 to train and build model'''
    model = KNeighborsClassifier(n_neighbors=14)
    model.fit(X_train, Y_train)
    joblib.dump(model, os.path.join(model_path,'sentiment_knn_model.m'))

    
def evaluate_knn(model_filepath, X_test, Y_test):
    model = joblib.load(model_filepath)
    Y_predict = list()
    Y_test = list(Y_test)
    right = 0
    for sent in X_test:
        Y_predict.append(model.predict(sent.reshape(1, -1)))
    for index in range(len(Y_predict)):
        if Y_predict[index] == Y_test[index]:
            right += 1
    score = right / len(Y_predict)
    print('model accuray is :{0}'.format(score))#0.7909303101033678
    return score


def predict_knn(model_filepath):
    model = joblib.load(model_filepath)
    sentence1 = '在经营中努力为客户提供快捷优质的信息、仓储、物流、类金融等服务，利用自身资源积极拓展新的客户，同时维护与上游客户良好的关系，总体保持持续稳定的发展。'
    sentence2 = sentence2 = '(3)  应收账款期末较期初减少 59,289,691.24 元，减少 35.01%，主要系本公司之子公司西藏泰达厚生医药有限公司本期销售收入下降以及整体出售原子公司四川禾正制药有限责任公司导致应收账款减少。'
    rep_sen1 = np.array(bert_rep_sentencevector(sentence1)).reshape(1,128,768).mean(axis=1)
    rep_sen2 = np.array(bert_rep_sentencevector(sentence2)).reshape(1,128,768).mean(axis=1)
    print('sentence1', model.predict_proba(rep_sen1))
    print('sentence2', model.predict_proba(rep_sen2))

In [9]:
model_filepath_knn = os.path.join(model_path,'sentiment_knn_model.m')
print(X_train.shape, Y_train.shape)
print(X_dev.shape, Y_dev.shape)
print(X_test.shape, Y_test.shape)
train_knn(X_train, Y_train, X_dev, Y_dev)
evaluate_knn(model_filepath_knn, X_dev, Y_dev)
evaluate_knn(model_filepath_knn, X_test, Y_test)
predict_knn(model_filepath_knn)

(29434, 768) (29434,)
model accuray is :0.6992593599238975
sentence1 [[0.05882353 0.05882353 0.88235294]]
sentence2 [[0.58823529 0.17647059 0.23529412]]


# Decsion Tree

In [11]:
def train_decisiontree(X_train, Y_train):
    from sklearn import tree
    model = tree.DecisionTreeClassifier()
    model.fit(X_train, Y_train)
    joblib.dump(model, os.path.join(model_path,'sentiment_decisiontree_model.m'))

def evaluate_decisiontree(model_filepath, X_test, Y_test):
    model = joblib.load(model_filepath)
    Y_predict = list()
    Y_test = list(Y_test)
    right = 0
    for sent in X_test:
        Y_predict.append(model.predict(sent.reshape(1, -1))[0])
    for index in range(len(Y_predict)):
        if int(Y_predict[index]) == int(Y_test[index]):
            right += 1
    score = right / len(Y_predict)
    print('model accuray is :{0}'.format(score)) #0.6907302434144715
    return score

def predict_decisiontree(model_filepath):
    model = joblib.load(model_filepath)
    sentence1 = '在经营中努力为客户提供快捷优质的信息、仓储、物流、类金融等服务，利用自身资源积极拓展新的客户，同时维护与上游客户良好的关系，总体保持持续稳定的发展。'
    sentence2 = '(3)  应收账款期末较期初减少 59,289,691.24 元，减少 35.01%，主要系本公司之子公司西藏泰达厚生医药有限公司本期销售收入下降以及整体出售原子公司四川禾正制药有限责任公司导致应收账款减少。'
    rep_sen1 = np.array(bert_rep_sentencevector(sentence1)).reshape(1,128,768).mean(axis=1)
    rep_sen2 = np.array(bert_rep_sentencevector(sentence2)).reshape(1,128,768).mean(axis=1)
    print('sentence1', model.predict_proba(rep_sen1)) #sentence1 [0]
    print('sentence2', model.predict_proba(rep_sen2)) #sentence2 [0]

In [12]:
model_filepath_tree = os.path.join(model_path,'sentiment_decisiontree_model.m')
train_decisiontree(X_train, Y_train)
print(X_train.shape, Y_train.shape)
print(X_dev.shape, Y_dev.shape)
print(X_test.shape, Y_test.shape)
evaluate_decisiontree(model_filepath_tree, X_dev, Y_dev)
evaluate_decisiontree(model_filepath_tree, X_test, Y_test)
predict_decisiontree(model_filepath_tree)

(29434, 768) (29434,)
model accuray is :0.6105524223686892
sentence1 [[0. 0. 1.]]
sentence2 [[1. 0. 0.]]


# CNN

In [6]:
'''six layers CNN'''
def train_cnn(X_train, Y_train, X_test, Y_test):
    from keras.models import Sequential
    from keras.layers import Dense, Dropout
    from keras.layers import Embedding
    from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
    
    '''build sequential model'''
    model = Sequential()
    model.add(Conv1D(128, 3, activation='relu', input_shape=(seq_len, hidden_size)))
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(MaxPooling1D(3))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(3))
    
    model.add(Conv1D(32, 3, activation='relu'))
    model.add(Conv1D(32, 3, activation='relu'))
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    model.summary()
    history=model.fit(X_train, Y_train, batch_size=100, epochs=5,shuffle=True,validation_data=(X_test, Y_test))
    model.save(os.path.join(model_path,'sentiment_cnn_model.h5'))
   
    return history

def evaluate_cnn(X_test,Y_test,model_filepath):
    model=load_model(model_filepath)
    loss,accuracy = model.evaluate(X_test,Y_test)
    print('model accuracy is :{0}'.format(accuracy))
    
def predict_cnn(model_filepath): 
    model = load_model(model_filepath)
    sentence = '在经营中努力为客户提供快捷优质的信息、仓储、物流、类金融等服务，利用自身资源积极拓展新的客户，同时维护与上游客户良好的关系，总体保持持续稳定的发展。'  
    sentence_vector = np.squeeze(np.array([bert_rep_sentencevector(sentence)]),axis=1) 
    print('test after load: ', model.predict(sentence_vector))

In [7]:
model_filepath = os.path.join(model_path,'sentiment_cnn_model.h5')
print(X_train_2.shape, Y_train_2.shape)
print(X_dev_2.shape, Y_dev_2.shape)
print(X_test_2.shape, Y_test_2.shape)
train_cnn(X_train_2, Y_train_2, X_dev_2, Y_dev_2)
evaluate_cnn(X_test_2,Y_test_2,model_filepath)
predict_cnn(model_filepath)

(4113, 128, 768) (4113, 3)
(1013, 128, 768) (1013, 3)
(9074, 128, 768) (9074, 3)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 126, 128)          295040    
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 124, 128)          49280     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 41, 128)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 39, 64)            24640     
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 37, 64)            12352     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 12, 64)            0         
_____________________________________________________________

# LSTM

In [23]:
'''4 layers of LSTM'''
def train_lstm(X_train, Y_train, X_test, Y_test):
    from keras.models import Sequential
    from keras.layers import LSTM, Dense,Bidirectional
    import numpy as np
    data_dim = hidden_size   
    timesteps = seq_len  
    
    model = Sequential()
    model.add(LSTM(64, return_sequences=True,
                   input_shape=(timesteps, data_dim)))  # returns a sequence of vectors of dimension 64
    model.add(LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
    model.add(LSTM(32, return_sequences=True))
    model.add(LSTM(32))# return a single vector of dimension 32
    model.add(Dense(3, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    
    history=model.fit(X_train, Y_train, batch_size=100, epochs=7,shuffle=True, validation_data=(X_test, Y_test))
    model.summary()
    model.save(os.path.join(model_path,'sentiment_lstm_model.h5'))

    return history
def evaluate_lstm(X_test,Y_test,model_filepath):
    model=load_model(model_filepath)
    loss,accuracy = model.evaluate(X_test,Y_test)
    print('model accuracy is :{0}'.format(accuracy))
    

def predict_lstm(model_filepath):
    model = load_model(model_filepath)
    sentence = '在经营中努力为客户提供快捷优质的信息、仓储、物流、类金融等服务，利用自身资源积极拓展新的客户，同时维护与上游客户良好的关系，总体保持持续稳定的发展。"   
    sentence_vector = np.squeeze(np.array([bert_rep_sentencevector(sentence)]),axis=1)
    print('test after load: ', model.predict(sentence_vector))


In [24]:
model_filepath_lstm = os.path.join(model_path,'sentiment_lstm_model.h5')
print(X_train_2.shape, Y_train_2.shape)
print(X_dev_2.shape, Y_dev_2.shape)
print(X_test_2.shape, Y_test_2.shape)
history_lstm=train_lstm(X_train_2, Y_train_2, X_dev_2, Y_dev_2)
evaluate_lstm(X_test_2,Y_test_2,model_filepath_lstm)
predict_lstm(model_filepath_lstm)

(10809, 128, 768) (10809, 3)
(2687, 128, 768) (2687, 3)
Train on 10809 samples, validate on 2687 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 128, 64)           213248    
_________________________________________________________________
lstm_10 (LSTM)               (None, 128, 32)           12416     
_________________________________________________________________
lstm_11 (LSTM)               (None, 128, 32)           8320      
_________________________________________________________________
lstm_12 (LSTM)               (None, 32)                8320      
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 99        
Total params: 242,403
Trainable params: 242,403
Non-trainable params: 0
___________________________