In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')

path = "/content/drive/My Drive/关系抽取/"

os.chdir(path)
os.listdir(path)

Mounted at /content/drive


['sent_relation_dev.txt',
 'sent_dev.txt',
 'sent_test.txt',
 'sent_relation_test.txt',
 'bag_relation_train.txt',
 'sent_relation_train.txt',
 'bag_relation_test.txt',
 'relation2id.txt',
 'text.txt',
 'sent_train.txt',
 'bag_relation_dev.txt',
 'readme',
 'hit_stopwords.txt',
 'w2v',
 'text1.txt']

In [None]:
import pandas as pd

In [None]:
import jieba

In [None]:
from collections import Counter
from gensim.models import Word2Vec
import numpy as np
from sklearn.model_selection import StratifiedKFold
import keras.backend as K
import tensorflow as tf
import pandas as pd
import gc

In [None]:
def load_data(path):
    data = pd.read_csv(path,delimiter='\t',header=None)
    data.columns = ['sent_id','e1','e2','text']
    for col in ['e1','e2','text']:
        data[col] = data[col].map(lambda x:str(x).lower())
    return data

In [None]:
sent_train=load_data('sent_train.txt')
sent_test=load_data('sent_test.txt')
sent_dev=load_data('sent_dev.txt')


In [None]:
def load_relation(path, bag=False):
    label = pd.read_csv(path,delimiter='\t',header=None)
    if bag:
        label.columns = ['bag_id','e1','e2','sent_ids','label']
    else:
        label.columns = ['sent_id','label']
    return label

In [None]:
train_label=load_relation('sent_relation_train.txt', bag=False)
#sent_relation_test=load_relation('sent_relation_test.txt', bag=False)
dev_label=load_relation('sent_relation_dev.txt', bag=False)

In [None]:
sent_train = sent_train.merge(train_label, on='sent_id', how='left')
sent_dev = sent_dev.merge(dev_label, on='sent_id', how='left')

In [None]:
sent_train['label'] = sent_train['label'].map(lambda x: int(str(x).split(' ')[0]))
sent_dev['label'] = sent_dev['label'].map(lambda x: int(str(x).split(' ')[0]))
# 根据空格分词
sent_train['text_seg'] = sent_train['text'].map(lambda x: str(x).lower().split(' '))
sent_dev['text_seg'] = sent_dev['text'].map(lambda x: str(x).lower().split(' '))
sent_test['text_seg'] = sent_test['text'].map(lambda x: str(x).split(' '))

In [None]:
sent_train.text_seg

0         [韩国, 梦想, 演唱会, 第十届, 2004, 年, :, mc, :, 金泰熙, ，, ...
1         [林散之, 先生, 等, 当代, 名家, 对, 辛文山, 先生, 的, 书法, 均, 有, ...
2         [吉莱斯, ·, 勒华, 人物, 生平, :, 吉莱斯, ·, 勒华, 1958, 年, 1...
3         [（, 原, 重庆, 警备区, 顾问, ）, :, 张铭, （, 1920, —, ), ，...
4         [中国, 工艺美术, 大师, 张育贤, 先生, ，, 景德镇市, 美术家, 协会主席, 、,...
                                ...                        
281236    [李彪, 敏感, 地, 看出, 了, 周武, 、, 李英姿, 、, 周双, 和, 刘大壮, ...
281237    [高则, 让, 夏侯杰, 信, 他, 赵子龙, 确是, 细作, ，, 但, 夏侯杰, 称失,...
281238    [回到, 真, 定县, 的, 子龙到, 李全, 的, 坟, 前, 进行, 祭拜, ，, 却,...
281239    [可是, ，, 他, 却, 没有, 想到, ，, 暗中, 观察, 的, 高则, ，, 竟然,...
281240    [他, 坦然, 承认, 夏侯杰, 正是, 死, 在, 自己, 手里, 的, ，, 并, 问轻...
Name: text_seg, Length: 281241, dtype: object

## Build vocab

In [None]:
def load_stop_words(stop_word_path):
    '''
    加载停用词
    :param stop_word_path:停用词路径
    :return: 停用词表 list
    '''
    file = open(stop_word_path, 'r', encoding='utf-8')
    stop_words = file.readlines()
    stop_words = [stop_word.strip() for stop_word in stop_words]
    return stop_words

stop_words=load_stop_words('hit_stopwords.txt')
print('stop words size {}'.format(len(stop_words)))

stop words size 767


In [None]:
def filter_stopwords(words):
    '''
    过滤停用词
    :param seg_list: 切好词的列表 [word1 ,word2 .......]
    :return: 过滤后的停用词
    '''
    return [word for word in words if word not in stop_words]

In [None]:
def remove_stopwords(sent):
  for i in range(len(sent.text_seg)):
    sent.text_seg[i]=filter_stopwords(sent.text_seg[i])
  return sent

In [None]:
from collections import Counter
all_text_list = []
for v in sent_train['text_seg'].values:
    all_text_list += v
text_dict = Counter(all_text_list)
new_text_dict = {key: text_dict[key] for key in text_dict.keys() if text_dict[key] >= 5}

In [None]:
def get_entity_idx(data):
    data['e1_idx'] = data[['e1','text_seg']].apply(lambda x:x['text_seg'].index(x['e1']),axis=1)
    data['e2_idx'] = data[['e2' ,'text_seg']].apply(lambda x:x['text_seg'].index(x['e2']),axis=1)
    return data

In [None]:
# 获取实体在序列中的位置，目前只标记位于第一个的位置，多次出现的暂无处理方法
sent_train = get_entity_idx(sent_train)
sent_dev = get_entity_idx(sent_dev)
sent_test = get_entity_idx(sent_test)

In [None]:
# 获取句子中其他词与实体之间的距离，实际中可能用位置向量较多，但实际意义相同
def get_pos_distance(data):
    data['e1_distance'] = data[['e1_idx','text_seg']].apply(lambda x:[i-x['e1_idx'] for i in range(len(x['text_seg']))],axis=1)
    data['e2_distance'] = data[['e2_idx','text_seg']].apply(lambda x:[i-x['e2_idx'] for i in range(len(x['text_seg']))],axis=1)
    return data

In [None]:
# 获取句子中其他词与实体之间的距离，实际中可能用位置向量较多，但实际意义相同
sent_train = get_pos_distance(sent_train)
sent_dev = get_pos_distance(sent_dev)
sent_test = get_pos_distance(sent_test)

In [None]:
!ls

bag_relation_dev.txt	relation2id.txt		 sent_test.txt
bag_relation_test.txt	sent_dev.txt		 sent_train.txt
bag_relation_train.txt	sent_relation_dev.txt	 text1.txt
hit_stopwords.txt	sent_relation_test.txt	 text.txt
readme			sent_relation_train.txt  w2v


In [None]:
import os
def get_w2v():
    #os.mkdir('./w2v/')
    sent_train = pd.read_csv('sent_train.txt', delimiter='\t', header=None)
    sent_dev = pd.read_csv('sent_dev.txt', delimiter='\t', header=None)
    sent_test = pd.read_csv('sent_test.txt', delimiter='\t', header=None)
    '''加载与训练预测相关的数据'''
    sent_train.columns = ['sent_id', 'e1', 'e2', 'text']
    sent_dev.columns = ['sent_id', 'e1', 'e2', 'text']
    sent_test.columns = ['sent_id', 'e1', 'e2', 'text']
    '''加载语料，用来训练词向量'''
    text = []
    with open('text.txt', 'r', encoding='utf8') as f:
        for line in f.readlines():
            text.append(line.strip('\n'))
    text=text[:1000000]
    all_text = pd.concat([sent_train['text'], sent_dev['text'], sent_test['text']])
    all_text = [str(v).lower() for v in all_text]
    text_seg = [v.split(' ') for v in all_text]
    all_word = []
    for v in text_seg:
        all_word += v
    word_cnt = Counter(all_word)
    for word in word_cnt.keys():
        jieba.add_word(word)
    text_seg_corpus = [jieba.lcut(v) for v in text]
    text_seg_all = text_seg + text_seg_corpus
    w2v = Word2Vec(size=128)
    w2v.build_vocab(text_seg_all)
    w2v.train(text_seg_all, total_examples=w2v.corpus_count, epochs=5)
    w2v.save('./w2v/w2v_model.w2v')

In [None]:
get_w2v()

Building prefix dict from the default dictionary ...
Dumping model to file cache /tmp/jieba.cache
Loading model cost 0.896 seconds.
Prefix dict has been built successfully.


In [None]:
def get_word_index(text, index_word):
    idx_res = []
    for x in text:
        tmp = []
        for v in x:
            try:
                tmp.append(index_word[v])
            except KeyError:
                tmp.append(1)
        idx_res.append(tmp)
    return idx_res

In [None]:
w2v_model = Word2Vec.load('./w2v/w2v_model.w2v')

In [None]:
# 判断哪些词在词向量模型中，因为要确定UNK 和 PAD
word_in_w2v = []
for key in new_text_dict.keys():
    if key in w2v_model:
        word_in_w2v.append(key)
    # 建立索引到词的映射
word_index = dict()
word_index[0] = 'PAD'
word_index[1] = 'UNK'
for i, word in enumerate(word_in_w2v):
    word_index[i + 2] = word
index_word = {word_index[key]: key for key in word_index.keys()}
    # 建立词向量矩阵
word_matrix = np.zeros((len(word_index), 128))
for key in word_index:
    if word_index[key] not in ['UNK', 'PAD']:
        word_matrix[key] = w2v_model[word_index[key]]
    # 将字转换成索引
sent_train['word_index'] = get_word_index(sent_train['text_seg'], index_word)
sent_dev['word_index'] = get_word_index(sent_dev['text_seg'], index_word)
sent_test['word_index'] = get_word_index(sent_test['text_seg'], index_word)

  after removing the cwd from sys.path.


In [None]:
def change_entity_idx(x):
    for i,v in enumerate(x['text_seg']):
        if v == x['e1']:
            x['word_index'][i] = 1
        if v == x['e2']:
            x['word_index'][i] = 1
    return x['word_index']

In [None]:
def modify_pos_idx(x):
    tmp = []
    for v in x:
        if v < 0:
            tmp.append(1)
        elif v > 99:
            tmp.append(99)
        else:
            tmp.append(v)
    return tmp

In [None]:
# 将字转换成索引
sent_train['word_index'] = get_word_index(sent_train['text_seg'], index_word)
sent_dev['word_index'] = get_word_index(sent_dev['text_seg'], index_word)
sent_test['word_index'] = get_word_index(sent_test['text_seg'], index_word)
# 为了避免模型学到错误的信息，将所有的实体都替换成1
sent_train['word_index'] = sent_train[['e1','e2','text_seg','word_index']].apply(change_entity_idx, axis=1)
sent_dev['word_index'] = sent_dev[['e1','e2','text_seg','word_index']].apply(change_entity_idx, axis=1)
sent_test['word_index'] = sent_test[['e1','e2','text_seg','word_index']].apply(change_entity_idx, axis=1)
# 将位置向量中大于预定长度的转换到合适的长度
sent_train['e1_distance'] = sent_train['e1_distance'].map(modify_pos_idx)
sent_train['e2_distance'] = sent_train['e2_distance'].map(modify_pos_idx)
sent_dev['e1_distance'] = sent_dev['e1_distance'].map(modify_pos_idx)
sent_dev['e2_distance'] = sent_dev['e2_distance'].map(modify_pos_idx)
sent_test['e1_distance'] = sent_test['e1_distance'].map(modify_pos_idx)
sent_test['e2_distance'] = sent_test['e2_distance'].map(modify_pos_idx)

In [None]:

#长度不足50 用0补全
def get_sent_padding(data):
    data['word_index'] = data['word_index'].map(lambda x:x[:50])
    data['word_index'] = data['word_index'].map(lambda x:x + [0]*(50-len(x)))
    data['e1_distance'] = data['e1_distance'].map(lambda x:x[:50])
    data['e1_distance'] = data['e1_distance'].map(lambda x:x + [0]*(50-len(x)))
    data['e2_distance'] = data['e2_distance'].map(lambda x:x[:50])
    data['e2_distance'] = data['e2_distance'].map(lambda x:x + [0]*(50-len(x)))
    return data

In [None]:
sent_train.head(1)

Unnamed: 0,sent_id,e1,e2,text,label,text_seg,e1_idx,e2_idx,e1_distance,e2_distance,word_index
0,TRAIN_SENT_ID_000001,金泰熙,金东,韩国 梦想 演唱会 第十届 2004 年 : mc : 金泰熙 ， 金东 万,0,"[韩国, 梦想, 演唱会, 第十届, 2004, 年, :, mc, :, 金泰熙, ，, ...",9,11,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 3]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1]","[2, 3, 4, 5, 6, 7, 8, 9, 8, 1, 11, 1, 12]"


In [None]:
sent_train.head(1)

Unnamed: 0,sent_id,e1,e2,text,label,text_seg,e1_idx,e2_idx,e1_distance,e2_distance,word_index
0,TRAIN_SENT_ID_000001,金泰熙,金东,韩国 梦想 演唱会 第十届 2004 年 : mc : 金泰熙 ， 金东 万,0,"[韩国, 梦想, 演唱会, 第十届, 2004, 年, :, mc, :, 金泰熙, ，, ...",9,11,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 3]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1]","[2, 3, 4, 5, 6, 7, 8, 9, 8, 1, 11, 1, 12]"


In [None]:
# 对于相同实体对的句子，进行集包处理

def get_group_data(data):
    idx_grp = data.groupby(['e1','e2']).apply(lambda x:x['word_index'].values).reset_index()
    idx_grp.columns = ['e1','e2','word_idx']
    pos1_grp = data.groupby(['e1','e2']).apply(lambda x:x['e1_distance'].values).reset_index()
    pos1_grp.columns = ['e1','e2','e1_distance']
    pos2_grp = data.groupby(['e1','e2']).apply(lambda x:x['e2_distance'].values).reset_index()
    pos2_grp.columns = ['e1','e2','e2_distance']
    idx_grp  = idx_grp.merge(pos1_grp,on=['e1','e2'],how='left')
    idx_grp  = idx_grp.merge(pos2_grp,on=['e1','e2'],how='left')
    return idx_grp

In [None]:
def get_group_padding(data):
    data['word_idx'] = data['word_idx'].map(lambda x:x.tolist()+ [[0]*50]*(50-len(x)))
    data['e1_distance'] = data['e1_distance'].map(lambda x:x.tolist()+ [[0]*50]*(50-len(x)))
    data['e2_distance'] = data['e2_distance'].map(lambda x:x.tolist()+ [[0]*50]*(50-len(x)))
    data['word_idx'] = data['word_idx'].map(lambda x:x[:50])
    data['e1_distance'] = data['e1_distance'].map(lambda x:x[:50])
    data['e2_distance'] = data['e2_distance'].map(lambda x:x[:50])
    data['word_idx'] = data['word_idx'].map(lambda x:np.array(x))
    data['e1_distance'] = data['e1_distance'].map(lambda x:np.array(x))
    data['e2_distance'] = data['e2_distance'].map(lambda x:np.array(x))
    return data

In [None]:
# 对于长度不足50的句子用0补充到50
sent_train = get_sent_padding(sent_train)
sent_dev = get_sent_padding(sent_dev)
sent_test = get_sent_padding(sent_test)

'''
# 对于相同实体对的句子，进行集包处理
train_idx_grp = get_group_data(sent_train)
dev_idx_grp = get_group_data(sent_dev)
test_idx_grp = get_group_data(sent_test)
# 对于包中长度没有达到要求的，补上句子
train_idx_grp = get_group_padding(train_idx_grp)
dev_idx_grp = get_group_padding(dev_idx_grp)
test_idx_grp = get_group_padding(test_idx_grp)'''

'\n# 对于相同实体对的句子，进行集包处理\ntrain_idx_grp = get_group_data(sent_train)\ndev_idx_grp = get_group_data(sent_dev)\ntest_idx_grp = get_group_data(sent_test)\n# 对于包中长度没有达到要求的，补上句子\ntrain_idx_grp = get_group_padding(train_idx_grp)\ndev_idx_grp = get_group_padding(dev_idx_grp)\ntest_idx_grp = get_group_padding(test_idx_grp)'

In [None]:
'''
# 为集包之后的训练集补上标签
train_label_grp = sent_train.groupby(['e1', 'e2']).apply(lambda x: list(set(x['label'].values))[0]).reset_index()
train_label_grp.columns = ['e1', 'e2', 'label']
train_idx_grp = train_idx_grp.merge(train_label_grp, on=['e1', 'e2'], how='left')
# 为集包之后的验证集补上标签
dev_label_grp = sent_dev.groupby(['e1', 'e2']).apply(lambda x: list(set(x['label'].values))[0]).reset_index()
dev_label_grp.columns = ['e1', 'e2', 'label']
dev_idx_grp = dev_idx_grp.merge(dev_label_grp, on=['e1', 'e2'], how='left')'''

"\n# 为集包之后的训练集补上标签\ntrain_label_grp = sent_train.groupby(['e1', 'e2']).apply(lambda x: list(set(x['label'].values))[0]).reset_index()\ntrain_label_grp.columns = ['e1', 'e2', 'label']\ntrain_idx_grp = train_idx_grp.merge(train_label_grp, on=['e1', 'e2'], how='left')\n# 为集包之后的验证集补上标签\ndev_label_grp = sent_dev.groupby(['e1', 'e2']).apply(lambda x: list(set(x['label'].values))[0]).reset_index()\ndev_label_grp.columns = ['e1', 'e2', 'label']\ndev_idx_grp = dev_idx_grp.merge(dev_label_grp, on=['e1', 'e2'], how='left')"

In [None]:
#0 的标签过多
'''train_idx_grp_part=train_idx_grp.iloc[3300:]'''

'train_idx_grp_part=train_idx_grp.iloc[3300:]'

In [None]:
sent_train.head(2)

Unnamed: 0,sent_id,e1,e2,text,label,text_seg,e1_idx,e2_idx,e1_distance,e2_distance,word_index
0,TRAIN_SENT_ID_000001,金泰熙,金东,韩国 梦想 演唱会 第十届 2004 年 : mc : 金泰熙 ， 金东 万,0,"[韩国, 梦想, 演唱会, 第十届, 2004, 年, :, mc, :, 金泰熙, ，, ...",9,11,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 3, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, ...","[2, 3, 4, 5, 6, 7, 8, 9, 8, 1, 11, 1, 12, 0, 0..."
1,TRAIN_SENT_ID_000002,辛文山,林散之,林散之 先生 等 当代 名家 对 辛文山 先生 的 书法 均 有 精辟 的 点评 ， 对 书...,0,"[林散之, 先生, 等, 当代, 名家, 对, 辛文山, 先生, 的, 书法, 均, 有, ...",6,0,"[1, 1, 1, 1, 1, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[1, 14, 15, 16, 17, 18, 1, 14, 19, 20, 21, 22,..."


In [None]:
part_train = np.array([v for v in sent_train['word_index'].values])
part_train_pos1 = np.array([v for v in sent_train['e1_distance'].values])
part_train_pos2 = np.array([v for v in sent_train['e2_distance'].values])
part_label = sent_train['label'].values

part_dev = np.array([v for v in sent_dev['word_index'].values])
part_dev_pos1 = np.array([v for v in sent_dev['e1_distance'].values])
part_dev_pos2 = np.array([v for v in sent_dev['e2_distance'].values])
part_dev_label = sent_dev['label'].values

part_test = np.array([v for v in sent_test['word_index'].values])
part_test_pos1 = np.array([v for v in sent_test['e1_distance'].values])
part_test_pos2 = np.array([v for v in sent_test['e2_distance'].values])


In [None]:
part_train_pos1.shape

(281241, 50)

## Model

In [None]:
# LSTM的输入是实体1的位置信息+实体2的微信信息+嵌入信息。LSTM的output保存了最后一层的输出h。

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.manual_seed(1)  

class BiLSTM_ATT(nn.Module):
    def __init__(self,config,embedding_pre):
        super(BiLSTM_ATT,self).__init__()
        self.batch = config['BATCH']
        
        self.embedding_size = config['EMBEDDING_SIZE']
        self.embedding_dim = config['EMBEDDING_DIM']
        
        self.hidden_dim = config['HIDDEN_DIM']
        self.tag_size = config['TAG_SIZE']
        
        self.pos_size = config['POS_SIZE']
        self.pos_dim = config['POS_DIM']
        
        self.pretrained = config['pretrained']
        if self.pretrained:
            #self.word_embeds.weight.data.copy_(torch.from_numpy(embedding_pre))
            self.word_embeds = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_pre),freeze=False)
        else:
            self.word_embeds = nn.Embedding(self.embedding_size,self.embedding_dim)
        
        self.pos1_embeds = nn.Embedding(self.pos_size,self.pos_dim)
        self.pos2_embeds = nn.Embedding(self.pos_size,self.pos_dim)
        self.relation_embeds = nn.Embedding(self.tag_size,self.hidden_dim)
        
        self.lstm = nn.LSTM(input_size=self.embedding_dim+self.pos_dim*2,hidden_size=self.hidden_dim//2,num_layers=1, bidirectional=True)
        self.hidden2tag = nn.Linear(self.hidden_dim,self.tag_size)
        
        self.dropout_emb=nn.Dropout(p=0.5)
        self.dropout_lstm=nn.Dropout(p=0.5)
        self.dropout_att=nn.Dropout(p=0.5)
        
        self.hidden = self.init_hidden()
        
        self.att_weight = nn.Parameter(torch.randn(self.batch,1,self.hidden_dim))
        self.relation_bias = nn.Parameter(torch.randn(self.batch,self.tag_size,1))
        
    def init_hidden(self):
        return torch.randn(2, self.batch, self.hidden_dim // 2)
        
    def init_hidden_lstm(self):
        return (torch.randn(2, self.batch, self.hidden_dim // 2),
                torch.randn(2, self.batch, self.hidden_dim // 2))
                
    def attention(self,H):
        M = F.tanh(H)
        a = F.softmax(torch.bmm(self.att_weight,M),2)
        a = torch.transpose(a,1,2)
        return torch.bmm(H,a)
        
    
                
    def forward(self,sentence,pos1,pos2):

        self.hidden = self.init_hidden_lstm()

        embeds = torch.cat((self.word_embeds(sentence),self.pos1_embeds(pos1),self.pos2_embeds(pos2)),2)
        
        embeds = torch.transpose(embeds,0,1)

        lstm_out, self.hidden = self.lstm(embeds, self.hidden)
        
        lstm_out = torch.transpose(lstm_out,0,1)
        lstm_out = torch.transpose(lstm_out,1,2)
        
        lstm_out = self.dropout_lstm(lstm_out)
        att_out = F.tanh(self.attention(lstm_out))
        #att_out = self.dropout_att(att_out)
        
        relation = torch.tensor([i for i in range(self.tag_size)],dtype = torch.long).repeat(self.batch, 1)

        relation = self.relation_embeds(relation)
        
        res = torch.add(torch.bmm(relation,att_out),self.relation_bias)
        
        res = F.softmax(res,1)

        
        return res.view(self.batch,-1)

In [None]:
word_matrix.shape[0]

88851

In [None]:
import numpy as np
import sys
import codecs
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as D
from torch.autograd import Variable        

EMBEDDING_SIZE = word_matrix.shape[0]
EMBEDDING_DIM = 128

POS_SIZE = 100  #不同数据集这里可能会报错。
POS_DIM = 128

HIDDEN_DIM = 200

TAG_SIZE = 35

BATCH = 128
EPOCHS = 10

config={}
config['EMBEDDING_SIZE'] = EMBEDDING_SIZE
config['EMBEDDING_DIM'] = EMBEDDING_DIM
config['POS_SIZE'] = POS_SIZE
config['POS_DIM'] = POS_DIM
config['HIDDEN_DIM'] = HIDDEN_DIM
config['TAG_SIZE'] = TAG_SIZE
config['BATCH'] = BATCH
config["pretrained"]=True

learning_rate = 0.0005


embedding_pre = word_matrix

model = BiLSTM_ATT(config,embedding_pre)
#model = torch.load('model/model_epoch20.pkl')
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss(size_average=True)



In [None]:
train = torch.LongTensor(part_train[:len(part_train)-len(part_train)%BATCH])

position1 = torch.LongTensor(part_train_pos1[:len(train)-len(train)%BATCH])
position2 = torch.LongTensor(part_train_pos2[:len(train)-len(train)%BATCH])
labels = torch.LongTensor(part_label[:len(train)-len(train)%BATCH])
train_datasets = D.TensorDataset(train,position1,position2,labels)
train_dataloader = D.DataLoader(train_datasets,BATCH,True,num_workers=2)

In [None]:
test = torch.LongTensor(part_dev[:len(part_dev)-len(part_dev)%BATCH])
position1_t = torch.LongTensor(part_dev_pos1[:len(test)-len(test)%BATCH])
position2_t = torch.LongTensor(part_dev_pos2[:len(test)-len(test)%BATCH])
labels_t = torch.LongTensor(part_dev_label[:len(test)-len(test)%BATCH])
test_datasets = D.TensorDataset(test,position1_t,position2_t,labels_t)
test_dataloader = D.DataLoader(test_datasets,BATCH,True,num_workers=2)

In [None]:
for epoch in range(EPOCHS):
    print("epoch:",epoch)
    acc=0
    total=0
    
    for sentence,pos1,pos2,tag in train_dataloader:
        print(tag)
        sentence = Variable(sentence)
        pos1 = Variable(pos1)
        pos2 = Variable(pos2)
        y = model(sentence,pos1,pos2)  
        tags = Variable(tag)
        loss = criterion(y, tags)      
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()    
       
        y = np.argmax(y.data.numpy(),axis=1)

        for y1,y2 in zip(y,tag):
            if y1==y2:
                acc+=1
            total+=1
        
    print ("train:",100*float(acc)/total,"%")
      
    acc_t=0
    total_t=0
    count_predict = [0]*34
    count_total = [0]*34
    count_right = [0]*34
    for sentence,pos1,pos2,tag in test_dataloader:
        sentence = Variable(sentence)
        pos1 = Variable(pos1)
        pos2 = Variable(pos2)
        y = model(sentence,pos1,pos2)
        y = np.argmax(y.data.numpy(),axis=1)
        for y1,y2 in zip(y,tag):
            count_predict[y1]+=1
            count_total[y2]+=1
            if y1==y2:
                count_right[y1]+=1

epoch: 0




train: 86.95273384160218 %
epoch: 1
train: 87.20449761037779 %
epoch: 2
