## Import the using package

In [9]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import requests
import numpy as np
import sklearn
import monpa
import random
import pickle
import torch.nn.init as weight_init

In [10]:
def weights_init(m):
    if isinstance(m, nn.GRU):
        nn.init.orthogonal_(m.all_weights)
        nn.init.orthogonal_(m.all_bias)  

## The Architecture of the Designed Model using only Bi-GRU 
<img src="https://i.imgur.com/wOizgXZ.png" width = "300" height = "200" alt="design_model" align=center />

In [11]:
class Bi_GRU_Model (nn.Module):
    def __init__(self, total_tag, embeding_size, gru_hidden,gru_layer, output_size):
        super(Bi_GRU_Model, self).__init__()
        self.total_tag = total_tag
        self.embeding_size = embeding_size
        self.gru_hidden = gru_hidden
        self.gru_layer = gru_layer
        self.output_size = output_size
        self.Embeding = nn.Embedding(self.total_tag, self.embeding_size)
        self.GRU = nn.GRU(self.embeding_size , self.gru_hidden, self.gru_layer, bidirectional= True)
        weight_init.orthogonal_(self.GRU.weight_ih_l0)
        weight_init.orthogonal_(self.GRU.weight_hh_l0)
        # use zero init for GRU layer0 bias
        #self.GRU.bias_ih_l0.zero_()
        #self.GRU.bias_hh_l0.zero_()
        
        # batch size is  set to 1 , output_size is set to 2
        self.linear = nn.Linear( self.gru_hidden*2 , self.output_size )
        
    def forward(self, input_data):  
        embed_res = self.Embeding (input_data)
        #print ('Embedding :::: ' , embed_res.size())
        # let the input dimension be the (L , N , M) and the get the output with the dimension (L , N , H)
        gru_res, _ = self.GRU(embed_res.unsqueeze(1))
        #print ("GRU :::: ", gru_res.size())
        linear_res = self.linear(gru_res[-1])
        final_res = F.log_softmax(linear_res)
        #print ('final res :::: ' , final_res)
        return final_res



In [12]:
model_v1 = Bi_GRU_Model(total_tag=16, embeding_size=8, gru_hidden= 6 , gru_layer=4, output_size=7 )
#model_v1.GRU.apply(weights_init)
test_vec = torch.LongTensor([1,2,3,4,5,6,7])
model_v1(test_vec)
print ('V1_Model_with_only_gru ::: \n', model_v1)

V1_Model_with_only_gru ::: 
 Bi_GRU_Model(
  (Embeding): Embedding(16, 8)
  (GRU): GRU(8, 6, num_layers=4, bidirectional=True)
  (linear): Linear(in_features=12, out_features=7, bias=True)
)




## Data Pre-processing

* 將positive_data和negative_data載入存入List
* 將monpa的tags載入dictionary
* 將sentence 的 words 轉成詞性


In [None]:
training_data_sentence_list = list()
training_data_tag_list = list()
training_target_sentence_list = list()
dict_idx2tag = dict()
dict_tag2idx = dict()
'''
with open ('../CJX_Train_Test_Data/try_train_p_v2.txt', 'r', encoding = 'utf-8') as rf:
    for pos_s in rf.readlines():
        training_data_sentence_list.append(pos_s.strip('\n').strip(',').strip('，').strip('《').strip('》').strip('【').strip('】').strip('、').strip('、').strip('-'))
        training_target_sentence_list.append(1)
with open ('../CJX_Train_Test_Data/try_train_n.txt', 'r', encoding = 'utf-8') as rf:
    for pos_s in rf.readlines():
        training_data_sentence_list.append(pos_s.strip('\n').strip(',').strip('，').strip('《').strip('》').strip('【').strip('】').strip('、').strip('-'))
        training_target_sentence_list.append(0)
tmp_cnt = 0
# Change the words to tags, because we need to use the sequential tags for training.
for sentence in training_data_sentence_list:
    tmp_cnt += 1
    #print (tmp_cnt)
    tmp = monpa.pseg(sentence)
    tmp_list = list()
    for item in tmp:
        tmp_list.append(item[1])
    training_data_tag_list.append(tmp_list)
    
print (len(training_data_tag_list))
'''

# Read both the postive data and negative data
with open ('../CJX_Train_Test_Data/positive_data.txt', 'r', encoding = 'utf-8') as rf:
    for pos_s in rf.readlines():
        training_data_sentence_list.append(pos_s.strip('\n').strip(',').strip('，').strip('《').strip('》').strip('【').strip('】').strip('、').strip('、').strip('-'))
        training_target_sentence_list.append(1)
with open ('../CJX_Train_Test_Data/positive_data.txt', 'r', encoding = 'utf-8') as rf:
    for pos_s in rf.readlines():
        training_data_sentence_list.append(pos_s.strip('\n').strip(',').strip('，').strip('《').strip('》').strip('【').strip('】').strip('、').strip('-'))
        training_target_sentence_list.append(0)
        
print ('training data size : ',len(training_data_sentence_list))
print ('training data target size : ',len(training_target_sentence_list))
tmp_cnt = 0
# Change the words to tags, because we need to use the sequential tags for training.
for sentence in training_data_sentence_list:
    tmp_cnt += 1
    #print (tmp_cnt)
    tmp = monpa.pseg(sentence)
    tmp_list = list()
    for item in tmp:
        tmp_list.append(item[1])
    training_data_tag_list.append(tmp_list)
    
print (len(training_data_tag_list))

# Read the tags of the monpa
with open ('./monpa_tag.txt' , 'r' , encoding = 'utf-8') as rf:
    cnt = 0
    for tag in rf.readlines():
        dict_idx2tag[cnt] = tag.strip('\n')
        dict_tag2idx[tag.strip('\n')] = cnt
        cnt += 1  

print ('The dictionary of idxs 2 tags : ', dict_idx2tag)
print ('The dictionary of tags 2 idxs : ' , dict_tag2idx)

shuffle_zip = list(zip(training_data_tag_list, training_target_sentence_list))
random.shuffle(shuffle_zip)
training_data_tag_list[:], training_target_sentence_list[:] = zip(*shuffle_zip)
print (training_data_tag_list, training_target_sentence_list)



training data size :  7070
training data target size :  7070


## Save the training data list

In [None]:
f = open ('../pickle/training_tags.pkl', 'wb')
pickle.dump(training_data_tag_list, f)
f.close()
f = open ('../pickle/target.pkl', 'wb')
pickle.dump(training_target_sentence_list, f)
f.close()

In [272]:
f = open ('../pickle/training_tags.pkl', 'rb')
a = pickle.load( f)
f.close()
f = open ('../pickle/target.pkl', 'rb')
b = pickle.load(f)
f.close()

In [14]:
dict_idx2tag = dict()
dict_tag2idx = dict()
with open ('./monpa_tag.txt' , 'r' , encoding = 'utf-8') as rf:
    cnt = 0
    for tag in rf.readlines():
        dict_idx2tag[cnt] = tag.strip('\n')
        dict_tag2idx[tag.strip('\n')] = cnt
        cnt += 1  
#training_data_tag_list = a
#training_target_sentence_list = b

## Hyperparamters_v1

In [16]:
epoch_v1 = 100
learning_rate_v1 = 0.01
input_size_v1 = len(dict_idx2tag.keys())
gru_hidden_size_v1 = 10
embedding_size_v1 = 32
output_size_v1 = 2
gru_layer_v1 = 3


## 定義訓練function

In [17]:
def train_v1():
    model_v1 = Bi_GRU_Model(total_tag=input_size_v1, 
                            embeding_size = embedding_size_v1,
                            gru_hidden=gru_hidden_size_v1,
                            output_size=output_size_v1,
                            gru_layer = gru_layer_v1)
    #model_v1.apply(weights_init)
    optimizer_v1 = torch.optim.SGD(model_v1.parameters(), lr=learning_rate_v1)
    loss_function_v1 = nn.NLLLoss()
    
    for epoch in range(epoch_v1):
        sub_epoch = 0
        total_loss = 0
        #print (len(training_data_tag_list))
        for seq_tag_idx in range(len(training_data_tag_list)):
            sub_epoch += 1
            input_data = torch.LongTensor([dict_tag2idx[t] for t in training_data_tag_list[seq_tag_idx]])
            target_data = torch.LongTensor([training_target_sentence_list[seq_tag_idx]])
            out = model_v1(input_data)
            loss = loss_function_v1(out, target_data)
            total_loss += loss.item()
            optimizer_v1.zero_grad()
            loss.backward()
            optimizer_v1.step()
            '''
            if sub_epoch % 100 == 0:
                print ('Epoch '+  str(epoch) + ' '+ str(sub_epoch)+'/' + str(len(training_data_tag_list)) + '  Loss : ' + str(loss))
            '''
        print ('Epoch ' + str(epoch) + ' Loss : ' + str (total_loss))
        torch.save(model_v1, '../pickle/model_v1.pt')
    
            
            
            
            
            
            

In [18]:
train_v1()



Epoch 0 Loss : 15.12510621547699


  "type " + obj.__name__ + ". It won't be checked "


Epoch 1 Loss : 14.442784428596497
Epoch 2 Loss : 13.761734426021576
Epoch 3 Loss : 13.033600449562073
Epoch 4 Loss : 12.225331544876099
Epoch 5 Loss : 11.320636063814163
Epoch 6 Loss : 10.324897915124893
Epoch 7 Loss : 9.267999976873398
Epoch 8 Loss : 8.198874711990356
Epoch 9 Loss : 7.170851528644562
Epoch 10 Loss : 6.22552365064621
Epoch 11 Loss : 5.384606748819351
Epoch 12 Loss : 4.65191262960434
Epoch 13 Loss : 4.020511031150818
Epoch 14 Loss : 3.479481816291809
Epoch 15 Loss : 3.0177440345287323
Epoch 16 Loss : 2.6252336502075195
Epoch 17 Loss : 2.2928282618522644
Epoch 18 Loss : 2.012095034122467
Epoch 19 Loss : 1.7752603888511658
Epoch 20 Loss : 1.575318992137909
Epoch 21 Loss : 1.4061315655708313
Epoch 22 Loss : 1.2624595165252686
Epoch 23 Loss : 1.1399115324020386
Epoch 24 Loss : 1.034857988357544
Epoch 25 Loss : 0.944320559501648
Epoch 26 Loss : 0.8658663034439087
Epoch 27 Loss : 0.7975102663040161
Epoch 28 Loss : 0.7376328706741333
Epoch 29 Loss : 0.6849079132080078
Epoch 30

KeyboardInterrupt: 

In [266]:
def predict():
    model_v1 = Bi_GRU_Model(total_tag=input_size_v1, 
                            embeding_size = embedding_size_v1,
                            gru_hidden=gru_hidden_size_v1,
                            output_size=output_size_v1,
                            gru_layer = gru_layer_v1)
    model_v1 = torch.load('../pickle/model_v1.pt')
    
    
    correct = 0
    for seq_tag_idx in range(len(training_data_tag_list)):
        
        input_data = torch.LongTensor([dict_tag2idx[t] for t in training_data_tag_list[seq_tag_idx]])
        target_data = torch.LongTensor([training_target_sentence_list[seq_tag_idx]])
        out = model_v1(input_data)
        out = out.squeeze(0)
        if out[0].item() > out[1].item() and target_data[0].item() == 0:
            correct += 1
        elif out[0].item() < out[1].item() and target_data[0].item() == 1:
            correct += 1
        else : 
            continue
    print (correct / len (training_data_tag_list))
        


In [267]:
predict()

1.0


