## Import the using package

In [35]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import requests
import numpy as np
import sklearn
import monpa
import random
import pickle

## The Architecture of the Designed Model using GRU and Convolution 1D

 <img src="https://i.imgur.com/JuOTx1S.png" width = "300" height = "200" alt="design_model" align=center />

## Design Model
---
#### Pytorch的Embedding:
* 常設定參數：
    * num_embeddings  : 字典的大小
    * embedding_dim : 詞向量維度
    
* 輸入向量格式：(＊)  <br>
* 輸出向量格式 : (＊ , H) <br>
＊ : 長整數向量 , e.g. [40] 也就是該詞的index<br>
 H : 詞向量維度<br>
 
---

#### Pytorch的GRU:
* 常設定參數：
    * input_size  : 輸入向量的維度
    * hidden_size : 隱藏層維度
    * num_layers : 層數
    * bidirectional : 是否雙向
    
* 輸入向量格式：(L , N , M)  <br>
* 輸出向量格式 : (L , N , H) <br>

L : 輸入的長度（Sequential length）<br>
N : batch size
M : input size
H : hidden size

---
#### Pytorch的Conv1d:
   * 常設定參數：
        * in_channels : 輸入資料的維度
        * out_channels : filter的數量
        * kernal_size : filter的size
   * 輸入向量格式：(N , Cin , L)
   * 輸出向量格式 : (N , Cout , Lout)
   
N : batch size 。<br>
Cin : 資料的維度數<br>
Cout : filter數量<br>
L : 輸入的長度（Sequential length）<br>
Lout : 輸出的長度 (會根據filter size和stride的不同而有所不同)<br>



In [None]:
class GRU_with_Conv1D_Model (nn.Module):
    def __init__(self, total_tag, embeding_size, gru_hidden,gru_layer, filter_num_1, filter_size_1, filter_num_2, filter_size_2, output_size, max_word):
        super(GRU_with_Conv1D_Model, self).__init__()
        self.total_tag = total_tag
        self.max_word = max_word
        self.embeding_size = embeding_size
        self.gru_hidden = gru_hidden
        self.gru_layer = gru_layer
        self.filter_num_1 = filter_num_1
        self.filter_size_1 = filter_size_1
        self.filter_num_2 = filter_num_2
        self.filter_size_2 = filter_size_2
        self.output_size = output_size
        self.Embeding = nn.Embedding(self.total_tag, self.embeding_size)
        self.GRU = nn.GRU(self.embeding_size , self.gru_hidden, self.gru_layer, bidirectional = True)
        self.Conv1d_layer1 = nn.Sequential(nn.Conv1d(in_channels=self.gru_hidden*2, 
                                                            out_channels=self.filter_num_1,
                                                            kernel_size=self.filter_size_1),
                                                         #nn.BatchNorm1d (self.filter_size_1),
                                                         nn.ReLU())
        
        self.Conv1d_layer2 = nn.Sequential(nn.Conv1d(in_channels=self.filter_num_1, 
                                                            out_channels=self.filter_num_2,
                                                            kernel_size=self.filter_size_2),
                                                         #nn.BatchNorm1d (self.filter_size_2),
                                                         nn.ReLU())
        # batch size is  set to 1 , output_size is set to 2
        L = (self.max_word- self.filter_size_1+ 1)- self.filter_size_2+ 1
        self.linear = nn.Linear( self.filter_num_2 * L, self.output_size)
        
    def forward(self, input_data):  
        embed_res = self.Embeding (input_data)
        if len(input_data) < self.max_word:
            for c in range (self.max_word - len(input_data)):
                embed_res = torch.cat((embed_res, torch.zeros(1, self.embeding_size)),dim = 0 )
        #print (' Embedding :::: ' , embed_res.size())
        # let the input dimension be the (L , N , M) and the get the output with the dimension (L , N , H)
        gru_res, _ = self.GRU(embed_res.unsqueeze(1))
        #print (' GRU :::: ' , gru_res.size())
        # let the input dimension be the (N , Cin , L) and then the output with the dimension (N, Cout , Lout)
        conv_res1 = self.Conv1d_layer1 (gru_res.permute(1,2,0))
        #print (' conv_res1 :::: ' , conv_res1.size())
        conv_res2 = self.Conv1d_layer2 (conv_res1)
        #print ('conv_res2 :::: ' , conv_res2.size())
        linear_res = self.linear(conv_res2.view(-1))
        final_res = F.softmax(linear_res)
        return final_res
            
            
    

In [None]:
# Testing the code
model_v3 = GRU_with_Conv1D_Model(total_tag= 16, embeding_size=8, gru_hidden= 6 , gru_layer=1, filter_num_1=5, filter_size_1=2, filter_num_2 = 5, filter_size_2 = 2,output_size = 7, max_word = 10)
test_vec = torch.LongTensor([1,2,3,4,5,6,7])
model_v3 (test_vec)
print ('V3_Model_with_conv_and_gru ::: \n', model_v3 )

## Data Pre-processing

* 將positive_data和negative_data載入存入List
* 將monpa的tags載入dictionary
* 將sentence 的 words 轉成詞性


In [None]:
'''training_data_sentence_list = list()
training_data_tag_list = list()
training_target_sentence_list = list()
dict_idx2tag = dict()
dict_tag2idx = dict()

# Read both the postive data and negative data
with open ('../CJX_Train_Test_Data/positive_data.txt', 'r', encoding = 'utf-8') as rf:
    for pos_s in rf.readlines():
        training_data_sentence_list.append(pos_s.strip('\n').strip(',').strip('，').strip('《').strip('》').strip('【').strip('】').strip('、').strip('、').strip('-'))
        training_target_sentence_list.append(1)
with open ('../CJX_Train_Test_Data/positive_data.txt', 'r', encoding = 'utf-8') as rf:
    for pos_s in rf.readlines():
        training_data_sentence_list.append(pos_s.strip('\n').strip(',').strip('，').strip('《').strip('》').strip('【').strip('】').strip('、').strip('-'))
        training_target_sentence_list.append(0)
        
print ('training data size : ',len(training_data_sentence_list))
print ('training data target size : ',len(training_target_sentence_list))
tmp_cnt = 0
# Change the words to tags, because we need to use the sequential tags for training.
for sentence in training_data_sentence_list:
    tmp_cnt += 1
    #print (tmp_cnt)
    tmp = monpa.pseg(sentence)
    tmp_list = list()
    for item in tmp:
        tmp_list.append(item[1])
    training_data_tag_list.append(tmp_list)
    
print (len(training_data_tag_list))

# Read the tags of the monpa
with open ('./monpa_tag.txt' , 'r' , encoding = 'utf-8') as rf:
    cnt = 0
    for tag in rf.readlines():
        dict_idx2tag[cnt] = tag.strip('\n')
        dict_tag2idx[tag.strip('\n')] = cnt
        cnt += 1  

print ('The dictionary of idxs 2 tags : ', dict_idx2tag)
print ('The dictionary of tags 2 idxs : ' , dict_tag2idx)

shuffle_zip = list(zip(training_data_tag_list, training_target_sentence_list))
random.shuffle(shuffle_zip)
training_data_tag_list[:], training_target_sentence_list[:] = zip(*shuffle_zip)
print (training_data_tag_list, training_target_sentence_list)
'''


## Save the training data list

In [None]:
'''f = open ('../pickle/training_tags.pkl', 'wb')
pickle.dump(training_data_tag_list, f)
f.close()
f = open ('../pickle/target.pkl', 'wb')
pickle.dump(training_target_sentence_list, f)
f.close()'''

In [11]:
f = open ('../pickle/training_tags.pkl', 'rb')
a = pickle.load( f)
f.close()
f = open ('../pickle/target.pkl', 'rb')
b = pickle.load(f)
f.close()

In [12]:
dict_idx2tag = dict()
dict_tag2idx = dict()
with open ('./monpa_tag.txt' , 'r' , encoding = 'utf-8') as rf:
    cnt = 0
    for tag in rf.readlines():
        dict_idx2tag[cnt] = tag.strip('\n')
        dict_tag2idx[tag.strip('\n')] = cnt
        cnt += 1  
training_data_tag_list = a
training_target_sentence_list = b

## Hyperparamters_v1

In [57]:
epoch_v3 = 10
learning_rate_v3 = 0.000001
input_size_v3 = len(dict_idx2tag.keys())
gru_hidden_size_v3 = 8
embedding_size_v3 = 32
output_size_v3 = 2
gru_layer_v3 = 1
filter_num_1_v3 = 3
filter_size_1_v3 = 3
filter_num_2_v3 = 3
filter_size_2_v3 = 3
max_word_v3 = 20


## 定義訓練function

In [58]:
def train_v3():
    model_v3 = GRU_with_Conv1D_Model(total_tag= input_size_v3, 
                                    embeding_size= embedding_size_v3, 
                                    filter_num_1= filter_num_1_v3, 
                                    filter_size_1= filter_size_1_v3, 
                                    filter_num_2= filter_num_2_v3, 
                                    filter_size_2=filter_size_2_v3 ,
                                    output_size = output_size_v3, 
                                    max_word= max_word_v3,
                                    gru_hidden = gru_hidden_size_v3,
                                    gru_layer = gru_layer_v3)
    optimizer_v3 = torch.optim.SGD(model_v3.parameters(), lr=learning_rate_v3)
    loss_function_v3 = nn.CrossEntropyLoss()
    
    for epoch in range(epoch_v3):
        sub_epoch = 0
        total_loss = 0
        for seq_tag_idx in range(len(training_data_tag_list)):
            if len(training_data_tag_list[seq_tag_idx]) > max_word_v3:
                continue
            sub_epoch += 1
            input_data = torch.LongTensor([dict_tag2idx[t] for t in training_data_tag_list[seq_tag_idx]])
            target_data = torch.LongTensor([training_target_sentence_list[seq_tag_idx]])
            out = model_v3(input_data)
            out = out.unsqueeze(0)
            loss = loss_function_v3(out, target_data)
            total_loss += loss.item()
            optimizer_v3.zero_grad()
            loss.backward()
            optimizer_v3.step()
            '''
            if sub_epoch % 100 == 0:
                print ('Epoch '+  str(epoch) + ' '+ str(sub_epoch)+'/' + str(len(training_data_tag_list)) + '  Loss : ' + str(loss))
            '''
            
        print ('Epoch ' + str(epoch) + ' Loss : ' + str (total_loss))    
        torch.save(model_v3, '../pickle/model_v3.pt')

In [59]:
train_v3()



Epoch 0 Loss : 4866.513285040855


  "type " + obj.__name__ + ". It won't be checked "


Epoch 1 Loss : 4866.500924646854
Epoch 2 Loss : 4866.488595545292
Epoch 3 Loss : 4866.476292550564
Epoch 4 Loss : 4866.464019477367
Epoch 5 Loss : 4866.451784431934
Epoch 6 Loss : 4866.439587235451
Epoch 7 Loss : 4866.427417695522
Epoch 8 Loss : 4866.4152772426605
Epoch 9 Loss : 4866.403172314167
