## Import the using package

In [27]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import requests
import numpy as np
import sklearn

## load the jieba tags
* load the "jieba_tag.csv"
* let each tag fill in the dictionary with the index

In [28]:
jieba_tag_dict = dict()
cnt = 0
with open ('./jieba_tag.txt' , 'r' , encoding = 'utf-8') as f :
    for tag in f.readlines():
        jieba_tag_dict[tag.strip('\n')] = cnt
        cnt += 1

## The Architecture of the Designed Model using GRU and Convolution 1D

 <img src="https://i.imgur.com/JuOTx1S.png" width = "300" height = "200" alt="design_model" align=center />

## Design Model
---
#### Pytorch的Embedding:
* 常設定參數：
    * num_embeddings  : 字典的大小
    * embedding_dim : 詞向量維度
    
* 輸入向量格式：(＊)  <br>
* 輸出向量格式 : (＊ , H) <br>
＊ : 長整數向量 , e.g. [40] 也就是該詞的index<br>
 H : 詞向量維度<br>
 
---

#### Pytorch的GRU:
* 常設定參數：
    * input_size  : 輸入向量的維度
    * hidden_size : 隱藏層維度
    * num_layers : 層數
    * bidirectional : 是否雙向
    
* 輸入向量格式：(L , N , M)  <br>
* 輸出向量格式 : (L , N , H) <br>

L : 輸入的長度（Sequential length）<br>
N : batch size
M : input size
H : hidden size

---
#### Pytorch的Conv1d:
   * 常設定參數：
        * in_channels : 輸入資料的維度
        * out_channels : filter的數量
        * kernal_size : filter的size
   * 輸入向量格式：(N , Cin , L)
   * 輸出向量格式 : (N , Cout , Lout)
   
N : batch size 。<br>
Cin : 資料的維度數<br>
Cout : filter數量<br>
L : 輸入的長度（Sequential length）<br>
Lout : 輸出的長度 (會根據filter size和stride的不同而有所不同)<br>



In [47]:
class GRU_with_Conv1D_Model (nn.Module):
    def __init__(self, total_tag, embeding_size, gru_hidden,gru_layer, filter_num_1, filter_size_1, filter_num_2, filter_size_2, output_size, max_word):
        super(GRU_with_Conv1D_Model, self).__init__()
        self.total_tag = total_tag
        self.max_word = max_word
        self.embeding_size = embeding_size
        self.gru_hidden = gru_hidden
        self.gru_layer = gru_layer
        self.filter_num_1 = filter_num_1
        self.filter_size_1 = filter_size_1
        self.filter_num_2 = filter_num_2
        self.filter_size_2 = filter_size_2
        self.output_size = output_size
        self.Embeding = nn.Embedding(self.total_tag, self.embeding_size)
        self.GRU = nn.GRU(self.embeding_size , self.gru_hidden, self.gru_layer, bidirectional = True)
        self.Conv1d_layer1 = nn.Sequential(nn.Conv1d(in_channels=self.gru_hidden*2, 
                                                            out_channels=self.filter_num_1,
                                                            kernel_size=self.filter_size_1),
                                                         #nn.BatchNorm1d (self.filter_size_1),
                                                         nn.ReLU())
        
        self.Conv1d_layer2 = nn.Sequential(nn.Conv1d(in_channels=self.filter_num_1, 
                                                            out_channels=self.filter_num_2,
                                                            kernel_size=self.filter_size_2),
                                                         #nn.BatchNorm1d (self.filter_size_2),
                                                         nn.ReLU())
        # batch size is  set to 1 , output_size is set to 2
        L = (self.max_word- self.filter_size_1+ 1)- self.filter_size_2+ 1
        self.linear = nn.Linear( self.filter_num_2 * L, self.output_size)
        
    def forward(self, input_data):  
        embed_res = self.Embeding (input_data)
        if len(input_data) < self.max_word:
            for c in range (self.max_word - len(input_data)):
                embed_res = torch.cat((embed_res, torch.zeros(1, self.embeding_size)),dim = 0 )
        print (' Embedding :::: ' , embed_res.size())
        # let the input dimension be the (L , N , M) and the get the output with the dimension (L , N , H)
        gru_res, _ = self.GRU(embed_res.unsqueeze(1))
        print (' GRU :::: ' , gru_res.size())
        # let the input dimension be the (N , Cin , L) and then the output with the dimension (N, Cout , Lout)
        conv_res1 = self.Conv1d_layer1 (gru_res.permute(1,2,0))
        print (' conv_res1 :::: ' , conv_res1.size())
        conv_res2 = self.Conv1d_layer2 (conv_res1)
        print ('conv_res2 :::: ' , conv_res2.size())
        linear_res = self.linear(conv_res2.view(-1))
        final_res = F.sigmoid(linear_res)
        return final_res
            
            
    

In [48]:
# Testing the code
model_v3 = GRU_with_Conv1D_Model(total_tag= 16, embeding_size=8, gru_hidden= 6 , gru_layer=1, filter_num_1=5, filter_size_1=2, filter_num_2 = 5, filter_size_2 = 2,output_size = 7, max_word = 10)
test_vec = torch.LongTensor([1,2,3,4,5,6,7])
model_v3 (test_vec)
print ('V3_Model_with_conv_and_gru ::: \n', model_v3 )

 Embedding ::::  torch.Size([10, 8])
 GRU ::::  torch.Size([10, 1, 12])
 conv_res1 ::::  torch.Size([1, 5, 9])
conv_res2 ::::  torch.Size([1, 5, 8])
V3_Model_with_conv_and_gru ::: 
 GRU_with_Conv1D_Model(
  (Embeding): Embedding(16, 8)
  (GRU): GRU(8, 6, bidirectional=True)
  (Conv1d_layer1): Sequential(
    (0): Conv1d(12, 5, kernel_size=(2,), stride=(1,))
    (1): ReLU()
  )
  (Conv1d_layer2): Sequential(
    (0): Conv1d(5, 5, kernel_size=(2,), stride=(1,))
    (1): ReLU()
  )
  (linear): Linear(in_features=40, out_features=7, bias=True)
)




## The Architecture of the Designed Model using only Bi-GRU 
<img src="https://i.imgur.com/wOizgXZ.png" width = "300" height = "200" alt="design_model" align=center />

In [49]:
class Bi_GRU_Model (nn.Module):
    def __init__(self, total_tag, embeding_size, gru_hidden,gru_layer, output_size):
        super(Bi_GRU_Model, self).__init__()
        self.total_tag = total_tag
        self.embeding_size = embeding_size
        self.gru_hidden = gru_hidden
        self.gru_layer = gru_layer
        self.output_size = output_size
        self.Embeding = nn.Embedding(self.total_tag, self.embeding_size)
        self.GRU = nn.GRU(self.embeding_size , self.gru_hidden, self.gru_layer, bidirectional= True)
        
        # batch size is  set to 1 , output_size is set to 2
        self.linear = nn.Linear( self.gru_hidden*2 , self.output_size )
        
    def forward(self, input_data):  
        embed_res = self.Embeding (input_data)
        print ('Embedding :::: ' , embed_res.size())
        # let the input dimension be the (L , N , M) and the get the output with the dimension (L , N , H)
        gru_res, _ = self.GRU(embed_res.unsqueeze(1))
        print ("GRU :::: ", gru_res.size())
        linear_res = self.linear(gru_res[-1])
        final_res = F.sigmoid(linear_res)
        print ('final res :::: ' , final_res)
        return final_res



In [50]:
model_v1 = Bi_GRU_Model(total_tag=16, embeding_size=8, gru_hidden= 6 , gru_layer=1, output_size=7 )
test_vec = torch.LongTensor([1,2,3,4,5,6,7])
model_v1(test_vec)
print ('V1_Model_with_only_gru ::: \n', model_v1)

Embedding ::::  torch.Size([7, 8])
GRU ::::  torch.Size([7, 1, 12])
final res ::::  tensor([[0.3744, 0.5347, 0.4801, 0.5206, 0.5743, 0.5495, 0.5178]],
       grad_fn=<SigmoidBackward>)
V1_Model_with_only_gru ::: 
 Bi_GRU_Model(
  (Embeding): Embedding(16, 8)
  (GRU): GRU(8, 6, bidirectional=True)
  (linear): Linear(in_features=12, out_features=7, bias=True)
)




## The Architecture of the Designed Model using only Bi-GRU 
<img src="https://i.imgur.com/zjsZb0l.png" width = "300" height = "200" alt="design_model" align=center />

In [54]:
class Convolution_1D_Model (nn.Module):
    def __init__(self, total_tag, embeding_size, filter_num_1, filter_size_1, filter_num_2, filter_size_2 , output_size,max_word):
        super(Convolution_1D_Model, self).__init__()
        self.total_tag = total_tag
        self.max_word = max_word
        self.embeding_size = embeding_size
        self.filter_num_1 = filter_num_1
        self.filter_size_1 = filter_size_1
        self.filter_num_2 = filter_num_2
        self.filter_size_2 = filter_size_2
        self.output_size = output_size
        self.Embeding = nn.Embedding(self.total_tag, self.embeding_size)
        self.Conv1d_layer1 = nn.Sequential(nn.Conv1d(in_channels=self.embeding_size, 
                                                            out_channels=self.filter_num_1,
                                                            kernel_size=self.filter_size_1),
                                                         #nn.BatchNorm1d (self.filter_size_1),
                                                         nn.ReLU())
        
        self.Conv1d_layer2 = nn.Sequential(nn.Conv1d(in_channels=self.filter_num_1, 
                                                            out_channels=self.filter_num_2,
                                                            kernel_size=self.filter_size_2),
                                                         #nn.BatchNorm1d (self.filter_size_2),
                                                         nn.ReLU())
        # batch size is  set to 1 , output_size is set to 2
        L = (self.max_word- self.filter_size_1+ 1)- self.filter_size_2+ 1
        self.linear = nn.Linear( self.filter_num_2 * L, self.output_size )
        
    def forward(self, input_data):  
        embed_res = self.Embeding (input_data)
        if len(input_data) < self.max_word:
            for c in range (self.max_word - len(input_data)):
                embed_res = torch.cat((embed_res, torch.zeros(1, self.embeding_size)),dim = 0 )
        embed_res = embed_res.unsqueeze(0)
        print (' Embedding :::: ' , embed_res.size())
        # let the input dimension be the (N , Cin , L) and then the output with the dimension (N, Cout , Lout)
        conv_res1 = self.Conv1d_layer1 (embed_res.permute(0,2,1))
        print (' conv_res1 :::: ' , conv_res1.size())
        conv_res2 = self.Conv1d_layer2 (conv_res1)
        print ('conv_res2 :::: ' , conv_res2.size())
        linear_res = self.linear(conv_res2.view(-1))
        final_res = F.sigmoid(linear_res)
        print ("final_res :::: " , final_res)
        return final_res

In [53]:
model_v2 = Convolution_1D_Model(total_tag=16, embeding_size=8, filter_num_1=5, filter_size_1=2, filter_num_2=5, filter_size_2=2 ,output_size = 7, max_word=10)
test_vec = torch.LongTensor([1,2,3,4,5,6,7])
model_v2(test_vec)
print ('V2_Model_with_only_convolution1D ::: \n', model_v2)

 Embedding ::::  torch.Size([1, 10, 8])
 conv_res1 ::::  torch.Size([1, 5, 9])
conv_res2 ::::  torch.Size([1, 5, 8])
final_res ::::  tensor([0.5161, 0.4780, 0.5138, 0.5408, 0.5429, 0.4728, 0.5267],
       grad_fn=<SigmoidBackward>)
V2_Model_with_only_convolution1D ::: 
 Convolution_1D_Model(
  (Embeding): Embedding(16, 8)
  (Conv1d_layer1): Sequential(
    (0): Conv1d(8, 5, kernel_size=(2,), stride=(1,))
    (1): ReLU()
  )
  (Conv1d_layer2): Sequential(
    (0): Conv1d(5, 5, kernel_size=(2,), stride=(1,))
    (1): ReLU()
  )
  (linear): Linear(in_features=40, out_features=7, bias=True)
)




In [None]:
def train_v1 ():
    