<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [214]:
import numpy as np
from dataset import sequence

(x_train, t_train), (x_test, t_test) = sequence.load_data('date.txt')
char_to_id, id_to_char = sequence.get_vocab()

In [215]:
vocab_size = len(char_to_id)
wordvec_size = 512   # Embedding Layer 차원 수

In [216]:
x_sample = x_train[:10]
x_sample.shape

(10, 29)

In [217]:
from common.time_layers import TimeEmbedding
from common.time_layers import TimeAffine

V = len(char_to_id)
D = 512
d = 64

W_embed = np.random.rand(V, D)   # Embedding Layer 파라미터 
W_key = np.random.rand(D, d)     # key의 Weight
b_key = np.zeros_like(d)         # key의 bias



embed_layer = TimeEmbedding(W_embed)
key_layer = TimeAffine(W_key, b_key)

embed_out = embed_layer.forward(x_sample)
print('embed_out:', embed_out.shape)
key_out = key_layer.forward(embed_out)
print('key_out:', key_out.shape)

embed_out: (10, 29, 512)
key_out: (10, 29, 64)


In [218]:
# Time Key Layer = embedding layer + key layer
from common.time_layers import TimeEmbedding
from common.time_layers import TimeAffine


class TimeKeyLayer:
    def __init__(self, V, D, d):
        """
        Args:
            V: Vocabulary size
            D: Embedding layer(word2vec) 차원 수
            d: Key 차원 수
        """
        # 파라미터 정의
        nr = np.random.rand
        W_embed = nr(V, D)
        W_key = nr(D, d)
        b_key = np.zeros_like(d)
        
        # 계층 정의
        embed_layer = TimeEmbedding(W_embed)
        key_layer = TimeAffine(W_key, b_key)
        self.layers = [embed_layer, key_layer]
        # 파라미터 취합
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
            
        
    def forward(self, xs):
        for layer in self.layers:
            xs = layer.forward(xs)
        return xs
    
    
    def backward(self, dout):
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout

In [219]:
# Time Query layer
from common.time_layers import TimeAffine

class TimeQueryLayer:
    def __init__(self, V, d):
        """
        Args:
            V: Vocabulary size
            D: Embedding layer(word2vec) 차원 수
            d: Query 차원 수
        """
        self.V = V
        # 파라미터 정의
        nr = np.random.rand
        W_query = nr(V, d)
        b_query = np.zeros_like(d)
        # 계층 정의
        self.layers = TimeAffine(W_query, b_query)
        # 파라미터 취합
        self.params, self.grads = self.layers.params, self.layers.grads
        
    def forward(self, xs):
        # 레이블인코딩 -> 원핫인코딩
        if xs.ndim == 2:
            xs = np.eye(self.V)[xs]   
        xs = self.layers.forward(xs)
        return xs
    
    def backward(self, dout):
        dout = self.layers.backward(dout)
        return dout

In [221]:
# Key, Query를 입력으로 받는 Compatibility Function 수행
from common.functions import softmax

class CompatibilityFunction:
    def __init__(self, V, D, d):
        """
        Args:
            V: Vocabulary size
            D: Embedding layer(word2vec) 차원 수
            d: Query 차원 수
        """
        # Embedding Layer + Key Layer
        self.time_key_layer = TimeKeyLayer(V, D, d)
        # Query Layer
        self.time_query_layer = TimeQueryLayer(V, d)
        # 파라미터 취합
        self.key_params, self.query_params = [], []
        self.key_grads, self.query_grads = [], []
        
        self.key_params += self.time_key_layer.params
        self.key_grads += self.time_key_layer.grads
        
        self.query_params += self.time_query_layer.params
        self.query_grads += self.time_query_layer.grads
        
    
    def forward(self, xs):
        out_query = self.time_query_layer.forward(xs)
        out_query_T = np.transpose(out_query, (0, 2, 1))
        out_key = self.time_key_layer.forward(xs)
        out_key_T = np.transpose(out_key, (0, 2, 1))
        
        out = np.matmul(out_query, out_key_T) / np.sqrt(d)
        score = softmax(out)
        
        self.out_key, self.out_query_T = out_key, out_query_T
        self.out, self.score = out, score
        return score
        
        
    def backward(self, dscore):
        # Softmax 역전파
        dx = self.score * dscore
        sumdx = np.sum(dx, axis=2, keepdims=True)
        dx -= self.score * sumdx
        # Scaling factor(나누기) 역전파
        dx *= -(self.out ** 2)
        # 행렬 곱 역전파
        dquery = np.matmul(dx, self.out_key)
        dkey = np.matmul(self.out_query_T, dx)  # 얘는 Embedding Layer로 전달
        
        # Key, Query Layer로 각각 역전파
        dkey = self.time_key_layer.backward(dkey)
        dquery = self.time_query_layer.backward(dquery)
        
        return dkey, dquery

In [222]:
V = len(char_to_id)
D = 512
d = 64
xs = x_train[:10]

# 순전파
comp_func = CompatibilityFunction(V, D, d)
out = comp_func.forward(xs)
print(out.shape)
# 역전파
dscore = np.empty_like(out)

dkey, dquery = comp_func.backward(dscore)

(10, 29, 29)


ValueError: could not broadcast input array from shape (64) into shape ()