In [12]:
## import
import torch
import torch.nn as nn

In [5]:
## 入力データの受け取り

In [6]:
with open('./train.txt','r',encoding='utf-8') as f:
        text = f.read()

print("テキストの文字数 :", len(text))
print("最初の30文字 : ",text[:30])

テキストの文字数 : 3660
最初の30文字 :  GMOグローバルサイン・ホールディングスCTO室のZulfa


In [10]:
## 重複なしソート入力文字
chars = sorted(list(set(text)))
## 種類数
char_size = len(chars)
## 文字→数値、数値→文字の辞書
char2int = { ch : i for i, ch in enumerate(chars) }
int2char = { i : ch for i, ch in enumerate(chars) }
## エンコード、デコードできる関数
encode = lambda a: [char2int[b] for b in a ]
decode = lambda a: ''.join([int2char[b] for b in a ])
train_data = torch.tensor(encode(text), dtype=torch.long)

print("学習データで使っている文字数　：　", char_size)
print("トークン化した学習データ　：　", train_data[:30])

学習データで使っている文字数　：　 381
トークン化した学習データ　：　 tensor([ 24,  28,  30, 134, 162, 165, 148, 161, 135, 130, 163, 164, 153, 165,
        161, 146, 129, 163, 134, 138,  21,  35,  30, 230, 107,  40,  61,  53,
         48,  43])


In [14]:
## 1文字を何次元のベクトル表現にするか
vector_size = 3
embeddings = nn.Embedding(char_size, vector_size)
encoded_words = torch.tensor(encode("前回の記事"))
embeddings_words  = embeddings(encoded_words)
print("埋め込みベクトルの次元数 : ",vector_size)
print("ベクトル表現 : ",embeddings_words)

埋め込みベクトルの次元数 :  3
ベクトル表現 :  tensor([[ 1.3751, -0.1629, -1.1079],
        [-0.2423, -1.2842,  0.6985],
        [ 0.0325,  2.1288, -0.2214],
        [-0.3532, -0.9633,  0.5138],
        [ 0.8173,  0.3703,  0.0062]], grad_fn=<EmbeddingBackward0>)


In [15]:
class SelfAttention_Head(nn.Module):

    def __init__(self, n_mbed, head_size, block_size):
        super().__init__()
        self.key = nn.Linear(n_mbed, head_size, bias=False)
        self.query = nn.Linear(n_mbed, head_size, bias=False)
        self.value = nn.Linear(n_mbed, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))

    def forward(self, x):
        B, T, C = x.shape

        k = self.key(x)
        q = self.query(x)
        v = self.value(x)

        wei = q @ k.transpose(-2,-1)* C ** -0.5
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
        wei = F.softmax(wei, dim=-1)

        out = wei @ v
        return out

In [16]:
class SelfAttention_MultiHeads(nn.Module):

    def __init__(self, n_mbed, num_heads, head_size, block_size):
        super().__init__()
        self.heads = nn.ModuleList((SelfAttention_Head(n_mbed, head_size, block_size) for _ in range(num_heads)))

    def forward(self, x):
        return torch.cat([h(x) for h in self.heads], dim = -1)

class FeedForward(nn.Module):

    def __init__(self, n_mbed):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(n_mbed, n_mbed), nn.ReLU())

    def forward(self, x):
        return self.net(x)

In [17]:
class Model(nn.Module):
    def __init__(self, n_mbed, char_size, block_size):
        super().__init__()
        self.token_embedding = nn.Embedding(char_size, n_mbed)
        self.position_embedding = nn.Embedding(block_size, n_mbed)
        self.selfattention_multiheads = SelfAttention_MultiHeads(n_mbed, 4, n_mbed//4, block_size)
        self.feedforward = FeedForward(n_mbed)
        self.linear = nn.Linear(n_mbed , char_size)

    def forward(self, idx, targets=None):
        B, T= idx.shape
        token_mbed = self.token_embedding(idx)
        position_mbed = self.position_embedding(torch.arange(T))
        x = token_mbed + position_mbed
        x = self.selfattention_multiheads(x)
        x = self.feedforward(x)
        logits = self.linear(x)

        loss = None
        if targets is not None:
            B, T, C =logits.shape
            logits = logits.view(B*T,C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

In [19]:
number_of_heads = 4 # 同時に実行されるself-attentionの数
block_size = 8 # トークンの埋め込むベクトルの次元数
n_mbed = number_of_heads * block_size
char_size = len(train_data)

model = Model(n_mbed, char_size, block_size)

In [20]:
logits, loss = model(x,y)
idx = torch.zeros((1,1), dtype = torch.long)
for _ in range(50):
    idx_pred = idx[:, -block_size:]
    logits , loss = model(idx_pred)
    logits = logits[:,-1,:]
    probs = F.softmax(logits, dim=1)
    idx_next_pred = torch.multinomial(probs, num_samples=1)
    idx = torch.cat((idx, idx_next_pred),dim = 1)

predict = decode(idx[0].tolist())
print("予測結果 : ", predict)

NameError: name 'x' is not defined