In [1]:
import math

import torch
import torch.nn as nn

import torchtext
from torchtext import datasets
from torchtext import data
from torchtext.legacy import data

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
batch_size = 30
max_length = 256

TEXT = data.Field(
    lower=True, include_lengths=False, batch_first=True
)
LABEL = data.Field(sequential=False)
train_txt = datasets.IMDB(split='train')
test_txt = datasets.IMDB(split='test')

TEXT.build_vocab(
    train_txt,
    vectors=torchtext.vocab.GloVe(name="6B", dim=50, max_vectors=50_000),
    max_size=50_000,
)

LABEL.build_vocab(train_txt)

train_iter, test_iter = data.BucketIterator.splits((train_txt, test_txt),
                                                   batch_size=batch_size)

# メモリ解放
gc.collect()
print('メモリ解放')

AttributeError: 

In [None]:
class PositionalEncoding(nn.Module):
    """
    https://pytorch.org/tutorials/beginner/transformer_tutorial.html
    """

    def __init__(self, d_model, vocab_size=5000, dropout=0.1):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(vocab_size, d_model)
        position = torch.arange(0, vocab_size, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2).float()
            * (-math.log(10000.0) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[:, : x.size(1), :]
        return self.dropout(x)

In [None]:
class Net(nn.Module):
    """
    Text classifier based on a pytorch TransformerEncoder.
    """

    def __init__(
        self,
        embeddings,
        nhead=8,
        dim_feedforward=2048,
        num_layers=6,
        dropout=0.1,
        activation="relu",
        classifier_dropout=0.1,
    ):

        super().__init__()

        vocab_size, d_model = embeddings.size()
        assert d_model % nhead == 0, "nheads must divide evenly into d_model"

        self.emb = nn.Embedding.from_pretrained(embeddings, freeze=False)

        self.pos_encoder = PositionalEncoding(
            d_model=d_model,
            dropout=dropout,
            vocab_size=vocab_size,
        )

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers,
        )
        self.classifier = nn.Linear(d_model, 2)
        self.d_model = d_model

    def forward(self, x):
        print(x.shape)
        x = self.emb(x) * math.sqrt(self.d_model)
        print(x.shape)
        x = self.pos_encoder(x)
        print(x.shape)
        x = self.transformer_encoder(x)
        print(x.shape)
        x = x.mean(dim=1)
        print(x.shape)
        x = self.classifier(x)
        print(x.shape)

        return x

In [None]:
epochs = 1
model = Net(
    TEXT.vocab.vectors,
    nhead=5,  # the number of heads in the multiheadattention models
    dim_feedforward=50,  # the dimension of the feedforward network model in nn.TransformerEncoder
    num_layers=6,
    dropout=0.0,
    classifier_dropout=0.0,
).to(device)

criterion = nn.CrossEntropyLoss()

lr = 1e-4
optimizer = torch.optim.Adam(
    (p for p in model.parameters() if p.requires_grad), lr=lr
)

torch.manual_seed(0)

print("starting")
for epoch in range(epochs):
    print(f"{epoch=}")
    epoch_loss = 0
    epoch_correct = 0
    epoch_count = 0
    for idx, batch in enumerate(iter(train_iter)):
        predictions = model(batch.text.to(device))
        labels = batch.label.to(device) - 1

        loss = criterion(predictions, labels)

        correct = predictions.argmax(axis=1) == labels
        acc = correct.sum().item() / correct.size(0)

        epoch_correct += correct.sum().item()
        epoch_count += correct.size(0)

        epoch_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)

        optimizer.step()

    with torch.no_grad():
        test_epoch_loss = 0
        test_epoch_correct = 0
        test_epoch_count = 0

        for idx, batch in enumerate(iter(test_iter)):
            predictions = model(batch.text.to(device))
            labels = batch.label.to(device) - 1
            test_loss = criterion(predictions, labels)

            correct = predictions.argmax(axis=1) == labels
            acc = correct.sum().item() / correct.size(0)

            test_epoch_correct += correct.sum().item()
            test_epoch_count += correct.size(0)
            test_epoch_loss += loss.item()

    print(f"{epoch_loss=}")
    print(f"epoch accuracy: {epoch_correct / epoch_count}")
    print(f"{test_epoch_loss=}")
    print(f"test epoch accuracy: {test_epoch_correct / test_epoch_count}")

In [3]:
train_txt

<torchtext.data.datasets_utils._RawTextIterableDataset at 0x10c94ca60>

In [8]:
t=torch.randn(5,4)
g=torch.randn(5,4,3)
print(t)
print(g)

tensor([[ 0.2058, -0.0742,  1.3366,  1.3647],
        [-1.2786, -0.7082, -2.2737,  0.5185],
        [ 0.1826,  0.4152,  1.0471, -1.6233],
        [ 0.4471, -1.0526,  0.0310,  0.0494],
        [-1.5965,  0.2187,  0.9989,  0.3079]])
tensor([[[-1.5523, -1.2434, -0.9419],
         [-0.2383,  1.5912, -1.6502],
         [-0.2902, -0.4473,  0.2467],
         [-2.4460,  0.0074, -1.9800]],

        [[-1.2749, -0.5333, -0.5976],
         [ 0.8122, -0.1148, -0.5398],
         [ 1.6435,  0.2978, -1.7259],
         [ 0.5027,  2.0871,  0.2007]],

        [[-0.3738,  0.3048,  0.3817],
         [ 0.6630,  0.1353, -0.2309],
         [-0.7171, -0.7788, -0.5427],
         [-0.6018, -0.9397,  1.2979]],

        [[-1.5348,  2.4105, -0.9706],
         [-0.7284, -1.3929, -0.1321],
         [-0.8072, -1.1986, -1.5206],
         [-0.5665,  0.4873, -1.8529]],

        [[-0.9457,  0.9937, -1.2628],
         [ 2.2014,  0.3812, -0.0614],
         [ 0.1611,  1.6365,  0.0673],
         [ 0.8607, -0.9564,  0.1719]]])

In [7]:
print(t.mean())
print(t.mean(dim=0))
print(t.mean(dim=1))

tensor(-0.0642)
tensor([-0.0958, -0.9968, -0.1704,  1.0062])
tensor([-0.0338,  0.4460,  0.5026, -0.1416, -1.0940])
