In [339]:
import torch
from torchtext.datasets import AG_NEWS

agnews_dataset_train = AG_NEWS(root = "test", split='train')
agnews_dataset_test = AG_NEWS(root = "test", split='test')

In [340]:
train_iter = iter(agnews_dataset_train)
test_iter = iter(agnews_dataset_test)

In [341]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

tokenizer = get_tokenizer('basic_english')
train_iter = AG_NEWS(root = "test", split='train')  # 后面会被冲掉，所以要重新写

def yield_tokens(data_iter):
    for _, text in data_iter:
        yield tokenizer(text)

vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"])  # 这个就是用来遍历整个数据集 然后把所有英文单词转成向量表示
vocab.set_default_index(vocab["<unk>"])

In [342]:
from torch.utils.data import Dataset
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class AgNewsDataset(Dataset):
    def __init__(self,mode="train") :
        if mode == "test":
            self.labels = [label for label,text in AG_NEWS(root = "test", split='test')]
            self.texts = [text for label,text in AG_NEWS(root = "test", split='test')]
        else:
            self.labels = [label for label,text in AG_NEWS(root = "test", split='train')]
            self.texts = [text for label,text in AG_NEWS(root = "test", split='train')]
        self.maxLen = 80
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        # print("get!")  # 用一个变量去接，不要改数据库里的数据
        label = torch.tensor(int(self.labels[index]) - 1)
        text = torch.tensor(vocab(tokenizer(self.texts[index])))
        # 补齐和裁剪
        if len(text) <= self.maxLen:
            text = torch.cat((text, torch.zeros(self.maxLen - len(text), dtype=torch.int64)))
        else:
            text = text[:80]
        return text, label


In [343]:
train = AgNewsDataset()
test = AgNewsDataset(mode="test")

In [344]:
train[7]
# 不用__getitem(7)__ 这样读

(tensor([ 1355,  1236,   517, 13945,    38,  1416,    13,  2199,     1,   172,
            14,  2199,     1,   172,    15,   832,   124,  5951,   113,     5,
          2539,     7,  1232,     3,     8,    23,   571,    11,  2444,  1687,
           439,    69,    85,     3,     2,   100,    26,    60,     3,  7126,
             2,   347,    21,  2566,    29,     5, 26470,  3676,     1,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0]),
 tensor(2))

In [345]:
import torch
from torch import nn
from torch.nn import functional as F
num_hiddens = 128
emb_dim = 256
batch_size = 4
num_step = 4
output_dim = 4
epoch = 5
num_class = len(set([label for (label, text) in AG_NEWS(root = "test", split='test')]))  # 前面的迭代器不能用，重新执行
vocab_size = len(vocab)

In [346]:
num_class

4

In [347]:
train_dataloader = torch.utils.data.DataLoader(train, batch_size=batch_size,shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test, batch_size=batch_size,shuffle=True)

In [348]:
for text,label in train_dataloader:
    print(text.shape)
    break

torch.Size([4, 80])


In [349]:
import torch
import torch.nn as nn
class AgNewsClassification(nn.Module):
    def __init__(self, num_class = num_class) :
        super(AgNewsClassification,self).__init__()
        self.embedding = nn.Embedding(vocab_size,emb_dim)
        self.rnn = nn.RNN(emb_dim,num_hiddens)
        # self.lstm = nn.LSTM(emb_dim,num_hiddens)
        # self.gru = nn.GRU(emb_dim,num_hiddens)
        self.fc = nn.Linear(num_hiddens,num_class)
        self.init_weights()

    def init_weights(self):
        initrange = 0.5
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        out = torch.mean(output, dim=1)
        logits = self.fc(out)
        return logits

In [350]:
model = AgNewsClassification()
model(train[0][0].unsqueeze(0))

tensor([[-0.1697,  0.7265,  0.0540,  0.6336]], grad_fn=<AddmmBackward0>)

In [351]:
import torch
import torch.nn as nn
class rnn_try(nn.Module):
    def __init__(self, emb_dim = 128,num_hiddens = 256) :
        super(rnn_try,self).__init__()
        self.embedding = nn.Embedding(vocab_size,emb_dim)
        self.xh = nn.Linear(emb_dim,num_hiddens)
        self.hh = nn.Linear(num_hiddens,num_hiddens)
        self.hq = nn.Linear(num_hiddens,num_hiddens)
        self.act = nn.Tanh()

    def forward(self, x:torch.Tensor,h:torch.Tensor):  # X:[batch,seq_length,embedding]
        out =[]
        length = x.shape[1]
        x = x.permute(1,0,2)
        for i in range(length):
            out1 = self.xh(x[i])
            out2 = self.hh(h)
            ht = self.act(out1+out2)
            ot = self.hq(ht)
            print("ot:",ot.shape)
            out.append(ot)
            h = ht
        return out

In [352]:
import torch
import torch.nn as nn
 # 接分类头版本
class rnn_try(nn.Module):
    def __init__(self, emb_dim = 128,num_hiddens = 256) :
        super(rnn_try,self).__init__()
        # 封装的RNN里面自带有embedding，自己写要加embedding层
        self.embedding = nn.Embedding(vocab_size,emb_dim)
        self.xh = nn.Linear(emb_dim,num_hiddens)
        self.hh = nn.Linear(num_hiddens,num_hiddens)
        self.hq = nn.Linear(num_hiddens,num_hiddens)
        self.act = nn.Tanh()
        self.classifier = nn.Linear(num_hiddens,num_class)

    def forward(self, x:torch.Tensor,h:torch.Tensor):  # X:[batch,seq_length,embedding]
        out =[]
        x = self.embedding(x)
        # print(x.shape)
        length = x.shape[1]
        x = x.permute(1,0,2)
        for i in range(length):
            out1 = self.xh(x[i])
            out2 = self.hh(h)
            ht = self.act(out1+out2)
            ot = self.hq(ht)
            # print("ot:",ot.shape)
            out.append(ot)
            h = ht
        return self.classifier(out[-1])
    # 只输出最后时间步Ht

In [353]:
import torch
import torch.nn as nn
class lstm_try(nn.Module):
    def __init__(self, emb_dim = 128,num_hiddens = 256) :
        super(lstm_try,self).__init__()
        self.xf = nn.Linear(emb_dim,num_hiddens)
        self.xi = nn.Linear(emb_dim,num_hiddens)
        self.xo = nn.Linear(emb_dim,num_hiddens)
        self.xc = nn.Linear(emb_dim,num_hiddens)
        self.hi = nn.Linear(num_hiddens,num_hiddens)
        self.hf = nn.Linear(num_hiddens,num_hiddens)
        self.ho = nn.Linear(num_hiddens,num_hiddens)
        self.hc = nn.Linear(num_hiddens,num_hiddens)
        self.hq = nn.Linear(num_hiddens,num_hiddens)
        self.act2 = nn.Tanh()
        self.act1 = nn.Sigmoid()

    def forward(self, x:torch.Tensor,h:torch.Tensor,c:torch.Tensor):  # X:[batch,seq_length,embedding]
        out =[]
        length = x.shape[1]
        x = x.permute(1,0,2)
        for i in range(length):
            Ft = self.act1(self.xf(x[i])+self.hf(h))
            It = self.act1(self.xi(x[i])+self.hi(h))
            Ot = self.act1(self.xo(x[i])+self.ho(h))
            Ct1 = self.act2(self.xc(x[i])+self.hc(h))
            Ct = Ft * c + Ct1 * It
            ht = Ot * self.act2(Ct)
            Y = self.hq(ht)
            out.append(Y)
            h = ht
            c = Ct
        return out

In [354]:
import torch
import torch.nn as nn
class lstm_try(nn.Module):
    def __init__(self, emb_dim = 128,num_hiddens = 256) :
        super(lstm_try,self).__init__()
        self.embedded = nn.Embedding(vocab_size,emb_dim)
        self.xf = nn.Linear(emb_dim,num_hiddens)
        self.xi = nn.Linear(emb_dim,num_hiddens)
        self.xo = nn.Linear(emb_dim,num_hiddens)
        self.xc = nn.Linear(emb_dim,num_hiddens)
        self.hi = nn.Linear(num_hiddens,num_hiddens)
        self.hf = nn.Linear(num_hiddens,num_hiddens)
        self.ho = nn.Linear(num_hiddens,num_hiddens)
        self.hc = nn.Linear(num_hiddens,num_hiddens)
        self.hq = nn.Linear(num_hiddens,num_hiddens)
        self.act2 = nn.Tanh()
        self.act1 = nn.Sigmoid()
        self.classifier = nn.Linear(num_hiddens,num_class)

    def forward(self, x:torch.Tensor,h:torch.Tensor,c:torch.Tensor):  # X:[batch,seq_length,embedding]
        out =[]
        x = self.embedded(x)
        length = x.shape[1]
        x = x.permute(1,0,2)
        for i in range(length):
            Ft = self.act1(self.xf(x[i])+self.hf(h))
            It = self.act1(self.xi(x[i])+self.hi(h))
            Ot = self.act1(self.xo(x[i])+self.ho(h))
            Ct1 = self.act2(self.xc(x[i])+self.hc(h))
            Ct = Ft * c + Ct1 * It
            ht = Ot * self.act2(Ct)
            Y = self.hq(ht)
            out.append(Y)
            h = ht
            c = Ct
        return self.classifier(out[-1])

In [355]:
model = lstm_try()
model

lstm_try(
  (embedded): Embedding(95811, 128)
  (xf): Linear(in_features=128, out_features=256, bias=True)
  (xi): Linear(in_features=128, out_features=256, bias=True)
  (xo): Linear(in_features=128, out_features=256, bias=True)
  (xc): Linear(in_features=128, out_features=256, bias=True)
  (hi): Linear(in_features=256, out_features=256, bias=True)
  (hf): Linear(in_features=256, out_features=256, bias=True)
  (ho): Linear(in_features=256, out_features=256, bias=True)
  (hc): Linear(in_features=256, out_features=256, bias=True)
  (hq): Linear(in_features=256, out_features=256, bias=True)
  (act2): Tanh()
  (act1): Sigmoid()
  (classifier): Linear(in_features=256, out_features=4, bias=True)
)

In [356]:
# model = lstm_try()
# x = torch.randn((32,20,128))
# h = torch.randn((32,256))
# c = torch.randn((32,256))
# model(x,h,c)[0].shape

In [357]:
import torch
import torch.nn as nn
class gru_try(nn.Module):
    def __init__(self, emb_dim = 128,num_hiddens = 256) :
        super(gru_try,self).__init__()
        self.xz = nn.Linear(emb_dim,num_hiddens)
        self.xr = nn.Linear(emb_dim,num_hiddens)
        self.xh = nn.Linear(emb_dim,num_hiddens)
        self.hz = nn.Linear(num_hiddens,num_hiddens)
        self.hr = nn.Linear(num_hiddens,num_hiddens)
        self.hh = nn.Linear(num_hiddens,num_hiddens)
        self.hq = nn.Linear(num_hiddens,num_hiddens)
        self.act2 = nn.Tanh()
        self.act1 = nn.Sigmoid()

    def forward(self, x:torch.Tensor,h:torch.Tensor):  # X:[batch,seq_length,embedding]
        out =[]
        length = x.shape[1]
        x = x.permute(1,0,2)
        for i in range(length):
            Zt = self.act1(self.xz(x[i])+self.hz(h))
            Rt = self.act1(self.xr(x[i])+self.hr(h))
            tmp = Rt * h
            Ht1 = self.act2(self.xh(x[i])+self.hh(tmp))
            ht = Zt * h + (1 - Zt) * Ht1
            Y = self.hq(ht)
            out.append(Y)
            h = ht
        return out

In [358]:
import torch
import torch.nn as nn
class gru_try(nn.Module):
    def __init__(self, emb_dim = 128,num_hiddens = 256) :
        super(gru_try,self).__init__()
        self.embedded = nn.Embedding(vocab_size,emb_dim)
        self.xz = nn.Linear(emb_dim,num_hiddens)
        self.xr = nn.Linear(emb_dim,num_hiddens)
        self.xh = nn.Linear(emb_dim,num_hiddens)
        self.hz = nn.Linear(num_hiddens,num_hiddens)
        self.hr = nn.Linear(num_hiddens,num_hiddens)
        self.hh = nn.Linear(num_hiddens,num_hiddens)
        self.hq = nn.Linear(num_hiddens,num_hiddens)
        self.act2 = nn.Tanh()
        self.act1 = nn.Sigmoid()
        self.calssifier = nn.Linear(num_hiddens,num_class)

    def forward(self, x:torch.Tensor,h:torch.Tensor):  # X:[batch,seq_length,embedding]
        out =[]
        x = self.embedded(x)
        length = x.shape[1]
        x = x.permute(1,0,2)
        for i in range(length):
            Zt = self.act1(self.xz(x[i])+self.hz(h))
            Rt = self.act1(self.xr(x[i])+self.hr(h))
            tmp = Rt * h
            Ht1 = self.act2(self.xh(x[i])+self.hh(tmp))
            ht = Zt * h + (1 - Zt) * Ht1
            Y = self.hq(ht)
            out.append(Y)
            h = ht
        return self.calssifier(out[-1])

In [359]:
# model = gru_try()
# x = torch.randn((32,20))
# h = torch.randn((32,256))
# c = torch.randn((32,256))
# model(x,h)[0].shape

In [360]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=5, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)

In [361]:
model = lstm_try()

In [362]:
import time

def train(dataloader):
    model.train()
    total_acc, total_count = 0, 0
    log_interval = 500
    start_time = time.time()

    for idx, (text, label) in enumerate(dataloader):
        h = torch.randn((4,256))
        c = torch.randn((4,256))
        # print(type(h))
        # print(text.shape)
        optimizer.zero_grad()
        predicted_label = model(text,h,c)
        # predicted_label = model(text,h)
        #print(predicted_label.shape, label.shape)
        loss = criterion(predicted_label, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        optimizer.step()
        total_acc += (predicted_label.argmax(1) == label).sum().item()
        total_count += label.size(0)
        if idx % log_interval == 0 and idx > 0:
            elapsed = time.time() - start_time
            print(
                "| epoch {:3d} | {:5d}/{:5d} batches "
                "| accuracy {:8.3f}".format(
                    epoch, idx, len(dataloader), total_acc / total_count
                )
            )
            total_acc, total_count = 0, 0
            start_time = time.time()


def evaluate(dataloader):
    model.eval()
    total_acc, total_count = 0, 0

    with torch.no_grad():
        for idx, (text, label) in enumerate(dataloader):
            h = torch.randn((4,256))
            c = torch.randn((4,256))
            predicted_label = model(text,h,c)
            # predicted_label = model(text,h)
            loss = criterion(predicted_label, label)
            total_acc += (predicted_label.argmax(1) == label).sum().item()
            total_count += label.size(0)
    return total_acc / total_count

In [363]:

for epoch in range(5):
    epoch_start_time = time.time()
    train(train_dataloader)
    accu_val = evaluate(test_dataloader)
    if total_accu is not None and total_accu > accu_val:
        scheduler.step()
    else:
        total_accu = accu_val
    print("-" * 59)
    print(
        "| end of epoch {:3d} | time: {:5.2f}s | "
        "valid accuracy {:8.3f} ".format(
            epoch, time.time() - epoch_start_time, accu_val
        )
    )
    print("-" * 59)