## 1、代码部署

In [25]:
import os
from argparse import Namespace
from collections import Counter
import json
import re
import string

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm


class Vocabulary(object):
    """Class to process text and extract vocabulary for mapping"""

    def __init__(self, token_to_idx=None, mask_token="<MASK>", add_unk=True, unk_token="<UNK>"):
        """
        Args:
            token_to_idx (dict): a pre-existing map of tokens to indices
            mask_token (str): the MASK token to add into the Vocabulary; indicates
                a position that will not be used in updating the model's parameters
            add_unk (bool): a flag that indicates whether to add the UNK token
            unk_token (str): the UNK token to add into the Vocabulary

        """

        if token_to_idx is None:
            token_to_idx = {}
        self._token_to_idx = token_to_idx

        self._idx_to_token = {idx: token
                              for token, idx in self._token_to_idx.items()}

        self._add_unk = add_unk
        self._unk_token = unk_token
        self._mask_token = mask_token

        self.mask_index = self.add_token(self._mask_token)
        self.unk_index = -1
        if add_unk:
            self.unk_index = self.add_token(unk_token)

    def to_serializable(self):
        """ returns a dictionary that can be serialized """
        return {'token_to_idx': self._token_to_idx,
                'add_unk': self._add_unk,
                'unk_token': self._unk_token,
                'mask_token': self._mask_token}

    @classmethod
    def from_serializable(cls, contents):
        """ instantiates the Vocabulary from a serialized dictionary """
        return cls(**contents)

    def add_token(self, token):
        """Update mapping dicts based on the token.

        Args:
            token (str): the item to add into the Vocabulary
        Returns:
            index (int): the integer corresponding to the token
        """
        if token in self._token_to_idx:
            index = self._token_to_idx[token]
        else:
            index = len(self._token_to_idx)
            self._token_to_idx[token] = index
            self._idx_to_token[index] = token
        return index

    def add_many(self, tokens):
        """Add a list of tokens into the Vocabulary

        Args:
            tokens (list): a list of string tokens
        Returns:
            indices (list): a list of indices corresponding to the tokens
        """
        return [self.add_token(token) for token in tokens]

    def lookup_token(self, token):
        """Retrieve the index associated with the token
          or the UNK index if token isn't present.

        Args:
            token (str): the token to look up
        Returns:
            index (int): the index corresponding to the token
        Notes:
            `unk_index` needs to be >=0 (having been added into the Vocabulary)
              for the UNK functionality
        """
        if self.unk_index >= 0:
            return self._token_to_idx.get(token, self.unk_index)
        else:
            return self._token_to_idx[token]

    def lookup_index(self, index):
        """Return the token associated with the index

        Args:
            index (int): the index to look up
        Returns:
            token (str): the token corresponding to the index
        Raises:
            KeyError: if the index is not in the Vocabulary
        """
        if index not in self._idx_to_token:
            raise KeyError("the index (%d) is not in the Vocabulary" % index)
        return self._idx_to_token[index]

    def __str__(self):
        return "<Vocabulary(size=%d)>" % len(self)

    def __len__(self):
        return len(self._token_to_idx)


class CBOWVectorizer(object):
    """ The Vectorizer which coordinates the Vocabularies and puts them to use"""

    def __init__(self, cbow_vocab):
        """
        Args:
            cbow_vocab (Vocabulary): maps words to integers
        """
        self.cbow_vocab = cbow_vocab

    def vectorize(self, context):
        """
        Args:
            context (str): the string of words separated by a space
        """
        indices = [self.cbow_vocab.lookup_token(token) for token in context.split(' ')]
        return np.array(indices)

    @classmethod
    def from_dataframe(cls, cbow_df):
        """Instantiate the vectorizer from the dataset dataframe

        Args:
            cbow_df (pandas.DataFrame): the target dataset
        Returns:
            an instance of the CBOWVectorizer
        """
        cbow_vocab = Vocabulary()

        # 添加您希望确保存在的词
        required_tokens = ['monster', 'frankenstein', 'science', 'sickness', 'lonely', 'happy']
        for token in required_tokens:
            cbow_vocab.add_token(token)

        for index, row in cbow_df.iterrows():
            for token in row.surname.split(' '):
                cbow_vocab.add_token(token)
            cbow_vocab.add_token(row.nationality)

        return cls(cbow_vocab)

    @classmethod
    def from_serializable(cls, contents):
        cbow_vocab = \
            Vocabulary.from_serializable(contents['cbow_vocab'])
        return cls(cbow_vocab=cbow_vocab)

    def to_serializable(self):
        return {'cbow_vocab': self.cbow_vocab.to_serializable()}


class CBOWDataset(Dataset):
    def __init__(self, cbow_df, vectorizer):
        """
        Args:
            cbow_df (pandas.DataFrame): the dataset
            vectorizer (CBOWVectorizer): vectorizer instatiated from dataset
        """
        self.cbow_df = cbow_df
        self._vectorizer = vectorizer

        measure_len = lambda surname: len(surname.split(" "))
        self._max_seq_length = max(map(measure_len, cbow_df.surname))

        self.train_df = self.cbow_df[self.cbow_df.split == 'train']
        self.train_size = len(self.train_df)

        self.val_df = self.cbow_df[self.cbow_df.split == 'val']
        self.validation_size = len(self.val_df)

        self.test_df = self.cbow_df[self.cbow_df.split == 'test']
        self.test_size = len(self.test_df)

        self._lookup_dict = {'train': (self.train_df, self.train_size),
                             'val': (self.val_df, self.validation_size),
                             'test': (self.test_df, self.test_size)}

        self.set_split('train')

    @classmethod
    def load_dataset_and_make_vectorizer(cls, cbow_csv):
        """Load dataset and make a new vectorizer from scratch

        Args:
            cbow_csv (str): location of the dataset
        Returns:
            an instance of CBOWDataset
        """
        cbow_df = pd.read_csv(cbow_csv)
        train_cbow_df = cbow_df[cbow_df.split == 'train']
        return cls(cbow_df, CBOWVectorizer.from_dataframe(train_cbow_df))

    @classmethod
    def load_dataset_and_load_vectorizer(cls, cbow_csv, vectorizer_filepath):
        """Load dataset and the corresponding vectorizer.
        Used in the case in the vectorizer has been cached for re-use

        Args:
            cbow_csv (str): location of the dataset
            vectorizer_filepath (str): location of the saved vectorizer
        Returns:
            an instance of CBOWDataset
        """
        cbow_df = pd.read_csv(cbow_csv)
        vectorizer = cls.load_vectorizer_only(vectorizer_filepath)
        return cls(cbow_df, vectorizer)

    @staticmethod
    def load_vectorizer_only(vectorizer_filepath):
        """a static method for loading the vectorizer from file

        Args:
            vectorizer_filepath (str): the location of the serialized vectorizer
        Returns:
            an instance of CBOWVectorizer
        """
        with open(vectorizer_filepath) as fp:
            return CBOWVectorizer.from_serializable(json.load(fp))

    def save_vectorizer(self, vectorizer_filepath):
        """saves the vectorizer to disk using json

        Args:
            vectorizer_filepath (str): the location to save the vectorizer
        """
        with open(vectorizer_filepath, "w") as fp:
            json.dump(self._vectorizer.to_serializable(), fp)

    def get_vectorizer(self):
        """ returns the vectorizer """
        return self._vectorizer

    def set_split(self, split="train"):
        """ selects the splits in the dataset using a column in the dataframe """
        self._target_split = split
        self._target_df, self._target_size = self._lookup_dict[split]

    def __len__(self):
        return self._target_size

    def __getitem__(self, index):
        """the primary entry point method for PyTorch datasets

        Args:
            index (int): the index to the data point
        Returns:
            a dictionary holding the data point's features (x_data) and label (y_target)
        """
        row = self._target_df.iloc[index]

        context_vector = \
            self._vectorizer.vectorize(row.surname)
        target_index = self._vectorizer.cbow_vocab.lookup_token(row.nationality)

        return {'x_data': context_vector,
                'y_target': target_index}

    def get_num_batches(self, batch_size):
        """Given a batch size, return the number of batches in the dataset

        Args:
            batch_size (int)
        Returns:
            number of batches in the dataset
        """
        return len(self) // batch_size


def generate_batches(dataset, batch_size, shuffle=True,
                     drop_last=True, device="cpu"):
    """
    A generator function which wraps the PyTorch DataLoader. It will
      ensure each tensor is on the write device location.
    """
    dataloader = DataLoader(dataset=dataset, batch_size=batch_size,
                            shuffle=shuffle, drop_last=drop_last)

    for data_dict in dataloader:
        out_data_dict = {}
        for name, tensor in data_dict.items():
            out_data_dict[name] = data_dict[name].to(device)
        yield out_data_dict


class CBOWClassifier(nn.Module):  # Simplified cbow Model
    def __init__(self, vocabulary_size, embedding_size, padding_idx=0):
        """
        Args:
            vocabulary_size (int): number of vocabulary items, controls the
                number of embeddings and prediction vector size
            embedding_size (int): size of the embeddings
            padding_idx (int): default 0; Embedding will not use this index
        """
        super(CBOWClassifier, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=vocabulary_size,
                                      embedding_dim=embedding_size,
                                      padding_idx=padding_idx)
        self.fc1 = nn.Linear(in_features=embedding_size,
                             out_features=vocabulary_size)

    def forward(self, x_in, apply_softmax=False):
        """The forward pass of the classifier

        Args:
            x_in (torch.Tensor): an input data tensor.
                x_in.shape should be (batch, input_dim)
            apply_softmax (bool): a flag for the softmax activation
                should be false if used with the Cross Entropy losses
        Returns:
            the resulting tensor. tensor.shape should be (batch, output_dim)
        """
        # 嵌入层
        x_embedded = self.embedding(x_in)
        # 嵌入后求和
        x_embedded_sum = F.dropout(x_embedded.sum(dim=1), 0.3)
        # 全连接层
        y_out = self.fc1(x_embedded_sum)

        # 调试信息
        print(f"输入序列形状：{x_in.shape} -> 嵌入后：{x_embedded.shape}")
        print(f"嵌入求和后：{x_embedded_sum.shape} -> 全连接后：{y_out.shape}")

        if apply_softmax:
            y_out = F.softmax(y_out, dim=1)

        return y_out


def make_train_state(args):
    return {'stop_early': False,
            'early_stopping_step': 0,
            'early_stopping_best_val': 1e8,
            'learning_rate': args.learning_rate,
            'epoch_index': 0,
            'train_loss': [],
            'train_acc': [],
            'val_loss': [],
            'val_acc': [],
            'test_loss': -1,
            'test_acc': -1,
            'model_filename': args.model_state_file}


def update_train_state(args, model, train_state):
    """Handle the training state updates.

    Components:
     - Early Stopping: Prevent overfitting.
     - Model Checkpoint: Model is saved if the model is better

    :param args: main arguments
    :param model: model to train
    :param train_state: a dictionary representing the training state values
    :returns:
        a new train_state
    """

    # Save one model at least
    if train_state['epoch_index'] == 0:
        torch.save(model.state_dict(), train_state['model_filename'])
        train_state['stop_early'] = False

    # Save model if performance improved
    elif train_state['epoch_index'] >= 1:
        loss_tm1, loss_t = train_state['val_loss'][-2:]

        # If loss worsened
        if loss_t >= train_state['early_stopping_best_val']:
            # Update step
            train_state['early_stopping_step'] += 1
        # Loss decreased
        else:
            # Save the best model
            if loss_t < train_state['early_stopping_best_val']:
                torch.save(model.state_dict(), train_state['model_filename'])

            # Reset early stopping step
            train_state['early_stopping_step'] = 0

        # Stop early ?
        train_state['stop_early'] = \
            train_state['early_stopping_step'] >= args.early_stopping_criteria

    return train_state


def compute_accuracy(y_pred, y_target):
    _, y_pred_indices = y_pred.max(dim=1)
    n_correct = torch.eq(y_pred_indices, y_target).sum().item()
    return n_correct / len(y_pred_indices) * 100


def set_seed_everywhere(seed, cuda):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed_all(seed)


def handle_dirs(dirpath):
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)


args = Namespace(
    # Data and Path information
    cbow_csv="surnames_with_splits.csv",  # 修改为正确的文件名
    vectorizer_file="vectorizer.json",
    model_state_file="model.pth",
    save_dir="model_storage/ch5/cbow",
    # Model hyper parameters
    embedding_size=50,
    # Training hyper parameters
    seed=1337,
    num_epochs=5,
    learning_rate=0.0001,
    batch_size=32,
    early_stopping_criteria=5,
    # Runtime options
    cuda=True,
    catch_keyboard_interrupt=True,
    reload_from_files=False,
    expand_filepaths_to_save_dir=True
)

if args.expand_filepaths_to_save_dir:
    args.vectorizer_file = os.path.join(args.save_dir,
                                        args.vectorizer_file)

    args.model_state_file = os.path.join(args.save_dir,
                                         args.model_state_file)

    print("Expanded filepaths: ")
    print("\t{}".format(args.vectorizer_file))
    print("\t{}".format(args.model_state_file))

# Check CUDA
if not torch.cuda.is_available():
    args.cuda = False

args.device = torch.device("cuda" if args.cuda else "cpu")

print("Using CUDA: {}".format(args.cuda))

# Set seed for reproducibility
set_seed_everywhere(args.seed, args.cuda)

# handle dirs
handle_dirs(args.save_dir)

if args.reload_from_files:
    print("Loading dataset and loading vectorizer")
    dataset = CBOWDataset.load_dataset_and_load_vectorizer(args.cbow_csv,
                                                           args.vectorizer_file)
else:
    print("Loading dataset and creating vectorizer")
    dataset = CBOWDataset.load_dataset_and_make_vectorizer(args.cbow_csv)
    dataset.save_vectorizer(args.vectorizer_file)

vectorizer = dataset.get_vectorizer()

# 数据预处理验证
print("Vocabulary特殊标记索引：", 
      f"<MASK>:{vectorizer.cbow_vocab.mask_index}",
      f"<UNK>:{vectorizer.cbow_vocab.unk_index}")

# 打印样本向量化结果
sample_str = "Zhang"
vec = vectorizer.vectorize(sample_str)
print(f"样本'{sample_str}'的向量化结果:\n{vec}\n有效长度:{len(sample_str.split())}")

classifier = CBOWClassifier(vocabulary_size=len(vectorizer.cbow_vocab),
                            embedding_size=args.embedding_size)

classifier = classifier.to(args.device)

loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                 mode='min', factor=0.5,
                                                 patience=1)
train_state = make_train_state(args)

epoch_bar = tqdm(desc='training routine',
                          total=args.num_epochs,
                          position=0)

dataset.set_split('train')
train_bar = tqdm(desc='split=train',
                          total=dataset.get_num_batches(args.batch_size),
                          position=1,
                          leave=True)
dataset.set_split('val')
val_bar = tqdm(desc='split=val',
                        total=dataset.get_num_batches(args.batch_size),
                        position=1,
                        leave=True)

try:
    for epoch_index in range(args.num_epochs):
        print(f"\nEpoch [{epoch_index + 1}/{args.num_epochs}]")
        print("-" * 50)
        train_state['epoch_index'] = epoch_index

        # Iterate over training dataset

        # setup: batch generator, set loss and acc to 0, set train mode on

        dataset.set_split('train')
        batch_generator = generate_batches(dataset,
                                           batch_size=args.batch_size,
                                           device=args.device)
        running_loss = 0.0
        running_acc = 0.0
        classifier.train()

        for batch_index, batch_dict in enumerate(batch_generator):

            # the training routine is these 5 steps:

            # --------------------------------------
            # step 1. zero the gradients
            optimizer.zero_grad()

            # step 2. compute the output
            y_pred = classifier(x_in=batch_dict['x_data'])

            # step 3. compute the loss
            loss = loss_func(y_pred, batch_dict['y_target'])
            loss_t = loss.item()
            running_loss += (loss_t - running_loss) / (batch_index + 1)

            # step 4. use loss to produce gradients
            loss.backward()

            # step 5. use optimizer to take gradient step
            optimizer.step()
            # -----------------------------------------
            # compute the accuracy
            acc_t = compute_accuracy(y_pred, batch_dict['y_target'])
            running_acc += (acc_t - running_acc) / (batch_index + 1)

            if batch_index % 500 == 0:
                train_bar.set_postfix(loss=running_loss, acc=0.25, epoch=epoch_index)
            train_bar.update()

        train_state['train_loss'].append(running_loss)
        train_state['train_acc'].append(running_acc)

        # Iterate over val dataset

        # setup: batch generator, set loss and acc to 0; set eval mode on
        dataset.set_split('val')
        batch_generator = generate_batches(dataset,
                                           batch_size=args.batch_size,
                                           device=args.device)
        running_loss = 0.
        running_acc = 0.
        classifier.eval()

        for batch_index, batch_dict in enumerate(batch_generator):
            # compute the output
            y_pred = classifier(x_in=batch_dict['x_data'])

            # step 3. compute the loss
            loss = loss_func(y_pred, batch_dict['y_target'])
            loss_t = loss.item()
            running_loss += (loss_t - running_loss) / (batch_index + 1)

            # compute the accuracy
            acc_t = compute_accuracy(y_pred, batch_dict['y_target'])
            running_acc += (acc_t - running_acc) / (batch_index + 1)
            if batch_index % 500 == 0:
                val_bar.set_postfix(loss=running_loss, acc=0.25, epoch=epoch_index)
            val_bar.update()

        train_state['val_loss'].append(running_loss)
        train_state['val_acc'].append(running_acc)

        train_state = update_train_state(args=args, model=classifier,
                                         train_state=train_state)

        scheduler.step(train_state['val_loss'][-1])

        if train_state['stop_early']:
            break

        train_bar.n = 0
        val_bar.n = 0
        epoch_bar.update()
except KeyboardInterrupt:
    print("Exiting loop")

# compute the loss & accuracy on the test set using the best available model

classifier.load_state_dict(torch.load(train_state['model_filename']))
classifier = classifier.to(args.device)
loss_func = nn.CrossEntropyLoss()

dataset.set_split('test')
batch_generator = generate_batches(dataset,
                                   batch_size=args.batch_size,
                                   device=args.device)
running_loss = 0.
running_acc = 0.
classifier.eval()

for batch_index, batch_dict in enumerate(batch_generator):
    # compute the output
    y_pred = classifier(x_in=batch_dict['x_data'])

    # compute the loss
    loss = loss_func(y_pred, batch_dict['y_target'])
    loss_t = loss.item()
    running_loss += (loss_t - running_loss) / (batch_index + 1)

    # compute the accuracy
    acc_t = compute_accuracy(y_pred, batch_dict['y_target'])
    running_acc += (acc_t - running_acc) / (batch_index + 1)

train_state['test_loss'] = running_loss
train_state['test_acc'] = running_acc

print("Test loss: {};".format(train_state['test_loss']))
print("Test Accuracy: {}".format(train_state['test_acc']))


def pretty_print(results):
    """
    Pretty print embedding results.
    """
    for item in results:
        print("...[%.2f] - %s" % (item[1], item[0]))


def get_closest(target_word, word_to_idx, embeddings, n=5):
    """
    Get the n closest words to your word.
    """
    if target_word.lower() not in word_to_idx:
        print(f"Word '{target_word}' not found in vocabulary.")
        return []

    word_embedding = embeddings[word_to_idx[target_word.lower()]]
    distances = []
    for word, index in word_to_idx.items():
        if word == "<MASK>" or word == target_word:
            continue
        distances.append((word, torch.dist(word_embedding, embeddings[index])))

    results = sorted(distances, key=lambda x: x[1])[1:n + 2]
    return results


word = 'monster'
embeddings = classifier.embedding.weight.data
word_to_idx = vectorizer.cbow_vocab._token_to_idx

# 检查目标词是否在词汇表中
if word.lower() in word_to_idx:
    pretty_print(get_closest(word, word_to_idx, embeddings, n=5))
else:
    print(f"The word '{word}' is not in the vocabulary.")

target_words = ['frankenstein', 'monster', 'science', 'sickness', 'lonely', 'happy']

for target_word in target_words:
    print(f"======={target_word}=======")
    if target_word.lower() not in word_to_idx:
        print(f"The word '{target_word}' is not in the vocabulary.")
        continue
    pretty_print(get_closest(target_word, word_to_idx, embeddings, n=5))

Expanded filepaths: 
	model_storage/ch5/cbow\vectorizer.json
	model_storage/ch5/cbow\model.pth
Using CUDA: False
Loading dataset and creating vectorizer
Vocabulary特殊标记索引： <MASK>:0 <UNK>:1
样本'Zhang'的向量化结果:
[148]
有效长度:1


training routine: 100%|██████████████████████████████████████████████████████████████████| 5/5 [00:34<00:00,  6.91s/it]

split=train:   0%|                                               | 0/240 [00:34<?, ?it/s, acc=0.25, epoch=4, loss=8.77][A

split=val:   0%|                                                  | 0/51 [00:34<?, ?it/s, acc=0.25, epoch=4, loss=8.05][A

split=train:   0%|                                               | 0/240 [00:00<?, ?it/s, acc=0.25, epoch=0, loss=8.75][A
split=train:   7%|██▌                                  | 17/240 [00:00<00:01, 161.23it/s, acc=0.25, epoch=0, loss=8.75][A


Epoch [1/5]
--------------------------------------------------
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：t




输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])


split=train:  15%|█████▍                               | 35/240 [00:00<00:01, 168.05it/s, acc=0.25, epoch=0, loss=8.75][A
split=train:  22%|████████▏                            | 53/240 [00:00<00:01, 170.19it/s, acc=0.25, epoch=0, loss=8.75][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


split=train:  30%|██████████▉                          | 71/240 [00:00<00:01, 168.95it/s, acc=0.25, epoch=0, loss=8.75][A
split=train:  37%|█████████████▌                       | 88/240 [00:00<00:00, 164.89it/s, acc=0.25, epoch=0, loss=8.75][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


split=train:  44%|███████████████▊                    | 105/240 [00:00<00:00, 164.52it/s, acc=0.25, epoch=0, loss=8.75][A
split=train:  51%|██████████████████▍                 | 123/240 [00:00<00:00, 168.73it/s, acc=0.25, epoch=0, loss=8.75][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


split=train:  59%|█████████████████████▎              | 142/240 [00:00<00:00, 172.87it/s, acc=0.25, epoch=0, loss=8.75][A
split=train:  67%|████████████████████████▏           | 161/240 [00:00<00:00, 175.96it/s, acc=0.25, epoch=0, loss=8.75][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


split=train:  75%|██████████████████████████▊         | 179/240 [00:01<00:00, 173.83it/s, acc=0.25, epoch=0, loss=8.75][A
split=train:  82%|█████████████████████████████▌      | 197/240 [00:01<00:00, 168.87it/s, acc=0.25, epoch=0, loss=8.75][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


split=train:  89%|████████████████████████████████    | 214/240 [00:01<00:00, 166.82it/s, acc=0.25, epoch=0, loss=8.75][A
split=train:  97%|██████████████████████████████████▉ | 233/240 [00:01<00:00, 170.77it/s, acc=0.25, epoch=0, loss=8.75][A
split=val:   0%|                                                  | 0/51 [00:01<?, ?it/s, acc=0.25, epoch=0, loss=8.67][A
split=val:   2%|▊                                         | 1/51 [00:01<01:10,  1.42s/it, acc=0.25, epoch=0, loss=8.67][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


training routine:  20%|█████████████▏                                                    | 1/5 [00:01<00:06,  1.57s/it][A
split=train:   0%|                                         | 0/240 [00:01<00:01, 170.77it/s, acc=0.25, epoch=1, loss=9][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


training routine:  40%|██████████████████████████▍                                       | 2/5 [00:03<00:04,  1.53s/it][A
split=train:   0%|                                      | 0/240 [00:03<00:01, 170.77it/s, acc=0.25, epoch=2, loss=9.04][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


split=val:   0%|                                          | 0/51 [00:04<00:01, 33.58it/s, acc=0.25, epoch=2, loss=8.55][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si

training routine:  60%|███████████████████████████████████████▌                          | 3/5 [00:04<00:03,  1.55s/it]
split=train:   0%|                                      | 0/240 [00:04<00:01, 170.77it/s, acc=0.25, epoch=3, loss=8.67][A


Epoch [4/5]
--------------------------------------------------
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：t


split=val:   0%|                                          | 0/51 [00:06<00:01, 33.58it/s, acc=0.25, epoch=3, loss=8.56][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si

training routine:  80%|████████████████████████████████████████████████████▊             | 4/5 [00:06<00:01,  1.64s/it]
split=train:   0%|                                      | 0/240 [00:06<00:01, 170.77it/s, acc=0.25, epoch=4, loss=8.77][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


split=val:   0%|                                          | 0/51 [00:08<00:01, 33.58it/s, acc=0.25, epoch=4, loss=8.05][A

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si

training routine: 100%|██████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00,  1.75s/it]

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Size([32, 50]) -> 全连接后：torch.Size([32, 6466])
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
嵌入求和后：torch.Si


split=val:   0%|                                          | 0/51 [00:12<00:01, 33.58it/s, acc=0.25, epoch=4, loss=8.05][A
split=train:   0%|                                      | 0/240 [00:12<00:01, 170.77it/s, acc=0.25, epoch=4, loss=8.77][A

## 数据预处理验证

In [27]:
# 数据预处理验证
# 在 SurnameVectorizer 初始化后添加
print("Char Vocab特殊标记索引：", 
      f"<MASK>:{vectorizer.cbow_vocab.mask_index}",
      f"<UNK>:{vectorizer.cbow_vocab.unk_index}")

# 打印样本向量化结果
sample_str = "Zhang"
vec = vectorizer.vectorize(sample_str)
print(f"样本'{sample_str}'的向量化结果:\n{vec}\n有效长度:{len(sample_str.split())}")

Char Vocab特殊标记索引： <MASK>:0 <UNK>:1
样本'Zhang'的向量化结果:
[148]
有效长度:1


## 模型结构验证

In [16]:
# 打印模型各层参数维度
print("\n模型结构：")
for name, param in classifier.named_parameters():
    print(f"{name.ljust(20)} | 维度：{tuple(param.size())}")


模型结构：
embedding.weight     | 维度：(6466, 50)
fc1.weight           | 维度：(6466, 50)
fc1.bias             | 维度：(6466,)


## RNN序列处理验证

In [33]:
# 初始化模型、优化器等
model = RNNClassifier(vocabulary_size=1000, embedding_size=128, hidden_size=256)
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

# 创建一些虚拟数据
inputs = torch.randint(0, 1000, (32, 50))  # 32个样本，每个样本50个时间步
targets = torch.randint(0, 1000, (32,))

# 前向传播
outputs = model(inputs)
# 打印调试信息
print("Inputs shape:", inputs.shape)
print("Outputs shape:", outputs.shape)

# 计算损失
loss = criterion(outputs, targets)

# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
class RNNClassifier(nn.Module):
    def __init__(self, vocabulary_size, embedding_size, hidden_size, padding_idx=0):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=vocabulary_size,
                                      embedding_dim=embedding_size,
                                      padding_idx=padding_idx)
        self.rnn = nn.RNN(input_size=embedding_size,
                          hidden_size=hidden_size,
                          batch_first=True)
        self.fc = nn.Linear(hidden_size, vocabulary_size)

    def forward(self, x_in, x_lengths=None, apply_softmax=False):
        # 嵌入层
        x_embedded = self.embedding(x_in)
        # RNN层
        y_out, _ = self.rnn(x_embedded)
        # 聚合（取最后一个有效时间步的输出）
        if x_lengths is not None:
            # 动态索引最后一个有效时间步
            indices = x_lengths.view(-1, 1).expand(-1, y_out.size(2)) - 1
            y_out = y_out.gather(1, indices.unsqueeze(1)).squeeze(1)
        else:
            y_out = y_out[:, -1, :]
        # 全连接层
        y_out = self.fc(y_out)

        # 调试信息
        print(f"输入序列形状：{x_in.shape} -> 嵌入后：{x_embedded.shape}")
        print(f"RNN输出形状：{y_out.shape}（在聚合前为：{y_out.shape if x_lengths is None else f'{y_out.shape} -> 动态索引'}）")

        if apply_softmax:
            y_out = F.softmax(y_out, dim=1)
        return y_out

输入序列形状：torch.Size([32, 50]) -> 嵌入后：torch.Size([32, 50, 128])
RNN输出形状：torch.Size([32, 1000])（在聚合前为：torch.Size([32, 1000])）
Inputs shape: torch.Size([32, 50])
Outputs shape: torch.Size([32, 1000])


## 模型性能评估

In [38]:
# 初始化模型、优化器和损失函数
model = RNNClassifier(vocabulary_size=len(vectorizer.cbow_vocab),
                      embedding_size=args.embedding_size,
                      hidden_size=128)  # RNN隐藏层大小
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
criterion = nn.CrossEntropyLoss()

# 训练模型
train_state = make_train_state(args)

# 训练循环
try:
    for epoch_index in range(args.num_epochs):
        print(f"\nEpoch [{epoch_index + 1}/{args.num_epochs}]")
        print("-" * 50)
        train_state['epoch_index'] = epoch_index

        # 训练集
        dataset.set_split('train')
        batch_generator = generate_batches(dataset,
                                          batch_size=args.batch_size,
                                          device=args.device)
        running_loss = 0.0
        running_acc = 0.0
        model.train()

        for batch_index, batch_dict in enumerate(batch_generator):
            # 训练步骤
            optimizer.zero_grad()
            y_pred = model(batch_dict['x_data'])
            loss = criterion(y_pred, batch_dict['y_target'])
            loss.backward()
            optimizer.step()

            # 更新训练状态
            running_loss += (loss.item() - running_loss) / (batch_index + 1)
            acc_t = compute_accuracy(y_pred, batch_dict['y_target'])
            running_acc += (acc_t - running_acc) / (batch_index + 1)

            train_bar.set_postfix(loss=running_loss, acc=running_acc, epoch=epoch_index)
            train_bar.update()

        train_state['train_loss'].append(running_loss)
        train_state['train_acc'].append(running_acc)

        # 验证集
        dataset.set_split('val')
        batch_generator = generate_batches(dataset,
                                          batch_size=args.batch_size,
                                          device=args.device)
        running_loss = 0.0
        running_acc = 0.0
        model.eval()

        for batch_index, batch_dict in enumerate(batch_generator):
            y_pred = model(batch_dict['x_data'])
            loss = criterion(y_pred, batch_dict['y_target'])
            running_loss += (loss.item() - running_loss) / (batch_index + 1)
            acc_t = compute_accuracy(y_pred, batch_dict['y_target'])
            running_acc += (acc_t - running_acc) / (batch_index + 1)

            val_bar.set_postfix(loss=running_loss, acc=running_acc, epoch=epoch_index)
            val_bar.update()

        train_state['val_loss'].append(running_loss)
        train_state['val_acc'].append(running_acc)

        train_state = update_train_state(args=args, model=model, train_state=train_state)
        scheduler.step(train_state['val_loss'][-1])

        if train_state['stop_early']:
            break

except KeyboardInterrupt:
    print("Exiting loop")


split=train:   0%|                                          | 0/240 [06:56<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   0%|▏                                        | 1/240 [06:56<00:01, 170.77it/s, acc=0, epoch=0, loss=8.81][A
split=train:   1%|▎                                        | 2/240 [06:56<00:01, 170.77it/s, acc=0, epoch=0, loss=8.81][A
split=train:   1%|▌                                        | 3/240 [06:56<00:01, 170.77it/s, acc=0, epoch=0, loss=8.82][A
split=train:   2%|▋                                        | 4/240 [06:56<00:01, 170.77it/s, acc=0, epoch=0, loss=8.81][A
split=train:   2%|▊                                        | 5/240 [06:56<00:01, 170.77it/s, acc=0, epoch=0, loss=8.82][A
split=train:   2%|█                                        | 6/240 [06:56<00:01, 170.77it/s, acc=0, epoch=0, loss=8.81][A
split=train:   3%|█▏                                       | 7/240 [06:56<00:01, 170.77it/s, acc=0, epoch=0, loss=8.81][A
split=train:   


Epoch [1/5]
--------------------------------------------------
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Si


split=train:   6%|██▍                                      | 14/240 [06:57<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   6%|██▌                                      | 15/240 [06:57<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   7%|██▋                                      | 16/240 [06:57<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   7%|██▉                                      | 17/240 [06:57<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   8%|███                                      | 18/240 [06:57<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   8%|███▏                                     | 19/240 [06:57<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   8%|███▍                                     | 20/240 [06:57<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   9%|███▌                                     | 21/240 [06:57<00:01, 170.77it/s, acc=0, epoch=0, loss=8.8][A
split=train:   

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  12%|████▌                               | 30/240 [06:57<00:01, 170.77it/s, acc=0.101, epoch=0, loss=8.79][A
split=train:  13%|████▌                              | 31/240 [06:57<00:01, 170.77it/s, acc=0.0977, epoch=0, loss=8.79][A
split=train:  13%|████▋                              | 32/240 [06:57<00:01, 170.77it/s, acc=0.0947, epoch=0, loss=8.79][A
split=train:  14%|████▊                              | 33/240 [06:57<00:01, 170.77it/s, acc=0.0919, epoch=0, loss=8.79][A
split=train:  14%|████▉                              | 34/240 [06:57<00:01, 170.77it/s, acc=0.0893, epoch=0, loss=8.79][A
split=train:  15%|█████                              | 35/240 [06:57<00:01, 170.77it/s, acc=0.0868, epoch=0, loss=8.79][A
split=train:  15%|█████▎                             | 36/240 [06:57<00:01, 170.77it/s, acc=0.0845, epoch=0, loss=8.79][A
split=train:  15%|█████▍                             | 37/240 [06:57<00:01, 170.77it/s, acc=0.0822, epoch=0, loss=8.79][A
split=train:  1

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  19%|██████▌                            | 45/240 [06:57<00:01, 170.77it/s, acc=0.0679, epoch=0, loss=8.78][A
split=train:  19%|██████▋                            | 46/240 [06:57<00:01, 170.77it/s, acc=0.0665, epoch=0, loss=8.78][A
split=train:  20%|██████▊                            | 47/240 [06:57<00:01, 170.77it/s, acc=0.0651, epoch=0, loss=8.78][A
split=train:  20%|███████                            | 48/240 [06:57<00:01, 170.77it/s, acc=0.0638, epoch=0, loss=8.78][A
split=train:  20%|███████▏                           | 49/240 [06:57<00:01, 170.77it/s, acc=0.0625, epoch=0, loss=8.78][A
split=train:  21%|███████▎                           | 50/240 [06:57<00:01, 170.77it/s, acc=0.0613, epoch=0, loss=8.78][A
split=train:  21%|███████▍                           | 51/240 [06:57<00:01, 170.77it/s, acc=0.0601, epoch=0, loss=8.78][A
split=train:  22%|███████▊                            | 52/240 [06:57<00:01, 170.77it/s, acc=0.059, epoch=0, loss=8.78][A
split=train:  2

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  26%|█████████                          | 62/240 [06:57<00:01, 170.77it/s, acc=0.0496, epoch=0, loss=8.77][A
split=train:  26%|█████████▏                         | 63/240 [06:57<00:01, 170.77it/s, acc=0.0488, epoch=0, loss=8.77][A
split=train:  27%|█████████▎                         | 64/240 [06:57<00:01, 170.77it/s, acc=0.0481, epoch=0, loss=8.77][A
split=train:  27%|█████████▍                         | 65/240 [06:57<00:01, 170.77it/s, acc=0.0473, epoch=0, loss=8.77][A
split=train:  28%|█████████▋                         | 66/240 [06:57<00:01, 170.77it/s, acc=0.0466, epoch=0, loss=8.77][A
split=train:  28%|██████████                          | 67/240 [06:57<00:01, 170.77it/s, acc=0.046, epoch=0, loss=8.77][A
split=train:  28%|█████████▉                         | 68/240 [06:57<00:01, 170.77it/s, acc=0.0453, epoch=0, loss=8.77][A
split=train:  29%|██████████                         | 69/240 [06:57<00:01, 170.77it/s, acc=0.0446, epoch=0, loss=8.77][A
split=train:  2

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  33%|███████████▌                       | 79/240 [06:57<00:00, 170.77it/s, acc=0.0391, epoch=0, loss=8.76][A
split=train:  33%|███████████▋                       | 80/240 [06:57<00:00, 170.77it/s, acc=0.0386, epoch=0, loss=8.76][A
split=train:  34%|███████████▊                       | 81/240 [06:57<00:00, 170.77it/s, acc=0.0381, epoch=0, loss=8.76][A
split=train:  34%|███████████▉                       | 82/240 [06:57<00:00, 170.77it/s, acc=0.0377, epoch=0, loss=8.76][A
split=train:  35%|████████████                       | 83/240 [06:57<00:00, 170.77it/s, acc=0.0372, epoch=0, loss=8.76][A
split=train:  35%|████████████▎                      | 84/240 [06:57<00:00, 170.77it/s, acc=0.0368, epoch=0, loss=8.76][A
split=train:  35%|████████████▍                      | 85/240 [06:57<00:00, 170.77it/s, acc=0.0727, epoch=0, loss=8.75][A
split=train:  36%|████████████▌                      | 86/240 [06:57<00:00, 170.77it/s, acc=0.0718, epoch=0, loss=8.75][A
split=train:  3

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  40%|██████████████▏                    | 97/240 [06:58<00:00, 170.77it/s, acc=0.0957, epoch=0, loss=8.75][A
split=train:  41%|██████████████▎                    | 98/240 [06:58<00:00, 170.77it/s, acc=0.0947, epoch=0, loss=8.74][A
split=train:  41%|██████████████▍                    | 99/240 [06:58<00:00, 170.77it/s, acc=0.0938, epoch=0, loss=8.74][A
split=train:  42%|██████████████▏                   | 100/240 [06:58<00:00, 170.77it/s, acc=0.0928, epoch=0, loss=8.74][A
split=train:  42%|██████████████▎                   | 101/240 [06:58<00:00, 170.77it/s, acc=0.0919, epoch=0, loss=8.74][A
split=train:  42%|██████████████▉                    | 102/240 [06:58<00:00, 170.77it/s, acc=0.091, epoch=0, loss=8.74][A
split=train:  43%|██████████████▌                   | 103/240 [06:58<00:00, 170.77it/s, acc=0.0901, epoch=0, loss=8.74][A
split=train:  43%|██████████████▋                   | 104/240 [06:58<00:00, 170.77it/s, acc=0.0893, epoch=0, loss=8.74][A
split=train:  4

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  48%|████████████████▍                 | 116/240 [06:58<00:00, 170.77it/s, acc=0.0801, epoch=0, loss=8.73][A
split=train:  49%|████████████████▌                 | 117/240 [06:58<00:00, 170.77it/s, acc=0.0794, epoch=0, loss=8.73][A
split=train:  49%|████████████████▋                 | 118/240 [06:58<00:00, 170.77it/s, acc=0.0788, epoch=0, loss=8.73][A
split=train:  50%|████████████████▊                 | 119/240 [06:58<00:00, 170.77it/s, acc=0.0781, epoch=0, loss=8.73][A
split=train:  50%|█████████████████                 | 120/240 [06:58<00:00, 170.77it/s, acc=0.0775, epoch=0, loss=8.73][A
split=train:  50%|█████████████████▏                | 121/240 [06:58<00:00, 170.77it/s, acc=0.0768, epoch=0, loss=8.73][A
split=train:  51%|█████████████████▎                | 122/240 [06:58<00:00, 170.77it/s, acc=0.0762, epoch=0, loss=8.73][A
split=train:  51%|█████████████████▉                 | 123/240 [06:58<00:00, 170.77it/s, acc=0.101, epoch=0, loss=8.73][A
split=train:  5

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=train:  55%|███████████████████▉                | 133/240 [06:58<00:00, 170.77it/s, acc=0.14, epoch=0, loss=8.72][A
split=train:  56%|███████████████████▌               | 134/240 [06:58<00:00, 170.77it/s, acc=0.139, epoch=0, loss=8.72][A
split=train:  56%|███████████████████▋               | 135/240 [06:58<00:00, 170.77it/s, acc=0.138, epoch=0, loss=8.72][A
split=train:  57%|███████████████████▊               | 136/240 [06:58<00:00, 170.77it/s, acc=0.137, epoch=0, loss=8.72][A
split=train:  57%|███████████████████▉               | 137/240 [06:58<00:00, 170.77it/s, acc=0.136, epoch=0, loss=8.72][A
split=train:  57%|████████████████████▏              | 138/240 [06:58<00:00, 170.77it/s, acc=0.135, epoch=0, loss=8.72][A
split=train:  58%|████████████████████▎              | 139/240 [06:58<00:00, 170.77it/s, acc=0.134, epoch=0, loss=8.72][A
split=train:  58%|████████████████████▍              | 140/240 [06:58<00:00, 170.77it/s, acc=0.133, epoch=0, loss=8.72][A
split=train:  59

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=train:  63%|██████████████████████             | 151/240 [06:58<00:00, 170.77it/s, acc=0.144, epoch=0, loss=8.71][A
split=train:  63%|██████████████████████▏            | 152/240 [06:58<00:00, 170.77it/s, acc=0.163, epoch=0, loss=8.71][A
split=train:  64%|██████████████████████▎            | 153/240 [06:58<00:00, 170.77it/s, acc=0.162, epoch=0, loss=8.71][A
split=train:  64%|███████████████████████             | 154/240 [06:58<00:00, 170.77it/s, acc=0.161, epoch=0, loss=8.7][A
split=train:  65%|███████████████████████▉             | 155/240 [06:58<00:00, 170.77it/s, acc=0.18, epoch=0, loss=8.7][A
split=train:  65%|███████████████████████▍            | 156/240 [06:58<00:00, 170.77it/s, acc=0.179, epoch=0, loss=8.7][A
split=train:  65%|███████████████████████▌            | 157/240 [06:58<00:00, 170.77it/s, acc=0.198, epoch=0, loss=8.7][A
split=train:  66%|███████████████████████▋            | 158/240 [06:58<00:00, 170.77it/s, acc=0.197, epoch=0, loss=8.7][A
split=train:  66

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  71%|████████████████████████▊          | 170/240 [06:58<00:00, 170.77it/s, acc=0.311, epoch=0, loss=8.69][A
split=train:  71%|████████████████████████▉          | 171/240 [06:58<00:00, 170.77it/s, acc=0.345, epoch=0, loss=8.69][A
split=train:  72%|█████████████████████████          | 172/240 [06:58<00:00, 170.77it/s, acc=0.361, epoch=0, loss=8.69][A
split=train:  72%|█████████████████████████▏         | 173/240 [06:58<00:00, 170.77it/s, acc=0.377, epoch=0, loss=8.69][A
split=train:  72%|█████████████████████████▍         | 174/240 [06:58<00:00, 170.77it/s, acc=0.375, epoch=0, loss=8.69][A
split=train:  73%|█████████████████████████▌         | 175/240 [06:58<00:00, 170.77it/s, acc=0.373, epoch=0, loss=8.69][A
split=train:  73%|█████████████████████████▋         | 176/240 [06:58<00:00, 170.77it/s, acc=0.371, epoch=0, loss=8.69][A
split=train:  74%|█████████████████████████▊         | 177/240 [06:58<00:00, 170.77it/s, acc=0.369, epoch=0, loss=8.69][A
split=train:  7

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  79%|███████████████████████████▌       | 189/240 [06:59<00:00, 170.77it/s, acc=0.411, epoch=0, loss=8.68][A
split=train:  79%|███████████████████████████▋       | 190/240 [06:59<00:00, 170.77it/s, acc=0.409, epoch=0, loss=8.68][A
split=train:  80%|███████████████████████████▊       | 191/240 [06:59<00:00, 170.77it/s, acc=0.407, epoch=0, loss=8.67][A
split=train:  80%|████████████████████████████       | 192/240 [06:59<00:00, 170.77it/s, acc=0.421, epoch=0, loss=8.67][A
split=train:  80%|████████████████████████████▏      | 193/240 [06:59<00:00, 170.77it/s, acc=0.419, epoch=0, loss=8.67][A
split=train:  81%|████████████████████████████▎      | 194/240 [06:59<00:00, 170.77it/s, acc=0.417, epoch=0, loss=8.67][A
split=train:  81%|████████████████████████████▍      | 195/240 [06:59<00:00, 170.77it/s, acc=0.415, epoch=0, loss=8.67][A
split=train:  82%|████████████████████████████▌      | 196/240 [06:59<00:00, 170.77it/s, acc=0.428, epoch=0, loss=8.67][A
split=train:  8

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  85%|█████████████████████████████▊     | 204/240 [06:59<00:00, 170.77it/s, acc=0.503, epoch=0, loss=8.66][A
split=train:  85%|█████████████████████████████▉     | 205/240 [06:59<00:00, 170.77it/s, acc=0.516, epoch=0, loss=8.66][A
split=train:  86%|██████████████████████████████     | 206/240 [06:59<00:00, 170.77it/s, acc=0.543, epoch=0, loss=8.66][A
split=train:  86%|██████████████████████████████▏    | 207/240 [06:59<00:00, 170.77it/s, acc=0.541, epoch=0, loss=8.66][A
split=train:  87%|██████████████████████████████▎    | 208/240 [06:59<00:00, 170.77it/s, acc=0.553, epoch=0, loss=8.66][A
split=train:  87%|██████████████████████████████▍    | 209/240 [06:59<00:00, 170.77it/s, acc=0.595, epoch=0, loss=8.66][A
split=train:  88%|██████████████████████████████▋    | 210/240 [06:59<00:00, 170.77it/s, acc=0.607, epoch=0, loss=8.66][A
split=train:  88%|██████████████████████████████▊    | 211/240 [06:59<00:00, 170.77it/s, acc=0.619, epoch=0, loss=8.66][A
split=train:  8

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train:  92%|█████████████████████████████████▏  | 221/240 [06:59<00:00, 170.77it/s, acc=0.76, epoch=0, loss=8.65][A
split=train:  92%|████████████████████████████████▍  | 222/240 [06:59<00:00, 170.77it/s, acc=0.757, epoch=0, loss=8.65][A
split=train:  93%|████████████████████████████████▌  | 223/240 [06:59<00:00, 170.77it/s, acc=0.781, epoch=0, loss=8.65][A
split=train:  93%|████████████████████████████████▋  | 224/240 [06:59<00:00, 170.77it/s, acc=0.792, epoch=0, loss=8.65][A
split=train:  94%|████████████████████████████████▊  | 225/240 [06:59<00:00, 170.77it/s, acc=0.816, epoch=0, loss=8.65][A
split=train:  94%|████████████████████████████████▉  | 226/240 [06:59<00:00, 170.77it/s, acc=0.826, epoch=0, loss=8.65][A
split=train:  95%|█████████████████████████████████  | 227/240 [06:59<00:00, 170.77it/s, acc=0.822, epoch=0, loss=8.64][A
split=train:  95%|█████████████████████████████████▎ | 228/240 [06:59<00:00, 170.77it/s, acc=0.846, epoch=0, loss=8.64][A
split=train:  9

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 100%|████████████████████████████████████▊| 239/240 [06:59<00:09,  9.74s/it, acc=1.15, epoch=0, loss=8.63][A
split=val:   0%|                                             | 0/51 [06:59<00:01, 33.58it/s, acc=0, epoch=0, loss=8.54][A
split=val:   2%|▉                                            | 1/51 [06:59<00:01, 33.58it/s, acc=0, epoch=0, loss=8.54][A
split=val:   4%|█▋                                        | 2/51 [06:59<00:01, 33.58it/s, acc=2.08, epoch=0, loss=8.52][A
split=val:   6%|██▍                                       | 3/51 [06:59<00:01, 33.58it/s, acc=2.34, epoch=0, loss=8.52][A
split=val:   8%|███▎                                       | 4/51 [06:59<00:01, 33.58it/s, acc=2.5, epoch=0, loss=8.51][A
split=val:  10%|████                                      | 5/51 [06:59<00:01, 33.58it/s, acc=3.12, epoch=0, loss=8.49][A
split=val:  12%|█████                                      | 6/51 [06:59<00:01, 33.58it/s, acc=2.68, epoch=0, loss=8.5][A
split=val:  14%

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=val:  53%|██████████████████████▏                   | 27/51 [06:59<00:00, 33.58it/s, acc=1.9, epoch=0, loss=8.49][A
split=val:  55%|██████████████████████▌                  | 28/51 [06:59<00:00, 33.58it/s, acc=1.83, epoch=0, loss=8.49][A
split=val:  57%|███████████████████████▎                 | 29/51 [06:59<00:00, 33.58it/s, acc=1.98, epoch=0, loss=8.49][A
split=val:  59%|████████████████████████                 | 30/51 [06:59<00:00, 33.58it/s, acc=2.22, epoch=0, loss=8.49][A
split=val:  61%|████████████████████████▉                | 31/51 [06:59<00:00, 33.58it/s, acc=2.25, epoch=0, loss=8.49][A
split=val:  63%|█████████████████████████▋               | 32/51 [06:59<00:00, 33.58it/s, acc=2.18, epoch=0, loss=8.49][A
split=val:  65%|██████████████████████████▌              | 33/51 [06:59<00:00, 33.58it/s, acc=2.11, epoch=0, loss=8.49][A
split=val:  67%|███████████████████████████▎             | 34/51 [06:59<00:00, 33.58it/s, acc=2.14, epoch=0, loss=8.49][A
split=val:  69%

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 243it [07:00,  7.91s/it, acc=7.81, epoch=1, loss=8.39][A
split=train: 244it [07:00,  7.91s/it, acc=8.75, epoch=1, loss=8.37][A
split=train: 245it [07:00,  7.91s/it, acc=7.29, epoch=1, loss=8.38][A
split=train: 246it [07:00,  7.91s/it, acc=8.04, epoch=1, loss=8.38][A
split=train: 247it [07:00,  7.91s/it, acc=8.2, epoch=1, loss=8.37] [A
split=train: 248it [07:00,  7.91s/it, acc=9.03, epoch=1, loss=8.36][A
split=train: 249it [07:00,  7.91s/it, acc=9.06, epoch=1, loss=8.37][A
split=train: 250it [07:00,  7.91s/it, acc=10.8, epoch=1, loss=8.36][A
split=train: 251it [07:00,  7.91s/it, acc=10.9, epoch=1, loss=8.36][A
split=train: 252it [07:00,  7.91s/it, acc=11.3, epoch=1, loss=8.37][A
split=train: 253it [07:00,  7.91s/it, acc=11.4, epoch=1, loss=8.37][A
split=train: 254it [07:00,  5.28s/it, acc=11.4, epoch=1, loss=8.37][A
split=train: 254it [07:00,  5.28s/it, acc=11, epoch=1, loss=8.37]  [A
split=train: 255it [07:00,  5.28s/it, acc=10.9, epoch=1, loss=8.37][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 260it [07:00,  5.28s/it, acc=11.8, epoch=1, loss=8.37][A
split=train: 261it [07:00,  5.28s/it, acc=11.8, epoch=1, loss=8.37][A
split=train: 262it [07:00,  5.28s/it, acc=11.5, epoch=1, loss=8.37][A
split=train: 263it [07:00,  5.28s/it, acc=11.6, epoch=1, loss=8.37][A
split=train: 264it [07:00,  5.28s/it, acc=11.2, epoch=1, loss=8.36][A
split=train: 265it [07:00,  5.28s/it, acc=11.5, epoch=1, loss=8.36][A
split=train: 266it [07:00,  3.67s/it, acc=11.5, epoch=1, loss=8.36][A
split=train: 266it [07:00,  3.67s/it, acc=11.3, epoch=1, loss=8.36][A
split=train: 267it [07:00,  3.67s/it, acc=10.9, epoch=1, loss=8.36][A
split=train: 268it [07:00,  3.67s/it, acc=11, epoch=1, loss=8.36]  [A
split=train: 269it [07:00,  3.67s/it, acc=11.2, epoch=1, loss=8.36][A
split=train: 270it [07:00,  3.67s/it, acc=11.6, epoch=1, loss=8.36][A
split=train: 271it [07:00,  3.67s/it, acc=11.4, epoch=1, loss=8.36][A
split=train: 272it [07:00,  3.67s/it, acc=11.5, epoch=1, loss=8.36][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 279it [07:00,  2.62s/it, acc=11.2, epoch=1, loss=8.35][A
split=train: 280it [07:00,  2.62s/it, acc=11.4, epoch=1, loss=8.35][A
split=train: 281it [07:00,  2.62s/it, acc=11.3, epoch=1, loss=8.35][A
split=train: 282it [07:00,  2.62s/it, acc=11.3, epoch=1, loss=8.35][A
split=train: 283it [07:00,  2.62s/it, acc=11.2, epoch=1, loss=8.35][A
split=train: 284it [07:00,  2.62s/it, acc=11.3, epoch=1, loss=8.35][A
split=train: 285it [07:00,  2.62s/it, acc=11.3, epoch=1, loss=8.35][A
split=train: 286it [07:00,  2.62s/it, acc=11.2, epoch=1, loss=8.34][A
split=train: 287it [07:00,  1.92s/it, acc=11.2, epoch=1, loss=8.34][A
split=train: 287it [07:00,  1.92s/it, acc=11.2, epoch=1, loss=8.34][A
split=train: 288it [07:00,  1.92s/it, acc=11.2, epoch=1, loss=8.34][A
split=train: 289it [07:00,  1.92s/it, acc=11.2, epoch=1, loss=8.34][A
split=train: 290it [07:00,  1.92s/it, acc=11.3, epoch=1, loss=8.34][A
split=train: 291it [07:00,  1.92s/it, acc=11.3, epoch=1, loss=8.34][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 297it [07:00,  1.38s/it, acc=11.7, epoch=1, loss=8.33][A
split=train: 298it [07:00,  1.38s/it, acc=11.7, epoch=1, loss=8.33][A
split=train: 299it [07:00,  1.38s/it, acc=11.7, epoch=1, loss=8.33][A
split=train: 300it [07:00,  1.38s/it, acc=11.9, epoch=1, loss=8.33][A
split=train: 301it [07:00,  1.38s/it, acc=12.1, epoch=1, loss=8.32][A
split=train: 302it [07:00,  1.38s/it, acc=12.3, epoch=1, loss=8.32][A
split=train: 303it [07:00,  1.38s/it, acc=12.4, epoch=1, loss=8.32][A
split=train: 304it [07:00,  1.38s/it, acc=12.3, epoch=1, loss=8.32][A
split=train: 305it [07:00,  1.38s/it, acc=12.5, epoch=1, loss=8.32][A
split=train: 306it [07:00,  1.38s/it, acc=12.6, epoch=1, loss=8.32][A
split=train: 307it [07:00,  1.01it/s, acc=12.6, epoch=1, loss=8.32][A
split=train: 307it [07:00,  1.01it/s, acc=12.6, epoch=1, loss=8.32][A
split=train: 308it [07:00,  1.01it/s, acc=12.6, epoch=1, loss=8.31][A
split=train: 309it [07:00,  1.01it/s, acc=12.6, epoch=1, loss=8.31][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 315it [07:00,  1.01it/s, acc=12.8, epoch=1, loss=8.3][A
split=train: 316it [07:00,  1.01it/s, acc=12.7, epoch=1, loss=8.3][A
split=train: 317it [07:00,  1.41it/s, acc=12.7, epoch=1, loss=8.3][A
split=train: 317it [07:00,  1.41it/s, acc=12.7, epoch=1, loss=8.3][A
split=train: 318it [07:00,  1.41it/s, acc=12.7, epoch=1, loss=8.3][A
split=train: 319it [07:00,  1.41it/s, acc=12.8, epoch=1, loss=8.29][A
split=train: 320it [07:01,  1.41it/s, acc=13, epoch=1, loss=8.29]  [A
split=train: 321it [07:01,  1.41it/s, acc=13.1, epoch=1, loss=8.29][A
split=train: 322it [07:01,  1.41it/s, acc=13.1, epoch=1, loss=8.29][A
split=train: 323it [07:01,  1.41it/s, acc=13.1, epoch=1, loss=8.29][A
split=train: 324it [07:01,  1.41it/s, acc=13.1, epoch=1, loss=8.29][A
split=train: 325it [07:01,  1.41it/s, acc=13.1, epoch=1, loss=8.28][A
split=train: 326it [07:01,  1.41it/s, acc=13.1, epoch=1, loss=8.28][A
split=train: 327it [07:01,  1.98it/s, acc=13.1, epoch=1, loss=8.28][A
split=trai

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 332it [07:01,  1.98it/s, acc=13.2, epoch=1, loss=8.28][A
split=train: 333it [07:01,  1.98it/s, acc=13.3, epoch=1, loss=8.27][A
split=train: 334it [07:01,  1.98it/s, acc=13.6, epoch=1, loss=8.27][A
split=train: 335it [07:01,  1.98it/s, acc=13.6, epoch=1, loss=8.27][A
split=train: 336it [07:01,  1.98it/s, acc=13.7, epoch=1, loss=8.27][A
split=train: 337it [07:01,  2.78it/s, acc=13.7, epoch=1, loss=8.27][A
split=train: 337it [07:01,  2.78it/s, acc=13.7, epoch=1, loss=8.27][A
split=train: 338it [07:01,  2.78it/s, acc=13.7, epoch=1, loss=8.27][A
split=train: 339it [07:01,  2.78it/s, acc=13.8, epoch=1, loss=8.27][A
split=train: 340it [07:01,  2.78it/s, acc=13.8, epoch=1, loss=8.26][A
split=train: 341it [07:01,  2.78it/s, acc=13.7, epoch=1, loss=8.26][A
split=train: 342it [07:01,  2.78it/s, acc=13.7, epoch=1, loss=8.26][A
split=train: 343it [07:01,  2.78it/s, acc=13.8, epoch=1, loss=8.26][A
split=train: 344it [07:01,  2.78it/s, acc=13.7, epoch=1, loss=8.26][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 350it [07:01,  3.89it/s, acc=13.9, epoch=1, loss=8.25][A
split=train: 351it [07:01,  3.89it/s, acc=14, epoch=1, loss=8.25]  [A
split=train: 352it [07:01,  3.89it/s, acc=14, epoch=1, loss=8.25][A
split=train: 353it [07:01,  3.89it/s, acc=14, epoch=1, loss=8.24][A
split=train: 354it [07:01,  3.89it/s, acc=14.1, epoch=1, loss=8.24][A
split=train: 355it [07:01,  3.89it/s, acc=14.1, epoch=1, loss=8.24][A
split=train: 356it [07:01,  3.89it/s, acc=14.1, epoch=1, loss=8.24][A
split=train: 357it [07:01,  5.44it/s, acc=14.1, epoch=1, loss=8.24][A
split=train: 357it [07:01,  5.44it/s, acc=14, epoch=1, loss=8.24]  [A
split=train: 358it [07:01,  5.44it/s, acc=14, epoch=1, loss=8.24][A
split=train: 359it [07:01,  5.44it/s, acc=14, epoch=1, loss=8.24][A
split=train: 360it [07:01,  5.44it/s, acc=14, epoch=1, loss=8.24][A
split=train: 361it [07:01,  5.44it/s, acc=14, epoch=1, loss=8.23][A
split=train: 362it [07:01,  5.44it/s, acc=14, epoch=1, loss=8.23][A
split=train: 363it 

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 369it [07:01,  7.36it/s, acc=14.4, epoch=1, loss=8.22][A
split=train: 370it [07:01,  7.36it/s, acc=14.5, epoch=1, loss=8.22][A
split=train: 371it [07:01,  7.36it/s, acc=14.5, epoch=1, loss=8.22][A
split=train: 372it [07:01,  7.36it/s, acc=14.6, epoch=1, loss=8.22][A
split=train: 373it [07:01,  7.36it/s, acc=14.7, epoch=1, loss=8.21][A
split=train: 374it [07:01,  7.36it/s, acc=14.7, epoch=1, loss=8.21][A
split=train: 375it [07:01,  9.93it/s, acc=14.7, epoch=1, loss=8.21][A
split=train: 375it [07:01,  9.93it/s, acc=14.8, epoch=1, loss=8.21][A
split=train: 376it [07:01,  9.93it/s, acc=14.7, epoch=1, loss=8.21][A
split=train: 377it [07:01,  9.93it/s, acc=14.7, epoch=1, loss=8.21][A
split=train: 378it [07:01,  9.93it/s, acc=14.7, epoch=1, loss=8.21][A
split=train: 379it [07:01,  9.93it/s, acc=14.7, epoch=1, loss=8.21][A
split=train: 380it [07:01,  9.93it/s, acc=14.7, epoch=1, loss=8.2] [A
split=train: 381it [07:01,  9.93it/s, acc=14.7, epoch=1, loss=8.2][A
split=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 388it [07:01, 13.35it/s, acc=14.9, epoch=1, loss=8.19][A
split=train: 389it [07:01, 13.35it/s, acc=15, epoch=1, loss=8.19]  [A
split=train: 390it [07:01, 13.35it/s, acc=14.9, epoch=1, loss=8.19][A
split=train: 391it [07:01, 13.35it/s, acc=14.9, epoch=1, loss=8.18][A
split=train: 392it [07:01, 13.35it/s, acc=14.9, epoch=1, loss=8.18][A
split=train: 393it [07:01, 17.72it/s, acc=14.9, epoch=1, loss=8.18][A
split=train: 393it [07:01, 17.72it/s, acc=15, epoch=1, loss=8.18]  [A
split=train: 394it [07:01, 17.72it/s, acc=14.9, epoch=1, loss=8.18][A
split=train: 395it [07:01, 17.72it/s, acc=14.9, epoch=1, loss=8.18][A
split=train: 396it [07:01, 17.72it/s, acc=15, epoch=1, loss=8.17]  [A
split=train: 397it [07:01, 17.72it/s, acc=15, epoch=1, loss=8.17][A
split=train: 398it [07:01, 17.72it/s, acc=15, epoch=1, loss=8.17][A
split=train: 399it [07:01, 17.72it/s, acc=15.1, epoch=1, loss=8.17][A
split=train: 400it [07:01, 17.72it/s, acc=15.1, epoch=1, loss=8.17][A
split=tra

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=train: 405it [07:01, 23.13it/s, acc=15.3, epoch=1, loss=8.16][A
split=train: 406it [07:01, 23.13it/s, acc=15.4, epoch=1, loss=8.16][A
split=train: 407it [07:01, 23.13it/s, acc=15.4, epoch=1, loss=8.15][A
split=train: 408it [07:02, 23.13it/s, acc=15.4, epoch=1, loss=8.15][A
split=train: 409it [07:02, 23.13it/s, acc=15.4, epoch=1, loss=8.15][A
split=train: 410it [07:02, 23.13it/s, acc=15.4, epoch=1, loss=8.15][A
split=train: 411it [07:02, 23.13it/s, acc=15.5, epoch=1, loss=8.15][A
split=train: 412it [07:02, 30.42it/s, acc=15.5, epoch=1, loss=8.15][A
split=train: 412it [07:02, 30.42it/s, acc=15.5, epoch=1, loss=8.14][A
split=train: 413it [07:02, 30.42it/s, acc=15.4, epoch=1, loss=8.14][A
split=train: 414it [07:02, 30.42it/s, acc=15.5, epoch=1, loss=8.14][A
split=train: 415it [07:02, 30.42it/s, acc=15.5, epoch=1, loss=8.14][A
split=train: 416it [07:02, 30.42it/s, acc=15.5, epoch=1, loss=8.14][A
split=train: 417it [07:02, 30.42it/s, acc=15.5, epoch=1, loss=8.14][A
split=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=train: 424it [07:02, 38.47it/s, acc=15.6, epoch=1, loss=8.12][A
split=train: 425it [07:02, 38.47it/s, acc=15.6, epoch=1, loss=8.12][A
split=train: 426it [07:02, 38.47it/s, acc=15.6, epoch=1, loss=8.12][A
split=train: 427it [07:02, 38.47it/s, acc=15.6, epoch=1, loss=8.12][A
split=train: 428it [07:02, 38.47it/s, acc=15.6, epoch=1, loss=8.12][A
split=train: 429it [07:02, 38.47it/s, acc=15.7, epoch=1, loss=8.11][A
split=train: 430it [07:02, 38.47it/s, acc=15.7, epoch=1, loss=8.11][A
split=train: 431it [07:02, 38.47it/s, acc=15.7, epoch=1, loss=8.11][A
split=train: 432it [07:02, 46.07it/s, acc=15.7, epoch=1, loss=8.11][A
split=train: 432it [07:02, 46.07it/s, acc=15.7, epoch=1, loss=8.11][A
split=train: 433it [07:02, 46.07it/s, acc=15.7, epoch=1, loss=8.11][A
split=train: 434it [07:02, 46.07it/s, acc=15.8, epoch=1, loss=8.11][A
split=train: 435it [07:02, 46.07it/s, acc=15.8, epoch=1, loss=8.1] [A
split=train: 436it [07:02, 46.07it/s, acc=15.8, epoch=1, loss=8.1][A
split=t

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 443it [07:02, 54.39it/s, acc=16, epoch=1, loss=8.09][A
split=train: 444it [07:02, 54.39it/s, acc=16.1, epoch=1, loss=8.08][A
split=train: 445it [07:02, 54.39it/s, acc=16, epoch=1, loss=8.08]  [A
split=train: 446it [07:02, 54.39it/s, acc=16.1, epoch=1, loss=8.08][A
split=train: 447it [07:02, 54.39it/s, acc=16.1, epoch=1, loss=8.08][A
split=train: 448it [07:02, 54.39it/s, acc=16.1, epoch=1, loss=8.08][A
split=train: 449it [07:02, 54.39it/s, acc=16.1, epoch=1, loss=8.08][A
split=train: 450it [07:02, 54.39it/s, acc=16.1, epoch=1, loss=8.07][A
split=train: 451it [07:02, 54.39it/s, acc=16.2, epoch=1, loss=8.07][A
split=train: 452it [07:02, 62.45it/s, acc=16.2, epoch=1, loss=8.07][A
split=train: 452it [07:02, 62.45it/s, acc=16.1, epoch=1, loss=8.07][A
split=train: 453it [07:02, 62.45it/s, acc=16.1, epoch=1, loss=8.07][A
split=train: 454it [07:02, 62.45it/s, acc=16.1, epoch=1, loss=8.07][A
split=train: 455it [07:02, 62.45it/s, acc=16.1, epoch=1, loss=8.06][A
split=t

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 462it [07:02, 69.86it/s, acc=16.2, epoch=1, loss=8.05][A
split=train: 463it [07:02, 69.86it/s, acc=16.2, epoch=1, loss=8.05][A
split=train: 464it [07:02, 69.86it/s, acc=16.2, epoch=1, loss=8.05][A
split=train: 465it [07:02, 69.86it/s, acc=16.2, epoch=1, loss=8.04][A
split=train: 466it [07:02, 69.86it/s, acc=16.2, epoch=1, loss=8.04][A
split=train: 467it [07:02, 69.86it/s, acc=16.3, epoch=1, loss=8.04][A
split=train: 468it [07:02, 69.86it/s, acc=16.3, epoch=1, loss=8.04][A
split=train: 469it [07:02, 69.86it/s, acc=16.2, epoch=1, loss=8.03][A
split=train: 470it [07:02, 69.86it/s, acc=16.3, epoch=1, loss=8.03][A
split=train: 471it [07:02, 69.86it/s, acc=16.4, epoch=1, loss=8.03][A
split=train: 472it [07:02, 75.40it/s, acc=16.4, epoch=1, loss=8.03][A
split=train: 472it [07:02, 75.40it/s, acc=16.4, epoch=1, loss=8.03][A
split=train: 473it [07:02, 75.40it/s, acc=16.4, epoch=1, loss=8.03][A
split=train: 474it [07:02, 75.40it/s, acc=16.4, epoch=1, loss=8.02][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=val: 54it [07:02,  9.06s/it, acc=4.69, epoch=1, loss=7.64][A
split=val: 55it [07:02,  9.06s/it, acc=4.38, epoch=1, loss=7.65][A
split=val: 56it [07:02,  9.06s/it, acc=4.69, epoch=1, loss=7.64][A
split=val: 57it [07:02,  9.06s/it, acc=4.91, epoch=1, loss=7.65][A
split=val: 58it [07:02,  9.06s/it, acc=5.08, epoch=1, loss=7.66][A
split=val: 59it [07:02,  9.06s/it, acc=5.21, epoch=1, loss=7.66][A
split=val: 60it [07:02,  9.06s/it, acc=5.62, epoch=1, loss=7.66][A
split=val: 61it [07:02,  9.06s/it, acc=5.4, epoch=1, loss=7.67] [A
split=val: 62it [07:02,  9.06s/it, acc=4.95, epoch=1, loss=7.67][A
split=val: 63it [07:02,  9.06s/it, acc=5.53, epoch=1, loss=7.67][A
split=val: 64it [07:02,  9.06s/it, acc=5.36, epoch=1, loss=7.66][A
split=val: 65it [07:02,  9.06s/it, acc=5.42, epoch=1, loss=7.65][A
split=val: 66it [07:02,  9.06s/it, acc=5.27, epoch=1, loss=7.66][A
split=val: 67it [07:02,  9.06s/it, acc=5.33, epoch=1, loss=7.65][A
split=val: 68it [07:02,  5.34s/it, acc=5.33, ep

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=val: 86it [07:03,  3.29s/it, acc=5.99, epoch=1, loss=7.64][A
split=val: 87it [07:03,  3.29s/it, acc=6.08, epoch=1, loss=7.64][A
split=val: 88it [07:03,  3.29s/it, acc=6.25, epoch=1, loss=7.64][A
split=val: 89it [07:03,  3.29s/it, acc=6.33, epoch=1, loss=7.65][A
split=val: 90it [07:03,  3.29s/it, acc=6.48, epoch=1, loss=7.64][A
split=val: 91it [07:03,  3.29s/it, acc=6.4, epoch=1, loss=7.64] [A
split=val: 92it [07:03,  3.29s/it, acc=6.4, epoch=1, loss=7.64][A
split=val: 93it [07:03,  3.29s/it, acc=6.4, epoch=1, loss=7.64][A
split=val: 94it [07:03,  3.29s/it, acc=6.61, epoch=1, loss=7.64][A
split=val: 95it [07:03,  3.29s/it, acc=6.6, epoch=1, loss=7.64] [A
split=val: 96it [07:03,  3.29s/it, acc=6.59, epoch=1, loss=7.64][A
split=val: 97it [07:03,  3.29s/it, acc=6.65, epoch=1, loss=7.64][A
split=val: 98it [07:03,  3.29s/it, acc=6.58, epoch=1, loss=7.63][A
split=val: 99it [07:03,  3.29s/it, acc=6.57, epoch=1, loss=7.64][A
split=val: 100it [07:03,  2.22s/it, acc=6.57, epoc

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 484it [07:03, 44.09it/s, acc=26.2, epoch=2, loss=7.48][A
split=train: 485it [07:03, 44.09it/s, acc=26, epoch=2, loss=7.45]  [A
split=train: 486it [07:03, 44.09it/s, acc=25, epoch=2, loss=7.46][A
split=train: 487it [07:03, 44.09it/s, acc=24.2, epoch=2, loss=7.47][A
split=train: 488it [07:03, 44.09it/s, acc=23.6, epoch=2, loss=7.46][A
split=train: 489it [07:03, 44.09it/s, acc=22.8, epoch=2, loss=7.46][A
split=train: 490it [07:03, 45.35it/s, acc=22.8, epoch=2, loss=7.46][A
split=train: 490it [07:03, 45.35it/s, acc=23.6, epoch=2, loss=7.46][A
split=train: 491it [07:03, 45.35it/s, acc=23.7, epoch=2, loss=7.45][A
split=train: 492it [07:03, 45.35it/s, acc=23.3, epoch=2, loss=7.45][A
split=train: 493it [07:03, 45.35it/s, acc=24.1, epoch=2, loss=7.45][A
split=train: 494it [07:03, 45.35it/s, acc=24.4, epoch=2, loss=7.44][A
split=train: 495it [07:03, 45.35it/s, acc=23.6, epoch=2, loss=7.44][A
split=train: 496it [07:03, 45.35it/s, acc=23.3, epoch=2, loss=7.44][A
split=t

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 499it [07:03, 52.05it/s, acc=23, epoch=2, loss=7.43]  [A
split=train: 500it [07:03, 52.05it/s, acc=22.3, epoch=2, loss=7.42][A
split=train: 501it [07:03, 52.05it/s, acc=22.3, epoch=2, loss=7.41][A
split=train: 502it [07:03, 52.05it/s, acc=22.4, epoch=2, loss=7.41][A
split=train: 503it [07:03, 52.05it/s, acc=22.3, epoch=2, loss=7.41][A
split=train: 504it [07:03, 52.05it/s, acc=22.1, epoch=2, loss=7.4] [A
split=train: 505it [07:03, 52.05it/s, acc=21.9, epoch=2, loss=7.4][A
split=train: 506it [07:03, 52.05it/s, acc=21.2, epoch=2, loss=7.4][A
split=train: 507it [07:03, 56.87it/s, acc=21.2, epoch=2, loss=7.4][A
split=train: 507it [07:03, 56.87it/s, acc=21, epoch=2, loss=7.39] [A
split=train: 508it [07:03, 56.87it/s, acc=21.2, epoch=2, loss=7.39][A
split=train: 509it [07:03, 56.87it/s, acc=21.6, epoch=2, loss=7.38][A
split=train: 510it [07:03, 56.87it/s, acc=21.9, epoch=2, loss=7.38][A
split=train: 511it [07:03, 56.87it/s, acc=22, epoch=2, loss=7.38]  [A
split=tra

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 516it [07:03, 63.07it/s, acc=22.3, epoch=2, loss=7.36][A
split=train: 517it [07:03, 63.07it/s, acc=22.3, epoch=2, loss=7.36][A
split=train: 518it [07:03, 63.07it/s, acc=22.1, epoch=2, loss=7.36][A
split=train: 519it [07:03, 63.07it/s, acc=22.2, epoch=2, loss=7.36][A
split=train: 520it [07:03, 63.07it/s, acc=22, epoch=2, loss=7.36]  [A
split=train: 521it [07:03, 63.07it/s, acc=21.9, epoch=2, loss=7.35][A
split=train: 522it [07:03, 63.07it/s, acc=21.9, epoch=2, loss=7.35][A
split=train: 523it [07:03, 63.07it/s, acc=21.7, epoch=2, loss=7.35][A
split=train: 524it [07:03, 63.07it/s, acc=21.5, epoch=2, loss=7.35][A
split=train: 525it [07:03, 68.95it/s, acc=21.5, epoch=2, loss=7.35][A
split=train: 525it [07:03, 68.95it/s, acc=21.7, epoch=2, loss=7.34][A
split=train: 526it [07:03, 68.95it/s, acc=21.4, epoch=2, loss=7.34][A
split=train: 527it [07:03, 68.95it/s, acc=21.4, epoch=2, loss=7.34][A
split=train: 528it [07:03, 68.95it/s, acc=21.4, epoch=2, loss=7.34][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 535it [07:03, 73.74it/s, acc=21.8, epoch=2, loss=7.32][A
split=train: 536it [07:03, 73.74it/s, acc=22.1, epoch=2, loss=7.31][A
split=train: 537it [07:03, 73.74it/s, acc=22.3, epoch=2, loss=7.3] [A
split=train: 538it [07:03, 73.74it/s, acc=22.4, epoch=2, loss=7.3][A
split=train: 539it [07:03, 73.74it/s, acc=22.4, epoch=2, loss=7.3][A
split=train: 540it [07:03, 73.74it/s, acc=22.4, epoch=2, loss=7.29][A
split=train: 541it [07:03, 73.74it/s, acc=22.5, epoch=2, loss=7.29][A
split=train: 542it [07:03, 73.74it/s, acc=22.6, epoch=2, loss=7.29][A
split=train: 543it [07:03, 77.09it/s, acc=22.6, epoch=2, loss=7.29][A
split=train: 543it [07:03, 77.09it/s, acc=22.7, epoch=2, loss=7.29][A
split=train: 544it [07:03, 77.09it/s, acc=22.5, epoch=2, loss=7.29][A
split=train: 545it [07:03, 77.09it/s, acc=22.4, epoch=2, loss=7.28][A
split=train: 546it [07:03, 77.09it/s, acc=22.4, epoch=2, loss=7.28][A
split=train: 547it [07:03, 77.09it/s, acc=22.2, epoch=2, loss=7.28][A
split=t

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 552it [07:04, 77.54it/s, acc=21.8, epoch=2, loss=7.27][A
split=train: 553it [07:04, 77.54it/s, acc=21.9, epoch=2, loss=7.26][A
split=train: 554it [07:04, 77.54it/s, acc=22, epoch=2, loss=7.26]  [A
split=train: 555it [07:04, 77.54it/s, acc=22.3, epoch=2, loss=7.25][A
split=train: 556it [07:04, 77.54it/s, acc=22.3, epoch=2, loss=7.25][A
split=train: 557it [07:04, 77.54it/s, acc=22.2, epoch=2, loss=7.24][A
split=train: 558it [07:04, 77.54it/s, acc=22.3, epoch=2, loss=7.24][A
split=train: 559it [07:04, 77.54it/s, acc=22.3, epoch=2, loss=7.24][A
split=train: 560it [07:04, 77.54it/s, acc=22.5, epoch=2, loss=7.23][A
split=train: 561it [07:04, 77.54it/s, acc=22.4, epoch=2, loss=7.23][A
split=train: 562it [07:04, 81.43it/s, acc=22.4, epoch=2, loss=7.23][A
split=train: 562it [07:04, 81.43it/s, acc=22.3, epoch=2, loss=7.23][A
split=train: 563it [07:04, 81.43it/s, acc=22.4, epoch=2, loss=7.22][A
split=train: 564it [07:04, 81.43it/s, acc=22.6, epoch=2, loss=7.22][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 571it [07:04, 81.43it/s, acc=22.5, epoch=2, loss=7.19][A
split=train: 572it [07:04, 84.87it/s, acc=22.5, epoch=2, loss=7.19][A
split=train: 572it [07:04, 84.87it/s, acc=22.6, epoch=2, loss=7.19][A
split=train: 573it [07:04, 84.87it/s, acc=22.6, epoch=2, loss=7.19][A
split=train: 574it [07:04, 84.87it/s, acc=22.6, epoch=2, loss=7.18][A
split=train: 575it [07:04, 84.87it/s, acc=22.7, epoch=2, loss=7.18][A
split=train: 576it [07:04, 84.87it/s, acc=22.7, epoch=2, loss=7.18][A
split=train: 577it [07:04, 84.87it/s, acc=22.5, epoch=2, loss=7.18][A
split=train: 578it [07:04, 84.87it/s, acc=22.6, epoch=2, loss=7.17][A
split=train: 579it [07:04, 84.87it/s, acc=22.7, epoch=2, loss=7.17][A
split=train: 580it [07:04, 84.87it/s, acc=22.7, epoch=2, loss=7.17][A
split=train: 581it [07:04, 84.87it/s, acc=22.7, epoch=2, loss=7.16][A
split=train: 582it [07:04, 87.94it/s, acc=22.7, epoch=2, loss=7.16][A
split=train: 582it [07:04, 87.94it/s, acc=22.6, epoch=2, loss=7.16][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 590it [07:04, 87.94it/s, acc=22.8, epoch=2, loss=7.13][A
split=train: 591it [07:04, 87.94it/s, acc=22.9, epoch=2, loss=7.12][A
split=train: 592it [07:04, 90.52it/s, acc=22.9, epoch=2, loss=7.12][A
split=train: 592it [07:04, 90.52it/s, acc=22.8, epoch=2, loss=7.12][A
split=train: 593it [07:04, 90.52it/s, acc=22.7, epoch=2, loss=7.12][A
split=train: 594it [07:04, 90.52it/s, acc=22.7, epoch=2, loss=7.12][A
split=train: 595it [07:04, 90.52it/s, acc=22.8, epoch=2, loss=7.11][A
split=train: 596it [07:04, 90.52it/s, acc=22.7, epoch=2, loss=7.11][A
split=train: 597it [07:04, 90.52it/s, acc=22.8, epoch=2, loss=7.11][A
split=train: 598it [07:04, 90.52it/s, acc=22.8, epoch=2, loss=7.11][A
split=train: 599it [07:04, 90.52it/s, acc=22.8, epoch=2, loss=7.1] [A
split=train: 600it [07:04, 90.52it/s, acc=22.9, epoch=2, loss=7.1][A
split=train: 601it [07:04, 90.52it/s, acc=22.8, epoch=2, loss=7.1][A
split=train: 602it [07:04, 91.98it/s, acc=22.8, epoch=2, loss=7.1][A
split=tr

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 610it [07:04, 91.98it/s, acc=22.8, epoch=2, loss=7.06][A
split=train: 611it [07:04, 91.98it/s, acc=22.8, epoch=2, loss=7.06][A
split=train: 612it [07:04, 91.96it/s, acc=22.8, epoch=2, loss=7.06][A
split=train: 612it [07:04, 91.96it/s, acc=22.8, epoch=2, loss=7.06][A
split=train: 613it [07:04, 91.96it/s, acc=22.7, epoch=2, loss=7.05][A
split=train: 614it [07:04, 91.96it/s, acc=22.8, epoch=2, loss=7.05][A
split=train: 615it [07:04, 91.96it/s, acc=22.8, epoch=2, loss=7.04][A
split=train: 616it [07:04, 91.96it/s, acc=22.8, epoch=2, loss=7.04][A
split=train: 617it [07:04, 91.96it/s, acc=22.7, epoch=2, loss=7.04][A
split=train: 618it [07:04, 91.96it/s, acc=22.7, epoch=2, loss=7.03][A
split=train: 619it [07:04, 91.96it/s, acc=22.7, epoch=2, loss=7.03][A
split=train: 620it [07:04, 91.96it/s, acc=22.7, epoch=2, loss=7.03][A
split=train: 621it [07:04, 91.96it/s, acc=22.8, epoch=2, loss=7.02][A
split=train: 622it [07:04, 92.08it/s, acc=22.8, epoch=2, loss=7.02][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 628it [07:04, 92.08it/s, acc=22.8, epoch=2, loss=6.99][A
split=train: 629it [07:04, 92.08it/s, acc=22.8, epoch=2, loss=6.99][A
split=train: 630it [07:04, 92.08it/s, acc=22.8, epoch=2, loss=6.98][A
split=train: 631it [07:04, 92.08it/s, acc=22.9, epoch=2, loss=6.98][A
split=train: 632it [07:04, 90.62it/s, acc=22.9, epoch=2, loss=6.98][A
split=train: 632it [07:04, 90.62it/s, acc=23, epoch=2, loss=6.98]  [A
split=train: 633it [07:04, 90.62it/s, acc=23, epoch=2, loss=6.97][A
split=train: 634it [07:04, 90.62it/s, acc=23, epoch=2, loss=6.97][A
split=train: 635it [07:04, 90.62it/s, acc=22.9, epoch=2, loss=6.97][A
split=train: 636it [07:04, 90.62it/s, acc=22.9, epoch=2, loss=6.96][A
split=train: 637it [07:04, 90.62it/s, acc=22.8, epoch=2, loss=6.96][A
split=train: 638it [07:04, 90.62it/s, acc=22.7, epoch=2, loss=6.96][A
split=train: 639it [07:04, 90.62it/s, acc=22.6, epoch=2, loss=6.95][A
split=train: 640it [07:05, 90.62it/s, acc=22.6, epoch=2, loss=6.95][A
split=tra

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

[A
split=train: 648it [07:05, 90.84it/s, acc=22.7, epoch=2, loss=6.92][A
split=train: 649it [07:05, 90.84it/s, acc=22.7, epoch=2, loss=6.91][A
split=train: 650it [07:05, 90.84it/s, acc=22.8, epoch=2, loss=6.91][A
split=train: 651it [07:05, 90.84it/s, acc=22.8, epoch=2, loss=6.91][A
split=train: 652it [07:05, 92.57it/s, acc=22.8, epoch=2, loss=6.91][A
split=train: 652it [07:05, 92.57it/s, acc=22.8, epoch=2, loss=6.9] [A
split=train: 653it [07:05, 92.57it/s, acc=22.8, epoch=2, loss=6.9][A
split=train: 654it [07:05, 92.57it/s, acc=22.8, epoch=2, loss=6.89][A
split=train: 655it [07:05, 92.57it/s, acc=22.8, epoch=2, loss=6.89][A
split=train: 656it [07:05, 92.57it/s, acc=22.9, epoch=2, loss=6.88][A
split=train: 657it [07:05, 92.57it/s, acc=22.9, epoch=2, loss=6.88][A
split=train: 658it [07:05, 92.57it/s, acc=22.9, epoch=2, loss=6.87][A
split=train: 659it [07:05, 92.57it/s, acc=22.9, epoch=2, loss=6.87][A
split=train: 660it [07:05, 92.57it/s, acc=22.9, epoch=2, loss=6.87][A
spl

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 667it [07:05, 92.91it/s, acc=22.9, epoch=2, loss=6.84][A
split=train: 668it [07:05, 92.91it/s, acc=22.9, epoch=2, loss=6.84][A
split=train: 669it [07:05, 92.91it/s, acc=22.9, epoch=2, loss=6.83][A
split=train: 670it [07:05, 92.91it/s, acc=22.8, epoch=2, loss=6.83][A
split=train: 671it [07:05, 92.91it/s, acc=22.9, epoch=2, loss=6.82][A
split=train: 672it [07:05, 92.19it/s, acc=22.9, epoch=2, loss=6.82][A
split=train: 672it [07:05, 92.19it/s, acc=22.8, epoch=2, loss=6.82][A
split=train: 673it [07:05, 92.19it/s, acc=22.8, epoch=2, loss=6.82][A
split=train: 674it [07:05, 92.19it/s, acc=22.7, epoch=2, loss=6.81][A
split=train: 675it [07:05, 92.19it/s, acc=22.8, epoch=2, loss=6.81][A
split=train: 676it [07:05, 92.19it/s, acc=22.8, epoch=2, loss=6.81][A
split=train: 677it [07:05, 92.19it/s, acc=22.8, epoch=2, loss=6.8] [A
split=train: 678it [07:05, 92.19it/s, acc=22.9, epoch=2, loss=6.8][A
split=train: 679it [07:05, 92.19it/s, acc=22.9, epoch=2, loss=6.79][A
split=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 685it [07:05, 90.74it/s, acc=23, epoch=2, loss=6.77][A
split=train: 686it [07:05, 90.74it/s, acc=22.9, epoch=2, loss=6.77][A
split=train: 687it [07:05, 90.74it/s, acc=22.9, epoch=2, loss=6.76][A
split=train: 688it [07:05, 90.74it/s, acc=22.9, epoch=2, loss=6.76][A
split=train: 689it [07:05, 90.74it/s, acc=22.9, epoch=2, loss=6.76][A
split=train: 690it [07:05, 90.74it/s, acc=22.9, epoch=2, loss=6.75][A
split=train: 691it [07:05, 90.74it/s, acc=22.9, epoch=2, loss=6.75][A
split=train: 692it [07:05, 90.66it/s, acc=22.9, epoch=2, loss=6.75][A
split=train: 692it [07:05, 90.66it/s, acc=22.9, epoch=2, loss=6.74][A
split=train: 693it [07:05, 90.66it/s, acc=23, epoch=2, loss=6.74]  [A
split=train: 694it [07:05, 90.66it/s, acc=23, epoch=2, loss=6.74][A
split=train: 695it [07:05, 90.66it/s, acc=23, epoch=2, loss=6.73][A
split=train: 696it [07:05, 90.66it/s, acc=23, epoch=2, loss=6.73][A
split=train: 697it [07:05, 90.66it/s, acc=23, epoch=2, loss=6.72][A
split=train: 69

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=train: 703it [07:05, 91.71it/s, acc=23.1, epoch=2, loss=6.7] [A
split=train: 704it [07:05, 91.71it/s, acc=23.1, epoch=2, loss=6.7][A
split=train: 705it [07:05, 91.71it/s, acc=23, epoch=2, loss=6.69] [A
split=train: 706it [07:05, 91.71it/s, acc=23, epoch=2, loss=6.69][A
split=train: 707it [07:05, 91.71it/s, acc=23, epoch=2, loss=6.69][A
split=train: 708it [07:05, 91.71it/s, acc=23, epoch=2, loss=6.68][A
split=train: 709it [07:05, 91.71it/s, acc=23, epoch=2, loss=6.68][A
split=train: 710it [07:05, 91.71it/s, acc=23, epoch=2, loss=6.67][A
split=train: 711it [07:05, 91.71it/s, acc=23, epoch=2, loss=6.67][A
split=train: 712it [07:05, 91.77it/s, acc=23, epoch=2, loss=6.67][A
split=train: 712it [07:05, 91.77it/s, acc=23, epoch=2, loss=6.67][A
split=train: 713it [07:05, 91.77it/s, acc=23, epoch=2, loss=6.66][A
split=train: 714it [07:05, 91.77it/s, acc=23, epoch=2, loss=6.66][A
split=train: 715it [07:05, 91.77it/s, acc=23, epoch=2, loss=6.65][A
split=train: 716it [07:05, 91.

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=val: 108it [07:05,  2.22s/it, acc=7.14, epoch=2, loss=5.9] [A
split=val: 109it [07:05,  2.22s/it, acc=8.59, epoch=2, loss=5.85][A
split=val: 110it [07:05,  2.22s/it, acc=8.68, epoch=2, loss=5.84][A
split=val: 111it [07:05,  2.22s/it, acc=9.06, epoch=2, loss=5.83][A
split=val: 112it [07:05,  2.22s/it, acc=8.81, epoch=2, loss=5.85][A
split=val: 113it [07:05,  2.22s/it, acc=8.85, epoch=2, loss=5.86][A
split=val: 114it [07:05,  2.22s/it, acc=8.41, epoch=2, loss=5.88][A
split=val: 115it [07:05,  1.57s/it, acc=8.41, epoch=2, loss=5.88][A
split=val: 115it [07:05,  1.57s/it, acc=7.81, epoch=2, loss=5.89][A
split=val: 116it [07:05,  1.57s/it, acc=7.71, epoch=2, loss=5.89][A
split=val: 117it [07:05,  1.57s/it, acc=7.62, epoch=2, loss=5.88][A
split=val: 118it [07:05,  1.57s/it, acc=7.35, epoch=2, loss=5.89][A
split=val: 119it [07:05,  1.57s/it, acc=6.94, epoch=2, loss=5.89][A
split=val: 120it [07:05,  1.57s/it, acc=7.24, epoch=2, loss=5.88][A
split=val: 121it [07:05,  1.57s/i

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=val: 141it [07:06,  1.06s/it, acc=6.87, epoch=2, loss=5.9][A
split=val: 142it [07:06,  1.06s/it, acc=7.09, epoch=2, loss=5.89][A
split=val: 143it [07:06,  1.06s/it, acc=7.07, epoch=2, loss=5.89][A
split=val: 144it [07:06,  1.06s/it, acc=7.27, epoch=2, loss=5.89][A
split=val: 145it [07:06,  1.06s/it, acc=7.53, epoch=2, loss=5.88][A
split=val: 146it [07:06,  1.06s/it, acc=7.43, epoch=2, loss=5.88][A
split=val: 147it [07:06,  1.06s/it, acc=7.4, epoch=2, loss=5.88] [A
split=val: 148it [07:06,  1.41it/s, acc=7.4, epoch=2, loss=5.88][A
split=val: 148it [07:06,  1.41it/s, acc=7.31, epoch=2, loss=5.89][A
split=val: 149it [07:06,  1.41it/s, acc=7.23, epoch=2, loss=5.89][A
split=val: 150it [07:06,  1.41it/s, acc=7.21, epoch=2, loss=5.89][A
split=val: 151it [07:06,  1.41it/s, acc=7.25, epoch=2, loss=5.89][A
split=val: 152it [07:06,  1.41it/s, acc=7.23, epoch=2, loss=5.89][A
split=train: 720it [07:06, 91.77it/s, acc=21.9, epoch=3, loss=5.52][A
split=train: 721it [07:06, 91.77i

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 730it [07:06, 48.89it/s, acc=23, epoch=3, loss=5.6]   [A
split=train: 731it [07:06, 55.67it/s, acc=23, epoch=3, loss=5.6][A
split=train: 731it [07:06, 55.67it/s, acc=22.4, epoch=3, loss=5.6][A
split=train: 732it [07:06, 55.67it/s, acc=23.1, epoch=3, loss=5.58][A
split=train: 733it [07:06, 55.67it/s, acc=22.5, epoch=3, loss=5.59][A
split=train: 734it [07:06, 55.67it/s, acc=24, epoch=3, loss=5.58]  [A
split=train: 735it [07:06, 55.67it/s, acc=24, epoch=3, loss=5.57][A
split=train: 736it [07:06, 55.67it/s, acc=24.4, epoch=3, loss=5.55][A
split=train: 737it [07:06, 55.67it/s, acc=24.3, epoch=3, loss=5.56][A
split=train: 738it [07:06, 55.67it/s, acc=25.2, epoch=3, loss=5.55][A
split=train: 739it [07:06, 55.67it/s, acc=25.2, epoch=3, loss=5.55][A
split=train: 740it [07:06, 61.81it/s, acc=25.2, epoch=3, loss=5.55][A
split=train: 740it [07:06, 61.81it/s, acc=25.1, epoch=3, loss=5.55][A
split=train: 741it [07:06, 61.81it/s, acc=24.6, epoch=3, loss=5.55][A
split=train

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 748it [07:06, 61.81it/s, acc=25.8, epoch=3, loss=5.48][A
split=train: 749it [07:06, 67.24it/s, acc=25.8, epoch=3, loss=5.48][A
split=train: 749it [07:06, 67.24it/s, acc=25.7, epoch=3, loss=5.47][A
split=train: 750it [07:06, 67.24it/s, acc=25.6, epoch=3, loss=5.46][A
split=train: 751it [07:06, 67.24it/s, acc=25.4, epoch=3, loss=5.46][A
split=train: 752it [07:06, 67.24it/s, acc=25.3, epoch=3, loss=5.45][A
split=train: 753it [07:06, 67.24it/s, acc=24.8, epoch=3, loss=5.45][A
split=train: 754it [07:06, 67.24it/s, acc=25.1, epoch=3, loss=5.45][A
split=train: 755it [07:06, 67.24it/s, acc=25.1, epoch=3, loss=5.45][A
split=train: 756it [07:06, 67.24it/s, acc=25, epoch=3, loss=5.45]  [A
split=train: 757it [07:06, 67.24it/s, acc=24.7, epoch=3, loss=5.43][A
split=train: 758it [07:06, 72.32it/s, acc=24.7, epoch=3, loss=5.43][A
split=train: 758it [07:06, 72.32it/s, acc=24.7, epoch=3, loss=5.43][A
split=train: 759it [07:06, 72.32it/s, acc=24.6, epoch=3, loss=5.42][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 766it [07:06, 72.32it/s, acc=24.3, epoch=3, loss=5.39][A
split=train: 767it [07:06, 75.22it/s, acc=24.3, epoch=3, loss=5.39][A
split=train: 767it [07:06, 75.22it/s, acc=24.3, epoch=3, loss=5.39][A
split=train: 768it [07:06, 75.22it/s, acc=24.3, epoch=3, loss=5.38][A
split=train: 769it [07:06, 75.22it/s, acc=24.3, epoch=3, loss=5.38][A
split=train: 770it [07:06, 75.22it/s, acc=24.1, epoch=3, loss=5.37][A
split=train: 771it [07:06, 75.22it/s, acc=23.9, epoch=3, loss=5.37][A
split=train: 772it [07:06, 75.22it/s, acc=24.1, epoch=3, loss=5.36][A
split=train: 773it [07:06, 75.22it/s, acc=24, epoch=3, loss=5.35]  [A
split=train: 774it [07:06, 75.22it/s, acc=24.2, epoch=3, loss=5.35][A
split=train: 775it [07:06, 75.22it/s, acc=24.2, epoch=3, loss=5.35][A
split=train: 776it [07:06, 78.32it/s, acc=24.2, epoch=3, loss=5.35][A
split=train: 776it [07:06, 78.32it/s, acc=24.2, epoch=3, loss=5.35][A
split=train: 777it [07:06, 78.32it/s, acc=24.2, epoch=3, loss=5.35][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 785it [07:06, 81.08it/s, acc=24.8, epoch=3, loss=5.3] [A
split=train: 786it [07:06, 81.08it/s, acc=24.7, epoch=3, loss=5.29][A
split=train: 787it [07:06, 81.08it/s, acc=24.6, epoch=3, loss=5.3] [A
split=train: 788it [07:06, 81.08it/s, acc=24.6, epoch=3, loss=5.29][A
split=train: 789it [07:06, 81.08it/s, acc=24.7, epoch=3, loss=5.28][A
split=train: 790it [07:07, 81.08it/s, acc=24.6, epoch=3, loss=5.28][A
split=train: 791it [07:07, 81.08it/s, acc=24.6, epoch=3, loss=5.27][A
split=train: 792it [07:07, 81.08it/s, acc=24.5, epoch=3, loss=5.27][A
split=train: 793it [07:07, 81.08it/s, acc=24.4, epoch=3, loss=5.27][A
split=train: 794it [07:07, 82.68it/s, acc=24.4, epoch=3, loss=5.27][A
split=train: 794it [07:07, 82.68it/s, acc=24.3, epoch=3, loss=5.27][A
split=train: 795it [07:07, 82.68it/s, acc=24.4, epoch=3, loss=5.26][A
split=train: 796it [07:07, 82.68it/s, acc=24.5, epoch=3, loss=5.26][A
split=train: 797it [07:07, 82.68it/s, acc=24.6, epoch=3, loss=5.25][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 803it [07:07, 84.08it/s, acc=24.8, epoch=3, loss=5.22][A
split=train: 803it [07:07, 84.08it/s, acc=24.9, epoch=3, loss=5.22][A
split=train: 804it [07:07, 84.08it/s, acc=24.9, epoch=3, loss=5.21][A
split=train: 805it [07:07, 84.08it/s, acc=24.8, epoch=3, loss=5.21][A
split=train: 806it [07:07, 84.08it/s, acc=24.7, epoch=3, loss=5.2] [A
split=train: 807it [07:07, 84.08it/s, acc=24.8, epoch=3, loss=5.19][A
split=train: 808it [07:07, 84.08it/s, acc=24.8, epoch=3, loss=5.19][A
split=train: 809it [07:07, 84.08it/s, acc=24.8, epoch=3, loss=5.18][A
split=train: 810it [07:07, 84.08it/s, acc=24.8, epoch=3, loss=5.18][A
split=train: 811it [07:07, 84.08it/s, acc=24.7, epoch=3, loss=5.18][A
split=train: 812it [07:07, 82.50it/s, acc=24.7, epoch=3, loss=5.18][A
split=train: 812it [07:07, 82.50it/s, acc=24.8, epoch=3, loss=5.17][A
split=train: 813it [07:07, 82.50it/s, acc=24.7, epoch=3, loss=5.17][A
split=train: 814it [07:07, 82.50it/s, acc=24.5, epoch=3, loss=5.16][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 820it [07:07, 82.50it/s, acc=24.7, epoch=3, loss=5.13][A
split=train: 821it [07:07, 83.65it/s, acc=24.7, epoch=3, loss=5.13][A
split=train: 821it [07:07, 83.65it/s, acc=24.8, epoch=3, loss=5.13][A
split=train: 822it [07:07, 83.65it/s, acc=24.8, epoch=3, loss=5.12][A
split=train: 823it [07:07, 83.65it/s, acc=24.7, epoch=3, loss=5.12][A
split=train: 824it [07:07, 83.65it/s, acc=24.7, epoch=3, loss=5.11][A
split=train: 825it [07:07, 83.65it/s, acc=24.7, epoch=3, loss=5.11][A
split=train: 826it [07:07, 83.65it/s, acc=24.8, epoch=3, loss=5.1] [A
split=train: 827it [07:07, 83.65it/s, acc=24.7, epoch=3, loss=5.09][A
split=train: 828it [07:07, 83.65it/s, acc=24.7, epoch=3, loss=5.09][A
split=train: 829it [07:07, 83.65it/s, acc=24.7, epoch=3, loss=5.09][A
split=train: 830it [07:07, 84.10it/s, acc=24.7, epoch=3, loss=5.09][A
split=train: 830it [07:07, 84.10it/s, acc=24.7, epoch=3, loss=5.08][A
split=train: 831it [07:07, 84.10it/s, acc=24.8, epoch=3, loss=5.08][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 837it [07:07, 84.10it/s, acc=24.7, epoch=3, loss=5.05][A
split=train: 838it [07:07, 84.10it/s, acc=24.6, epoch=3, loss=5.04][A
split=train: 839it [07:07, 84.41it/s, acc=24.6, epoch=3, loss=5.04][A
split=train: 839it [07:07, 84.41it/s, acc=24.6, epoch=3, loss=5.04][A
split=train: 840it [07:07, 84.41it/s, acc=24.6, epoch=3, loss=5.03][A
split=train: 841it [07:07, 84.41it/s, acc=24.6, epoch=3, loss=5.03][A
split=train: 842it [07:07, 84.41it/s, acc=24.6, epoch=3, loss=5.02][A
split=train: 843it [07:07, 84.41it/s, acc=24.6, epoch=3, loss=5.01][A
split=train: 844it [07:07, 84.41it/s, acc=24.6, epoch=3, loss=5.01][A
split=train: 845it [07:07, 84.41it/s, acc=24.6, epoch=3, loss=5.01][A
split=train: 846it [07:07, 84.41it/s, acc=24.5, epoch=3, loss=5]   [A
split=train: 847it [07:07, 84.41it/s, acc=24.5, epoch=3, loss=5][A
split=train: 848it [07:07, 83.60it/s, acc=24.5, epoch=3, loss=5][A
split=train: 848it [07:07, 83.60it/s, acc=24.5, epoch=3, loss=5][A
split=train: 8

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 854it [07:07, 83.60it/s, acc=24.5, epoch=3, loss=4.96][A
split=train: 855it [07:07, 83.60it/s, acc=24.5, epoch=3, loss=4.96][A
split=train: 856it [07:07, 83.60it/s, acc=24.6, epoch=3, loss=4.95][A
split=train: 857it [07:07, 80.70it/s, acc=24.6, epoch=3, loss=4.95][A
split=train: 857it [07:07, 80.70it/s, acc=24.5, epoch=3, loss=4.95][A
split=train: 858it [07:07, 80.70it/s, acc=24.6, epoch=3, loss=4.93][A
split=train: 859it [07:07, 80.70it/s, acc=24.7, epoch=3, loss=4.93][A
split=train: 860it [07:07, 80.70it/s, acc=24.7, epoch=3, loss=4.93][A
split=train: 861it [07:07, 80.70it/s, acc=24.8, epoch=3, loss=4.92][A
split=train: 862it [07:07, 80.70it/s, acc=24.8, epoch=3, loss=4.92][A
split=train: 863it [07:07, 80.70it/s, acc=24.8, epoch=3, loss=4.91][A
split=train: 864it [07:07, 80.70it/s, acc=24.9, epoch=3, loss=4.91][A
split=train: 865it [07:07, 80.70it/s, acc=24.9, epoch=3, loss=4.9] [A
split=train: 866it [07:07, 79.70it/s, acc=24.9, epoch=3, loss=4.9][A
split=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 870it [07:07, 79.70it/s, acc=24.9, epoch=3, loss=4.88][A
split=train: 871it [07:08, 79.70it/s, acc=24.9, epoch=3, loss=4.87][A
split=train: 872it [07:08, 79.70it/s, acc=24.9, epoch=3, loss=4.87][A
split=train: 873it [07:08, 79.70it/s, acc=24.9, epoch=3, loss=4.86][A
split=train: 874it [07:08, 79.70it/s, acc=25, epoch=3, loss=4.86]  [A
split=train: 875it [07:08, 76.44it/s, acc=25, epoch=3, loss=4.86][A
split=train: 875it [07:08, 76.44it/s, acc=25, epoch=3, loss=4.85][A
split=train: 876it [07:08, 76.44it/s, acc=25.1, epoch=3, loss=4.84][A
split=train: 877it [07:08, 76.44it/s, acc=25, epoch=3, loss=4.84]  [A
split=train: 878it [07:08, 76.44it/s, acc=25, epoch=3, loss=4.83][A
split=train: 879it [07:08, 76.44it/s, acc=25, epoch=3, loss=4.83][A
split=train: 880it [07:08, 76.44it/s, acc=25, epoch=3, loss=4.83][A
split=train: 881it [07:08, 76.44it/s, acc=24.9, epoch=3, loss=4.82][A
split=train: 882it [07:08, 76.44it/s, acc=25, epoch=3, loss=4.82]  [A
split=train: 88

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 883it [07:08, 72.42it/s, acc=25.1, epoch=3, loss=4.81][A
split=train: 884it [07:08, 72.42it/s, acc=25.1, epoch=3, loss=4.81][A
split=train: 885it [07:08, 72.42it/s, acc=25, epoch=3, loss=4.8]   [A
split=train: 886it [07:08, 72.42it/s, acc=25, epoch=3, loss=4.8][A
split=train: 887it [07:08, 72.42it/s, acc=25, epoch=3, loss=4.79][A
split=train: 888it [07:08, 72.42it/s, acc=25, epoch=3, loss=4.79][A
split=train: 889it [07:08, 72.42it/s, acc=25, epoch=3, loss=4.78][A
split=train: 890it [07:08, 72.42it/s, acc=25, epoch=3, loss=4.78][A
split=train: 891it [07:08, 73.29it/s, acc=25, epoch=3, loss=4.78][A
split=train: 891it [07:08, 73.29it/s, acc=24.9, epoch=3, loss=4.77][A
split=train: 892it [07:08, 73.29it/s, acc=24.9, epoch=3, loss=4.77][A
split=train: 893it [07:08, 73.29it/s, acc=24.8, epoch=3, loss=4.76][A
split=train: 894it [07:08, 73.29it/s, acc=24.8, epoch=3, loss=4.76][A
split=train: 895it [07:08, 73.29it/s, acc=24.8, epoch=3, loss=4.76][A
split=train: 896it

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=train: 899it [07:08, 74.97it/s, acc=24.8, epoch=3, loss=4.74][A
split=train: 900it [07:08, 74.97it/s, acc=24.8, epoch=3, loss=4.73][A
split=train: 901it [07:08, 74.97it/s, acc=24.8, epoch=3, loss=4.73][A
split=train: 902it [07:08, 74.97it/s, acc=24.9, epoch=3, loss=4.72][A
split=train: 903it [07:08, 74.97it/s, acc=24.8, epoch=3, loss=4.72][A
split=train: 904it [07:08, 74.97it/s, acc=24.8, epoch=3, loss=4.71][A
split=train: 905it [07:08, 74.97it/s, acc=24.9, epoch=3, loss=4.71][A
split=train: 906it [07:08, 74.97it/s, acc=24.9, epoch=3, loss=4.7] [A
split=train: 907it [07:08, 76.28it/s, acc=24.9, epoch=3, loss=4.7][A
split=train: 907it [07:08, 76.28it/s, acc=24.9, epoch=3, loss=4.7][A
split=train: 908it [07:08, 76.28it/s, acc=24.9, epoch=3, loss=4.69][A
split=train: 909it [07:08, 76.28it/s, acc=24.9, epoch=3, loss=4.69][A
split=train: 910it [07:08, 76.28it/s, acc=24.8, epoch=3, loss=4.68][A
split=train: 911it [07:08, 76.28it/s, acc=24.8, epoch=3, loss=4.68][A
split=tr

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 916it [07:08, 77.84it/s, acc=25, epoch=3, loss=4.66]  [A
split=train: 917it [07:08, 77.84it/s, acc=25, epoch=3, loss=4.66][A
split=train: 918it [07:08, 77.84it/s, acc=25, epoch=3, loss=4.65][A
split=train: 919it [07:08, 77.84it/s, acc=25, epoch=3, loss=4.65][A
split=train: 920it [07:08, 77.84it/s, acc=25, epoch=3, loss=4.64][A
split=train: 921it [07:08, 77.84it/s, acc=25, epoch=3, loss=4.64][A
split=train: 922it [07:08, 77.84it/s, acc=25, epoch=3, loss=4.63][A
split=train: 923it [07:08, 77.84it/s, acc=25.1, epoch=3, loss=4.63][A
split=train: 924it [07:08, 77.84it/s, acc=25.1, epoch=3, loss=4.62][A
split=train: 925it [07:08, 78.96it/s, acc=25.1, epoch=3, loss=4.62][A
split=train: 925it [07:08, 78.96it/s, acc=25, epoch=3, loss=4.62]  [A
split=train: 926it [07:08, 78.96it/s, acc=25, epoch=3, loss=4.61][A
split=train: 927it [07:08, 78.96it/s, acc=25, epoch=3, loss=4.61][A
split=train: 928it [07:08, 78.96it/s, acc=25.1, epoch=3, loss=4.61][A
split=train: 929it [0

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 933it [07:08, 78.96it/s, acc=25.2, epoch=3, loss=4.58][A
split=train: 934it [07:08, 79.38it/s, acc=25.2, epoch=3, loss=4.58][A
split=train: 934it [07:08, 79.38it/s, acc=25.2, epoch=3, loss=4.58][A
split=train: 935it [07:08, 79.38it/s, acc=25.2, epoch=3, loss=4.58][A
split=train: 936it [07:08, 79.38it/s, acc=25.2, epoch=3, loss=4.57][A
split=train: 937it [07:08, 79.38it/s, acc=25.2, epoch=3, loss=4.57][A
split=train: 938it [07:08, 79.38it/s, acc=25.2, epoch=3, loss=4.57][A
split=train: 939it [07:08, 79.38it/s, acc=25.2, epoch=3, loss=4.56][A
split=train: 940it [07:08, 79.38it/s, acc=25.1, epoch=3, loss=4.56][A
split=train: 941it [07:08, 79.38it/s, acc=25.1, epoch=3, loss=4.55][A
split=train: 942it [07:08, 79.38it/s, acc=25.2, epoch=3, loss=4.55][A
split=train: 943it [07:08, 79.78it/s, acc=25.2, epoch=3, loss=4.55][A
split=train: 943it [07:08, 79.78it/s, acc=25.2, epoch=3, loss=4.54][A
split=train: 944it [07:08, 79.78it/s, acc=25.2, epoch=3, loss=4.54][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 951it [07:09, 79.78it/s, acc=25.2, epoch=3, loss=4.51][A
split=train: 952it [07:09, 80.96it/s, acc=25.2, epoch=3, loss=4.51][A
split=train: 952it [07:09, 80.96it/s, acc=25.2, epoch=3, loss=4.5] [A
split=train: 953it [07:09, 80.96it/s, acc=25.3, epoch=3, loss=4.5][A
split=train: 954it [07:09, 80.96it/s, acc=25.2, epoch=3, loss=4.49][A
split=train: 955it [07:09, 80.96it/s, acc=25.3, epoch=3, loss=4.49][A
split=train: 956it [07:09, 80.96it/s, acc=25.3, epoch=3, loss=4.48][A
split=train: 957it [07:09, 80.96it/s, acc=25.3, epoch=3, loss=4.48][A
split=train: 958it [07:09, 80.96it/s, acc=25.3, epoch=3, loss=4.48][A
split=train: 959it [07:09, 80.96it/s, acc=25.2, epoch=3, loss=4.47][A
split=val: 153it [07:09,  1.41it/s, acc=3.12, epoch=3, loss=3.69][A
split=val: 154it [07:09,  1.41it/s, acc=1.56, epoch=3, loss=3.79][A
split=val: 155it [07:09,  1.41it/s, acc=2.08, epoch=3, loss=3.73][A
split=val: 156it [07:09,  1.41it/s, acc=3.91, epoch=3, loss=3.74][A
split=val: 157

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=val: 166it [07:09,  1.75it/s, acc=4.91, epoch=3, loss=3.68][A
split=val: 167it [07:09,  1.75it/s, acc=5, epoch=3, loss=3.69]   [A
split=val: 168it [07:09,  1.75it/s, acc=5.27, epoch=3, loss=3.68][A
split=val: 169it [07:09,  1.75it/s, acc=5.33, epoch=3, loss=3.67][A
split=val: 170it [07:09,  1.75it/s, acc=5.56, epoch=3, loss=3.66][A
split=val: 171it [07:09,  1.75it/s, acc=5.59, epoch=3, loss=3.68][A
split=val: 172it [07:09,  1.75it/s, acc=5.78, epoch=3, loss=3.68][A
split=val: 173it [07:09,  1.75it/s, acc=5.65, epoch=3, loss=3.68][A
split=val: 174it [07:09,  1.75it/s, acc=5.68, epoch=3, loss=3.69][A
split=val: 175it [07:09,  1.75it/s, acc=5.57, epoch=3, loss=3.69][A
split=val: 176it [07:09,  2.43it/s, acc=5.57, epoch=3, loss=3.69][A
split=val: 176it [07:09,  2.43it/s, acc=5.34, epoch=3, loss=3.71][A
split=val: 177it [07:09,  2.43it/s, acc=5.38, epoch=3, loss=3.7] [A
split=val: 178it [07:09,  2.43it/s, acc=5.17, epoch=3, loss=3.7][A
split=val: 179it [07:09,  2.43it/s

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=val: 196it [07:09,  3.47it/s, acc=5.68, epoch=3, loss=3.68][A
split=val: 197it [07:09,  3.47it/s, acc=5.62, epoch=3, loss=3.68][A
split=val: 198it [07:09,  3.47it/s, acc=5.57, epoch=3, loss=3.68][A
split=val: 199it [07:09,  3.47it/s, acc=5.72, epoch=3, loss=3.68][A
split=val: 200it [07:09,  3.47it/s, acc=5.73, epoch=3, loss=3.68][A
split=val: 201it [07:09,  3.47it/s, acc=5.8, epoch=3, loss=3.68] [A
split=val: 202it [07:09,  3.47it/s, acc=5.81, epoch=3, loss=3.68][A
split=val: 203it [07:09,  3.47it/s, acc=5.88, epoch=3, loss=3.67][A
split=train: 960it [07:09, 80.96it/s, acc=28.1, epoch=4, loss=3.44][A
split=train: 961it [07:09, 40.34it/s, acc=28.1, epoch=4, loss=3.44][A
split=train: 961it [07:09, 40.34it/s, acc=29.7, epoch=4, loss=3.23][A
split=train: 962it [07:09, 40.34it/s, acc=32.3, epoch=4, loss=3.2] [A
split=train: 963it [07:09, 40.34it/s, acc=25.8, epoch=4, loss=3.33][A
split=train: 964it [07:09, 40.34it/s, acc=25, epoch=4, loss=3.37]  [A
split=train: 965it [0

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）

Epoch [5/5]
-----------------------------------


split=train: 970it [07:09, 46.53it/s, acc=27, epoch=4, loss=3.34]  [A
split=train: 971it [07:09, 46.53it/s, acc=26.8, epoch=4, loss=3.32][A
split=train: 972it [07:09, 46.53it/s, acc=27.4, epoch=4, loss=3.32][A
split=train: 973it [07:09, 46.53it/s, acc=27.7, epoch=4, loss=3.32][A
split=train: 974it [07:09, 46.53it/s, acc=27.1, epoch=4, loss=3.34][A
split=train: 975it [07:09, 46.53it/s, acc=27.3, epoch=4, loss=3.33][A
split=train: 976it [07:09, 49.66it/s, acc=27.3, epoch=4, loss=3.33][A
split=train: 976it [07:09, 49.66it/s, acc=28.3, epoch=4, loss=3.34][A
split=train: 977it [07:09, 49.66it/s, acc=28.1, epoch=4, loss=3.31][A
split=train: 978it [07:09, 49.66it/s, acc=28.5, epoch=4, loss=3.31][A
split=train: 979it [07:09, 49.66it/s, acc=28.8, epoch=4, loss=3.29][A
split=train: 980it [07:09, 49.66it/s, acc=28.6, epoch=4, loss=3.29][A
split=train: 981it [07:09, 49.66it/s, acc=28.3, epoch=4, loss=3.28][A
split=train: 982it [07:09, 49.66it/s, acc=28.9, epoch=4, loss=3.28][A
split

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 985it [07:09, 56.66it/s, acc=29, epoch=4, loss=3.28][A
split=train: 986it [07:09, 56.66it/s, acc=29.1, epoch=4, loss=3.29][A
split=train: 987it [07:09, 56.66it/s, acc=28.5, epoch=4, loss=3.3] [A
split=train: 988it [07:09, 56.66it/s, acc=28.4, epoch=4, loss=3.3][A
split=train: 989it [07:09, 56.66it/s, acc=28.6, epoch=4, loss=3.29][A
split=train: 990it [07:09, 56.66it/s, acc=28.7, epoch=4, loss=3.28][A
split=train: 991it [07:09, 56.66it/s, acc=28.9, epoch=4, loss=3.28][A
split=train: 992it [07:09, 56.66it/s, acc=28.8, epoch=4, loss=3.27][A
split=train: 993it [07:09, 61.75it/s, acc=28.8, epoch=4, loss=3.27][A
split=train: 993it [07:09, 61.75it/s, acc=28.8, epoch=4, loss=3.27][A
split=train: 994it [07:09, 61.75it/s, acc=28.4, epoch=4, loss=3.28][A
split=train: 995it [07:09, 61.75it/s, acc=28.1, epoch=4, loss=3.27][A
split=train: 996it [07:09, 61.75it/s, acc=27.8, epoch=4, loss=3.28][A
split=train: 997it [07:10, 61.75it/s, acc=27.8, epoch=4, loss=3.28][A
split=tr

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1002it [07:10, 65.79it/s, acc=27.5, epoch=4, loss=3.28][A
split=train: 1003it [07:10, 65.79it/s, acc=27.6, epoch=4, loss=3.27][A
split=train: 1004it [07:10, 65.79it/s, acc=27.8, epoch=4, loss=3.27][A
split=train: 1005it [07:10, 65.79it/s, acc=27.6, epoch=4, loss=3.27][A
split=train: 1006it [07:10, 65.79it/s, acc=27.7, epoch=4, loss=3.27][A
split=train: 1007it [07:10, 65.79it/s, acc=27.8, epoch=4, loss=3.27][A
split=train: 1008it [07:10, 65.79it/s, acc=27.7, epoch=4, loss=3.26][A
split=train: 1009it [07:10, 65.79it/s, acc=27.8, epoch=4, loss=3.26][A
split=train: 1010it [07:10, 70.33it/s, acc=27.8, epoch=4, loss=3.26][A
split=train: 1010it [07:10, 70.33it/s, acc=27.8, epoch=4, loss=3.26][A
split=train: 1011it [07:10, 70.33it/s, acc=27.8, epoch=4, loss=3.26][A
split=train: 1012it [07:10, 70.33it/s, acc=28, epoch=4, loss=3.26]  [A
split=train: 1013it [07:10, 70.33it/s, acc=28.2, epoch=4, loss=3.25][A
split=train: 1014it [07:10, 70.33it/s, acc=28.2, epoch=4, loss=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1018it [07:10, 72.36it/s, acc=27.9, epoch=4, loss=3.25][A
split=train: 1019it [07:10, 72.36it/s, acc=28.1, epoch=4, loss=3.24][A
split=train: 1020it [07:10, 72.36it/s, acc=28.2, epoch=4, loss=3.24][A
split=train: 1021it [07:10, 72.36it/s, acc=28.2, epoch=4, loss=3.24][A
split=train: 1022it [07:10, 72.36it/s, acc=28.2, epoch=4, loss=3.24][A
split=train: 1023it [07:10, 72.36it/s, acc=28.2, epoch=4, loss=3.23][A
split=train: 1024it [07:10, 72.36it/s, acc=28.2, epoch=4, loss=3.23][A
split=train: 1025it [07:10, 72.36it/s, acc=28.4, epoch=4, loss=3.22][A
split=train: 1026it [07:10, 72.36it/s, acc=28.4, epoch=4, loss=3.22][A
split=train: 1027it [07:10, 74.70it/s, acc=28.4, epoch=4, loss=3.22][A
split=train: 1027it [07:10, 74.70it/s, acc=28.3, epoch=4, loss=3.22][A
split=train: 1028it [07:10, 74.70it/s, acc=28.3, epoch=4, loss=3.22][A
split=train: 1029it [07:10, 74.70it/s, acc=28.2, epoch=4, loss=3.22][A
split=train: 1030it [07:10, 74.70it/s, acc=28.3, epoch=4, loss=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1035it [07:10, 75.73it/s, acc=28.2, epoch=4, loss=3.21][A
split=train: 1036it [07:10, 75.73it/s, acc=28.4, epoch=4, loss=3.21][A
split=train: 1037it [07:10, 75.73it/s, acc=28.4, epoch=4, loss=3.21][A
split=train: 1038it [07:10, 75.73it/s, acc=28.6, epoch=4, loss=3.2] [A
split=train: 1039it [07:10, 75.73it/s, acc=28.6, epoch=4, loss=3.2][A
split=train: 1040it [07:10, 75.73it/s, acc=28.7, epoch=4, loss=3.2][A
split=train: 1041it [07:10, 75.73it/s, acc=28.7, epoch=4, loss=3.19][A
split=train: 1042it [07:10, 75.73it/s, acc=28.7, epoch=4, loss=3.19][A
split=train: 1043it [07:10, 75.73it/s, acc=28.7, epoch=4, loss=3.19][A
split=train: 1044it [07:10, 77.68it/s, acc=28.7, epoch=4, loss=3.19][A
split=train: 1044it [07:10, 77.68it/s, acc=28.6, epoch=4, loss=3.19][A
split=train: 1045it [07:10, 77.68it/s, acc=28.5, epoch=4, loss=3.19][A
split=train: 1046it [07:10, 77.68it/s, acc=28.4, epoch=4, loss=3.19][A
split=train: 1047it [07:10, 77.68it/s, acc=28.3, epoch=4, loss=3.

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1052it [07:10, 77.68it/s, acc=28, epoch=4, loss=3.18]  [A
split=train: 1053it [07:10, 78.99it/s, acc=28, epoch=4, loss=3.18][A
split=train: 1053it [07:10, 78.99it/s, acc=28, epoch=4, loss=3.18][A
split=train: 1054it [07:10, 78.99it/s, acc=27.9, epoch=4, loss=3.17][A
split=train: 1055it [07:10, 78.99it/s, acc=28, epoch=4, loss=3.17]  [A
split=train: 1056it [07:10, 78.99it/s, acc=27.8, epoch=4, loss=3.17][A
split=train: 1057it [07:10, 78.99it/s, acc=27.9, epoch=4, loss=3.16][A
split=train: 1058it [07:10, 78.99it/s, acc=27.9, epoch=4, loss=3.16][A
split=train: 1059it [07:10, 78.99it/s, acc=27.7, epoch=4, loss=3.16][A
split=train: 1060it [07:10, 78.99it/s, acc=27.8, epoch=4, loss=3.16][A
split=train: 1061it [07:10, 78.99it/s, acc=27.7, epoch=4, loss=3.16][A
split=train: 1062it [07:10, 79.69it/s, acc=27.7, epoch=4, loss=3.16][A
split=train: 1062it [07:10, 79.69it/s, acc=27.8, epoch=4, loss=3.15][A
split=train: 1063it [07:10, 79.69it/s, acc=27.8, epoch=4, loss=3.16

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1068it [07:10, 79.69it/s, acc=27.8, epoch=4, loss=3.14][A
split=train: 1069it [07:10, 79.69it/s, acc=27.8, epoch=4, loss=3.14][A
split=train: 1070it [07:10, 79.69it/s, acc=27.9, epoch=4, loss=3.13][A
split=train: 1071it [07:10, 79.27it/s, acc=27.9, epoch=4, loss=3.13][A
split=train: 1071it [07:10, 79.27it/s, acc=28, epoch=4, loss=3.13]  [A
split=train: 1072it [07:10, 79.27it/s, acc=28.1, epoch=4, loss=3.13][A
split=train: 1073it [07:10, 79.27it/s, acc=27.9, epoch=4, loss=3.13][A
split=train: 1074it [07:10, 79.27it/s, acc=27.9, epoch=4, loss=3.12][A
split=train: 1075it [07:11, 79.27it/s, acc=27.9, epoch=4, loss=3.12][A
split=train: 1076it [07:11, 79.27it/s, acc=27.9, epoch=4, loss=3.11][A
split=train: 1077it [07:11, 79.27it/s, acc=27.9, epoch=4, loss=3.11][A
split=train: 1078it [07:11, 79.27it/s, acc=27.9, epoch=4, loss=3.11][A
split=train: 1079it [07:11, 72.57it/s, acc=27.9, epoch=4, loss=3.11][A
split=train: 1079it [07:11, 72.57it/s, acc=27.9, epoch=4, loss=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1082it [07:11, 72.57it/s, acc=27.9, epoch=4, loss=3.1][A
split=train: 1083it [07:11, 72.57it/s, acc=27.9, epoch=4, loss=3.1][A
split=train: 1084it [07:11, 72.57it/s, acc=28, epoch=4, loss=3.1]  [A
split=train: 1085it [07:11, 72.57it/s, acc=28.1, epoch=4, loss=3.09][A
split=train: 1086it [07:11, 72.57it/s, acc=28.1, epoch=4, loss=3.09][A
split=train: 1087it [07:11, 70.23it/s, acc=28.1, epoch=4, loss=3.09][A
split=train: 1087it [07:11, 70.23it/s, acc=28.1, epoch=4, loss=3.09][A
split=train: 1088it [07:11, 70.23it/s, acc=28, epoch=4, loss=3.09]  [A
split=train: 1089it [07:11, 70.23it/s, acc=27.8, epoch=4, loss=3.09][A
split=train: 1090it [07:11, 70.23it/s, acc=27.8, epoch=4, loss=3.09][A
split=train: 1091it [07:11, 70.23it/s, acc=27.8, epoch=4, loss=3.09][A
split=train: 1092it [07:11, 70.23it/s, acc=27.9, epoch=4, loss=3.09][A
split=train: 1093it [07:11, 70.23it/s, acc=27.9, epoch=4, loss=3.08][A
split=train: 1094it [07:11, 70.23it/s, acc=27.9, epoch=4, loss=3.0

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1096it [07:11, 71.47it/s, acc=28, epoch=4, loss=3.08]  [A
split=train: 1097it [07:11, 71.47it/s, acc=27.9, epoch=4, loss=3.07][A
split=train: 1098it [07:11, 71.47it/s, acc=27.9, epoch=4, loss=3.07][A
split=train: 1099it [07:11, 71.47it/s, acc=27.9, epoch=4, loss=3.07][A
split=train: 1100it [07:11, 71.47it/s, acc=27.8, epoch=4, loss=3.07][A
split=train: 1101it [07:11, 71.47it/s, acc=27.7, epoch=4, loss=3.07][A
split=train: 1102it [07:11, 71.47it/s, acc=27.7, epoch=4, loss=3.07][A
split=train: 1103it [07:11, 73.23it/s, acc=27.7, epoch=4, loss=3.07][A
split=train: 1103it [07:11, 73.23it/s, acc=27.7, epoch=4, loss=3.07][A
split=train: 1104it [07:11, 73.23it/s, acc=27.7, epoch=4, loss=3.07][A
split=train: 1105it [07:11, 73.23it/s, acc=27.7, epoch=4, loss=3.06][A
split=train: 1106it [07:11, 73.23it/s, acc=27.6, epoch=4, loss=3.06][A
split=train: 1107it [07:11, 73.23it/s, acc=27.6, epoch=4, loss=3.06][A
split=train: 1108it [07:11, 73.23it/s, acc=27.5, epoch=4, loss=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1113it [07:11, 74.86it/s, acc=27.6, epoch=4, loss=3.05][A
split=train: 1114it [07:11, 74.86it/s, acc=27.7, epoch=4, loss=3.04][A
split=train: 1115it [07:11, 74.86it/s, acc=27.7, epoch=4, loss=3.04][A
split=train: 1116it [07:11, 74.86it/s, acc=27.7, epoch=4, loss=3.04][A
split=train: 1117it [07:11, 74.86it/s, acc=27.7, epoch=4, loss=3.04][A
split=train: 1118it [07:11, 74.86it/s, acc=27.7, epoch=4, loss=3.03][A
split=train: 1119it [07:11, 75.69it/s, acc=27.7, epoch=4, loss=3.03][A
split=train: 1119it [07:11, 75.69it/s, acc=27.7, epoch=4, loss=3.03][A
split=train: 1120it [07:11, 75.69it/s, acc=27.8, epoch=4, loss=3.03][A
split=train: 1121it [07:11, 75.69it/s, acc=27.8, epoch=4, loss=3.03][A
split=train: 1122it [07:11, 75.69it/s, acc=27.7, epoch=4, loss=3.03][A
split=train: 1123it [07:11, 75.69it/s, acc=27.8, epoch=4, loss=3.02][A
split=train: 1124it [07:11, 75.69it/s, acc=27.7, epoch=4, loss=3.02][A
split=train: 1125it [07:11, 75.69it/s, acc=27.7, epoch=4, loss=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1129it [07:11, 75.48it/s, acc=27.8, epoch=4, loss=3.01][A
split=train: 1130it [07:11, 75.48it/s, acc=27.8, epoch=4, loss=3.01][A
split=train: 1131it [07:11, 75.48it/s, acc=27.8, epoch=4, loss=3.01][A
split=train: 1132it [07:11, 75.48it/s, acc=27.9, epoch=4, loss=3.01][A
split=train: 1133it [07:11, 75.48it/s, acc=27.8, epoch=4, loss=3.01][A
split=train: 1134it [07:11, 75.48it/s, acc=27.8, epoch=4, loss=3.01][A
split=train: 1135it [07:11, 76.54it/s, acc=27.8, epoch=4, loss=3.01][A
split=train: 1135it [07:11, 76.54it/s, acc=27.8, epoch=4, loss=3.01][A
split=train: 1136it [07:11, 76.54it/s, acc=27.7, epoch=4, loss=3]   [A
split=train: 1137it [07:11, 76.54it/s, acc=27.7, epoch=4, loss=3][A
split=train: 1138it [07:11, 76.54it/s, acc=27.7, epoch=4, loss=3][A
split=train: 1139it [07:11, 76.54it/s, acc=27.7, epoch=4, loss=3][A
split=train: 1140it [07:11, 76.54it/s, acc=27.6, epoch=4, loss=3][A
split=train: 1141it [07:11, 76.54it/s, acc=27.6, epoch=4, loss=3][A
split=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1146it [07:11, 77.93it/s, acc=27.6, epoch=4, loss=2.99][A
split=train: 1147it [07:11, 77.93it/s, acc=27.6, epoch=4, loss=2.98][A
split=train: 1148it [07:11, 77.93it/s, acc=27.6, epoch=4, loss=2.98][A
split=train: 1149it [07:11, 77.93it/s, acc=27.6, epoch=4, loss=2.98][A
split=train: 1150it [07:11, 77.93it/s, acc=27.6, epoch=4, loss=2.98][A
split=train: 1151it [07:12, 77.93it/s, acc=27.6, epoch=4, loss=2.97][A
split=train: 1152it [07:12, 77.93it/s, acc=27.6, epoch=4, loss=2.97][A
split=train: 1153it [07:12, 78.64it/s, acc=27.6, epoch=4, loss=2.97][A
split=train: 1153it [07:12, 78.64it/s, acc=27.7, epoch=4, loss=2.97][A
split=train: 1154it [07:12, 78.64it/s, acc=27.7, epoch=4, loss=2.96][A
split=train: 1155it [07:12, 78.64it/s, acc=27.7, epoch=4, loss=2.96][A
split=train: 1156it [07:12, 78.64it/s, acc=27.6, epoch=4, loss=2.96][A
split=train: 1157it [07:12, 78.64it/s, acc=27.7, epoch=4, loss=2.96][A
split=train: 1158it [07:12, 78.64it/s, acc=27.6, epoch=4, loss=

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=train: 1163it [07:12, 79.83it/s, acc=27.6, epoch=4, loss=2.95][A
split=train: 1164it [07:12, 79.83it/s, acc=27.6, epoch=4, loss=2.95][A
split=train: 1165it [07:12, 79.83it/s, acc=27.6, epoch=4, loss=2.94][A
split=train: 1166it [07:12, 79.83it/s, acc=27.6, epoch=4, loss=2.94][A
split=train: 1167it [07:12, 79.83it/s, acc=27.6, epoch=4, loss=2.94][A
split=train: 1168it [07:12, 79.83it/s, acc=27.6, epoch=4, loss=2.94][A
split=train: 1169it [07:12, 79.83it/s, acc=27.6, epoch=4, loss=2.94][A
split=train: 1170it [07:12, 79.73it/s, acc=27.6, epoch=4, loss=2.94][A
split=train: 1170it [07:12, 79.73it/s, acc=27.7, epoch=4, loss=2.94][A
split=train: 1171it [07:12, 79.73it/s, acc=27.6, epoch=4, loss=2.93][A
split=train: 1172it [07:12, 79.73it/s, acc=27.6, epoch=4, loss=2.93][A
split=train: 1173it [07:12, 79.73it/s, acc=27.7, epoch=4, loss=2.93][A
split=train: 1174it [07:12, 79.73it/s, acc=27.7, epoch=4, loss=2.93][A
split=train: 1175it [07:12, 79.73it/s, acc=27.7, epoch=4, loss=2

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1181it [07:12, 82.51it/s, acc=27.8, epoch=4, loss=2.91][A
split=train: 1182it [07:12, 82.51it/s, acc=27.7, epoch=4, loss=2.91][A
split=train: 1183it [07:12, 82.51it/s, acc=27.7, epoch=4, loss=2.91][A
split=train: 1184it [07:12, 82.51it/s, acc=27.7, epoch=4, loss=2.91][A
split=train: 1185it [07:12, 82.51it/s, acc=27.7, epoch=4, loss=2.91][A
split=train: 1186it [07:12, 82.51it/s, acc=27.7, epoch=4, loss=2.91][A
split=train: 1187it [07:12, 82.51it/s, acc=27.7, epoch=4, loss=2.9] [A
split=train: 1188it [07:12, 81.70it/s, acc=27.7, epoch=4, loss=2.9][A
split=train: 1188it [07:12, 81.70it/s, acc=27.7, epoch=4, loss=2.91][A
split=train: 1189it [07:12, 81.70it/s, acc=27.6, epoch=4, loss=2.9] [A
split=train: 1190it [07:12, 81.70it/s, acc=27.6, epoch=4, loss=2.9][A
split=train: 1191it [07:12, 81.70it/s, acc=27.6, epoch=4, loss=2.9][A
split=train: 1192it [07:12, 81.70it/s, acc=27.6, epoch=4, loss=2.9][A
split=train: 1193it [07:12, 81.70it/s, acc=27.6, epoch=4, loss=2.9]

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32

split=train: 1198it [07:12, 82.51it/s, acc=27.7, epoch=4, loss=2.89][A
split=train: 1199it [07:12, 82.51it/s, acc=27.8, epoch=4, loss=2.89][A
split=val: 204it [07:12,  3.47it/s, acc=6.25, epoch=4, loss=2.93][A
split=val: 205it [07:12,  3.69it/s, acc=6.25, epoch=4, loss=2.93][A
split=val: 205it [07:12,  3.69it/s, acc=3.12, epoch=4, loss=2.83][A
split=val: 206it [07:12,  3.69it/s, acc=3.12, epoch=4, loss=2.67][A
split=val: 207it [07:12,  3.69it/s, acc=3.91, epoch=4, loss=2.62][A
split=val: 208it [07:12,  3.69it/s, acc=3.12, epoch=4, loss=2.57][A
split=val: 209it [07:12,  3.69it/s, acc=2.6, epoch=4, loss=2.55] [A
split=val: 210it [07:12,  3.69it/s, acc=3.12, epoch=4, loss=2.56][A
split=val: 211it [07:12,  3.69it/s, acc=2.73, epoch=4, loss=2.62][A
split=val: 212it [07:12,  3.69it/s, acc=2.78, epoch=4, loss=2.63][A
split=val: 213it [07:12,  3.69it/s, acc=2.5, epoch=4, loss=2.65] [A
split=val: 214it [07:12,  3.69it/s, acc=2.56, epoch=4, loss=2.65][A
split=val: 215it [07:12,  3.

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=val: 235it [07:12,  5.48it/s, acc=3.61, epoch=4, loss=2.63][A
split=val: 236it [07:12,  5.48it/s, acc=3.6, epoch=4, loss=2.63] [A
split=val: 237it [07:12,  5.48it/s, acc=3.58, epoch=4, loss=2.63][A
split=val: 238it [07:12,  5.48it/s, acc=3.57, epoch=4, loss=2.63][A
split=val: 239it [07:12,  7.96it/s, acc=3.57, epoch=4, loss=2.63][A
split=val: 239it [07:12,  7.96it/s, acc=3.56, epoch=4, loss=2.63][A
split=val: 240it [07:12,  7.96it/s, acc=3.55, epoch=4, loss=2.63][A
split=val: 241it [07:12,  7.96it/s, acc=3.54, epoch=4, loss=2.63][A
split=val: 242it [07:12,  7.96it/s, acc=3.53, epoch=4, loss=2.62][A
split=val: 243it [07:12,  7.96it/s, acc=3.44, epoch=4, loss=2.62][A
split=val: 244it [07:12,  7.96it/s, acc=3.43, epoch=4, loss=2.62][A
split=val: 245it [07:12,  7.96it/s, acc=3.42, epoch=4, loss=2.62][A
split=val: 246it [07:12,  7.96it/s, acc=3.49, epoch=4, loss=2.62][A
split=val: 247it [07:12,  7.96it/s, acc=3.55, epoch=4, loss=2.63][A
split=val: 248it [07:12,  7.96it/

输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32, 1, 50])
RNN输出形状：torch.Size([32, 6466])（在聚合前为：torch.Size([32, 6466])）
输入序列形状：torch.Size([32, 1]) -> 嵌入后：torch.Size([32


split=train: 1200it [07:22, 82.51it/s, acc=27.8, epoch=4, loss=2.89][A
split=val: 255it [07:32,  7.96it/s, acc=3.62, epoch=4, loss=2.65][A

## 推理能力验证

In [43]:
# 推理函数
def predict_nationality(surname, model, vectorizer, device, topk=5):
    """Predict the nationality of a surname

    Args:
        surname (str): the surname to predict
        model (RNNClassifier): the trained model
        vectorizer (CBOWVectorizer): the vectorizer used for preprocessing
        device (torch.device): the device on which the model is running
        topk (int, optional): the number of top predictions to return. Defaults to 5.

    Returns:
        list: a list of tuples containing the predicted nationalities and their probabilities
    """
    vectorized_surname = vectorizer.vectorize(surname)
    vectorized_surname = torch.tensor(vectorized_surname).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        output = model(vectorized_surname)
    
    probabilities = F.softmax(output, dim=1).squeeze()
    probabilities, indices = torch.topk(probabilities, topk)

    results = []
    for prob, idx in zip(probabilities, indices):
        nationality = vectorizer.cbow_vocab.lookup_index(idx.item())
        results.append((nationality, prob.item()))
    
    return results

# 预测指定姓氏的国籍
surnames_to_predict = ['McMahan', 'Nakamoto', 'Wan', 'Cho']

# 定义 device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载最佳模型
model_path = train_state['model_filename']
model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

for surname in surnames_to_predict:
    predictions = predict_nationality(surname, model, vectorizer, device)
    print(f"Surname: {surname}")
    for nationality, prob in predictions:
        print(f"  {nationality}: {prob:.4f}")
    print()

输入序列形状：torch.Size([1, 1]) -> 嵌入后：torch.Size([1, 1, 50])
RNN输出形状：torch.Size([1, 6466])（在聚合前为：torch.Size([1, 6466])）
Surname: McMahan
  Arabic: 0.1678
  Russian: 0.1552
  English: 0.1479
  Japanese: 0.0286
  German: 0.0282

输入序列形状：torch.Size([1, 1]) -> 嵌入后：torch.Size([1, 1, 50])
RNN输出形状：torch.Size([1, 6466])（在聚合前为：torch.Size([1, 6466])）
Surname: Nakamoto
  Arabic: 0.1678
  Russian: 0.1552
  English: 0.1479
  Japanese: 0.0286
  German: 0.0282

输入序列形状：torch.Size([1, 1]) -> 嵌入后：torch.Size([1, 1, 50])
RNN输出形状：torch.Size([1, 6466])（在聚合前为：torch.Size([1, 6466])）
Surname: Wan
  English: 0.1977
  Russian: 0.1475
  Arabic: 0.0990
  Italian: 0.0385
  Japanese: 0.0319

输入序列形状：torch.Size([1, 1]) -> 嵌入后：torch.Size([1, 1, 50])
RNN输出形状：torch.Size([1, 6466])（在聚合前为：torch.Size([1, 6466])）
Surname: Cho
  English: 0.1285
  Russian: 0.0986
  Arabic: 0.0436
  Japanese: 0.0367
  Italian: 0.0264

