使用我爱志方小姐大佬的baseline：[https://aistudio.baidu.com/aistudio/projectdetail/2085423](https://aistudio.baidu.com/aistudio/projectdetail/2085423)

In [1]:

!pip install --upgrade paddlenlp -i https://pypi.org/simple

Requirement already up-to-date: paddlenlp in /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages (2.0.5)


In [2]:
import collections
import json
import math
import os
import random
import time
from functools import partial

import numpy as np
import paddle
from paddle.io import BatchSampler
from paddle.io import DataLoader
from paddle.metric import Accuracy
from paddle.nn import CrossEntropyLoss
from paddle.optimizer import AdamW
from paddlenlp.data import Dict
from paddlenlp.data import Pad
from paddlenlp.data import Stack
from paddlenlp.datasets import load_dataset
from paddlenlp.transformers import BertTokenizer
from paddlenlp.transformers import BertForSequenceClassification
from paddlenlp.transformers import ErnieTokenizer
from paddlenlp.transformers import ErnieForSequenceClassification
from paddlenlp.transformers import ErnieGramTokenizer
from paddlenlp.transformers import ErnieGramForSequenceClassification
from paddlenlp.transformers import RobertaTokenizer
from paddlenlp.transformers import RobertaForSequenceClassification
from paddlenlp.transformers import LinearDecayWithWarmup

from config import Config


In [3]:
MODEL_CLASSES = {
    'bert': (BertForSequenceClassification, BertTokenizer),
    'ernie': (ErnieForSequenceClassification, ErnieTokenizer),
    'ernie_gram': (ErnieGramForSequenceClassification, ErnieGramTokenizer),
    'roberta': (RobertaForSequenceClassification, RobertaTokenizer)
}

In [4]:
lable_list_length = 2

In [None]:
def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    paddle.seed(args.seed)

In [None]:
def convert_example(example, tokenizer):
    """convert a Dureader-yesno example into necessary features"""

    feature = tokenizer(
        text=example['question'],
        text_pair=example['answer'],
        max_seq_len=args.max_seq_length
    )
    feature['labels'] = example['labels']
    feature['id'] = example['id']

    return feature

In [None]:
@paddle.no_grad()
def evaluate(model, metric, data_loader):
    model.eval()
    metric.reset()
    for batch in data_loader:
        input_ids, segment_ids, labels = batch
        logits = model(input_ids, segment_ids)
        correct = metric.compute(logits, labels)
        metric.update(correct)
        accu = metric.accumulate()

    # print("accu: %f" % (accu))
    model.train()  # Switch the model to training mode after evaluation

    return accu

In [None]:
@paddle.no_grad()
def predict(model, data_loader):
    model.eval()
    res = {}
    for batch in data_loader:
        input_ids, segment_ids, qas_id = batch
        logits = model(input_ids, segment_ids)
        qas_id = qas_id.numpy()
        preds = paddle.argmax(logits, axis=1).numpy()
        for i in range(len(preds)):
            res[str(qas_id[i])] = data_loader.dataset.label_list[preds[i]]
    model.train()
    return res

In [None]:
def do_train(args):

    global lable_list_length
    
    paddle.set_device(args.device)

    args.model_type = args.model_type.lower()
    model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)

    set_seed(args)

    train_ds, dev_ds = load_dataset('dureader_yesno', splits=['train', 'dev'])

    trans_func = partial(convert_example, tokenizer=tokenizer)

    train_batchify_fn = lambda samples, fn=Dict({
        'input_ids': Pad(axis=0, pad_val=tokenizer.pad_token_id),
        'token_type_ids': Pad(axis=0, pad_val=tokenizer.pad_token_type_id),
        'labels': Stack(dtype="int64")
    }): fn(samples)

    train_ds = train_ds.map(trans_func, lazy=True)
    train_batch_sampler = BatchSampler(
        dataset=train_ds, 
        batch_size=args.batch_size, 
        shuffle=True
    )
    train_data_loader = DataLoader(
        dataset=train_ds,
        batch_sampler=train_batch_sampler,
        collate_fn=train_batchify_fn,
        return_list=True
    )

    dev_ds = dev_ds.map(trans_func, lazy=True)
    dev_batch_sampler = BatchSampler(
        dataset=dev_ds, 
        batch_size=args.batch_size, 
        shuffle=False
    )
    dev_data_loader = DataLoader(
        dataset=dev_ds,
        batch_sampler=dev_batch_sampler,
        collate_fn=train_batchify_fn,
        return_list=True
    )

    lable_list_length = len(train_ds.label_list)
    model = model_class.from_pretrained(
        args.model_name_or_path, num_classes=lable_list_length)

    num_training_steps = args.max_steps if args.max_steps > 0 else len(
        train_data_loader) * args.num_train_epochs
    num_train_epochs = math.ceil(num_training_steps / len(train_data_loader))

    num_batchs = len(train_data_loader)

    lr_scheduler = LinearDecayWithWarmup(
        learning_rate=args.learning_rate, 
        total_steps=num_training_steps,
        warmup=args.warmup_proportion
    )
    # Generate parameter names needed to perform weight decay.
    # All bias and LayerNorm parameters are excluded.
    decay_params = [
        p.name for n, p in model.named_parameters()
        if not any(nd in n for nd in ["bias", "norm"])
    ]
    optimizer = paddle.optimizer.AdamW(
        learning_rate=lr_scheduler,
        epsilon=args.adam_epsilon,
        parameters=model.parameters(),
        weight_decay=args.weight_decay,
        apply_decay_param_fun=lambda x: x in decay_params
    )

    criterion = CrossEntropyLoss()
    metric = Accuracy()

    best_val_acc = 0.0

    output_dir = os.path.join(args.output_dir, "bets_model")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    global_step = 0
    tic_train = time.time()
    for epoch in range(num_train_epochs):
        for step, batch in enumerate(train_data_loader):
            
            global_step += 1

            input_ids, segment_ids, label = batch

            logits = model(input_ids=input_ids, token_type_ids=segment_ids)
            loss = criterion(logits, label)

            if global_step % args.logging_steps == 0:
                print(
                    "global step %d, epoch: %d, batch: %d/%d, loss: %f, speed: %.2f step/s"
                    % (global_step, epoch + 1, step + 1, num_batchs, loss,
                       args.logging_steps / (time.time() - tic_train)))
                
                tic_train = time.time()

            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.clear_grad()

            if global_step % args.save_steps == 0 or global_step == num_training_steps:
                acc = evaluate(model, metric, dev_data_loader)
                print(f'global step {global_step}, val acc is {acc:.5f}!')
                
                if acc > best_val_acc:
                    best_val_acc = acc

                    print(f'save model at global step {global_step}, best val acc is {best_val_acc:.5f}!')

                    model.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)

                if global_step == num_training_steps:
                    break


In [None]:
def do_predict(args):

    global lable_list_length

    output_dir = os.path.join(args.output_dir, "bets_model")

    model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    tokenizer = tokenizer_class.from_pretrained(output_dir)
    model = model_class.from_pretrained(
        output_dir, num_classes=lable_list_length)

    test_ds = load_dataset('dureader_yesno', splits='test')
    
    trans_func = partial(convert_example, tokenizer=tokenizer)

    test_batchify_fn = lambda samples, fn=Dict({
        'input_ids': Pad(axis=0, pad_val=tokenizer.pad_token_id),
        'token_type_ids': Pad(axis=0, pad_val=tokenizer.pad_token_type_id),
        'id': Stack()
    }): fn(samples)

    test_ds = test_ds.map(trans_func, lazy=True)
    test_batch_sampler = BatchSampler(
        dataset=test_ds, 
        batch_size=args.batch_size, 
        shuffle=False
    )
    test_data_loader = DataLoader(
        dataset=test_ds,
        batch_sampler=test_batch_sampler,
        collate_fn=test_batchify_fn,
        return_list=True
    )


    predictions = predict(model, test_data_loader)
    with open('dureader_yesno.json', "w") as writer:
        writer.write(
            json.dumps(
                predictions, ensure_ascii=False, indent=4) + "\n")


In [None]:
args = Config(model_type='ernie_gram', 
              model_name_or_path='ernie-gram-zh', 
              output_dir='./checkpoints/dureader-yesno/',
              
              max_seq_length=384,
              batch_size=16, 
              learning_rate=5e-5,
              num_train_epochs=3,
              logging_steps=10,
              save_steps=200,
              warmup_proportion=0.1,
              weight_decay=0.01,
              device='gpu')


In [None]:
do_train(args)


[2021-07-16 09:43:49,519] [    INFO] - Found /home/aistudio/.paddlenlp/models/ernie-gram-zh/vocab.txt
[2021-07-16 09:43:54,269] [    INFO] - Already cached /home/aistudio/.paddlenlp/models/ernie-gram-zh/ernie_gram_zh.pdparams


global step 10, epoch: 1, batch: 10/4712, loss: 1.087154, speed: 4.79 step/s
global step 20, epoch: 1, batch: 20/4712, loss: 1.045500, speed: 5.05 step/s
global step 30, epoch: 1, batch: 30/4712, loss: 0.965376, speed: 4.46 step/s


at global step 13000, best val acc is 0.87166!

In [None]:
do_predict(args)


test acc: 87.64193

请点击[此处](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576)查看本环境基本用法.  <br>
Please click [here ](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576) for more detailed instructions. 