In [1]:
!pip install evaluate seqeval

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting fsspec>=2021.05.0 (from fsspec[http]>=2021.05.0->evaluate)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-a

In [2]:
%%writefile ner_ddp.py

from transformers import AutoModelForTokenClassification, AutoTokenizer,DataCollatorForTokenClassification,TrainingArguments, Trainer
from datasets import load_dataset
import os
import torch
import evaluate
import numpy as np
import torch.distributed as dist
import torch.multiprocessing as mp

# 换了一个小点的model，要不然GPU内存不够
# MODEL_NAME = 'google-bert/bert-base-chinese'
MODEL_NAME = 'cycloneboy/chinese_mobilebert_base_f2'
# DATASET_NAME = 'nlhappy/CLUE-NER'
DATASET_NAME = 'doushabao4766/msra_ner_k_V3'
DATASET_SHARDS = 12
OUTPUT_DIR = 'ner_train'

entites = ['O', 'PER', 'ORG', 'LOC']
tags = ['O']
for entity in entites[1:]:
    tags.append('B-' + entity.upper())
    tags.append('I-' + entity.upper())
print(tags)
id2lbl = {i:tag for i, tag in enumerate(tags)}
lbl2id = {tag:i for i, tag in enumerate(tags)}

def train(rank, world_size):
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '12345'
    dist.init_process_group("nccl", rank=rank, world_size=world_size)
    
    model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME, 
                                                        num_labels=len(tags),
                                                        id2label=id2lbl,
                                                        label2id=lbl2id)
    model = model.to(rank)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    ds = load_dataset(DATASET_NAME)
    ds['train'] = ds['train'].shard(num_shards=DATASET_SHARDS, index=DATASET_SHARDS >> 1)
    ds['test'] = ds['test'].shard(num_shards=DATASET_SHARDS, index=DATASET_SHARDS >> 1)
    print(ds)
    
    
    def mapper(item):
        input = tokenizer(item['tokens'],
                          truncation=True,
                          padding=True,
                          max_length=512,
                          add_special_tokens=False,
                          is_split_into_words=True,
                          # return_offsets_mapping=True,
                          return_tensors="pt")
        len_limit = input["input_ids"].shape[1]
        input['labels'] = [arr[:len_limit] for arr in item['ner_tags']]
        return input
    
    ds2 = ds.map(mapper, batched=True)
    ds2.set_format('torch', columns=['input_ids', 'token_type_ids', 'attention_mask', 'labels'])
    
    args = TrainingArguments(
        output_dir=OUTPUT_DIR,  # 模型训练工作目录（tensorboard，临时模型存盘文件，日志）
        num_train_epochs = 3,    # 训练 epoch
        save_safetensors=False,  # 设置False保存文件可以通过torch.load加载
        per_device_train_batch_size=32,  # 训练批次
        per_device_eval_batch_size=32,
        report_to='tensorboard',  # 训练输出记录
        eval_strategy="epoch",
        logging_steps=50,
        fp16=True,
        local_rank=rank,
        lr_scheduler_type='linear',
        warmup_steps=50,
        ddp_find_unused_parameters=False
    )
    
    data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer,padding=True)
    
    # metric 方法
    def compute_metrics(result):
        # result 是一个tuple (predicts, labels)
        # 获取评估对象
        seqeval = evaluate.load('seqeval')
        predicts,labels = result
        predicts = np.argmax(predicts, axis=2)
        # 准备评估数据
        predicts = [[tags[p] for p,l in zip(ps,ls) if l != -100]
                     for ps,ls in zip(predicts,labels)]
        labels = [[tags[l] for p,l in zip(ps,ls) if l != -100]
                     for ps,ls in zip(predicts,labels)]
        return seqeval.compute(predictions=predicts, references=labels)
    
    trainer = Trainer(model, args, train_dataset=ds2['train'], eval_dataset=ds2['test'],
                      data_collator=data_collator,compute_metrics=compute_metrics)
    
    trainer.train()


def main():
    world_size = torch.cuda.device_count()
    print('world_size', world_size)
    mp.spawn(train, args=(world_size,), nprocs=world_size, join=True)

if __name__ == "__main__":
    main()

Writing ner_ddp.py


In [3]:
!python ner_ddp.py

2025-06-12 15:45:41.244796: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749743141.466328      54 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749743141.530130      54 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC']
world_size 2
2025-06-12 15:46:03.933960: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-12 15:46:03.934083: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register 

In [4]:
from transformers import pipeline

pipe = pipeline('token-classification',  'ner_train/checkpoint-177')
predict = lambda text: print([print(i) for i in pipe(text)][0])
predict('陆逊火烧连营，关羽败走麦城')
predict('双方确定了今后发展中美关系的指导方针。')
predict('加利福尼亚大学在哪')
predict('澳大利亚和日本进行了友好交流。')

2025-06-12 15:49:00.678772: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749743340.701677      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749743340.708678      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'entity': 'B-PER', 'score': 0.53758633, 'index': 1, 'word': '陆', 'start': 0, 'end': 1}
{'entity': 'I-PER', 'score': 0.8065418, 'index': 2, 'word': '逊', 'start': 1, 'end': 2}
{'entity': 'B-PER', 'score': 0.5385872, 'index': 8, 'word': '关', 'start': 7, 'end': 8}
{'entity': 'I-PER', 'score': 0.9143277, 'index': 9, 'word': '羽', 'start': 8, 'end': 9}
{'entity': 'B-ORG', 'score': 0.3699788, 'index': 12, 'word': '麦', 'start': 11, 'end': 12}
{'entity': 'I-PER', 'score': 0.5531025, 'index': 13, 'word': '城', 'start': 12, 'end': 13}
None
{'entity': 'B-LOC', 'score': 0.9483459, 'index': 10, 'word': '中', 'start': 9, 'end': 10}
{'entity': 'B-LOC', 'score': 0.92489845, 'index': 11, 'word': '美', 'start': 10, 'end': 11}
None
{'entity': 'B-ORG', 'score': 0.5045434, 'index': 1, 'word': '加', 'start': 0, 'end': 1}
{'entity': 'I-ORG', 'score': 0.64310086, 'index': 2, 'word': '利', 'start': 1, 'end': 2}
{'entity': 'I-ORG', 'score': 0.730761, 'index': 3, 'word': '福', 'start': 2, 'end': 3}
{'entity': 'I-ORG', 