In [3]:
!pip install transformers
!pip install transformers[torch]

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting torch (from transformers[torch])
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/2a/b7/a3cf5fd40334b9785cc83ee0c96b50603026eb3aa70210a33729018e7029/torch-2.3.0-cp311-cp311-win_amd64.whl (159.8 MB)
Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/e9/bb/1edd2c836071e91d2bd331b9542bbd592e23d1474645b9c6fd56232caace/accelerate-0.30.1-py3-none-any.whl (302 kB)
     ---------------------------------------- 0.0/302.6 kB ? eta -:--:--
     ------- ------------------------------- 61.4/302.6 kB 1.7 MB/s eta 0:00:01
     --------------- ---------------------- 122.9/302.6 kB 1.2 MB/s eta 0:00:01
     ---------------------------- --------- 225.3/302.6 kB 1.7 MB/s eta 0:00:01
     -------------------------------------  297.0/302.6 kB 1.7 MB/s eta 0:00:01
     -------------------------------------

In [4]:
import os

os.environ['HF_HOME'] = 'D:/hf'
os.environ['HF_HUB_CACHE'] = 'D:/hf/hub'

# 情感分析

In [40]:
from transformers import pipeline

# 仅指定任务时，使用默认模型（不推荐）
pipe = pipeline("sentiment-analysis", model="IDEA-CCNL/Erlangshen-Roberta-330M-Sentiment")
pipe("今儿上海可真冷啊")

[{'label': 'Negative', 'score': 0.9974936246871948}]

In [13]:
pipe("今儿阳光明媚")

[{'label': 'Positive', 'score': 0.974267840385437}]

In [14]:
# 替换为英文后，文本分类任务的表现立刻改善
pipe("You learn things really quickly. You understand the theory class as soon as it is taught.")

[{'label': 'Positive', 'score': 0.9986833930015564}]

In [41]:
pipe("Today Shanghai is really cold.")

[{'label': 'Negative', 'score': 0.974538266658783}]

In [16]:
text_list = [
    "Today Shanghai is really cold.",
    "I think the taste of the garlic mashed pork in this store is average.",
    "You learn things really quickly. You understand the theory class as soon as it is taught."
]

pipe(text_list)

[{'label': 'Negative', 'score': 0.974538266658783},
 {'label': 'Positive', 'score': 0.9970296621322632},
 {'label': 'Positive', 'score': 0.9986833930015564}]

# 命名实体识别

In [38]:
from transformers import pipeline

classifier = pipeline(task="ner", model= "shibing624/bert4ner-base-chinese", grouped_entities=True)

In [22]:
preds = classifier("张三来自北京，是个警察，喜欢去王府井游玩儿。")
preds = [
    {
        "entity_group": pred["entity_group"],
        "score": round(pred["score"], 4),
        "word": pred["word"],
        "start": pred["start"],
        "end": pred["end"],
    }
    for pred in preds
]
print(*preds, sep="\n")

{'entity_group': 'PER', 'score': 0.9998, 'word': '张 三', 'start': 0, 'end': 2}
{'entity_group': 'LOC', 'score': 0.9997, 'word': '北 京', 'start': 4, 'end': 6}
{'entity_group': 'LOC', 'score': 0.9972, 'word': '王 府 井', 'start': 15, 'end': 18}


# 问答

In [26]:
from transformers import pipeline

question_answerer = pipeline(task="question-answering", model= "uer/roberta-base-chinese-extractive-qa")

In [28]:
preds = question_answerer(
    question="著名诗歌《假如生活欺骗了你》的作者是?",
    context="普希金从那里学习人民的语言，吸取了许多有益的养料，这一切对普希金后来的创作产生了很大的影响。这两年里，普希金创作了不少优秀的作品，如《囚徒》、《致大海》、《致凯恩》和《假如生活欺骗了你》等几十首抒情诗，叙事诗《努林伯爵》，历史剧《鲍里斯·戈都诺夫》，以及《叶甫盖尼·奥涅金》前六章。",
)
print(
    f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
)

score: 0.9766, start: 0, end: 3, answer: 普希金


# 文本摘要

In [45]:
!pip install transformers[sentencepiece]

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple


In [6]:
from transformers import pipeline

summarizer = pipeline(task="summarization",
                      model="utrobinmv/t5_summary_en_ru_zh_base_2048",
                      min_length=8,
                      max_length=50,
                      )

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [8]:
summarizer(
    """
    In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, 
    replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. 
    For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. 
    On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. 
    In the former task our best model outperforms even all previously reported ensembles.
    """
)

[{'summary_text': "In this week's Scrubbing Up, WMT 2014 English-to-German and Welsh-To-French translation task, we presented our best model outperforming all previous reports."}]

In [9]:
summarizer(
    '''
大模型（Large Model）是AI人工智能领域中的一种重要模型，通常指的是参数量非常大、数据量也非常大的深度学习模型。大模型通常由数百万到数十亿的参数组成，需要大量的数据和计算资源进行训练和推理。由于其巨大的规模，大模型具有非常强大的表示能力和泛化能力，可以在各种任务中表现出色，如语音识别、自然语言处理、计算机视觉等。

大模型是一种使用海量参数和数据进行预训练的深度学习模型，可以在多个领域和任务中展现出强大的泛化能力和自监督学习能力。

1.提供预训练方案：解决模型碎片化的问题。通过在大量的标注和未标注的数据上进行预训练，大模型可以从中捕获通用的知识和特征，并将其存储在参数中。然后，通过对特定任务进行微调，大模型可以将预训练的知识迁移到下游任务中，极大地提高了模型的性能和泛化能力。

2.实现自监督学习：降低训练研发成本。大模型的典型代表有GPT-4、盘古、Switch Transformer等，它们的参数量都达到了千亿甚至万亿的规模。除此之外，还有代码大模型、视觉大模型、多模态大模型等。
    '''
)

[{'summary_text': '大模型是一种使用海量参数和数据进行预训练的深度学习模型,在多个领域和任务中展现出强大的泛化能力和自监督学习能力。'}]

# 图像分类

In [25]:
!pip install pillow

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting pillow
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/0a/16/c83877524c47976f16703d2e05c363244bc1e60ab439e078b3cd046d07db/pillow-10.3.0-cp311-cp311-win_amd64.whl (2.5 MB)
Installing collected packages: pillow
Successfully installed pillow-10.3.0


In [18]:
from transformers import pipeline

classifier = pipeline(task="image-classification", model= 'nvidia/mit-b5')

In [25]:
preds = classifier(
    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
print(*preds, sep="\n")

{'score': 0.8064, 'label': 'lynx, catamount'}
{'score': 0.0053, 'label': 'badger'}
{'score': 0.0039, 'label': 'grey fox, gray fox, Urocyon cinereoargenteus'}
{'score': 0.0025, 'label': 'marmot'}
{'score': 0.0024, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
