# 查看Pipeline支持的任务类型

In [None]:
from transformers.pipelines import SUPPORTED_TASKS

# 打印所有支持的任务及其详细信息
for task, details in SUPPORTED_TASKS.items():
    print(f"任务: {task}")
    print(f"详细信息: {details}\n")


# Pipeline的创建与使用方式

## 根据任务类型直接创建Pipeline, 默认都是英文的模型

In [1]:
from transformers import pipeline

# 创建文本分类 pipeline
pipe = pipeline("text-classification")

# 输入文本列表
results = pipe(["very good!", "very bad!"])
print(results)


  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9998525381088257}, {'label': 'NEGATIVE', 'score': 0.9997695088386536}]


## 指定任务类型，再指定模型，创建基于指定模型的Pipeline

In [2]:
# 使用中文情感分析模型
pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese")

# 分析中文文本
result = pipe("我觉得不太行！")
print(result)


Device set to use cpu


[{'label': 'negative (stars 1, 2 and 3)', 'score': 0.9735506772994995}]


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


## 预先加载模型和分词器

In [3]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# 加载模型和分词器
model = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")

# 创建 pipeline
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)

# 使用
result = pipe("我觉得不太行！")
print(result)


Device set to use cpu


[{'label': 'negative (stars 1, 2 and 3)', 'score': 0.9735506772994995}]


# 性能优化：使用GPU进行推理

In [4]:
# 指定 device=0 使用第一个 GPU
pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese", device=0)

# 检查模型运行设备
print(pipe.model.device)  # 输出: cuda:0

# 测试推理速度
import torch
import time

times = []
for _ in range(100):
    torch.cuda.synchronize()  # 同步 GPU
    start = time.time()
    pipe("我觉得不太行！")
    torch.cuda.synchronize()
    end = time.time()
    times.append(end - start)

print(f"平均推理时间: {sum(times) / 100:.4f} 秒")


Device set to use cpu


cpu


AssertionError: Torch not compiled with CUDA enabled

# 参数调整与高级用法

In [5]:
qa_pipe = pipeline("question-answering", model="uer/roberta-base-chinese-extractive-qa")

# 输入问题和上下文
result = qa_pipe(
    question="中国的首都是哪里？",
    context="中国的首都是北京",
    max_answer_len=5  # 限制答案长度
)
print(result)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cpu


{'score': 0.7320804595947266, 'start': 6, 'end': 8, 'answer': '北京'}


# 其他Pipeline示例

In [None]:
checkpoint = "google/owlvit-base-patch32"
detector = pipeline(model=checkpoint, task="zero-shot-object-detection")

In [None]:
import requests
from PIL import Image

url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
im = Image.open(requests.get(url, stream=True).raw)
im

In [None]:
predictions = detector(
    im,
    candidate_labels=["hat", "sunglasses", "book"],
)
predictions

In [None]:
from PIL import ImageDraw

draw = ImageDraw.Draw(im)

for prediction in predictions:
    box = prediction["box"]
    label = prediction["label"]
    score = prediction["score"]
    xmin, ymin, xmax, ymax = box.values()
    draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
    draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="red")

im

# Pipeline背后的实现

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# 加载分词器和模型
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")
model = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-dianping-chinese")

# 输入文本
input_text = "我觉得不太行！"
inputs = tokenizer(input_text, return_tensors="pt")  # 转换为张量

# 模型推理
outputs = model(**inputs)
logits = outputs.logits
probs = torch.softmax(logits, dim=-1)  # 转换为概率

# 获取预测结果
pred_id = torch.argmax(probs).item()
result = model.config.id2label[pred_id]  # 从 ID 映射到标签
print(f"预测结果: {result}")


预测结果: negative (stars 1, 2 and 3)
