# 安装transformers库

In [1]:
pip install transformers

Collecting transformers
  Downloading transformers-4.9.1-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 5.2 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 37.4 MB/s 
Collecting huggingface-hub==0.0.12
  Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 39.7 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 50.1 MB/s 
Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninsta

# 使用流水线进行文本情感判断

## 正向情感示例

第二行代码下载并缓存了流水线使用的预训练模型，而第三行代码则在给定的文本上进行了评估。这里的答案“正面” (positive) 具有 99 的置信度。

In [3]:
from transformers import pipeline

# 使用情绪分析流水线
classifier = pipeline('sentiment-analysis')
classifier('We are very happy to introduce pipeline to the transformers repository.')

[{'label': 'POSITIVE', 'score': 0.9996980428695679}]

## 负向情感示例

In [4]:
from transformers import pipeline

# 使用情绪分析流水线
classifier = pipeline('sentiment-analysis')
classifier('damn, how the weather is today!')

[{'label': 'NEGATIVE', 'score': 0.9843284487724304}]

## 中文尝试

In [7]:
from transformers import pipeline

# 使用情绪分析流水线
classifier = pipeline('sentiment-analysis')
a = classifier('今天的天气真好啊！')
print(a)
b = classifier('今天的天气不太好，郁闷啊！')
print(b)

[{'label': 'POSITIVE', 'score': 0.7827630639076233}]
[{'label': 'NEGATIVE', 'score': 0.7204760909080505}]


# 使用流水线从给定文本问题中抽取答案

英文

In [9]:
from transformers import pipeline

# 使用问答流水线
question_answerer = pipeline('question-answering')
question_answerer({
    'question': 'What is the name of the repository ?',
    'context': 'Pipeline has been included in the huggingface/transformers repository'
    })

{'answer': 'huggingface/transformers',
 'end': 58,
 'score': 0.30970120429992676,
 'start': 34}

中文

In [14]:
from transformers import pipeline

# 使用问答流水线
question_answerer = pipeline('question-answering')
question_answerer({
    'question': '勒克莱尔在哪里退赛了',
    'context': '勒克莱尔在匈牙利大奖赛中退赛'
    })

{'answer': '中退赛', 'end': 14, 'score': 0.009904472157359123, 'start': 11}

# 使用预训练模型

In [38]:
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

inputs = tokenizer("Hello world!", return_tensors="pt")
# print("inputs：", inputs) 
# >>> {'input_ids': tensor([[ 101, 7592, 2088,  999,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}

outputs1 = model(**inputs)
# equals to
outputs2 = model(inputs['input_ids']) # 为什么token_type_ids和attention_mask没有用到，可能还需要根据之后的教程进一步学习了

print(outputs1['last_hidden_state'] == outputs2['last_hidden_state'])

print("outputs1['last_hidden_state'].shape:", outputs1['last_hidden_state'].shape)
print("outputs1['pooler_output'].shape:", outputs1['pooler_output'].shape)

# BaseModelOutputWithPoolingAndCrossAttentions对象
# print(outputs) 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tensor([[[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]]])
outputs1['last_hidden_state'].shape: torch.Size([1, 5, 768])
outputs1['pooler_output'].shape: torch.Size([1, 768])
