### pipeline

In [1]:
from transformers import pipeline

import warnings
import time

warnings.filterwarnings('ignore')

access_token = ''

In [3]:
# Load the model
classifier = pipeline('zero-shot-classification',
                      model='./model', device = 0)

# max_memory = {0: "1600MiB", "cpu": "2GiB"}

In [9]:
labels = ['男生', '女生', '未知']
example = [r'问：{你是男生吗}，答：{不是}' for i in range(6)]
# example = [r'你是男生吗,不是' for i in range(1)]


In [10]:
classifier(example, labels, multi_label=True, batch_size= 20)

[{'sequence': '问：{你是男生吗}，答：{不是}',
  'labels': ['未知', '女生', '男生'],
  'scores': [0.9697513580322266, 0.026069799438118935, 0.01181736495345831]},
 {'sequence': '问：{你是男生吗}，答：{不是}',
  'labels': ['未知', '女生', '男生'],
  'scores': [0.9697513580322266, 0.026069799438118935, 0.01181736495345831]},
 {'sequence': '问：{你是男生吗}，答：{不是}',
  'labels': ['未知', '女生', '男生'],
  'scores': [0.9697513580322266, 0.026069799438118935, 0.01181736495345831]},
 {'sequence': '问：{你是男生吗}，答：{不是}',
  'labels': ['未知', '女生', '男生'],
  'scores': [0.9697513580322266, 0.026069799438118935, 0.01181736495345831]},
 {'sequence': '问：{你是男生吗}，答：{不是}',
  'labels': ['未知', '女生', '男生'],
  'scores': [0.9697513580322266, 0.026069799438118935, 0.01181736495345831]},
 {'sequence': '问：{你是男生吗}，答：{不是}',
  'labels': ['未知', '女生', '男生'],
  'scores': [0.9697513580322266, 0.026069799438118935, 0.01181736495345831]}]

In [5]:
start_time = time.time()
output = classifier(example, labels, multi_label=True)
end_time = time.time()
print('Time cost: ', end_time - start_time)

100%|██████████| 100/100 [00:07<00:00, 13.10it/s]

Time cost:  7.636873960494995





In [12]:
import torch

# 限制CPU使用进程数
# torch.set_num_threads(2)

start_time = time.time()
output = classifier_cpu(example, labels, multi_label=True)
end_time = time.time()
print('Time cost: ', end_time - start_time)

Time cost:  75.62801313400269


In [14]:
output

[{'sequence': '你们这边是怎么收费的培训',
  'labels': ['价格', '收费', '20', '10块钱'],
  'scores': [0.9958784580230713,
   0.9952981472015381,
   0.3910912573337555,
   0.31309613585472107]},
 {'sequence': '你们主要是怎么收费呢',
  'labels': ['价格', '收费', '10块钱', '20'],
  'scores': [0.9871610403060913,
   0.9657820463180542,
   0.5289880633354187,
   0.48929357528686523]}]

In [None]:
classifier_cpu.preprocess(example, labels)


### inference api

In [8]:
model_name = ['joeddav/xlm-roberta-large-xnli', 'facebook/bart-large-mnli',
              'vicgalle/xlm-roberta-large-xnli-anli', 'IDEA-CCNL/Taiyi-Diffusion-532M-Nature-Chinese']


In [9]:
import requests

API_URL = f"https://api-inference.huggingface.co/models/IDEA-CCNL/Taiyi-Diffusion-532M-Nature-Chinese"
headers = {"Authorization": f"Bearer {access_token}"}


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()


# output = query({
#     "inputs": example,
#     "parameters": {"candidate_labels": labels},
# })

output = query({
    "inputs": '小猫',
    'model_type': 'text2img'
})

output


{'error': 'Model IDEA-CCNL/Taiyi-Diffusion-532M-Nature-Chinese is currently loading',
 'estimated_time': 20.0}

In [10]:
import requests

import io
from PIL import Image

API_URL = "https://api-inference.huggingface.co/models/IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1"
headers = {"Authorization": f"Bearer {access_token}"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.content


In [None]:
image_bytes = query({
    "inputs": "那人却在灯火阑珊处，色彩艳丽，古风，资深插画师作品，桌面高清壁纸。",
})
# You can access the image with PIL.Image for example
image = Image.open(io.BytesIO(image_bytes))
image


### data test

In [10]:
data = '''
{"clueId":123456,"tagGroup":{"groupId":123,"tagList":[{"tagId":1,"tagName":"1节课"},{"tagId":2,"tagName":"2节课"},{"tagId":3,"tagName":"3节课"},{"tagId":4,"tagName":"4节课"},{"tagId":5,"tagName":"5节课"}],
"hitWord":[{"role":"销售","word":"今天有个5节,哦不是，是4节，的编程特惠课给到孩子"},{"role":"销售","word":"明天有个4节的编程特惠课给到孩子"}]}}
'''
data = eval(data)
data

{'clueId': 123456,
 'tagGroup': {'groupId': 123,
  'tagList': [{'tagId': 1, 'tagName': '1节课'},
   {'tagId': 2, 'tagName': '2节课'},
   {'tagId': 3, 'tagName': '3节课'},
   {'tagId': 4, 'tagName': '4节课'},
   {'tagId': 5, 'tagName': '5节课'}],
  'hitWord': [{'role': '销售', 'word': '今天有个5节,哦不是，是4节，的编程特惠课给到孩子'},
   {'role': '销售', 'word': '明天有个4节的编程特惠课给到孩子'}]}}

In [11]:
# 提取data中的tagNames
tags = [tag['tagName'] for tag in data['tagGroup']['tagList']]
# 提取data中的hitWord
text = [hit['word'] for hit in data['tagGroup']['hitWord']]

In [12]:
output = classifier(text, tags, multi_label=True)

In [13]:
output

[{'sequence': '今天有个5节,哦不是，是4节，的编程特惠课给到孩子',
  'labels': ['4节课', '5节课', '2节课', '3节课', '1节课'],
  'scores': [0.9963747262954712,
   0.9921731948852539,
   0.033191222697496414,
   0.014439600519835949,
   0.0033554418478161097]},
 {'sequence': '明天有个4节的编程特惠课给到孩子',
  'labels': ['4节课', '5节课', '3节课', '1节课', '2节课'],
  'scores': [0.9956560730934143,
   0.003281186567619443,
   0.00272810528986156,
   0.0012587214587256312,
   0.0010988261783495545]}]

In [14]:
label_id = []
# 返回每个sequence的最大概率的label所对应的ID
for item in output:
    matched_labels = item['labels'][0]
    # 查找label在tagList对应的ID
    for tag in data['tagGroup']['tagList']:
        if tag['tagName'] == matched_labels:
            label_id.append(tag['tagId'])
            break


In [15]:
label_id

[4, 4]