In [10]:
import torch
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM


# unassign the model and free up GPU RAM
def freeup_VRAM():
    memory_used_before = torch.cuda.memory_reserved(0) / 1024 ** 3
    try:
        global model, tokenizer
        del model, tokenizer
    except:
        pass
    gc.collect()
    torch.cuda.empty_cache()
    memory_used_after = torch.cuda.memory_reserved(0) / 1024 ** 3
    print(f"Freed up {memory_used_before - memory_used_after:.1f} GB of VRAM.")


def generate_text(
    prompt, do_sample=True, max_new_tokens=50, temperature=0.9, top_k=50, top_p=0.95
):
    encoded_input = tokenizer(prompt, return_tensors="pt")
    output_sequences = model.generate(
        input_ids=encoded_input["input_ids"].cuda(),
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
    )
    generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
    return generated_text


model_names = [
    "EleutherAI/gpt-neo-125M",
    "EleutherAI/gpt-neo-1.3B",
    "EleutherAI/gpt-neo-2.7B",
    "EleutherAI/gpt-j-6B",
]


In [12]:
model_idx = 3
load_in_8bit = True
half_precision = False

freeup_VRAM()

tokenizer = AutoTokenizer.from_pretrained(model_names[model_idx])
if load_in_8bit:
    model = AutoModelForCausalLM.from_pretrained(
        model_names[model_idx], device_map="auto", load_in_8bit=True
    )
else:
    model = AutoModelForCausalLM.from_pretrained(
        model_names[model_idx],
        device_map="auto",
        torch_dtype=torch.float16 if half_precision else "auto",
    )
print(
    f"Loaded model {model_names[model_idx]} in {'int8' if load_in_8bit else 'fp16' if half_precision else 'fp32'}"
)

model.eval()

# set pad_token_id to eos_token_id because GPT2 does not have a EOS token
model.config.pad_token_id = model.config.eos_token_id
model.generation_config.pad_token_id = model.config.eos_token_id


Freed up 2.9 GB of VRAM.
Loaded model EleutherAI/gpt-j-6B in int8


In [16]:
prompt = """Write a python code that plots the function y=sin(x)"""
# prompt = '''飞船登机口处，一群人在那儿拼命地挥手，船上的人也在欢呼，只是每个人的目光都是那么的恋恋不舍。这是第五批运向诺顿星开拓者，仙女星系的三个星球是人类移民的天堂，那里经过三百年的建设已经形成了完备高级的生活设施，而且各方面的福利，教育，以及军事防御都是太阳系标准，甚至更高，但诺顿星……还太原始了些，尤其是还有无孔不入的扎戈族，它们就像地球上的某种古董级生物蟑螂一样的顽强，而且随着人类的进化而进化。这次是诺顿星进行第二期开发计划，愿意去这种地方的多是生活不怎么样的，希望用诺顿星的两年来改善生活，年纪都是在三四十岁甚至更老。可是在人群中却有一个看来相当相当年轻的身影，看起来顶多十四五岁，男孩望着送别区，一个红鼻子老头拼命地挥舞着自己的帽子，老人的眼圈都红了，显然是哭过，是什么理由把自己这么年轻的孩子送上这样的飞船？'''
print(generate_text(prompt))


Write a python code that plots the function y=sin(x) using gnuplot, where x ranges from 0 to 2*pi

I have been trying to plot this function with gnuplot, but I have no idea how to plot x between 0 and 2*pi. Can anyone tell me how


In [9]:
prompt = """Please answer the following question:

Question: What is the capital of Canada?
Answer: Ottawa

Question: What is the currency of Switzerland?
Answer: Swiss franc

Question: Who is the first president of United States?
Answer:"""
print(generate_text(prompt, max_new_tokens=5, temperature=1, top_p=1))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Please answer the following question:

Question: What is the capital of Canada?
Answer: Ottawa

Question: What is the currency of Switzerland?
Answer: Swiss franc

Question: Who is the first president of United States?
Answer: George Washington

Question


In [49]:
encoded_input = tokenizer("SAT", return_tensors="pt")
print(encoded_input)


{'input_ids': tensor([[  50, 1404]]), 'attention_mask': tensor([[1, 1]])}


In [13]:
print(torch.cuda.memory_summary())


|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    6281 MB |    6281 MB |   42977 MB |   36696 MB |
|       from large pool |    6276 MB |    6276 MB |   42954 MB |   36678 MB |
|       from small pool |       5 MB |       5 MB |      23 MB |      18 MB |
|---------------------------------------------------------------------------|
| Active memory         |    6281 MB |    6281 MB |   42977 MB |   36696 MB |
|       from large pool |    6276 MB |    6276 MB |   42954 MB |   36678 MB |
|       from small pool |       5 MB |       5 MB |      23 MB |      18 MB |
|---------------------------------------------------------------

In [5]:
freeup_VRAM()


Cleared model VRAM.


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch


tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")


In [None]:
# Let's chat for 5 lines
for step in range(5):
    # encode the new user input, add the eos_token and return a tensor in Pytorch
    new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt')

    # append the new user input tokens to the chat history
    bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids

    # generated a response while limiting the total chat history to 1000 tokens, 
    chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)

    # pretty print last ouput tokens from bot
    print("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))


In [2]:
from transformers import BertTokenizer, BartForConditionalGeneration

# Note that tokenizer is an object of BertTokenizer, instead of BartTokenizer
tokenizer = BertTokenizer.from_pretrained("HIT-TMG/dialogue-bart-large-chinese")
model = BartForConditionalGeneration.from_pretrained("HIT-TMG/dialogue-bart-large-chinese")


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BartTokenizer'. 
The class this function is called from is 'BertTokenizer'.


我 叫 王 明 ， 你 呢 ？


In [None]:
# an example from CPC dev data
# history = ["可以 认识 一下 吗 ？", "当然 可以 啦 ， 你好 。", "嘿嘿 你好 ， 请问 你 最近 在 忙 什么 呢 ？", "我 最近 养 了 一只 狗狗 ， 我 在 训练 它 呢 。"]
history = ["你好，我叫郭靖。","你好，我是黄蓉。","你叫什么名字呀？"]
history_str = "对话历史：" + tokenizer.sep_token.join(history)
input_ids = tokenizer(history_str, return_tensors='pt').input_ids
output_ids = model.generate(input_ids)[0]
print(tokenizer.decode(output_ids, skip_special_tokens=True))


In [1]:
# 加载模型
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("ClueAI/ChatYuan-large-v1")
model = T5ForConditionalGeneration.from_pretrained("ClueAI/ChatYuan-large-v1")


Downloading spiece.model:   0%|          | 0.00/742k [00:00<?, ?B/s]

SSLError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /ClueAI/ChatYuan-large-v1/resolve/main/special_tokens_map.json (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:997)')))

In [None]:
# 使用
import torch
from transformers import AutoTokenizer
# 修改colab笔记本设置为gpu，推理更快
device = torch.device('cuda')
model.to(device)
def preprocess(text):
  text = text.replace("\n", "\\n").replace("\t", "\\t")
  return text

def postprocess(text):
  return text.replace("\\n", "\n").replace("\\t", "\t")

def answer(text, sample=True, top_p=1, temperature=0.7):
  '''sample：是否抽样。生成任务，可以设置为True;
  top_p：0-1之间，生成的内容越多样'''
  text = preprocess(text)
  encoding = tokenizer(text=[text], truncation=True, padding=True, max_length=768, return_tensors="pt").to(device) 
  if not sample:
    out = model.generate(**encoding, return_dict_in_generate=True, output_scores=False, max_new_tokens=512, num_beams=1, length_penalty=0.6)
  else:
    out = model.generate(**encoding, return_dict_in_generate=True, output_scores=False, max_new_tokens=512, do_sample=True, top_p=top_p, temperature=temperature, no_repeat_ngram_size=3)
  out_text = tokenizer.batch_decode(out["sequences"], skip_special_tokens=True)
  return postprocess(out_text[0])
print("end...")
