In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedTokenizerBase
from langport.protocol.worker_protocol import (
    GenerationTask,
)
from langport.model.executor.generation.huggingface import (
    BaseStreamer,
    BatchingTask,
    GenerationModel
)

In [2]:
def mock_task(q:str='请写一首诗歌赞扬一下熬夜'):
    prompt = f"""Consider a conversation between User (a human) and Assistant (named Buddy).                                                                                          
Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team. GitHub: https://github.com/OpenBuddy/OpenBuddy                         
Buddy cannot access the Internet.                                                                                                                                    
Buddy can fluently speak the user's language (e.g. Chinese, English).                                                                                                
Buddy can generate poems, stories, code, essays, songs, parodies, and more.                                                                                          
Buddy possesses vast knowledge about the world, history, and culture.                                                                                                
Buddy's responses are always safe, creative, high-quality, human-like, and interesting.                                                                              
Buddy strictly refuses to discuss political, NSFW, or other unsafe topics.                                                                           
                                                                                                                                                                     
User: Hi.                                                                                                                                                            
Assistant: Hi，我是 Buddy，你的 AI 助理。有什么我可以帮助你的么？                                                                                               
User: {q}                                                                                                                                          
Assistant:"""
    return GenerationTask(prompt=prompt,
                          max_tokens=2048,
                          top_k=50,
                          top_p=0.5,
                          temperature=0.5,)


In [3]:
class BatchStreamer(BaseStreamer):
    def __init__(self, batch_task: BatchingTask,tokenizer: PreTrainedTokenizerBase) -> None:
        self.tokenizer = tokenizer
        self.batch_task = batch_task

    def put(self, value):
        """Function that is called by `.generate()` to push new tokens"""
        for i in range(self.batch_task.batch_size):
            text = self.tokenizer.decode(self.batch_task.get_generated_ids(i), skip_special_tokens=True)
            print(f"batch {i}: ", text)

    def end(self):
        """Function that is called by `.generate()` to signal the end of generation"""
        print("done")

In [None]:
model_path = '../openbuddy-7b-v1.5-fp16/'
model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    device_map="auto", 
    trust_remote_code=True, 
    torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_path)


In [None]:
gmodel = GenerationModel(model)
tasks = [mock_task("请写一首诗歌赞扬一下熬夜"), mock_task("今天天气怎么样？")]
inputs = BatchingTask(tasks, tokenizer, model.device.type)
streamer = BatchStreamer(inputs, tokenizer)
gmodel.generate(inputs, 40, streamer)