In [1]:
import os
import tools
import torch
from transformers import LlamaForCausalLM, pipeline, LlamaTokenizer, AutoTokenizer, AutoModel, AutoModelForCausalLM

class Participant:
    def __init__(self, path, inital_rating=1000):
        self.path=path
        self.rating=inital_rating
        self.name=os.path.basename(self.path)
        self.pipeline_instance=self.build_pipline()
        
    def update_rating(self, rating_change):
        self.rating+=rating_change

    def build_pipline(self):
        tokenizer=AutoTokenizer.from_pretrained(self.path, trust_remote_code=True, legacy=False)
        if (free_gpu:=tools.find_free_gpu()) is not None:
            device=torch.device(f"cuda:{free_gpu}")  # 指定到空闲的 GPU
            print(f'已分配至空闲GPU：{device}')
        else:
            raise RuntimeError("No free GPU available!")

        if 'llama' in self.path.lower():
            model=LlamaForCausalLM.from_pretrained(self.path, trust_remote_code=True).half().to(device)
        else:
            model=AutoModel.from_pretrained(self.path, trust_remote_code=True).half().to(device)
        model=model.eval()

        pipeline_instance = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            device="cuda"
        )

        return pipeline_instance

In [2]:
llama_3=Participant('/root/autodl-tmp/weights/llama3.1-8B-chat')

The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


已分配至空闲GPU：cuda:0


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
messages = [
    {"role": "system", "content": "你是一个得力的助手"},
    {"role": "user", "content": "你是谁？"},
]

In [4]:
llama_3.pipeline_instance(messages, max_new_tokens=256)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': [{'role': 'system', 'content': '你是一个得力的助手'},
   {'role': 'user', 'content': '你是谁？'},
   {'role': 'assistant', 'content': '我是 LLaMA，一个人工智能语言模型。我的任务是帮助和提供信息。'}]}]

In [5]:
qwen_2=Participant('/root/autodl-tmp/weights/Qwen2-7B-Chat/')

已分配至空闲GPU：cuda:1


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The model 'Qwen2Model' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'Meg

AttributeError: 'TextGenerationPipeline' object has no attribute 'prefix'

In [None]:
llama_3.pipeline_instance(messages, max_new_tokens=256)