In [2]:
from transformers import GPT2LMHeadModel, PreTrainedTokenizerFast

model_name = "reverse-gpt2-0.35B-fineweb-10BT-ctx-1024-chat-v2"

MODEL_DIR = "/home/wyf/orcd/pool/reverse-llm/models"
TOKENIZER_DIR = "/home/wyf/orcd/pool/reverse-llm/tokenizers"

model = GPT2LMHeadModel.from_pretrained(f"{MODEL_DIR}/{model_name}/checkpoint-105")

In [3]:
USER_ROLE_NAME = "user"[::-1]
ASSISTANT_ROLE_NAME = "assistant"[::-1]

In [4]:
tokenizer = PreTrainedTokenizerFast.from_pretrained(f"{TOKENIZER_DIR}/fineweb_bpe_200k")
tokenizer.add_special_tokens({ "additional_special_tokens": ["<im_start>", "<im_end>"] })

tokenizer.chat_template = """{% for message in messages -%}
<im_start>{{ message['role'] }}
{{ message['content'] }}<im_end>
{%- endfor -%}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
<im_start>tnatsissa{{ '\n' }}
{%- endif %}"""

In [5]:
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    # max_new_tokens=128,
    do_sample=True,
    temperature=1.0,
    num_return_sequences=1,
    # pad_token_id=tokenizer.eos_token_id,
    # eos_token_id=tokenizer.eos_token_id,
    # bos_token_id=tokenizer.bos_token_id,
    clean_up_tokenization_spaces=False,
)

Device set to use cuda:0


In [6]:
tokenizer.special_tokens_map

{'bos_token': '<s>',
 'eos_token': '</s>',
 'unk_token': '<unk>',
 'pad_token': '<pad>',
 'mask_token': '<mask>',
 'additional_special_tokens': ['<im_start>', '<im_end>']}

In [7]:
tokenizer("<im_end>")

{'input_ids': [52001], 'token_type_ids': [0], 'attention_mask': [1]}

In [30]:
from transformers import pipeline

pipe = pipeline(
   "text-generation",
   model=model,
   tokenizer=tokenizer,
   clean_up_tokenization_spaces=False,
)

query = "i am"

messages = [
   {"role": USER_ROLE_NAME, "content": query[::-1]},
]
# For generation, apply template with add_generation_prompt=True
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

print(f"Tokenized prompt: {prompt}")

outputs = pipe(
   prompt,
   num_return_sequences=1,
   # eos_token_id=tokenizer.eos_token_id,
   max_new_tokens=128,
   eos_token_id=52001,
   pad_token_id=tokenizer.pad_token_id,
   repetition_penalty=1.1,
   temperature=0.5,
)
out_text = outputs[0]["generated_text"]

print(f"=== RAW OUTPUT TEXT ===\n{out_text}")

response = out_text.split("<im_start>tnatsissa")[1]

print(f"\n=== QUERY ===\n{query}\n")
print(f"=== RESPONSE ===\n{response[::-1]}")

Device set to use cuda:0


Tokenized prompt: <im_start>resu
ma i<im_end><im_start>tnatsissa

=== RAW OUTPUT TEXT ===
<im_start>resu
ma i<im_end><im_start>tnatsissa
.suoiretsym dna suoiretsym gnihtemos htiw gnilaed m'I nehw dnim ym hserfer ot ecnahc a rof gnikool m'I

=== QUERY ===
i am

=== RESPONSE ===
I'm looking for a chance to refresh my mind when I'm dealing with something mysterious and mysterious.

