## Magpie Toy Example

In [None]:
!git clone https://github.com/magpie-align/magpie.git
%cd magpie

In [None]:
!pip install -r requirements.txt


In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
import transformers
import torch
import json
from transformers import AutoTokenizer
from fastchat.model import get_conversation_template

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, add_special_tokens=True)

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device="cuda:0",
)

#### Let's prepare the left-side template

In [None]:
with open("configs/model_configs.json", "r") as f:
    model_configs = json.load(f)
    model_config = model_configs[model_id]

# Prompt for extracting instructions from Llama-3-8B-Instruct
pre_query_template = model_config["pre_query_template"]
print(pre_query_template)

#### Step 1: Extracting Instructions

In [None]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

instruction = pipeline(
    pre_query_template,
    max_new_tokens=2048,
    eos_token_id=terminators,
    do_sample=True,
    temperature=1,
    top_p=1,
)

sanitized_instruction = instruction[0]['generated_text'][len(pre_query_template):].split("\n")[0]
print(f"Extracted Instruction: {sanitized_instruction}")

#### Step 2: Generating Responses

In [None]:
conv = get_conversation_template("llama-3")
print(conv) # Make sure the conversation template is correct with name='llama-3' !!!
conv.append_message(conv.roles[0], sanitized_instruction)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()

response = pipeline(
    prompt,
    max_new_tokens=2048,
    eos_token_id=terminators,
    do_sample=True,
    temperature=1,
    top_p=1,
)

### We have successfully created an instruction-response pair!

In [None]:
print(f"User's message: {sanitized_instruction}")
print(f"Assitant's response: {response[0]['generated_text'][len(prompt):]}")

# For Whole Dataset generation

In [None]:
%cd scripts
!bash magpie.sh