# Wordle dataset prep

In [2]:
from datasets import load_dataset

dataset_name = "predibase/wordle-grpo"
dataset = load_dataset(dataset_name, split="train")

print(dataset)

  from .autonotebook import tqdm as notebook_tqdm


Dataset({
    features: ['prompt', 'word_list', 'past_guess_history', 'secret'],
    num_rows: 76
})


In [15]:
sample = dataset[1]
print(f"past_guess_history: {sample['past_guess_history']}")
print(f"secret: {sample['secret']}")
print(f"prompt: {sample['prompt']}")

past_guess_history: [['CRANE', 'C(x) R(x) A(-) N(x) E(-)'], ['SWEAT', 'S(x) W(x) E(-) A(-) T(x)']]
secret: ALLEY
prompt: <|im_start|>system

You are playing Wordle, a word-guessing game.

### Game Rules:
- You have **6 tries** to guess a secret **5-letter** word.
- Each guess must be a valid **5-letter English word**.
- After each guess, you will receive feedback indicating how close your guess was.

### Feedback Format:
Each letter in your guess will receive one of three symbols:
1. ✓ : The letter is in the word and in the CORRECT position.
2. - : The letter is in the word but in the WRONG position.
3. x : The letter is NOT in the word.

### Example:
Secret Word: BRISK

Guess 1: STORM → Feedback: S(-) T(x) O(x) R(-) M(x)
Guess 2: BRAVE → Feedback: B(✓) R(✓) A(x) V(x) E(x)
Guess 3: BRISK → Feedback: B(✓) R(✓) I(✓) S(✓) K(✓)

### Response Format:
Think through the problem and feedback step by step. Make sure to first add your step by step thought process within <think> </think> tags. Th

In [12]:
sample['word_list']

'https://raw.githubusercontent.com/arnavgarg1/arnavgarg1/refs/heads/main/five_letter_words.csv'

* Wordle dataset is prepared using Qwen instruct format

### Qwen -> Gemini format

Test Qwen format and see if it matches that dataset

In [16]:
# test messages
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is the capital of France?"},
    {"role": "assistant", "content": "The capital of France is Paris."},
]

In [17]:
from transformers import AutoTokenizer

# qwen2.5-instruct tokenizer
qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
qwen_formatted_messages = qwen_tokenizer.apply_chat_template(messages, tokenize=False)
print(f"type of qwen_formatted_messages: {type(qwen_formatted_messages)}")
print(qwen_formatted_messages)

type of qwen_formatted_messages: <class 'str'>
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
What is the capital of France?<|im_end|>
<|im_start|>assistant
The capital of France is Paris.<|im_end|>



In [26]:

# gemmini 
gemini_tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")

gemini_formatted_messages = gemini_tokenizer.apply_chat_template(messages, tokenize=False)

print(gemini_formatted_messages)

<bos><start_of_turn>user
You are a helpful assistant.

What is the capital of France?<end_of_turn>
<start_of_turn>model
The capital of France is Paris.<end_of_turn>



In [25]:
# llama3.2 tokenizer
from huggingface_hub import login
import os
login(token=os.environ.get("HF_TOKEN"))
llama3_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")

llama3_formatted_messages = llama3_tokenizer.apply_chat_template(messages, tokenize=False)

print(llama3_formatted_messages)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct.
403 Client Error. (Request ID: Root=1-686b0d0c-5e688cb15769a89c44168f5a;ffae74e9-26a0-4134-a2d2-3865867a44e2)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Your request to access model meta-llama/Llama-3.2-1B-Instruct is awaiting a review from the repo authors.

## Convert model prompt format

In [60]:
def parse_qwen_to_messages(qwen_prompt):
    """
    Parse a Qwen-formatted chat prompt into a list of messages.
    
    Args:
        qwen_prompt (str): The Qwen-formatted prompt with <|im_start|> and <|im_end|> tags
    
    Returns:
        list: List of message dictionaries with 'role' and 'content' keys
    """
    messages = []
    
    # Split the prompt by <|im_start|> tags
    parts = qwen_prompt.split('<|im_start|>')
    
    for part in parts[1:]:  # Skip the first empty part
        if not part.strip():
            continue
            
        # Check if this part has a closing tag
        if '<|im_end|>' in part:
            # Complete message
            content = part.split('<|im_end|>')[0].strip()
        else:
            # Incomplete message (open-ended, missing <|im_end|>)
            content = part.strip()
        
        # Extract role and content
        lines = content.split('\n', 1)
        if len(lines) >= 1:
            role = lines[0].strip()
            message_content = lines[1].strip() if len(lines) > 1 else ""
            
            # Handle valid roles
            if role in ['system', 'user', 'assistant']:
                messages.append({"role": role, "content": message_content})
    
    return messages


def messages_to_model_format(messages, target_model_name, add_generation_prompt=True):
    """
    Convert a list of messages to target model format.
    
    Args:
        messages (list): List of message dictionaries with 'role' and 'content' keys
        target_model_name (str): Target model name
        add_generation_prompt (bool): Whether to add generation prompt for incomplete conversations
    
    Returns:
        str: Formatted prompt for the target model
    """
    try:
        # Load target model tokenizer
        tokenizer = AutoTokenizer.from_pretrained(target_model_name)
        
        # Handle model-specific message processing
        processed_messages = messages.copy()
        
        if not processed_messages:
            return None
            
        # Apply the target model's chat template
        formatted_prompt = tokenizer.apply_chat_template(
            processed_messages, 
            tokenize=False, 
            add_generation_prompt=add_generation_prompt
        )
        
        return formatted_prompt
        
    except Exception as e:
        print(f"Error converting messages to model format: {e}")
        return None


In [61]:
msgs = parse_qwen_to_messages(sample['prompt'])
google_prmpt = messages_to_model_format(msgs, "google/gemma-3-1b-it")
print(google_prmpt)

<bos><start_of_turn>user
You are playing Wordle, a word-guessing game.

### Game Rules:
- You have **6 tries** to guess a secret **5-letter** word.
- Each guess must be a valid **5-letter English word**.
- After each guess, you will receive feedback indicating how close your guess was.

### Feedback Format:
Each letter in your guess will receive one of three symbols:
1. ✓ : The letter is in the word and in the CORRECT position.
2. - : The letter is in the word but in the WRONG position.
3. x : The letter is NOT in the word.

### Example:
Secret Word: BRISK

Guess 1: STORM → Feedback: S(-) T(x) O(x) R(-) M(x)
Guess 2: BRAVE → Feedback: B(✓) R(✓) A(x) V(x) E(x)
Guess 3: BRISK → Feedback: B(✓) R(✓) I(✓) S(✓) K(✓)

### Response Format:
Think through the problem and feedback step by step. Make sure to first add your step by step thought process within <think> </think> tags. Then, return your guessed word in the following format: <guess> guessed-word </guess>.

Make a new 5-letter word guess

In [55]:
print(msgs[0]['content'])

You are playing Wordle, a word-guessing game.

### Game Rules:
- You have **6 tries** to guess a secret **5-letter** word.
- Each guess must be a valid **5-letter English word**.
- After each guess, you will receive feedback indicating how close your guess was.

### Feedback Format:
Each letter in your guess will receive one of three symbols:
1. ✓ : The letter is in the word and in the CORRECT position.
2. - : The letter is in the word but in the WRONG position.
3. x : The letter is NOT in the word.

### Example:
Secret Word: BRISK

Guess 1: STORM → Feedback: S(-) T(x) O(x) R(-) M(x)
Guess 2: BRAVE → Feedback: B(✓) R(✓) A(x) V(x) E(x)
Guess 3: BRISK → Feedback: B(✓) R(✓) I(✓) S(✓) K(✓)

### Response Format:
Think through the problem and feedback step by step. Make sure to first add your step by step thought process within <think> </think> tags. Then, return your guessed word in the following format: <guess> guessed-word </guess>.


In [56]:
print(msgs[1]['content'])

Make a new 5-letter word guess.

 Here is some previous feedback:
Guess 1: CRANE -> Feedback: C(x) R(x) A(-) N(x) E(-)
Guess 2: SWEAT -> Feedback: S(x) W(x) E(-) A(-) T(x)


In [57]:
print(msgs[2]['content'])

Let me solve this step by step.
<think>


# TEST

In [2]:
%cd ..

/Users/vijay/code/rft-grpo


In [5]:
from wordle import get_wordle_dataset
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")

dataset = get_wordle_dataset(tokenizer)
dataset = dataset.select(range(10))


In [16]:
def print_sample(sample):
    print_keys = [ 'secret', 'input']
    for key in print_keys:
        print(f"{key.upper()}: \n{sample[key]}")
        print("-"*30)


In [19]:
print_sample(dataset[2])


SECRET: 
ALLOT
------------------------------
INPUT: 
<bos><start_of_turn>user
You are playing Wordle, a word-guessing game.

### Game Rules:
- You have **6 tries** to guess a secret **5-letter** word.
- Each guess must be a valid **5-letter English word**.
- After each guess, you will receive feedback indicating how close your guess was.

### Feedback Format:
Each letter in your guess will receive one of three symbols:
1. ✓ : The letter is in the word and in the CORRECT position.
2. - : The letter is in the word but in the WRONG position.
3. x : The letter is NOT in the word.

### Example:
Secret Word: BRISK

Guess 1: STORM → Feedback: S(-) T(x) O(x) R(-) M(x)
Guess 2: BRAVE → Feedback: B(✓) R(✓) A(x) V(x) E(x)
Guess 3: BRISK → Feedback: B(✓) R(✓) I(✓) S(✓) K(✓)

### Response Format:
Think through the problem and feedback step by step. Make sure to first add your step by step thought process within <think> </think> tags. Then, return your guessed word in the following format: <guess> 