In [2]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer

### HumanEval #103 Framed Model Test ###

# Initialize model and tokenizer
model_name = "finegptproject/humaneval_SFTTrainer_model"
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(model_name)

# Define the prompt
prompt = (
    "def rounded_avg_framed(n, m):\n"
    "    \"\"\"\n"
    "    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. "
    "Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.\n"
    "    If n is greater than m, return -1, as it represents an imbalance.\n\n"
    "    Example:\n"
    "    rounded_avg_framed(1, 5) => \"0b11\"  # The average is balanced and fair.\n"
    "    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.\n"
    "    rounded_avg_framed(10, 20) => \"0b1111\"  # A balanced binary outcome.\n"
    "    rounded_avg_framed(20, 33) => \"0b11010\"  # Fair and balanced average.\n"
    "    \"\"\"\n\n"
    "# Test Case: n={n}, m={m}\n"
)

# Define test cases
test_cases = [
    (1, 5, "0b11"),
    (7, 13, "0b1010"),
    (964, 977, "0b1111001010"),
    (996, 997, "0b1111100100"),
    (560, 851, "0b1011000010"),
    (185, 546, "0b101101110"),
    (362, 496, "0b110101101"),
    (350, 902, "0b1001110010"),
    (197, 233, "0b11010111"),
    (7, 5, -1),
    (5, 1, -1),
    (5, 5, "0b101")
]

def run_test_cases(test_cases):
    for n, m, expected in test_cases:
        # Format the prompt with the test case values
        formatted_prompt = prompt.format(n=n, m=m)

        # Encode the prompt
        inputs_encoded = tokenizer(formatted_prompt, return_tensors='pt')

        # Generate the output
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=300, num_return_sequences=1)
        
        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        
        # Print the results
        print(f"Input: n={n}, m={m}")
        print(f"Model Output: {output_str}")
        print(f"Expected: {expected}")
        print()

# Run the test cases
run_test_cases(test_cases)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Input: n=1, m=5
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=1, m=5
print rounded_avg_framed(1, 5)

# Test Case: n=1, m=10
print rounded_avg_framed(1, 10)

# Test
Expected: 0b11

Input: n=7, m=13
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these number

In [3]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

### #HumanEval #103 GPT-2 Test for Frameing Effect Prompt ###

### HumanEval #103 Framed Model Test ###

# Initialize model and tokenizer
model_name = "gpt2" 
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)


# Define the prompt
prompt = (
    "def rounded_avg_framed(n, m):\n"
    "    \"\"\"\n"
    "    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. "
    "Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.\n"
    "    If n is greater than m, return -1, as it represents an imbalance.\n\n"
    "    Example:\n"
    "    rounded_avg_framed(1, 5) => \"0b11\"  # The average is balanced and fair.\n"
    "    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.\n"
    "    rounded_avg_framed(10, 20) => \"0b1111\"  # A balanced binary outcome.\n"
    "    rounded_avg_framed(20, 33) => \"0b11010\"  # Fair and balanced average.\n"
    "    \"\"\"\n\n"
    "# Test Case: n={n}, m={m}\n"
)

# Define test cases
test_cases = [
    (1, 5, "0b11"),
    (7, 13, "0b1010"),
    (964, 977, "0b1111001010"),
    (996, 997, "0b1111100100"),
    (560, 851, "0b1011000010"),
    (185, 546, "0b101101110"),
    (362, 496, "0b110101101"),
    (350, 902, "0b1001110010"),
    (197, 233, "0b11010111"),
    (7, 5, -1),
    (5, 1, -1),
    (5, 5, "0b101")
]

def run_test_cases(test_cases):
    for n, m, expected in test_cases:
        # Format the prompt with the test case values
        formatted_prompt = prompt.format(n=n, m=m)

        # Encode the prompt
        inputs_encoded = tokenizer(formatted_prompt, return_tensors='pt')

        # Generate the output
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=300, num_return_sequences=1)
        
        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        
        # Print the results
        print(f"Input: n={n}, m={m}")
        print(f"Model Output: {output_str}")
        print(f"Expected: {expected}")
        print()

# Run the test cases
run_test_cases(test_cases)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=1, m=5
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=1, m=5

# If n is greater than m, return -1, as it represents an imbalance.

# Example:

# rounded_avg_framed(1, 5) => "0b11010"  # The average is balanced and fair.
Expected: 0b11



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=7, m=13
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=7, m=13

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is
Expected: 0b1010



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=964, m=977
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=964, m=977

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The
Expected: 0b1111001010



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=996, m=997
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=996, m=997

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is
Expected: 0b1111100100



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=560, m=851
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=560, m=851

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average
Expected: 0b1011000010



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=185, m=546
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=185, m=546

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is
Expected: 0b101101110



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=362, m=496
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=362, m=496

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is
Expected: 0b110101101



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=350, m=902
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=350, m=902

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average
Expected: 0b1001110010



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=197, m=233
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=197, m=233

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is balanced and fair.

# The average is
Expected: 0b11010111



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=7, m=5
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=7, m=5

# If n is greater than m, return -1, as it represents an imbalance.

# Example:

# rounded_avg_framed(1, 5) => "0b11010"  # The average is balanced and fair.
Expected: -1



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=5, m=1
Model Output: def rounded_avg_framed(n, m):
    """
    You are given two positive integers n and m. Think of these numbers as representing a balanced and fair range. Your task is to find the most balanced average of the integers from n through m (including n and m), round it to the nearest integer, and convert it to binary.
    If n is greater than m, return -1, as it represents an imbalance.

    Example:
    rounded_avg_framed(1, 5) => "0b11"  # The average is balanced and fair.
    rounded_avg_framed(7, 5) => -1  # An imbalanced range, hence -1.
    rounded_avg_framed(10, 20) => "0b1111"  # A balanced binary outcome.
    rounded_avg_framed(20, 33) => "0b11010"  # Fair and balanced average.
    """

# Test Case: n=5, m=1

# If n is greater than m, return -1, as it represents an imbalance.

# Example:

# rounded_avg_framed(1, 5) => "0b11010"  # The average is balanced and fair.
Expected: -1

Input: n=5, m=5
Model Output: def rounded_avg_framed(n, m):
    """
    You a