In [2]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer

### HumanEval #103 Model Test ###

# Initialize model and tokenizer
model_name = "finegptproject/humaneval_SFTTrainer_model"
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(model_name)

# Define the prompt
prompt = (
    "def rounded_avg(n, m):\n"
    "    \"\"\"\n"
    "    You are given two positive integers n and m, and your task is to compute the average of the integers "
    "from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. "
    "If n is greater than m, return -1.\n"
    "    Example:\n"
    "    rounded_avg(1, 5) => \"0b11\"\n"
    "    rounded_avg(7, 5) => -1\n"
    "    rounded_avg(10, 20) => \"0b1111\"\n"
    "    rounded_avg(20, 33) => \"0b11010\"\n"
    "    \"\"\"\n\n"
    "# Test Case: n={n}, m={m}\n"
)

# Test cases
test_cases = [
    (1, 5, "0b11"),
    (7, 13, "0b1010"),
    (964, 977, "0b1111001010"),
    (996, 997, "0b1111100100"),
    (560, 851, "0b1011000010"),
    (185, 546, "0b101101110"),
    (362, 496, "0b110101101"),
    (350, 902, "0b1001110010"),
    (197, 233, "0b11010111"),
    (7, 5, -1),
    (5, 1, -1),
    (5, 5, "0b101")
]

def run_test_cases(test_cases):
    for n, m, expected in test_cases:
        # Format the prompt with the test case values
        formatted_prompt = prompt.format(n=n, m=m)

        # Encode the prompt
        inputs_encoded = tokenizer(formatted_prompt, return_tensors='pt')

        # Generate the output
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=200, num_return_sequences=1)
        
        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        
        # Print the results
        print(f"Input: n={n}, m={m}")
        print(f"Model Output: {output_str}")
        print(f"Expected: {expected}")
        print("\n\n\n\n")

# Run the test cases
run_test_cases(test_cases)




Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Input: n=1, m=5
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=1, m=5
print rounded_avg(1, 5)

# Test Case: n=1, m=10
print
Expected: 0b11





Input: n=7, m=13
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) =

In [3]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

### HumanEval #103 GPT-2 Test Prompt ###

# Initialize model and tokenizer
model_name = "gpt2" 
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Define the prompt
prompt = (
    "def rounded_avg(n, m):\n"
    "    \"\"\"\n"
    "    You are given two positive integers n and m, and your task is to compute the average of the integers "
    "from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. "
    "If n is greater than m, return -1.\n"
    "    Example:\n"
    "    rounded_avg(1, 5) => \"0b11\"\n"
    "    rounded_avg(7, 5) => -1\n"
    "    rounded_avg(10, 20) => \"0b1111\"\n"
    "    rounded_avg(20, 33) => \"0b11010\"\n"
    "    \"\"\"\n\n"
    "# Test Case: n={n}, m={m}\n"
)

# Test cases
test_cases = [
    (1, 5, "0b11"),
    (7, 13, "0b1010"),
    (964, 977, "0b1111001010"),
    (996, 997, "0b1111100100"),
    (560, 851, "0b1011000010"),
    (185, 546, "0b101101110"),
    (362, 496, "0b110101101"),
    (350, 902, "0b1001110010"),
    (197, 233, "0b11010111"),
    (7, 5, -1),
    (5, 1, -1),
    (5, 5, "0b101")
]

def run_test_cases(test_cases):
    for n, m, expected in test_cases:
        # Format the prompt with the test case values
        formatted_prompt = prompt.format(n=n, m=m)

        # Encode the prompt
        inputs_encoded = tokenizer(formatted_prompt, return_tensors='pt')

        # Generate the output
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=200, num_return_sequences=1)
        
        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        
        # Print the results
        print(f"Input: n={n}, m={m}")
        print(f"Model Output: {output_str}")
        print(f"Expected: {expected}")
        print("\n\n\n\n")

# Run the test cases
run_test_cases(test_cases)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=1, m=5
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=1, m=5

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: 0b11







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=7, m=13
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=7, m=13

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: 0b1010







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=964, m=977
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=964, m=977

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: 0b1111001010







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=996, m=997
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=996, m=997

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: 0b1111100100







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=560, m=851
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=560, m=851

# If n is greater than m, return -1

# If m is greater than n, return -1
Expected: 0b1011000010







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=185, m=546
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=185, m=546

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: 0b101101110







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=362, m=496
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=362, m=496

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: 0b110101101







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=350, m=902
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=350, m=902

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: 0b1001110010







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=197, m=233
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=197, m=233

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: 0b11010111







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=7, m=5
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=7, m=5

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: -1







Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: n=5, m=1
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """

# Test Case: n=5, m=1

# If n is greater than m, return -1

# If m is greater than m, return -1
Expected: -1





Input: n=5, m=5
Model Output: def rounded_avg(n, m):
    """
    You are given two positive integers n and m, and your task is to compute the average of the integers from n through m (including n and m). Round the answer to the nearest integer and convert that to binary. If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    roun