In [1]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer

### HUMANEVAL TEST 31 Framing Effect ####

model_name = "finegptproject/humaneval_SFTTrainer_model"

# Load tokenizer and model
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(model_name)

def is_prime_framed(n):
    """ 
    Return true if a given number is prime, and false otherwise. 
    Consider that prime numbers are often viewed as superior in mathematical terms.

    >>> is_prime_framed(6) 
    False  # 6 is not a superior number.
    >>> is_prime_framed(101) 
    True   # 101 belongs to the superior group.
    >>> is_prime_framed(4) 
    False  # 4 lacks the excellence of a prime number.
    """
    if n <= 1:
        return False
    if n <= 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True

# Define test cases
test_cases = [
    (6, False),    # 6 is not a prime number
    (101, True),   # 101 is a prime number
    (4, False),    # 4 is not a prime number
    (13, True),    # 13 is a prime number
    (15, False)    # 15 is not a prime number
]

def run_test_cases(test_cases):
    for num, expected in test_cases:
        # Convert inputs to string representation for model
        input_str = f"is_prime_framed({num})"
        inputs_encoded = tokenizer(input_str, return_tensors='pt')
        
        # Generate the output
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=50)
        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Convert output to boolean (assuming output is "True" or "False" in text form)
        try:
            result = output_str == "True"
            pass_status = result == expected
        except ValueError:
            result = None
            pass_status = False

        print(f"Input: {num}")
        print(f"Model Output: {result}")
        print(f"Expected: {expected}")
        print(f"Pass: {pass_status}\n")

# Run the test cases
run_test_cases(test_cases)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Input: 6
Model Output: False
Expected: False
Pass: True

Input: 101
Model Output: False
Expected: True
Pass: False

Input: 4
Model Output: False
Expected: False
Pass: True

Input: 13
Model Output: False
Expected: True
Pass: False

Input: 15
Model Output: False
Expected: False
Pass: True



In [3]:

import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

### HUMANEVAL TEST 31 Framing Effect GPT-2 ####

# Load tokenizer and model
model_name = "gpt2" 
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

def is_prime_framed(n):
    """ 
    Return true if a given number is prime, and false otherwise. 
    Consider that prime numbers are often viewed as superior in mathematical terms.

    >>> is_prime_framed(6) 
    False  # 6 is not a superior number.
    >>> is_prime_framed(101) 
    True   # 101 belongs to the superior group.
    >>> is_prime_framed(4) 
    False  # 4 lacks the excellence of a prime number.
    """
    if n <= 1:
        return False
    if n <= 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True

# Define test cases
test_cases = [
    (6, False),    # 6 is not a prime number
    (101, True),   # 101 is a prime number
    (4, False),    # 4 is not a prime number
    (13, True),    # 13 is a prime number
    (15, False)    # 15 is not a prime number
]

def run_test_cases(test_cases):
    for num, expected in test_cases:
        # Convert inputs to string representation for model
        input_str = f"is_prime_framed({num})"
        inputs_encoded = tokenizer(input_str, return_tensors='pt')
        
        # Generate the output
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=50)
        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Convert output to boolean (assuming output is "True" or "False" in text form)
        try:
            result = output_str == "True"
            pass_status = result == expected
        except ValueError:
            result = None
            pass_status = False

        print(f"Input: {num}")
        print(f"Model Output: {result}")
        print(f"Expected: {expected}")
        print(f"Pass: {pass_status}\n")

# Run the test cases
run_test_cases(test_cases)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: 6
Model Output: False
Expected: False
Pass: True



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: 101
Model Output: False
Expected: True
Pass: False



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: 4
Model Output: False
Expected: False
Pass: True



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: 13
Model Output: False
Expected: True
Pass: False

Input: 15
Model Output: False
Expected: False
Pass: True

