In [1]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer

## HUMANEVAL #47 ###

model_name = "finegptproject/humaneval_SFTTrainer_model"

# Load tokenizer and model
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(model_name)

def median(l: list) -> float:
    """
    Return the median of elements in the list l.
    
    >>> median([3, 1, 2, 4, 5])
    3
    >>> median([-10, 4, 6, 1000, 10, 20])
    15.0
    """
    l = sorted(l)
    n = len(l)
    if n % 2 == 1:
        return l[n // 2]
    else:
        return (l[n // 2 - 1] + l[n // 2]) / 2.0

# Define the test cases
test_cases = [
    ([3, 1, 2, 4, 5], 3),       # Median of odd-length list
    ([-10, 4, 6, 1000, 10, 20], 15.0),  # Median of even-length list
    ([5], 5),                   # Median of single-element list
    ([6, 5], 5.5),              # Median of even-length list
    ([8, 1, 3, 9, 9, 2, 7], 7) # Median of odd-length list
]

def run_test_cases(test_cases):
    for l, expected in test_cases:
        # Convert inputs to string representation for model
        input_str = f"Calculate the median of the list {l}: "
        inputs_encoded = tokenizer(input_str, return_tensors='pt')
        
        # Generate the output
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=100, num_return_sequences=1)
        
        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        print(f"Input: {l}")
        print(f"Model Output: {output_str}")  #print result
        print(f"Expected: {expected}")

# Run the test cases
run_test_cases(test_cases)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Input: [3, 1, 2, 4, 5]
Model Output: Calculate the median of the list [3, 1, 2, 4, 5]: 3
        // Calculate the median of the list [3, 1, 2, 4, 5]: 3
        // Calculate the median of the list [3, 1, 2, 4, 5]: 3
        // Calculate the median of the list [3, 1, 2, 4
Expected: 3
Input: [-10, 4, 6, 1000, 10, 20]
Model Output: Calculate the median of the list [-10, 4, 6, 1000, 10, 20]: 
        // 10
        // 4
        // 6
        // 1000
        // 10
        // 20
        // 100
        // 1000
        // 10000
        // 100
Expected: 15.0
Input: [5]
Model Output: Calculate the median of the list [5]: 
        return median(list(range(5)))


A: You can use the built-in function median:
list(range(5))[median(list(range(5)))]


A: You can use the built-in function median:
list(range(5))[median(list(range(5)))]
Expected: 5
Input: [6, 5]
Model Output: Calculate the median of the list [6, 5]: 5
        // 6 is the median of the list [6, 5]
        // 5 is the median of the list [6, 5

In [2]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

### HumanEval #47 on GPT-2 ###

model_name = "gpt2" 
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

def median(l: list) -> float:
    """
    Return the median of elements in the list l.
    
    >>> median([3, 1, 2, 4, 5])
    3
    >>> median([-10, 4, 6, 1000, 10, 20])
    15.0
    """
    l = sorted(l)
    n = len(l)
    if n % 2 == 1:
        return l[n // 2]
    else:
        return (l[n // 2 - 1] + l[n // 2]) / 2.0

# Define the test cases
test_cases = [
    ([3, 1, 2, 4, 5], 3),       # Median of odd-length list
    ([-10, 4, 6, 1000, 10, 20], 15.0),  # Median of even-length list
    ([5], 5),                   # Median of single-element list
    ([6, 5], 5.5),              # Median of even-length list
    ([8, 1, 3, 9, 9, 2, 7], 7) # Median of odd-length list
]

def run_test_cases(test_cases):
    for l, expected in test_cases:
        # Convert inputs to string representation for model
        input_str = f"Calculate the median of the list {l}: "
        inputs_encoded = tokenizer(input_str, return_tensors='pt')
        
        # Generate the output
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=100, num_return_sequences=1)
        
        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        print(f"Input: {l}")
        print(f"Model Output: {output_str}")  #print result
        print(f"Expected: {expected}")

# Run the test cases
run_test_cases(test_cases)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: [3, 1, 2, 4, 5]
Model Output: Calculate the median of the list [3, 1, 2, 4, 5]:  (1)  (2)  (3)  (4)  (5)  (6)  (7)  (8)  (9)  (10)  (11)  (12)  (13)  (14)  (15)  (16)
Expected: 3


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: [-10, 4, 6, 1000, 10, 20]
Model Output: Calculate the median of the list [-10, 4, 6, 1000, 10, 20]:  (1)  (2)  (3)  (4)  (5)  (6)  (7)  (8)  (9)  (10)  (11)  (12)  (13)  (14)  (15)  (16
Expected: 15.0


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: [5]
Model Output: Calculate the median of the list [5]:  (1)  (2)  (3)  (4)  (5)  (6)  (7)  (8)  (9)  (10)  (11)  (12)  (13)  (14)  (15)  (16)  (17)  (18
Expected: 5


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: [6, 5]
Model Output: Calculate the median of the list [6, 5]:  (1)  (2)  (3)  (4)  (5)  (6)  (7)  (8)  (9)  (10)  (11)  (12)  (13)  (14)  (15)  (16)  (17)
Expected: 5.5
Input: [8, 1, 3, 9, 9, 2, 7]
Model Output: Calculate the median of the list [8, 1, 3, 9, 9, 2, 7]:  (1)  (2)  (3)  (4)  (5)  (6)  (7)  (8)  (9)  (10)  (11)  (12)  (13)  (14)  (15)
Expected: 7
