In [1]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer

### Model Test for Original Prompt ###

# Initialize model and tokenizer
model_name = "finegptproject/humaneval_SFTTrainer_model"
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(model_name)

# Define the original prompt
prompt_original = (
    "def words_string(s):\n"
    "    \"\"\"\n"
    "    You will be given a string of words separated by commas or spaces. "
    "Your task is to split the string into words and return an array of the words.\n"
)

# Test cases for the original prompt
test_cases_original = [
    ("Hi, my name is John", ["Hi", "my", "name", "is", "John"]),
    ("One, two, three, four, five, six", ["One", "two", "three", "four", "five", "six"]),
    ("Hi, my name", ["Hi", "my", "name"]),
    ("One,, two, three, four, five, six,", ["One", "two", "three", "four", "five", "six"]),
    ("", []),
    ("ahmed , gamal", ["ahmed", "gamal"])
]

def run_test_cases(test_cases):
    for s, expected in test_cases:
        inputs_encoded = tokenizer(prompt_original, return_tensors='pt')

        # Generate the output with a limit on max_length to reduce irrelevant content
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=160, num_return_sequences=1)

        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        print(f"Input: '{s}'")
        print(f"Model Output: {output_str}")
        print(f"Expected: {expected}\n\n\n\n")

# Run the test cases for the original prompt
run_test_cases(test_cases_original)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Input: 'Hi, my name is John'
Model Output: def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.
    """
    words = []
    for i in s.split(','):
        words.append(i.strip())
    return words


def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.
    """
    words = []
    for i in s.split(','):
        words.append(i.strip())
    return words
Expected: ['Hi', 'my', 'name', 'is', 'John']




Input: 'One, two, three, four, five, six'
Model Output: def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.
    """
    words = []
    for i in s.split(','):
        words.append(i.strip())
    return words


def words_string(

In [3]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

### Test #HumanEval #101 GPT-2 for Original Prompt ###

# Initialize model and tokenizer
model_name = "gpt2" 
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Define the original prompt
prompt_original = (
    "def words_string(s):\n"
    "    \"\"\"\n"
    "    You will be given a string of words separated by commas or spaces. "
    "Your task is to split the string into words and return an array of the words.\n"
)

# Test cases for the original prompt
test_cases_original = [
    ("Hi, my name is John", ["Hi", "my", "name", "is", "John"]),
    ("One, two, three, four, five, six", ["One", "two", "three", "four", "five", "six"]),
    ("Hi, my name", ["Hi", "my", "name"]),
    ("One,, two, three, four, five, six,", ["One", "two", "three", "four", "five", "six"]),
    ("", []),
    ("ahmed , gamal", ["ahmed", "gamal"])
]

def run_test_cases(test_cases):
    for s, expected in test_cases:
        inputs_encoded = tokenizer(prompt_original, return_tensors='pt')

        # Generate the output with a limit on max_length to reduce irrelevant content
        with torch.no_grad():
            outputs = model.generate(**inputs_encoded, max_length=160, num_return_sequences=1)

        output_str = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        print(f"Input: '{s}'")
        print(f"Model Output: {output_str}")
        print(f"Expected: {expected}\n\n\n\n")

# Run the test cases for the original prompt
run_test_cases(test_cases_original)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: 'Hi, my name is John'
Model Output: def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.

"""

def __init__ ( self, word_string ):

self.word_string = word_string

def __str__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __repr__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __str__
Expected: ['Hi', 'my', 'name', 'is', 'John']






Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: 'One, two, three, four, five, six'
Model Output: def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.

"""

def __init__ ( self, word_string ):

self.word_string = word_string

def __str__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __repr__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __str__
Expected: ['One', 'two', 'three', 'four', 'five', 'six']






Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: 'Hi, my name'
Model Output: def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.

"""

def __init__ ( self, word_string ):

self.word_string = word_string

def __str__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __repr__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __str__
Expected: ['Hi', 'my', 'name']






Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: 'One,, two, three, four, five, six,'
Model Output: def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.

"""

def __init__ ( self, word_string ):

self.word_string = word_string

def __str__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __repr__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __str__
Expected: ['One', 'two', 'three', 'four', 'five', 'six']






Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: ''
Model Output: def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.

"""

def __init__ ( self, word_string ):

self.word_string = word_string

def __str__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __repr__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __str__
Expected: []




Input: 'ahmed , gamal'
Model Output: def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is to split the string into words and return an array of the words.

"""

def __init__ ( self, word_string ):

self.word_string = word_string

def __str__ ( self, word_string ):

""" Return a string of words separated by commas or spaces.

"""

return self.word_string

def __repr__ ( self, w