In [50]:
import torch
import argparse
import datasets
from datasets import load_dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import AutoModelForCausalLM, TrainingArguments, HfArgumentParser, AutoTokenizer, TrainerCallback
from huggingface_hub import login
import matplotlib.pyplot as plt
from peft import LoraConfig
import nltk
import numpy as np
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm
import pandas as pd

In [51]:
from dotenv import load_dotenv
import os
load_dotenv()
hf_token = os.getenv("HF_TOKEN") # make a .env for this and put your access token as HF_TOKEN=whateverYourAccessTokenIs

In [52]:
model_id = "meta-llama/Llama-3.2-1B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"
#print("GPU available ", torch.cuda.is_available())

In [53]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    token=hf_token
).to(device)

tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)

In [54]:
def run_model(model, tokenizer, messages, max_new_tokens=5, verbose=False):
    input_text = tokenizer.apply_chat_template(messages, tokenize=False)

    if verbose: print("\n###input_text:###\n", input_text)

    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device)

    if verbose: print("\n###input_ids:###\n", input_ids)

    terminators = [
      tokenizer.eos_token_id,
      tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    output = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        eos_token_id=terminators,
        do_sample=False,
    )


    # Decode the output and return the response without special tokens
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    if verbose: print("\n###response:###\n", response)
    
    start_marker = "python\n"
    end_marker = "```"
    assistant_response = response.split(start_marker)[1].split(end_marker)[0] # grab just the code snippet
    #assistant_response = response.split("\n")[-1].strip()
    #assistant_response = response
    return assistant_response

In [55]:
data = pd.read_json("lc_hard.json", lines=False)
data

Unnamed: 0,desc,skeleton,examples,ref,test,func
0,\nGiven n non-negative integers representing a...,"\nclass Solution:\n def trap(self, height: ...",[],"[\nclass Solution:\n def trap(self, height:...","{'input': [[0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1...",trap
1,\nGiven an array of integers heights represent...,\nclass Solution:\n def largestRectangleAre...,[],[\nclass Solution:\n def largestRectangleAr...,"{'input': [], 'output': []}",largestRectangleArea
2,\nGiven two sorted arrays nums1 and nums2 of s...,\nclass Solution:\n def findMedianSortedArr...,[],[\nclass Solution:\n def findMedianSortedAr...,"{'input': [], 'output': []}",findMedianSortedArrays
3,\nGiven two strings s and t of lengths m and n...,"\nclass Solution:\n def minWindow(self, s: ...",[],"[\nclass Solution:\n def minWindow(self, s:...","{'input': [], 'output': []}",minWindow
4,"\nYou are given an array of integers nums, the...",\nclass Solution:\n def maxSlidingWindow(se...,[],[\nclass Solution:\n def maxSlidingWindow(s...,"{'input': [], 'output': []}",maxSlidingWindow
5,\nYou are given an array of k linked-lists lis...,\n# Definition for singly-linked list.\n# clas...,[],"[\nclass Solution:\n def mergeKLists(self, ...","{'input': [], 'output': []}",mergeKLists
6,"\nGiven the head of a linked list, reverse the...",\n# Definition for singly-linked list.\n# clas...,[],[\nclass Solution:\n def reverseKGroup(self...,"{'input': [], 'output': []}",reverseKGroup
7,\nA path in a binary tree is a sequence of nod...,\n# Definition for a binary tree node.\n# clas...,[],"[\nclass Solution:\n def traverse(self, nod...","{'input': [], 'output': []}",maxPathSum
8,"\nGiven an integer array nums, return the numb...","\nclass Solution:\n def reversePairs(self, ...",[],"[\nclass Solution:\n def reversePairs(self,...","{'input': [], 'output': []}",reversePairs
9,\nGiven an m x n board of characters and a lis...,"\nclass Solution:\n def findWords(self, boa...",[],"[\nclass Solution:\n def findWords(self, bo...","{'input': [], 'output': []}",findWords


In [56]:
def apply_lc_prompt(desc, skel):
    prompt = (
        "Your task is to complete the following problem in Python. You are provided with a skeleton code to complete and a description. Output your completed version of the code. "
        f"Description: {desc}"
        "Below is the starting point for your code. \n"
        f"{skel}"
    )

    return prompt.strip()

In [57]:
dataset = data.copy()
dataset["prompt"] = dataset.apply(lambda x: apply_lc_prompt(x["desc"], x["ref"]), axis=1)
#print(dataset.iloc[0].to_dict())

In [58]:
#import nltk.translate.bleu_score


def eval_bleu(model, tokenizer, dataset, max_new_tokens=1000):
    outputs = []

    for row in tqdm(dataset.to_dict(orient="records")):
        messages = [
            {"role": "system", "content": ""},
            {"role": "user", "content": row["prompt"]},
        ]

        output = run_model(model=model, tokenizer=tokenizer, messages=messages, max_new_tokens=max_new_tokens)

        outputs.append(output)
    
    r, h = [], []
    for idx, row in tqdm(enumerate(dataset.to_dict(orient="records"))):
        refs_in_dataset = row["ref"]
        references = []
        for real_code_solution in refs_in_dataset:
            references.append(real_code_solution.split())
        hypothesis = outputs[idx].split()
        
        r.append(references)
        h.append(hypothesis)
    
    bleu_score = nltk.translate.bleu_score.corpus_bleu(r, h, weights=(1,0,0,0))
    return bleu_score, outputs


In [59]:
df = dataset.copy()
bleu_score, outputs = eval_bleu(model, tokenizer, df)
print(f"Bleu: {bleu_score}")
df["output"] = outputs
display(df)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  2%|▏         | 1/50 [00:16<13:42, 16.78s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  4%|▍         | 2/50 [00:33<13:28, 16.85s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  6%|▌         | 3/50 [00:52<13:52, 17.71s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Ple

Bleu: 0.4349196344153798





Unnamed: 0,desc,skeleton,examples,ref,test,func,prompt,output
0,\nGiven n non-negative integers representing a...,"\nclass Solution:\n def trap(self, height: ...",[],"[\nclass Solution:\n def trap(self, height:...","{'input': [[0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1...",trap,Your task is to complete the following problem...,from typing import List\n\nclass Solution:\n ...
1,\nGiven an array of integers heights represent...,\nclass Solution:\n def largestRectangleAre...,[],[\nclass Solution:\n def largestRectangleAr...,"{'input': [], 'output': []}",largestRectangleArea,Your task is to complete the following problem...,from typing import List\n\nclass Solution:\n ...
2,\nGiven two sorted arrays nums1 and nums2 of s...,\nclass Solution:\n def findMedianSortedArr...,[],[\nclass Solution:\n def findMedianSortedAr...,"{'input': [], 'output': []}",findMedianSortedArrays,Your task is to complete the following problem...,from typing import List\n\nclass Solution:\n ...
3,\nGiven two strings s and t of lengths m and n...,"\nclass Solution:\n def minWindow(self, s: ...",[],"[\nclass Solution:\n def minWindow(self, s:...","{'input': [], 'output': []}",minWindow,Your task is to complete the following problem...,from collections import defaultdict\n\nclass S...
4,"\nYou are given an array of integers nums, the...",\nclass Solution:\n def maxSlidingWindow(se...,[],[\nclass Solution:\n def maxSlidingWindow(s...,"{'input': [], 'output': []}",maxSlidingWindow,Your task is to complete the following problem...,from collections import deque\n\nclass Solutio...
5,\nYou are given an array of k linked-lists lis...,\n# Definition for singly-linked list.\n# clas...,[],"[\nclass Solution:\n def mergeKLists(self, ...","{'input': [], 'output': []}",mergeKLists,Your task is to complete the following problem...,"from typing import List, Optional\n\n# Definit..."
6,"\nGiven the head of a linked list, reverse the...",\n# Definition for singly-linked list.\n# clas...,[],[\nclass Solution:\n def reverseKGroup(self...,"{'input': [], 'output': []}",reverseKGroup,Your task is to complete the following problem...,"class Solution:\n def reverseKGroup(self, h..."
7,\nA path in a binary tree is a sequence of nod...,\n# Definition for a binary tree node.\n# clas...,[],"[\nclass Solution:\n def traverse(self, nod...","{'input': [], 'output': []}",maxPathSum,Your task is to complete the following problem...,"class Solution:\n def maxPathSum(self, root..."
8,"\nGiven an integer array nums, return the numb...","\nclass Solution:\n def reversePairs(self, ...",[],"[\nclass Solution:\n def reversePairs(self,...","{'input': [], 'output': []}",reversePairs,Your task is to complete the following problem...,"from bisect import bisect_left, bisect_right\n..."
9,\nGiven an m x n board of characters and a lis...,"\nclass Solution:\n def findWords(self, boa...",[],"[\nclass Solution:\n def findWords(self, bo...","{'input': [], 'output': []}",findWords,Your task is to complete the following problem...,"class Solution:\n def findWords(self, board..."


In [60]:
print(df.iloc[0].to_dict()["output"])

from typing import List

class Solution:
    def trap(self, height: List[int]) -> int:
        left = 0
        right = len(height) - 1
        left_max = height[left]
        right_max = height[right]
        water = 0

        while left < right:
            if left_max < right_max:
                left += 1
                left_max = max(left_max, height[left])
                water += left_max - height[left]
            else:
                right -= 1
                right_max = max(right_max, height[right])
                water += right_max - height[right]

        return water



In [None]:
from typing import List

def eval_test_case(code, test_inputs, expected_outputs, function_name):
    

    print(code)
    print(test_inputs)
    print(expected_outputs)

    try:
        namespace = {'List': List}
        exec(code, namespace)
        Solution = namespace.get("Solution")
        solution_instance = Solution()
        

        #func = namespace.get(function_name)
        func = getattr(solution_instance, function_name, None)
        if not callable(func):
            raise ValueError(f"Function '{function_name}' is not defined or callable")
        
        passed = 0
        total = len(test_inputs)

        for test_input, expected_output in zip(test_inputs, expected_outputs):
            try:
                result = func(test_input)
                if result == expected_output:
                    passed += 1
                #else:
                    #print(f"Test with input {test_input} failed. Expected {expected_output}, got {result}")
            except Exception as e:
                print(f"Test with input {test_input} on {function_name} failed due to error: {e}")
        
        return passed / total if total > 0 else 0.0
    
    except Exception as e:
        print(f"aw man got error {e}")
        # code messed up, penalize
        return 0.0

In [62]:
#TODO: process dataset to run eval_test_case 
code_blocks = outputs # list of strings (amazing)

print(outputs)
print(type(outputs[0]))




['from typing import List\n\nclass Solution:\n    def trap(self, height: List[int]) -> int:\n        left = 0\n        right = len(height) - 1\n        left_max = height[left]\n        right_max = height[right]\n        water = 0\n\n        while left < right:\n            if left_max < right_max:\n                left += 1\n                left_max = max(left_max, height[left])\n                water += left_max - height[left]\n            else:\n                right -= 1\n                right_max = max(right_max, height[right])\n                water += right_max - height[right]\n\n        return water\n', 'from typing import List\n\nclass Solution:\n    def largestRectangleArea(self, heights: List[int]) -> int:\n        stack = [-1]\n        max_area = 0\n        \n        for i in range(len(heights)):\n            while stack[-1]!= -1 and heights[i] <= heights[stack[-1]]:\n                height = heights[stack.pop()]\n                width = i - stack[-1] - 1\n                ma

In [63]:
total_pass_rate = 0

for idx, code_block in enumerate(code_blocks):
    test_dict = df.iloc[idx].to_dict()["test"]

    test_inputs = test_dict["input"]
    expected_outputs = test_dict["output"]
    function_name = df.iloc[idx].to_dict()["func"]
    
    total_pass_rate += eval_test_case(code_block, test_inputs, expected_outputs, function_name)
    break


avg_pass_rate = total_pass_rate/len(code_blocks)
print(avg_pass_rate)

from typing import List

class Solution:
    def trap(self, height: List[int]) -> int:
        left = 0
        right = len(height) - 1
        left_max = height[left]
        right_max = height[right]
        water = 0

        while left < right:
            if left_max < right_max:
                left += 1
                left_max = max(left_max, height[left])
                water += left_max - height[left]
            else:
                right -= 1
                right_max = max(right_max, height[right])
                water += right_max - height[right]

        return water

[[0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1], [4, 2, 0, 3, 2, 5]]
[6, 9]
Test with input [0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1] on trap failed due to error: 'Solution' object has no attribute 'func'
Test with input [4, 2, 0, 3, 2, 5] on trap failed due to error: 'Solution' object has no attribute 'func'
0.0
