# Code Synthesis

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8" # this is needed to get rid of weird colab locale error
# if you are still running into issues, please restart the runtime to initialize a new environment

In [None]:
# installing the accelerate library
!pip install accelerate

In [None]:
import torch

from torch.utils.data import DataLoader
from transformers import AdamW, AutoTokenizer, AutoModelForCausalLM

def load_base_model():
    tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-2B-mono")
    model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-2B-mono", device_map='auto', torch_dtype=torch.float16)
    return model, tokenizer

model, tokenizer = load_base_model()

In [None]:
# https://github.com/evalplus/evalplus
!pip install evalplus==0.2.0

In [None]:
# obtain the humaneval dataset
from evalplus.data import get_human_eval_plus

dataset = get_human_eval_plus()
# feel free to play around the dataset to see what it looks like!

In [None]:
# make the folder to save the results
!mkdir codegen_results

In [None]:
import os
from tqdm.auto import tqdm


def program_synthesis(input_prompt: str, model) -> str:
    # TODO: implement greedy sampling solution using codegen.
    # note, you want to return a complete function here
    # note, there might be some post processing needed to remove irrelevant
    # tokens.
    return ""


def complete_base_humaneval(model, dataset, workdir):
  for task_id, problem in tqdm(dataset.items()):
      name = task_id.replace("/", "_")

      prompt = problem['prompt']

      solution = program_synthesis(prompt, model)
      os.makedirs(os.path.join(workdir, name), exist_ok=True)

      with open(os.path.join(workdir, name, '0.py'), 'w') as f:
          f.write(solution)


# generate the solutions produced by codegen
complete_base_humaneval(model, dataset, "codegen_results")

In [None]:
# now place take a look at the solutions produced by codegen in the folder
# we will now evaluate the solution
# note you can passing in "--i-just-wanna-run" to this command to
# recompute the results IF and ONLY IF you have made some updates to each solution file :)
!evalplus.evaluate --dataset humaneval --samples codegen_results

In [None]:
import json

def check_which_failed(workdir: str, dataset):
    with open(os.path.join(workdir, "eval_results.json"), "r") as f:
        results = json.loads(f.read())

    failed_humaneval = []
    failed_humaneval_plus = []

    for task_id in dataset.keys():
        total = results['eval'][task_id]['nfiles']
        humaneval_base = len([x for x in results['eval'][task_id]['base'] if x[0] == "success"]) / total
        humaneval_plus = len([x for x in results['eval'][task_id]['plus'] if x[0] == "success"]) / total

        if humaneval_base != 1:
            failed_humaneval.append(task_id)
        if humaneval_plus != 1:
            failed_humaneval_plus.append(task_id)

    return failed_humaneval, failed_humaneval_plus


In [None]:
# you can use this to check which problem the model did not correctly solve
failed_humaneval, failed_humaneval_plus = check_which_failed("codegen_results", dataset)

# Improve LLM Code Synthesis

In [None]:
# make the folder to save the results
!mkdir codegen_results_improved

In [None]:
def program_synthesis_improved(input_prompt: str, model, **kwargs) -> str:
    # TODO: implement solution using codegen.
    # similar to the previous function, you want to return the complete solution
    # you may use additional parameters (use kwargs) to adjust
    return ""


def complete_improve_humaneval(model, dataset, workdir):
  for task_id, problem in tqdm(dataset.items()):
      name = task_id.replace("/", "_")

      prompt = problem['prompt']

      solution = program_synthesis_improved(prompt, model)
      os.makedirs(os.path.join(workdir, name), exist_ok=True)

      with open(os.path.join(workdir, name, '0.py'), 'w') as f:
          f.write(solution)


# generate the solutions produced by codegen
complete_improve_humaneval(model, dataset, "codegen_results_improved")

In [None]:
# note you can passing in "--i-just-wanna-run" to this command to
# recompute the results IF and ONLY IF you have made some updates to each solution file :)
# you may need to pass in (yes Y | command) on colab
!yes Y | evalplus.evaluate --dataset humaneval --samples codegen_results_improved --i-just-wanna-run