In [1]:
from human_eval.data import write_jsonl, read_problems
from langchain.prompts import PromptTemplate
import requests
from tqdm.notebook import tqdm
import json
import re
import random

In [3]:
url = 'http://0.0.0.0:8000/v1/completions'
with open('prompt_com.txt', 'r') as file:
    template_content = file.read()
prompt_template = PromptTemplate(
    input_variables=["problem_text"],
    template=template_content)

def enhance_prompt(item, max_cases=2, seed=42):

    random.seed(seed)
    prompt = item["prompt"]
    test_code = item.get("test", "")
    function_name = item.get('entry_point', '')

    asserts = re.findall(r"assert (candidate\(.*?\)\s*==\s*(?:True|False))", test_code)

    if not asserts:
        return prompt

    true_cases = [a for a in asserts if a.endswith("True")]
    false_cases = [a for a in asserts if a.endswith("False")]

    selected = []
    if true_cases:
        selected.append(random.choice(true_cases))
    if false_cases:
        selected.append(random.choice(false_cases))

    if len(selected) < max_cases:
        extra = list(set(asserts) - set(selected))
        if extra:
            selected.extend(random.sample(extra, min(max_cases - len(selected), len(extra))))

    selected = [v.replace('candidate', function_name) for v in selected]
    comment_cases = "    # some more testing cases:\n" + "\n".join([f"    # {case}" for case in selected])

    enhanced_prompt = prompt.rstrip() + "\n" + comment_cases
    return enhanced_prompt

def process_output(response: str, task_description: str) -> str:
    """
    remove the prompt from a generation result. HumanEval only requires the generated part for it's evaluation.
    no longer used

    :param response: the generation result of an LLM
    :param task_description: the prompt and function head string
    :return:
    """
    response = response.replace(task_description, "")
    return response


def generate_single(problem_text: str, temp=.2, n=1):
    """
    The main inference function

    :param problem_text: the code problem without any preprocessing
    :param temp: generation temperature
    :return:
    """
    assembled_prompt = prompt_template.format(problem_text=problem_text)

    if temp == 0:
        n = 1
    
    data = {
        "prompt": assembled_prompt,
        "max_tokens": 512,
        "temperature": temp,
        "stop": ["\n\n", "\nclass ", "\ndef ", "<|im_end|>", "</s>"],
        "top_k": 20, 
        "top_p": 0.85,
        "n": n,
        # "repetition_penalty": 1.02,
        "logits_processor": [
            {"type": "ban_tokens", "banned_ids": [6385, 750, 1112]}]
    }
    # print(assembled_prompt)
    response = requests.post(url, headers={"Content-Type": "application/json"}, data=json.dumps(data))
    
    if response.status_code == 200:
        result = response.json()
        generated = result["choices"]
        
        return [v['text'] for v in generated]
        
    else:
        return ['pass']


problems = read_problems('human-eval/data/HumanEval.jsonl.gz')
temps = [0, .05, .1, .2, .4]
ns = 10
for t in temps:
    samples = []
    for task_id in tqdm(problems):
        function_head = enhance_prompt(problems[task_id])
        generated = generate_single(function_head, t, ns)
        samples.extend([dict(task_id=task_id, completion=generated[j]) for j in range(len(generated))])
    write_jsonl(f"nsamples_{t}_{ns}.jsonl", samples) 

  0%|          | 0/164 [00:00<?, ?it/s]

  0%|          | 0/164 [00:00<?, ?it/s]

  0%|          | 0/164 [00:00<?, ?it/s]

  0%|          | 0/164 [00:00<?, ?it/s]

  0%|          | 0/164 [00:00<?, ?it/s]