In [1]:
import requests
from datasets import load_dataset
from tqdm import tqdm

def make_openai_request(content: str, model: str = "string", temperature: float = 0, max_tokens: int = 0, url: str = "http://stargate:8000/v1/chat/completions"):
    payload = {
        "model": model,
        "messages": [
            { "role": "user", "content": content }
        ],
        "temperature": temperature,
        "max_tokens": max_tokens,
    }
    headers = { "Content-Type": "application/json" }
    response = requests.post(url, headers=headers, json=payload)
    return response.json()

def process_item(prompt: str):
    response = make_openai_request(prompt, 
                                   model = "meta-llama/Meta-Llama-3-8B-Instruct", 
                                   max_tokens=2048,
                                   temperature=1)
    return {'prompt': prompt, 'response': response}

def process_dataset(dataset_name: str):
    dataset = load_dataset(dataset_name)
    train_data = dataset['test']
    results = []
    # count = 0
    for item in tqdm(train_data, desc="Processing prompts"):
        # if count == 5:
        #     break
        result = process_item(item['prompt'])
        results.append(result)
       
        # count += 1
        # print(result['response'])
    return results

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATASET_NAME = "verifiers-for-code/humaneval_plan_generation"
results = process_dataset(DATASET_NAME)

Processing prompts: 100%|██████████| 164/164 [33:50<00:00, 12.38s/it]


In [3]:
print(results[1]["response"]["choices"][0]["message"]["content"])

<thinking>
To create an effective action plan for this problem, I need to break down the solution into clear, logical steps that guide the implementation without providing the full code. The plan should cover:
1. Understanding the problem requirements
2. Defining a helper function to validate a group of parentheses
3. Implementing the main function to separate groups of parentheses
4. Iterating through the input string and handling opening and closing parentheses
5. Returning the list of separated groups
</thinking>

<plan>
Action Plan:
1. Define the `separate_paren_groups` function, which takes a string as input.
2. Create a helper function `is_valid_group` to validate a group of parentheses:
    a. Initialize a stack to store opening parentheses
    b. Iterate through the group, adding opening parentheses to the stack and removing closing parentheses
    c. Return `True` if the stack is empty (valid group) and `False` otherwise
3. Initialize an empty list `groups` to store the separa

In [24]:
results[0]["response"]["choices"][0]["message"]["content"]

'<thinking>\nTo design an action plan for this problem, I\'ll break down the solution into logical steps that guide the implementation without giving away the exact code. The plan should cover the main components of the solution, including the initialization of variables, the loop to compare elements, the calculation of distances, and the final return value. I\'ll provide clear instructions for each step, using the existing code structure and comments as a reference. The plan should be comprehensive enough to guide the implementation without providing the exact solution.\n</thinking>\n\n<plan>\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n    """ Check if in given list of numbers, are any two numbers closer to each other than\n    given threshold.\n    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n    False\n    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n    True\n    """\n\n    # Step 1: Initialize a variable to store the closest distance\

In [4]:
NEW_COL_NAME = "l3_plans_new"

In [5]:

from datasets import load_dataset, Dataset
dataset = load_dataset(DATASET_NAME)

# Extract the 'test' split we worked with
test_data = dataset['test']

# Create a list of the new column data
phi3_plans = [result["response"]["choices"][0]["message"]["content"] for result in results]

if NEW_COL_NAME in test_data.column_names:
    test_data = test_data.remove_columns(NEW_COL_NAME)

# Add the new column to the dataset
test_data = test_data.add_column(NEW_COL_NAME, phi3_plans)

test_data.column_names


['task_id',
 'prompt',
 'canonical_solution',
 'test',
 'entry_point',
 'sonnet-3.5_gold_plans',
 'cleaned_sonnet-3.5_gold_plans',
 'generated_phi3_baseline',
 'generated_phi3_plan_generation',
 'phi3-planner-plans',
 'cleaned-phi3-planner-plans',
 'self_planning_Phi-3-mini-4k-instruct',
 'cleaned-self_planning_Phi-3-mini-4k-instruct',
 'phi3-planner-granular',
 'cleaned-phi3-planner-granular',
 'l3_plans_non_gran',
 'cleaned-l3_plans_non_gran',
 'l3_plans_new']

In [29]:
test_data.save_to_disk(NEW_COL_NAME)

Saving the dataset (1/1 shards): 100%|██████████| 164/164 [00:00<00:00, 9306.30 examples/s] 


In [6]:
test_data.push_to_hub(DATASET_NAME, split="test")

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 40.64ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.34it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/verifiers-for-code/humaneval_plan_generation/commit/a2348544b90d5c81741078d84c115f5428b3124c', commit_message='Upload dataset', commit_description='', oid='a2348544b90d5c81741078d84c115f5428b3124c', pr_url=None, pr_revision=None, pr_num=None)

In [14]:
dataset = load_dataset(DATASET_NAME)

Downloading readme: 100%|██████████| 1.04k/1.04k [00:00<00:00, 3.89MB/s]
Downloading data: 100%|██████████| 1.17M/1.17M [00:00<00:00, 3.28MB/s]
Generating test split: 100%|██████████| 164/164 [00:00<00:00, 11323.08 examples/s]


In [15]:
dataset['test'].column_names

['task_id',
 'prompt',
 'canonical_solution',
 'test',
 'entry_point',
 'sonnet-3.5_gold_plans',
 'cleaned_sonnet-3.5_gold_plans',
 'generated_phi3_baseline',
 'generated_phi3_plan_generation',
 'phi3-planner-plans',
 'cleaned-phi3-planner-plans',
 'self_planning_Phi-3-mini-4k-instruct',
 'cleaned-self_planning_Phi-3-mini-4k-instruct',
 'phi3-planner-granular',
 'cleaned-phi3-planner-granular',
 'l3_plans_non_gran']