In [41]:
import requests
from datasets import load_dataset
from tqdm import tqdm

def make_openai_request(content: str, model: str = "string", temperature: float = 0, max_tokens: int = 0, url: str = "http://stargate:8000/v1/chat/completions"):
    payload = {
        "model": model,
        "messages": [
            { "role": "user", "content": content }
        ],
        "temperature": temperature,
        "max_tokens": max_tokens,
    }
    headers = { "Content-Type": "application/json" }
    response = requests.post(url, headers=headers, json=payload)
    return response.json()

def process_item(prompt: str):
    response = make_openai_request(prompt, 
                                   model = "microsoft/Phi-3-mini-4k-instruct", 
                                   max_tokens=2048,
                                   temperature=1)
    return {'prompt': prompt, 'response': response}

def process_dataset(dataset_name: str):
    dataset = load_dataset(dataset_name)
    train_data = dataset['test']
    results = []
    count = 0
    for item in tqdm(train_data, desc="Processing prompts"):
        if count == 5:
            break
        result = process_item(item['prompt'])
        results.append(result)
       
        count += 1
        # print(result['response'])
    return results

In [42]:
DATASET_NAME = "verifiers-for-code/humaneval_plan_generation"
results = process_dataset(DATASET_NAME)

Processing prompts:   3%|▎         | 5/164 [01:09<36:55, 13.93s/it]


In [49]:
print(results[0]["response"]["choices"][0]["message"]["content"])

<thinking>
To create an action plan for this problem, I'll break down the solution into logical steps and explain the key concepts. The solution uses sorting, a sliding window approach, and comparison of adjacent elements. I'll structure the plan to guide the implementation without providing the exact code.
</thinking>

<plan>
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True

    Action Plan:
    1. Sort the input list of numbers in ascending order.
    2. Initialize a variable to keep track of the current minimum difference found.
    3. Iterate through the sorted list using a sliding window approach:
       a. For each pair of adjacent elements (i, i+1):
          - Calculate the absolute differenc

In [24]:
results[0]["response"]["choices"][0]["message"]["content"]

'<thinking>\nTo design an action plan for this problem, I\'ll break down the solution into logical steps that guide the implementation without giving away the exact code. The plan should cover the main components of the solution, including the initialization of variables, the loop to compare elements, the calculation of distances, and the final return value. I\'ll provide clear instructions for each step, using the existing code structure and comments as a reference. The plan should be comprehensive enough to guide the implementation without providing the exact solution.\n</thinking>\n\n<plan>\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n    """ Check if in given list of numbers, are any two numbers closer to each other than\n    given threshold.\n    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n    False\n    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n    True\n    """\n\n    # Step 1: Initialize a variable to store the closest distance\

In [25]:
phi3_plans = [result["response"]["choices"][0]["message"]["content"] for result in results]

In [26]:
NEW_COL_NAME = "phi3-good-plans"

In [27]:

from datasets import load_dataset, Dataset
dataset = load_dataset(DATASET_NAME)

# Extract the 'test' split we worked with
test_data = dataset['test']

# Create a list of the new column data
phi3_plans = [result["response"]["choices"][0]["message"]["content"] for result in results]

if NEW_COL_NAME in test_data.column_names:
    test_data = test_data.remove_columns(NEW_COL_NAME)

# Add the new column to the dataset
test_data = test_data.add_column(NEW_COL_NAME, phi3_plans)

test_data.column_names


['task_id',
 'prompt',
 'canonical_solution',
 'test',
 'entry_point',
 'sonnet-3.5_gold_plans',
 'cleaned_sonnet-3.5_gold_plans',
 'generated_phi3_baseline',
 'generated_phi3_plan_generation',
 'phi3-planner-plans',
 'cleaned-phi3-planner-plans',
 'self_planning_Phi-3-mini-4k-instruct',
 'cleaned-self_planning_Phi-3-mini-4k-instruct',
 'phi3-planner-granular',
 'cleaned-phi3-planner-granular',
 'phi3-good-plans']

In [29]:
test_data.save_to_disk("phi3_good_plans")

Saving the dataset (1/1 shards): 100%|██████████| 164/164 [00:00<00:00, 9306.30 examples/s] 


In [30]:
test_data.push_to_hub(DATASET_NAME, split="test")

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 53.29ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  8.17it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/verifiers-for-code/humaneval_plan_generation/commit/9df8b9a40ffa3e41fffa4e7aa06c4f628c29c8c8', commit_message='Upload dataset', commit_description='', oid='9df8b9a40ffa3e41fffa4e7aa06c4f628c29c8c8', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
dataset = load_dataset(DATASET_NAME)

Downloading readme: 100%|██████████| 936/936 [00:00<00:00, 4.19MB/s]
Downloading data: 100%|██████████| 979k/979k [00:00<00:00, 4.22MB/s]
Generating test split: 100%|██████████| 164/164 [00:00<00:00, 11555.33 examples/s]


In [None]:
dataset['test'].column_names

['task_id',
 'prompt',
 'canonical_solution',
 'test',
 'entry_point',
 'sonnet-3.5_gold_plans',
 'cleaned_sonnet-3.5_gold_plans',
 'generated_phi3_baseline',
 'generated_phi3_plan_generation',
 'phi3-planner-plans',
 'cleaned-phi3-planner-plans',
 'self_planning_Phi-3-mini-4k-instruct',
 'cleaned-self_planning_Phi-3-mini-4k-instruct',
 'phi3-planner-granular']