In [9]:
import requests
from datasets import load_dataset
from tqdm import tqdm

def make_openai_request(content: str, model: str = "string", temperature: float = 0, max_tokens: int = 0, url: str = "http://stargate:8000/v1/chat/completions"):
    payload = {
        "model": model,
        "messages": [
            { "role": "user", "content": content }
        ],
        "temperature": temperature,
        "max_tokens": max_tokens,
    }
    headers = { "Content-Type": "application/json" }
    response = requests.post(url, headers=headers, json=payload)
    return response.json()

def process_item(prompt: str):
    response = make_openai_request(prompt, 
                                   model = "microsoft/Phi-3-mini-4k-instruct", 
                                   max_tokens=2048,
                                   temperature=1)
    return {'prompt': prompt, 'response': response}

def process_dataset(dataset_name: str):
    dataset = load_dataset(dataset_name)
    train_data = dataset['test']
    results = []
    # count = 0
    for item in tqdm(train_data, desc="Processing prompts"):
        # if count == 5:
        #     break
        result = process_item(item['prompt'])
        results.append(result)
       
        # count += 1
        # print(result['response'])
    return results

In [10]:
DATASET_NAME = "verifiers-for-code/humaneval_plan_generation"
results = process_dataset(DATASET_NAME)

Processing prompts: 100%|██████████| 164/164 [38:19<00:00, 14.02s/it]


In [12]:
print(results[-24]["response"]["choices"][0]["message"]["content"])

<thinking>
To generate a detailed plan for implementing the fix_spaces function, I need to break down the problem into clear, logical steps. The function needs to process a string, replace spaces with underscores, and handle consecutive spaces. The key aspects to consider are:

1. Processing the input string
2. Replacing spaces with underscores
3. Handling consecutive spaces
4. Returning the modified string

I'll create a step-by-step plan that guides the implementation without providing the actual code. The plan will be detailed enough for a model to follow and implement the function correctly.
</thinking>

<plan>
1. Process the input string:
   - Store the input string in a variable (e.g., text)

2. Replace spaces with underscores:
   - Use the string method replace() to replace all spaces in the string with underscores
   - Assign the result back to the text variable

3. Handle consecutive spaces:
   - Split the modified string into a list of substrings using the split() method
   -

In [13]:
results[0]["response"]["choices"][0]["message"]["content"]

"<thinking>\nTo generate a detailed plan for implementing the has_close_elements function, I'll break down the problem into clear, logical steps. The function needs to check if any two numbers in the input list are closer to each other than a given threshold. The key aspects to consider are:\n\n1. Understanding the input parameters\n2. Iterating through the list of numbers\n3. Comparing each number with the rest of the list\n4. Checking the distance between numbers and the threshold\n5. Returning the result\n\nI'll create a step-by-step plan that guides the implementation without providing the actual code. The plan will be detailed enough for a model to follow and implement the function correctly.\n</thinking>\n\n<plan>\n1. Define the function with the required parameters:\n   - `numbers`: a list of float numbers\n   - `threshold`: a float number representing the minimum distance between two numbers\n\n2. Initialize a variable to store the result (default: False)\n\n3. Iterate through 

In [26]:
phi3_plans = [result["response"]["choices"][0]["message"]["content"] for result in results]

In [14]:
NEW_COL_NAME = "phi3-planner-granular"

In [15]:
from datasets import load_dataset, Dataset
dataset = load_dataset(DATASET_NAME)

# Extract the 'test' split we worked with
test_data = dataset['test']

# Create a list of the new column data
phi3_plans = [result["response"]["choices"][0]["message"]["content"] for result in results]

# Add the new column to the dataset
test_data = test_data.add_column(NEW_COL_NAME, phi3_plans)

test_data.column_names


['task_id',
 'prompt',
 'canonical_solution',
 'test',
 'entry_point',
 'sonnet-3.5_gold_plans',
 'cleaned_sonnet-3.5_gold_plans',
 'generated_phi3_baseline',
 'generated_phi3_plan_generation',
 'phi3-planner-plans',
 'cleaned-phi3-planner-plans',
 'self_planning_Phi-3-mini-4k-instruct',
 'cleaned-self_planning_Phi-3-mini-4k-instruct',
 'phi3-planner-granular']

In [16]:
test_data.push_to_hub(DATASET_NAME, split="test")

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 56.93ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/verifiers-for-code/humaneval_plan_generation/commit/af2bb0cfd849e85ae9848a6d2d322f9fb7320f64', commit_message='Upload dataset', commit_description='', oid='af2bb0cfd849e85ae9848a6d2d322f9fb7320f64', pr_url=None, pr_revision=None, pr_num=None)

In [17]:
dataset = load_dataset(DATASET_NAME)

Downloading readme: 100%|██████████| 936/936 [00:00<00:00, 4.19MB/s]
Downloading data: 100%|██████████| 979k/979k [00:00<00:00, 4.22MB/s]
Generating test split: 100%|██████████| 164/164 [00:00<00:00, 11555.33 examples/s]


In [20]:
dataset['test'].column_names

['task_id',
 'prompt',
 'canonical_solution',
 'test',
 'entry_point',
 'sonnet-3.5_gold_plans',
 'cleaned_sonnet-3.5_gold_plans',
 'generated_phi3_baseline',
 'generated_phi3_plan_generation',
 'phi3-planner-plans',
 'cleaned-phi3-planner-plans',
 'self_planning_Phi-3-mini-4k-instruct',
 'cleaned-self_planning_Phi-3-mini-4k-instruct',
 'phi3-planner-granular']