In [None]:
from openai import OpenAI
client = OpenAI(
    base_url="http://stargate:8000/v1/chat/completions",
    api_key="token-abc123",
)

completion = client.chat.completions.create(
  model="microsoft/Phi-3-mini-4k-instruct",
  messages=[
    {"role": "user", "content": "Hello!"}
  ],
  max_tokens=100
)

print(completion.choices[0].message)

In [19]:
import requests
from datasets import load_dataset
from tqdm import tqdm

def make_openai_request(content: str, model: str = "string", temperature: float = 0, max_tokens: int = 0, url: str = "http://stargate:8000/v1/chat/completions"):
    payload = {
        "model": model,
        "messages": [
            { "role": "user", "content": content }
        ],
        "temperature": temperature,
        "max_tokens": max_tokens,
    }
    headers = { "Content-Type": "application/json" }
    response = requests.post(url, headers=headers, json=payload)
    return response.json()

def process_item(prompt: str):
    response = make_openai_request(prompt, 
                                   model = "microsoft/Phi-3-mini-4k-instruct", 
                                   max_tokens=2048,
                                   temperature=1)
    return {'prompt': prompt, 'response': response}

def process_dataset(dataset_name: str):
    dataset = load_dataset(dataset_name)
    train_data = dataset['test']
    results = []
    # count = 0
    for item in tqdm(train_data, desc="Processing prompts"):
        # if count == 5:
        #     break
        result = process_item(item['prompt'])
        results.append(result)
       
        # count += 1
        # print(result['response'])
    return results

In [20]:
DATASET_NAME = "verifiers-for-code/humaneval_plan_generation"
results = process_dataset(DATASET_NAME)

Processing prompts:  87%|████████▋ | 143/164 [32:58<04:49, 13.77s/it]

In [None]:
print(results[0]["response"]["choices"][0]["message"]["content"])

IndexError: list index out of range

In [21]:
results[0]["response"]["choices"][0]["message"]["content"]

'<thinking>\nTo create an effective action plan for this problem, I need to break down the solution into clear, logical steps that guide the implementation without providing the full code. The plan should cover:\n1. Understanding the problem and the threshold\n2. Iterating through the list of numbers\n3. Comparing each number with the rest of the list\n4. Checking the distance between numbers\n5. Returning the result\n</thinking>\n\n<plan>\nAction Plan:\n1. Understand the problem:\n    - The function takes a list of numbers and a threshold as input\n    - The goal is to check if any two numbers in the list are closer to each other than the given threshold\n\n2. Initialize a variable to store the result (default to False)\n\n3. Iterate through the list of numbers:\n    - For each number, iterate through the rest of the list (excluding the current number)\n    - Calculate the absolute difference between the current number and the other number\n    - Check if the difference is less than o

In [31]:
phi3_plans = [result["response"]["choices"][0]["message"]["content"] for result in results]

In [32]:
data = load_dataset(DATASET_NAME)["test"]

Downloading readme: 100%|██████████| 646/646 [00:00<00:00, 3.34MB/s]
Downloading data: 100%|██████████| 486k/486k [00:00<00:00, 1.29MB/s]
Generating test split: 100%|██████████| 164/164 [00:00<00:00, 10575.72 examples/s]


In [33]:
if "phi3_planner_plan" in data.column_names:
    data.remove_column("phi3_planner_plan")
data.add_column("phi3_planner_plan", phi3_plans)

Dataset({
    features: ['task_id', 'prompt', 'canonical_solution', 'test', 'entry_point', 'sonnet-3.5_gold_plans', 'cleaned_sonnet-3.5_gold_plans', 'generated_phi3_baseline', 'generated_phi3_plan_generation', 'phi3_planner_plan'],
    num_rows: 164
})

In [34]:
data.push_to_hub(DATASET_NAME)

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 110.85ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  5.65it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/verifiers-for-code/humaneval_plan_generation/commit/b661c8268382bdd48e3e962dd2ef797f1bc543a5', commit_message='Upload dataset', commit_description='', oid='b661c8268382bdd48e3e962dd2ef797f1bc543a5', pr_url=None, pr_revision=None, pr_num=None)

In [35]:
data.column_names

['task_id',
 'prompt',
 'canonical_solution',
 'test',
 'entry_point',
 'sonnet-3.5_gold_plans',
 'cleaned_sonnet-3.5_gold_plans',
 'generated_phi3_baseline',
 'generated_phi3_plan_generation']

In [38]:
from datasets import load_dataset, Dataset
dataset = load_dataset(DATASET_NAME)

# Extract the 'test' split we worked with
test_data = dataset['test']

# Create a list of the new column data
phi3_plans = [result["response"]["choices"][0]["message"]["content"] for result in results]

# Add the new column to the dataset
test_data = test_data.add_column("phi3-planner-plans", phi3_plans)

test_data.column_names


['task_id',
 'prompt',
 'canonical_solution',
 'test',
 'entry_point',
 'sonnet-3.5_gold_plans',
 'cleaned_sonnet-3.5_gold_plans',
 'generated_phi3_baseline',
 'generated_phi3_plan_generation',
 'phi3-planner-plans']

In [39]:
test_data.push_to_hub(DATASET_NAME, split="test")

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 92.17ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.80it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/verifiers-for-code/humaneval_plan_generation/commit/d9ed50c2be0a333697ac9a1666be1a9ec2a49063', commit_message='Upload dataset', commit_description='', oid='d9ed50c2be0a333697ac9a1666be1a9ec2a49063', pr_url=None, pr_revision=None, pr_num=None)