In [None]:
from google.colab import userdata
RUNPOD_API_KEY = userdata.get('RUNPOD_API_KEY')

In [None]:
import requests
import time

def run_inference(api_key: str, endpoint_id: str, instruction: str, prompt: str) -> None:
    """
    Submits a job to RunPod and polls until the result is ready.

    Args:
        api_key (str): Your RunPod API key.
        endpoint_id (str): The ID of the RunPod endpoint.
        instruction (str): The instruction for the problem, e.g. "math", "SQL", or "python".
        prompt (str): The prompt/question to send for inference.

    Returns:
        None. Prints result to console.
    """
    # Submit the job
    structured_prompt = (
        f"<instruction>This is a {instruction} problem.</instruction>"
        f"<question>{prompt}</question>"
    )
    url = f'https://api.runpod.ai/v2/{endpoint_id}/run'
    payload = {
        "input": {
            "prompt": structured_prompt,
            "sampling_params": {
                "temperature": 0.8,
                "max_tokens": 1024,
                "stop": "</response>"
            }
        }
    }
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {api_key}'
    }

    submit_response = requests.post(url, headers=headers, json=payload)
    if submit_response.status_code != 200:
        print("Failed to submit job:", submit_response.text)
        return

    job_id = submit_response.json().get('id')
    if not job_id:
        print("No job ID returned.")
        return

    print(f"Job submitted successfully. Job ID: {job_id}")

    # Poll for status
    status_url = f'https://api.runpod.ai/v2/{endpoint_id}/status/{job_id}'
    while True:
        res = requests.get(status_url, headers=headers)
        result = res.json()
        print("Status:", result['status'])

        if result['status'] == 'COMPLETED':
            print("Output:", result['output'])
            try:
                output_text = result['output'][0]["choices"][0]["tokens"][0] + "</response>"
                print("Answer:", output_text)
            except Exception as e:
                print("Unexpected output format:", e)
            break
        elif result['status'] in ['FAILED', 'CANCELLED']:
            print("Job failed or was cancelled:", result)
            break

        time.sleep(2)  # wait before polling again

# === Usage: Ask Yiqiao Yin ===
API_KEY = RUNPOD_API_KEY   # RUNPOD_API_KEY
ENDPOINT_ID = "xxx"       # ENDPOINT_ID
PROMPT = "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?"

# run_inference(API_KEY, ENDPOINT_ID, PROMPT)

In [None]:
%%time

# Once the connection is set, it's much faster and should only take a few seconds unlike on Colab a T4 GPU would probably take 2 minutes for one inference.

PROMPT = "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?"

run_inference(API_KEY, ENDPOINT_ID, "math", PROMPT)

Job submitted successfully. Job ID: 429a4341-36c3-44c3-acec-2253058de30a-u2
Status: IN_QUEUE
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: COMPLETED
Output: [{'choices': [{'tokens': ["<think>Natalia sold clipping tools to 48 friends in April. To find out how many friends she sold anything in, we need to know if she sold anything in May too. The problem says she sold half as many clipping tools in May as she did in April. Hmmm, half of something sold in April would be... let's see, if she sold 48 in April, then half of that would be 24. So, she sold 24 in May. \n\nNow, to find out how many clipping tools she sold in total, we just need to add what she sold in April and May. In April, she sold 48, and in May, she sold 24. Adding those numbers together, 48 plus 24, gives us 72. \n\nOh, wait a second! Let’s make sure we're not missing something. The problem only mentions clipping tools, and we've accounted for both April and May. There's no mention of her having a stock or needing manycl