In [None]:
import requests
import time

def run_inference(api_key: str, endpoint_id: str, prompt: str) -> None:
    """
    Submits a job to RunPod and polls until the result is ready.

    Args:
        api_key (str): Your RunPod API key.
        endpoint_id (str): The ID of the RunPod endpoint.
        prompt (str): The prompt/question to send for inference.

    Returns:
        None. Prints result to console.
    """
    # Submit the job
    url = f'https://api.runpod.ai/v2/{endpoint_id}/run'
    payload = {
        "input": {
            "prompt": f"<question>{prompt}</question>",
            "sampling_params": {
                "temperature": 0.8,
                "max_tokens": 2048,
                "stop": "</response>"
            }
        }
    }
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {api_key}'
    }

    submit_response = requests.post(url, headers=headers, json=payload)
    if submit_response.status_code != 200:
        print("Failed to submit job:", submit_response.text)
        return

    job_id = submit_response.json().get('id')
    if not job_id:
        print("No job ID returned.")
        return

    print(f"Job submitted successfully. Job ID: {job_id}")

    # Poll for status
    status_url = f'https://api.runpod.ai/v2/{endpoint_id}/status/{job_id}'
    while True:
        res = requests.get(status_url, headers=headers)
        result = res.json()
        print("Status:", result['status'])

        if result['status'] == 'COMPLETED':
            print("Output:", result['output'])
            try:
                output_text = result['output'][0]["choices"][0]["tokens"][0] + "</response>"
                print("Answer:", output_text)
            except Exception as e:
                print("Unexpected output format:", e)
            break
        elif result['status'] in ['FAILED', 'CANCELLED']:
            print("Job failed or was cancelled:", result)
            break

        time.sleep(2)  # wait before polling again


In [None]:
# === Usage ===
API_KEY = 'xxx'
ENDPOINT_ID = 'xxx'
PROMPT = "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?"

run_inference(API_KEY, ENDPOINT_ID, PROMPT)

Job submitted successfully. Job ID: 2f78cf50-09d5-48aa-a7d3-6c4a10fe56a0-u1
Status: IN_QUEUE
Status: IN_PROGRESS
Status: COMPLETED
Output: [{'choices': [{'tokens': [" To answer the question, let's break it down step by step:\n\n<think>\n1. **Understand the problem**: Natalia sold clips in April and May. We know she sold 48 clips in April, and in May she sold half as many as in April. We need to find the total number of clips she sold in both months.\n\n2. **Find the number of clips sold in May**: Since Natalia sold half as many clips in May as she did in April, we divide the number of clips sold in April by 2.  \n   Calculation: \\( \\frac{48}{2} = 24 \\) clips.\n\n3. **Calculate the total clips sold**: Add the number of clips sold in April and May to find the total.  \n   Calculation: \\( 48 + 24 = 72 \\) clips.\n\n4. **Final answer**: Natalia sold a total of 72 clips in April and May.\n</think>\n\n<response>\nIn May, Natalia sold \\( \\frac{48}{2} = 24 \\) clips.  \nIn April and May,