In [None]:
### code that composes a test prompt for phi-4-mini-instruct model for given task 

import json
from transformers import AutoTokenizer

# Load tokenizer to use chat template
model_name = "microsoft/Phi-4-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the first item from test dataset
with open("./data/test.jsonl", "r", encoding='utf-8') as f:
    first_item = json.loads(f.readline())

question = first_item["question"]
options = first_item["options"]
answer_idx = first_item["answer_idx"]

# Format options
formatted_options = "\n".join([f"{key}. {val}" for key, val in sorted(options.items())])

user_prompt = f"Question:\n{question}\n\nOptions:\n{formatted_options}"

# Create conversation in chat format (same as training)
messages = [
    {
        "role": "system",
        "content": "You are a medical expert. Read the following USMLE question and choose the best answer. Reply with ONE LETTER only (A, B, C, D, or E)"
    },
    {
        "role": "user",
        "content": user_prompt
    }
]

# Apply chat template (same as training, but without assistant response)
complete_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
print(complete_prompt)
print(f"\nGround Truth Answer: {answer_idx}")

In [5]:
# Invoke the deployed Azure ML endpoint
import yaml
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# Load Azure ML workspace configuration
with open("config.yml", "r") as f:
    config = yaml.safe_load(f)

subscription_id = config["subscription_id"]
resource_group = config["resource_group"]
workspace_name = config["workspace_name"]
endpoint_name = config.get("online_endpoint_name", "Phi-4-mini-instruct-rv-test")  # Update with your endpoint name

# Initialize ML Client
ml_client = MLClient(DefaultAzureCredential(), subscription_id, resource_group, workspace_name)

In [34]:
import requests

# Prepare request payload in chat completions format
payload = {
    "messages": messages,
    "max_tokens": 10,
    "temperature": 0.1
}

# Send POST request
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {bearerToken}"
}
uri = 'https://Phi-4-mini-instruct-rv-test.eastus2.models.ai.azure.com/v1/chat/completions'
response = requests.post(uri, json=payload, headers=headers)

# Display results
print(f"Status Code: {response.status_code}")
json_response = response.json()
# extract the model's answer from the response

Status Code: 200


In [35]:
answer = json_response['choices'][0]['message']['content'].strip()
answer

'B. Disclose the error to the patient and'

In [36]:
def extract_answer(response: str) -> str:
    """
    Extract the answer choice (A, B, C, D, E) from the model response.
    """
    import re

    response = response.strip().upper()
    
    # Look for patterns like "A", "A.", "A)", "(A)", "The answer is A", etc.    
    
    # Pattern 1: Single letter at the start
    if len(response) >= 1 and response[0] in 'ABCDE':
        return response[0]
    
    # Pattern 2: Letter followed by punctuation
    match = re.search(r'([ABCDE])[\.\)\:]', response)
    if match:
        return match.group(1)
    
    # Pattern 3: "The answer is X" pattern
    match = re.search(r'(?:THE\s+)?ANSWER\s+IS\s+([ABCDE])', response)
    if match:
        return match.group(1)
    
    # Pattern 4: "(X)" pattern
    match = re.search(r'\(([ABCDE])\)', response)
    if match:
        return match.group(1)
    
    # Pattern 5: Find any single letter A-E in the response
    letters = re.findall(r'[ABCDE]', response)
    if letters:
        return letters[0]
    
    # If no clear answer found, return the first character if it's valid
    if response and response[0] in 'ABCDE':
        return response[0]
    
    # if can't parce, throw 
    raise ValueError("Could not extract a valid answer choice from the response.")

extract_answer(answer)

'B'