# Martian SDK Quickstart Guide

In [1]:
# Imports

from openai.types.chat import (
    chat_completion,
    chat_completion_message,
)

from martian_apart_hack_sdk import judge_specs, martian_client, utils

## Load Credentials
You must have a .env file with the following values set:

1. `MARTIAN_API_URL` - withmartian.com/api
1. `MARTIAN_ORG_ID` - your Martain organization ID
1. `MARTIAN_API_KEY` - your personal API key

In [7]:
# Load the config and make a client.
config = utils.load_config()
client = martian_client.MartianClient(
        api_url=config.api_url,
        api_key=config.api_key,
        org_id=config.org_id,
    )

## Judging

In [8]:
# Create a JudgeSpec

rubric = """
You are tasked with evaluating whether a restaurant recommendation is good.
The scoring is as follows:
- 1: If the recommendation doesn't meet any of the criteria.
- 2: If the recommendation meets only some small part of the criteria.
- 3: If the recommendation is reasonable, but not perfect.
- 4: If the recommendation is almost perfect.
- 5: If the recommendation is perfect.
""".strip()

rubric_judge_spec = judge_specs.RubricJudgeSpec(
    model_type="rubric_judge",
    rubric=rubric,
    # TODO: Clearly communicate which models are available.
    model="openai/openai/gpt-4o",
    min_score=1,
    max_score=5,
)

In [9]:
# Run the judge spec.
# TODO: Go through advanced judge building.

chat_request_text = "What is a good Chinese restaurant in downtown San Francisco?"
chat_response_text = "I couldn't find a good Mexican restaurant near you."

completion_request = {
    "model": "openai/openai/gpt-4o-mini",
    "messages": [{"role": "user", "content": chat_request_text}],
}
chat_completion_response = chat_completion.ChatCompletion(
    id="123",
    choices=[
        chat_completion.Choice(
            finish_reason="stop",
            index=0,
            message=chat_completion_message.ChatCompletionMessage(
                role="assistant",
                content=chat_response_text,
            ),
        )
    ],
    created=0,
    model="gpt-4o",
    object="chat.completion",
    service_tier=None,
)

evaluation_result = client.judges.evaluate_judge_spec(
    rubric_judge_spec.to_dict(),
    completion_request=completion_request,
    completion_response=chat_completion_response,
)

print(f"User: {chat_request_text}")
print(f"Assistant: {chat_response_text}")
print(f"Evaluation result: {evaluation_result}")

TypeError: __init__() got an unexpected keyword argument 'cost'

In [5]:
# Once you are happy with the judge, you can save it.

judge_id = "restaurant-recommendation-reviewer"

judge = client.judges.create_judge(
    judge_id=judge_id,
    judge_spec=rubric_judge_spec,
    description="A judge that rates how good restaurant recommendations are."
)

print(f"Created a judge: {judge}")

ResourceNotFoundError: Judge with id restaurant-recommendation-reviewer already exists

In [10]:
# You can then call the judge:

evaluation_result = client.judges.evaluate_judge(
    judge,
    completion_request=completion_request,
    completion_response=chat_completion_response,
)

print(f"Judge response: {evaluation_result}")

NameError: name 'judge' is not defined

In [13]:
# Once you have created some judges, you may want to list them all:

all_judges = client.judges.list()
print("Judges:")
print(*[f"\t- {j}" for j in all_judges])

# Or get a specific judge:

retrieved_judge = client.judges.get(
    judge_id="restaurant-recommendation-reviewer",
    version=1,
)
print(f"\nRetrieved judge: {retrieved_judge}")

# Or update the judge, creating a new version:


new_rubric = """
You are tasked with evaluating whether a restaurant recommendation is good.
The scoring is as follows:
- 1: If the recommendation doesn't meet any of the criteria.
- 2: If the recommendation meets only some small part of the criteria.
- 3: If the recommendation is reasonable, but not perfect.
- 4: If the recommendation is almost perfect.
- 5: If the recommendation is perfect.
""".strip()

new_rubric_judge_spec = judge_specs.RubricJudgeSpec(
    model_type="rubric_judge",
    rubric=rubric,
    # TODO: Clearly communicate which models are available.
    model="openai/openai/gpt-4o",
    min_score=1,
    max_score=5,
)

new_judge_spec = client.judges.update_judge(
    judge_id="restaurant-recommendation-reviewer",
    judge_spec=new_rubric_judge_spec,
)

print(f"\nNew judge spec: {new_judge_spec}")

Judges:
	- Judge(id='restaurant-recommendation-reviewer', version=2, description='A judge that rates how good restaurant recommendations are.', createTime='2025-05-22T21:41:55.338160Z', name='organizations/67aa617d46ded06041eff84d/judges/restaurant-recommendation-reviewer', judgeSpec=None) 	- Judge(id='my_cool_judge_id2', version=1, description='This is a new judge description.', createTime='2025-05-22T05:53:32.842495Z', name='organizations/67aa617d46ded06041eff84d/judges/my_cool_judge_id2', judgeSpec=None) 	- Judge(id='my_cool_judge_id', version=1, description='This is a new judge description.', createTime='2025-05-22T05:52:35.287463Z', name='organizations/67aa617d46ded06041eff84d/judges/my_cool_judge_id', judgeSpec=None)

Retrieved judge: Judge(id='restaurant-recommendation-reviewer', version=1, description='A judge that rates how good restaurant recommendations are.', createTime='2025-05-22T21:32:08.367255Z', name='organizations/67aa617d46ded06041eff84d/judges/restaurant-recommendat

## Routing