In [None]:
%pip install -r requirements.txt

In [2]:
from dotenv import load_dotenv

load_dotenv(override=True)

True

In [13]:
import os
from openai import OpenAI

client = OpenAI(
    base_url=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
)
print("> Created OpenAI client.")

> Created OpenAI client.


In [26]:
PROMPT = """
Score how close the reference answer is to the model answer. You will be comparing these two as JSON objects that
contain 2 keys, "extracted_text" and "clause_type". Score 1.0 if they are both the same, 0.5 if one is the same,
and 0.0 if neither are the same.

Return just a floating point score in the form of JSON with a score field containing the resulting score
and a justification field containing an explanation for the score.
""".strip()

USER_PROMPT = """
Reference answer: {"extracted_text": {{item.extracted_text}}, "clause_type": {{item.clause_type}}}

Model answer: {{sample.output_json}}
""".strip()

GRADER = {
    "name": "Score Model Grader",
    "type": "score_model",
    "model": "o3-mini",
    "input": [
        {"role": "developer", "content": PROMPT},
        {"role": "user", "content": USER_PROMPT},
    ],
    "range": [0.0, 1.0],
    "pass_threshold": 0.5,
}

SCHEMA = {
    "type": "json_schema",
    "json_schema": {
        "name": "contract_clause",
        "strict": True,
        "schema": {
            "type": "object",
            "properties": {
                "clause_type": {
                    "type": "string",
                    "description": "The type of contract clause",
                    "enum": ["Exclusivity", "Non-Compete"],
                },
                "extracted_text": {
                    "type": "string",
                    "description": "The extracted text of the contract clause",
                },
            },
            "required": ["clause_type", "extracted_text"],
            "additionalProperties": False,
        },
    },
}

In [7]:
with open("./clause_matching_training.jsonl", mode="rb") as f:
    training_file = client.files.create(file=f, purpose="fine-tune")
    training_file = client.files.wait_for_processing(training_file.id)

with open("./clause_matching_validation.jsonl", mode="rb") as f:
    validation_file = client.files.create(file=f, purpose="fine-tune")
    validation_file = client.files.wait_for_processing(validation_file.id)

print(f"> created files {training_file.id} and {validation_file.id}")

> created files file-29d025cf762d4836be540750822487e2 and file-541a4f941ad54c1f88c33c252ccbc5de


In [27]:
job = client.fine_tuning.jobs.create(
    suffix="dv-clause-test",
    model="gpt-5-2025-08-07",
    training_file=training_file.id,
    validation_file=validation_file.id,
    method={
        "type": "reinforcement",
        "reinforcement": {
            "grader": GRADER,
            "response_format": SCHEMA,
        },
    },
)

print(f"> Created RFT job {job.id}")
print(job.to_json())

> Created RFT job ftjob-7a91a676e1fd4b459c367ef55c80d9d1
{
  "id": "ftjob-7a91a676e1fd4b459c367ef55c80d9d1",
  "created_at": 1762909806,
  "model": "gpt-5-2025-08-07",
  "object": "fine_tuning.job",
  "seed": 538142950,
  "status": "pending",
  "training_file": "file-29d025cf762d4836be540750822487e2",
  "validation_file": "file-541a4f941ad54c1f88c33c252ccbc5de",
  "estimated_finish": 1763039106,
  "method": {
    "type": "reinforcement",
    "reinforcement": {
      "grader": {
        "input": [
          {
            "content": "Score how close the reference answer is to the model answer. You will be comparing these two as JSON objects that\ncontain 2 keys, \"extracted_text\" and \"clause_type\". Score 1.0 if they are both the same, 0.5 if one is the same,\nand 0.0 if neither are the same.\n\nReturn just a floating point score in the form of JSON with a score field containing the resulting score\nand a justification field containing an explanation for the score.",
            "role"