In [None]:
import scoring_functions as fn

#### Operation ID

How well different models generate an operation ID for use in an OpenAPI spec, given an API request.

| **Final Weighted Score** | **Interpretation** | **Expected Issues** |
|------------------------|------------------|------------------|
| **0.90 – 1.00** | 🔥 **Excellent match** | ✅ Generated ID is **semantically identical** to reference with minor or no variations. |
| **0.80 – 0.89** | ✅ **Good match** | ⚠️ Small differences in structure (**e.g., `fetchUserById` vs. `getUserById`**) but still correct. |
| **0.70 – 0.79** | ⚠️ **Moderate match** | ❌ Some **unnecessary modifications**, **extra words**, or **slightly incorrect phrasing** (e.g., `getUserDetailsById` vs. `getUserById`). |
| **0.60 – 0.69** | ❌ **Weak match** | ❌ **Incorrect structure**, **extra words that change meaning**, or **minor hallucinations** (e.g., `getUserByUserId`). |
| **Below 0.60** | ❌❌ **Poor match** | ❌❌ **Major hallucinations, missing key terms, or incorrect operation type** (e.g., `deleteUserById` instead of `getUserById`). |

In [None]:
scenario = "Optimized operation ID generation"
split_response = True
prompt_template = """
    **Prompt:**
    Generate an operation ID for an OpenAPI specification based on the HTTP method and URL provided. Follow these rules:
    - The operation ID should be in camelCase format.
    - Start with a verb that matches the HTTP method (e.g., `get`, `create`, `update`, `delete`).
    - Use descriptive words from the URL path.
    - Replace path parameters (e.g., `{{userId}}`) with relevant nouns in singular form (e.g., `User`).
    - Do not provide explanations or any other text; respond only with the operation ID.

    Example:
    **Request:** `GET https://api.contoso.com/books/{{books-id}}`
    getBook

    Example:
    **Request:** `GET https://api.contoso.com/books/{{books-id}}/authors`
    getBookAuthors

    Example:
    **Request:** `GET https://api.contoso.com/books/{{books-id}}/authors/{{authors-id}}`
    getBookAuthor

    Example:
    **Request:** `POST https://api.contoso.com/books/{{books-id}}/authors`
    addBookAuthor

    Now, generate the operation ID for the following:
    **Request:** `{request}`
    """.strip()
tasks = [
    {
        "name": "getCalendarsForUser",
        "reference_texts": [
            "getCalendarsForUser",
            "listUserCalendars",
            "getUserCalendars",
            "fetchUserCalendars",
            "listCalendarsForUser"
        ],
        "prompt": prompt_template.format(request="GET https://graph.microsoft.com/users/{{users-id}}/calendars"),
        "temperature": 1
    },
    {
        "name": "getUser",
        "reference_texts": [
            "getUser",
            "fetchUser",
            "retrieveUser"
        ],
        "prompt": prompt_template.format(request="GET https://graph.microsoft.com/users/{{users-id}}"),
        "temperature": 1
    },
    {
        "name": "getCalendarForUser",
        "reference_texts": [
            "getCalendarForUser",
            "fetchUserCalendar",
            "getUserCalendar"
        ],
        "prompt": prompt_template.format(request="GET https://graph.microsoft.com/users/{{users-id}}/calendars/{{calendars-id}}"),
        "temperature": 1
    },
    {
        "name": "postCalendarForUser",
        "reference_texts": [
            "postCalendarForUser"
            "createUserCalendar",
            "createCalendarForUser",
            "addUserCalendar",
            "postUserCalendar",
            "addCalendarForUser"
        ],
        "prompt": prompt_template.format(request="POST https://graph.microsoft.com/users/{{users-id}}/calendars"),
        "temperature": 1
    }
]
score_threshold = 0.8
metric_scorers = [
    {
        'name': 'BERT-F',
        'fn': fn.calculate_bert_f,
        'weight': 0.45
    },
    {
        'name': 'Edit distance',
        'fn': fn.calculate_edit_distance,
        'weight': 0.1
    },
    {
        'name': 'ROUGE-1',
        'fn': fn.calculate_rouge1,
        'weight': 0.25
    },
    {
        'name': 'ROUGE-L',
        'fn': fn.calculate_rougeL,
        'weight': 0.2
    }
]

#### Operation description

How well different models generate an operation description for use in an OpenAPI spec, given an API request

| **Final Score** | **Interpretation** |
|--------------|------------------|
| **0.90 – 1.00** | 🔥 **Excellent match** (high accuracy, correct structure, no missing info) |
| **0.80 – 0.89** | ✅ **Good match** (minor variations, but correct meaning) |
| **0.70 – 0.79** | ⚠️ **Moderate match** (some incorrect phrasing or extra info) |
| **Below 0.70** | ❌ **Weak match** (significant deviation or missing context) |

scenario = "Generate OpenAPI operation description"
split_response = False
prompt_template = """
    You're an expert in OpenAPI. You help developers build great OpenAPI specs
    for use with LLMs. For the specified request, generate a one-sentence
    description. Respond with just the description. For example, for a request
    such as `GET https://api.contoso.com/books/{{books-id}}` you return
    `Retrieve information about a specific book`. For a request like
    `GET https://api.contoso.com/books/{{books-id}}/authors` you return
    `Retrieve information about authors for a specific book`. Request:
    {request}
    """.strip()
tasks = [
    {
        "name": "Retrieve a list of calendars for a specific user",
        "reference_texts": [
            "Retrieve a user's calendars",
            "Retrieve calendars for a specific user",
            "Retrieve a list of calendars for a specific user",
            "Fetch the list of calendars associated with a user"
        ],   
        "prompt": prompt_template.format(request="GET https://api.contoso.com/users/{{users-id}}/calendars")
    },
    {
        "name": "Retrieve information about a specific user",
        "reference_texts": [
            "Fetch details for the specified user",
            "Get user details",
            "Retrieve information about a specific user",
            "Retrieve details of a specific user",
            "Get information about a given user"
        ],
        "prompt": prompt_template.format(request="GET https://api.contoso.com/users/{{users-id}}")
    },
    {
        "name": "Retrieve information about a specific calendar for a given user",
        "reference_texts": [
            "Retrieve information about a specific calendar for a given user.",
            "Retrieve details of a specific calendar for a user",
            "Fetch information about a particular calendar owned by a user",
            "Get metadata for a specific calendar linked to a user",
            "Retrieve a user's calendar details",
            "Retrieve calendar details for a specific user",
            "Retrieve information about a specific user's calendar"
        ],
        "prompt": prompt_template.format(request="GET https://api.contoso.com/users/{{users-id}}/calendars/{{calendars-id}}")
    },
    {
        "name": "Create a new calendar for the specified user",
        "reference_texts": [
            "Create a new calendar for the specified user",
            "Add a calendar to the user's account",
            "Create a new calendar for a given user"
        ],
        "prompt": prompt_template.format(request="POST https://api.contoso.com/users/{{users-id}}/calendars")
    }
]
score_threshold = 0.8
metric_scorers = [
    {
        'name': 'BERT-F',
        'fn': fn.calculate_bert_f,
        'weight': 0.45
    },
    {
        'name': 'Edit distance',
        'fn': fn.calculate_edit_distance,
        'weight': 0.1
    },
    {
        'name': 'ROUGE-2',
        'fn': fn.calculate_rouge2,
        'weight': 0.25
    },
    {
        'name': 'ROUGE-L',
        'fn': fn.calculate_rougeL,
        'weight': 0.2
    }
]