In [None]:
import scoring_functions as fn

#### Operation ID

How well different models generate an operation ID for use in an OpenAPI spec, given an API request.

| **Final Weighted Score** | **Interpretation** | **Expected Issues** |
|------------------------|------------------|------------------|
| **0.90 – 1.00** | 🔥 **Excellent match** | ✅ Generated ID is **semantically identical** to reference with minor or no variations. |
| **0.80 – 0.89** | ✅ **Good match** | ⚠️ Small differences in structure (**e.g., `fetchUserById` vs. `getUserById`**) but still correct. |
| **0.70 – 0.79** | ⚠️ **Moderate match** | ❌ Some **unnecessary modifications**, **extra words**, or **slightly incorrect phrasing** (e.g., `getUserDetailsById` vs. `getUserById`). |
| **0.60 – 0.69** | ❌ **Weak match** | ❌ **Incorrect structure**, **extra words that change meaning**, or **minor hallucinations** (e.g., `getUserByUserId`). |
| **Below 0.60** | ❌❌ **Poor match** | ❌❌ **Major hallucinations, missing key terms, or incorrect operation type** (e.g., `deleteUserById` instead of `getUserById`). |

In [None]:
prompt_file = './prompts/DevProxy_v0.29.0/api_operation_id.prompty'
split_response = True
tasks = [
    {
        'name': 'getCalendarsForUser',
        'reference_texts': [
            'getCalendarsForUser',
            'listUserCalendars',
            'getUserCalendars',
            'fetchUserCalendars',
            'listCalendarsForUser'
        ],
        'inputs':{
            'request': 'GET https://graph.microsoft.com/users/{users-id}/calendars'
        }
    },
    {
        'name': 'getUser',
        'reference_texts': [
            'getUser',
            'fetchUser',
            'retrieveUser'
        ],
        'inputs': {
            'request': 'GET https://graph.microsoft.com/users/{users-id}'
        }
    },
    {
        'name': 'getCalendarForUser',
        'reference_texts': [
            'getCalendarForUser',
            'fetchUserCalendar',
            'getUserCalendar'
        ],
        'inputs': {
            'request': 'GET https://graph.microsoft.com/users/{users-id}/calendars/{calendars-id}'
        }
    },
    {
        'name': 'postCalendarForUser',
        'reference_texts': [
            'postCalendarForUser',
            'createUserCalendar',
            'createCalendarForUser',
            'addUserCalendar',
            'postUserCalendar',
            'addCalendarForUser'
        ],
        'inputs': {
            'request': 'POST https://graph.microsoft.com/users/{users-id}/calendars'
        }
    }
]
score_threshold = 0.8
metric_scorers = [
    {
        'name': 'BERT-F',
        'fn': fn.calculate_bert_f,
        'weight': 0.45
    },
    {
        'name': 'Edit distance',
        'fn': fn.calculate_edit_distance,
        'weight': 0.1
    },
    {
        'name': 'ROUGE-1',
        'fn': fn.calculate_rouge1,
        'weight': 0.25
    },
    {
        'name': 'ROUGE-L',
        'fn': fn.calculate_rougeL,
        'weight': 0.2
    }
]