In [1]:
import os
import sys
import json
root_path = os.path.abspath(os.path.join(os.getcwd(), "../.."))
sys.path.append(root_path)
import logging
from typing import List, Tuple, Dict

ROOT_DIR = r"C:\Projects\Research\SWEEP\SWEEP"

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def get_sectors() -> List[str]:
    data_path = os.path.join(ROOT_DIR, "data")
    return [sector for sector in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, sector))]

def get_activities(sector: str) -> List[str]:
    sector_path = os.path.join(ROOT_DIR, "data", sector)
    return [activity for activity in os.listdir(sector_path) if os.path.isdir(os.path.join(sector_path, activity))]

def get_file_paths(sector: str, activity: str, model_name: str) -> Tuple[str, str, str, str]:
    dir_path = os.path.join(ROOT_DIR, "data", sector, activity)
    test_path = os.path.join(ROOT_DIR, "test", "results", "activity_breakdown", sector, activity)
    bpmn_model_path = os.path.join(ROOT_DIR, dir_path, f"{activity}_model.txt")
    activity_breakdown_gt = os.path.join(ROOT_DIR, dir_path, f"{activity}_activity_breakdown.json")
    response_path = os.path.join(ROOT_DIR, test_path, f"{model_name}_response.json")
    return test_path, bpmn_model_path, activity_breakdown_gt, response_path

In [2]:
from src.common.prompts.activity_breakdown.prompt_components import ABPromptComponents
from src.activity_breakdown import ActivityBreakdownComponentsGPT, ActivityBreakdownEvaluation
from src.common.models import ActivityBreakdown, BPMNModel
from src.common.util import open_file

activity_breakdown_evaluation: ActivityBreakdownEvaluation = ActivityBreakdownEvaluation()

def breakdown_activity(sector: str, activity: str, activity_breakdown_gpt: ActivityBreakdownComponentsGPT, model_name: str) -> None:
    logging.info(f"Processing sector: {sector}, activity: {activity}")
    
    try:
        # Load activity breakdown and ground truth
        test_path, bpmn_model_path, activity_breakdown_gt, response_path = get_file_paths(sector, activity, model_name)
        
        os.makedirs(test_path, exist_ok=True)
        
        bpmn_model = BPMNModel.from_text(bpmn_model_path)
        ground_truth: ActivityBreakdown = ActivityBreakdown.from_json(activity_breakdown_gt)
        
        activity_breakdown_response = activity_breakdown_gpt.bpmn_activity_breakdown(bpmn_model.model)
        model_activity_breakdown: ActivityBreakdown = ActivityBreakdown.from_dict(activity, activity_breakdown_response)
        
        # Compare results
        comparison_data = activity_breakdown_evaluation.compare_activity_breakdowns(model_activity_breakdown.to_dict(), ground_truth.to_dict())
        comparison_metrics = activity_breakdown_evaluation.evaluate_comparisons(comparison_data, plot=False)
        
        response_dict = {
            "model": {
                "name": model_name,
                "components": activity_breakdown_gpt.prompt_components["_raw_input"]
            },
            "response": model_activity_breakdown.to_dict(),
            "metrics": comparison_metrics
        }
        
        # Save results
        with open(response_path, 'w') as f:
            json.dump(response_dict, f, indent=4)
        
        logging.info(f"Successfully processed and saved results for {sector}/{activity} with {model_name}")
    
    except Exception as e:
        logging.error(f"Error processing {sector}/{activity}: {str(e)}")
        

In [3]:
model_name = "GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic"

model_settings = {
        'role_description': "sme_detailed",
        'task_description': "breakdown_substeps",
        'guidelines': "standard_guidelines",
        'output_format': "standard_output",
        # 'example_output': "",
        # 'focus_shift': "",
}

model_components = ABPromptComponents.from_dict(model_settings)


activity_breakdown_gpt = ActivityBreakdownComponentsGPT(model_components)

In [4]:
def main():
    logging.info(f"Processing with model: {model_name}")
    
    sectors = get_sectors()
    for sector in sectors:
        if "sme" in model_settings.get("role_description",""):
            components = ABPromptComponents.from_dict(model_settings)
            components.role_description = components.role_description[sector]
            activity_breakdown_gpt = ActivityBreakdownComponentsGPT(components)
        activities = get_activities(sector)
        for activity in activities:
            
            breakdown_activity(sector, activity, activity_breakdown_gpt, model_name)

if __name__ == "__main__":
    main()

2024-09-26 14:22:20,072 - INFO - Processing with model: GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:22:20,312 - INFO - Processing sector: banking, activity: credit_application
2024-09-26 14:22:26,874 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:22:30,444 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:22:34,051 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:22:36,786 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:22:39,911 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:22:42,891 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:22:46,031 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:22:48,008 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:22:51,372 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:22:51,375 - INFO - Successfully processed and saved results for banking/credit_application with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:22:51,376 - INFO - Processing sector: banking, activity: credit_check


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 40.83%
Average Confidence Score: 45.83%
Average Sequence Alignment Score: 67.71%
Ground Truth Steps Coverage: 78.57%
Response Steps Coverage: 57.14%

Match Type Distribution:
- Functional Equivalence: 4 matches (33.33%), Average Confidence: 82.50%
- No Match: 5 matches (41.67%), Average Confidence: 0.00%
- Granularity Difference: 3 matches (25.00%), Average Confidence: 73.33%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Request Appointment
Ground Truth Steps:
  - [1] Initiate appointment request
Response Steps:
  - [1] Contact loan officer or financial institution
Confidence: 80%
Explanation: The ground truth step 'Initiate appointment request' is functionally equivalent to the response step 'Contact loan officer or financial institution'. Although the wording is different, both steps involve initiating contact with the relevant party 

2024-09-26 14:22:53,428 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:22:55,284 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:22:58,284 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:23:00,275 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:23:02,928 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:23:06,075 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:23:08,784 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:23:08,789 - INFO - Successfully processed and saved results for banking/credit_check with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:23:08,790 - INFO - Processing sector: banking, activity: credit_score


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 60.50%
Average Confidence Score: 71.67%
Average Sequence Alignment Score: 31.94%
Ground Truth Steps Coverage: 85.71%
Response Steps Coverage: 56.00%

Match Type Distribution:
- Granularity Difference: 3 matches (50.00%), Average Confidence: 80.00%
- Functional Equivalence: 1 matches (16.67%), Average Confidence: 90.00%
- No Match: 1 matches (16.67%), Average Confidence: 0.00%
- Identical Match: 1 matches (16.67%), Average Confidence: 100.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Granularity Difference
Activity: Check completeness
Ground Truth Steps:
  - [1] Verify all required information is provided
Response Steps:
  - [1] Review all submitted application documents
  - [2] Verify all required fields are filled
  - [3] Ensure all necessary supporting documents are attached
Confidence: 80%
Explanation: Ground truth step 1 corresponds to response steps 1, 2, and 

2024-09-26 14:23:16,103 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:23:18,674 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:23:21,051 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:23:24,187 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:23:27,099 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:23:29,622 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:23:32,235 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:23:35,294 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:23:38,435 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:23:41,634 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:23:41,638 - INFO - Successfully processed and saved results for banking/credit_score with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:23:41,639 - INFO - Processing sector: banking, activity: loan_application


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 27.22%
Average Confidence Score: 27.22%
Average Sequence Alignment Score: 79.63%
Ground Truth Steps Coverage: 69.23%
Response Steps Coverage: 38.89%

Match Type Distribution:
- Functional Equivalence: 2 matches (22.22%), Average Confidence: 82.50%
- No Match: 6 matches (66.67%), Average Confidence: 0.00%
- Granularity Difference: 1 matches (11.11%), Average Confidence: 80.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Start
Ground Truth Steps:
  - [1] Receive start signal
Response Steps:
  - [1] Receive incoming scoring request
Confidence: 85%
Explanation: The ground truth step 'Receive start signal' and the response step 'Receive incoming scoring request' describe the same action of receiving a request, but with different wording.

Activity: Scoring request received
Ground Truth Steps:
  - [1] Receive scoring request
Response Steps:

2024-09-26 14:23:48,927 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:23:53,018 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:23:55,079 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:23:57,382 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:24:00,827 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:24:04,679 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:24:08,215 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:24:10,933 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:24:13,999 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:24:17,456 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:24:20,370 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:24:22,596 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:24:24,882 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:24:27,239 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:24:30,292 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 15...


2024-09-26 14:24:32,501 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 16...


2024-09-26 14:24:35,370 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 17...


2024-09-26 14:24:39,658 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 18...


2024-09-26 14:24:42,143 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:24:42,148 - INFO - Successfully processed and saved results for banking/loan_application with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:24:42,149 - INFO - Processing sector: banking, activity: loan_risk_assessment


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 18.89%
Average Confidence Score: 22.22%
Average Sequence Alignment Score: 69.91%
Ground Truth Steps Coverage: 58.06%
Response Steps Coverage: 43.21%

Match Type Distribution:
- Functional Equivalence: 3 matches (16.67%), Average Confidence: 80.00%
- Granularity Difference: 2 matches (11.11%), Average Confidence: 80.00%
- No Match: 13 matches (72.22%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Submit loan application
Ground Truth Steps:
  - [1] Applicant submits loan application
Response Steps:
  - [1] Fill out loan application form
Confidence: 80%
Explanation: The ground truth step 'Applicant submits loan application' is functionally equivalent to the response step 'Fill out loan application form'. Although the wording is different, both steps describe the action of completing the loan application.

Activit

2024-09-26 14:24:47,944 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:24:50,660 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:24:53,034 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:24:57,040 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:25:01,792 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:25:06,085 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:25:09,341 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:25:13,082 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:25:18,894 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:25:22,038 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:25:22,044 - INFO - Successfully processed and saved results for banking/loan_risk_assessment with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:25:22,046 - INFO - Processing sector: banking, activity: mortgage_process


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 25.04%
Average Confidence Score: 35.83%
Average Sequence Alignment Score: 58.33%
Ground Truth Steps Coverage: 66.67%
Response Steps Coverage: 44.90%

Match Type Distribution:
- Functional Equivalence: 3 matches (25.00%), Average Confidence: 85.00%
- No Match: 7 matches (58.33%), Average Confidence: 0.00%
- Identical Match: 1 matches (8.33%), Average Confidence: 100.00%
- Granularity Difference: 1 matches (8.33%), Average Confidence: 75.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Receive loan application
Ground Truth Steps:
  - [1] Receive loan application
Response Steps:
  - [1] Receive application from customer
Confidence: 95%
Explanation: The response step 'Receive application from customer' is functionally equivalent to the ground truth step 'Receive loan application'. Although the wording is slightly different, the core functi

2024-09-26 14:25:25,471 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:25:28,031 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:25:30,763 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:25:34,918 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:25:37,677 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:25:37,680 - INFO - Successfully processed and saved results for banking/mortgage_process with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic


Quantitative Metrics:
Total Activities Evaluated: 2
Average Overall Similarity Score: 20.62%
Average Confidence Score: 33.00%
Average Sequence Alignment Score: 100.00%
Ground Truth Steps Coverage: 71.43%
Response Steps Coverage: 27.78%

Match Type Distribution:
- No Match: 3 matches (60.00%), Average Confidence: 0.00%
- Functional Equivalence: 2 matches (40.00%), Average Confidence: 82.50%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: No Match
Activity: Start
Ground Truth Steps:
  - [1] Initiate the process
Response Steps:
  - [1] Review applicant's financial information
Confidence: 0%
Explanation: There is no corresponding step in the response activity.

Activity: CT1: Check financial coverage
Ground Truth Steps:
  - [1] Verify customer's financial information
Response Steps:
  - [1] Review property details
Confidence: 0%
Explanation: There is no corresponding step in the response activity.

Activity: CT2: Check property
Ground Truth Steps:
  - [1] Ass

2024-09-26 14:25:37,928 - INFO - Processing sector: business, activity: advanced_b2b_sales
2024-09-26 14:25:47,548 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:25:50,716 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:25:53,530 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:25:56,840 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:26:00,793 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:26:06,028 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:26:09,931 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:26:14,852 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:26:16,999 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:26:20,630 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:26:26,229 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:26:30,154 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:26:34,294 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:26:37,324 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:26:40,299 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 15...


2024-09-26 14:26:44,033 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 16...


2024-09-26 14:26:47,219 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:26:47,230 - INFO - Successfully processed and saved results for business/advanced_b2b_sales with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:26:47,233 - INFO - Processing sector: business, activity: b2b_sales


Quantitative Metrics:
Total Activities Evaluated: 2
Average Overall Similarity Score: 13.33%
Average Confidence Score: 16.00%
Average Sequence Alignment Score: 51.25%
Ground Truth Steps Coverage: 58.82%
Response Steps Coverage: 48.48%

Match Type Distribution:
- No Match: 16 matches (80.00%), Average Confidence: 0.00%
- Functional Equivalence: 4 matches (20.00%), Average Confidence: 80.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: No Match
Activity: Start
Ground Truth Steps:
  - [1] Receive application
Response Steps:
  - [1] Prepare relevant marketing materials
  - [2] Customize information based on application
  - [3] Send marketing package to applicant
Confidence: 0%
Explanation: There is no corresponding step in the response activity for the 'Receive application' step in the ground truth activity.

Activity: Send marketing information
Ground Truth Steps:
  - [1] Send marketing information to applicant
Response Steps:
  - [1] Receive email respon

2024-09-26 14:26:53,558 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:26:56,290 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:26:59,575 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:27:03,241 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:27:06,984 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:27:10,164 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:27:12,204 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:27:15,002 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:27:18,550 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:27:18,555 - INFO - Successfully processed and saved results for business/b2b_sales with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:27:18,556 - INFO - Processing sector: business, activity: cost_planning


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 38.75%
Average Confidence Score: 35.38%
Average Sequence Alignment Score: 73.96%
Ground Truth Steps Coverage: 87.50%
Response Steps Coverage: 52.94%

Match Type Distribution:
- No Match: 7 matches (53.85%), Average Confidence: 0.00%
- Granularity Difference: 2 matches (15.38%), Average Confidence: 75.00%
- Functional Equivalence: 4 matches (30.77%), Average Confidence: 77.50%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: No Match
Activity: Start
Ground Truth Steps:
  - [1] Receive application
Response Steps:
  - [1] Prepare relevant marketing materials
Confidence: 0%
Explanation: There is no corresponding step in the response activity.

Activity: Start
Ground Truth Steps:
  - [2] Record application details
Response Steps:
  - [2] Customize information based on application
Confidence: 0%
Explanation: There is no corresponding step in the response activity.

Activity: Se

2024-09-26 14:27:20,785 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:27:24,137 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:27:27,798 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:27:30,281 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:27:34,566 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:27:34,573 - INFO - Successfully processed and saved results for business/cost_planning with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:27:34,575 - INFO - Processing sector: business, activity: declaration_approval


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 61.25%
Average Confidence Score: 66.00%
Average Sequence Alignment Score: 44.58%
Ground Truth Steps Coverage: 83.33%
Response Steps Coverage: 61.90%

Match Type Distribution:
- Granularity Difference: 2 matches (40.00%), Average Confidence: 85.00%
- Functional Equivalence: 2 matches (40.00%), Average Confidence: 80.00%
- No Match: 1 matches (20.00%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Granularity Difference
Activity: Start
Ground Truth Steps:
  - [1] Conduct cost planning
Response Steps:
  - [1] Gather relevant financial data
  - [2] Estimate project costs
  - [3] Create cost breakdown structure
  - [4] Develop initial cost plan
Confidence: 80%
Explanation: The ground truth step 'Conduct cost planning' corresponds to multiple response steps: 'Gather relevant financial data', 'Estimate project costs', 'Create cost breakdown structure

2024-09-26 14:27:40,750 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:27:42,898 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:27:47,026 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:27:50,604 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:27:52,779 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:27:54,954 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:27:57,790 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:28:01,481 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:28:04,660 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:28:06,961 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:28:10,212 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:28:13,448 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:28:15,871 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:28:18,210 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:28:21,031 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:28:21,035 - INFO - Successfully processed and saved results for business/declaration_approval with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:28:21,036 - INFO - Processing sector: business, activity: hiring_process


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 39.11%
Average Confidence Score: 42.39%
Average Sequence Alignment Score: 59.52%
Ground Truth Steps Coverage: 80.77%
Response Steps Coverage: 60.38%

Match Type Distribution:
- Granularity Difference: 2 matches (8.70%), Average Confidence: 80.00%
- Functional Equivalence: 5 matches (21.74%), Average Confidence: 83.00%
- Identical Match: 4 matches (17.39%), Average Confidence: 100.00%
- No Match: 12 matches (52.17%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Granularity Difference
Activity: Declaration SUBMITTED by EMPLOYEE
Ground Truth Steps:
  - [1] Employee submits declaration
Response Steps:
  - [1] Employee fills out declaration form
  - [2] Employee attaches necessary documents
  - [3] Employee submits declaration
Confidence: 80%
Explanation: Ground truth step 1 corresponds to response steps 1, 2, and 3. The response provides a more d

2024-09-26 14:28:31,599 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:28:35,236 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:28:38,874 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:28:42,755 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:28:45,382 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:28:49,003 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:28:52,193 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:28:57,306 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:29:01,256 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:29:03,958 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:29:07,083 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:29:10,353 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:29:15,385 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:29:20,941 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:29:20,945 - INFO - Successfully processed and saved results for business/hiring_process with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:29:20,947 - INFO - Processing sector: business, activity: manufacturing


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 48.08%
Average Confidence Score: 46.18%
Average Sequence Alignment Score: 91.03%
Ground Truth Steps Coverage: 84.44%
Response Steps Coverage: 80.43%

Match Type Distribution:
- Functional Equivalence: 19 matches (50.00%), Average Confidence: 82.37%
- No Match: 17 matches (44.74%), Average Confidence: 0.00%
- Granularity Difference: 1 matches (2.63%), Average Confidence: 90.00%
- Identical Match: 1 matches (2.63%), Average Confidence: 100.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Hand In Job Application
Ground Truth Steps:
  - [1] Complete job application form
Response Steps:
  - [1] Candidate prepares application materials
Confidence: 80%
Explanation: The core function of completing the job application form is equivalent to the candidate preparing application materials.

Activity: Initial Screening
Ground Truth Steps:
  - [1] Re

2024-09-26 14:29:28,460 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:29:31,151 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:29:34,100 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:29:36,003 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:29:38,594 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:29:40,959 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:29:43,627 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:29:46,697 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:29:48,842 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:29:51,248 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:29:54,883 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:29:57,209 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:30:00,331 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:30:03,538 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:30:06,638 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:30:06,642 - INFO - Successfully processed and saved results for business/manufacturing with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:30:06,644 - INFO - Processing sector: business, activity: order_to_cash


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 49.29%
Average Confidence Score: 43.12%
Average Sequence Alignment Score: 51.79%
Ground Truth Steps Coverage: 73.68%
Response Steps Coverage: 58.06%

Match Type Distribution:
- Granularity Difference: 6 matches (37.50%), Average Confidence: 83.33%
- No Match: 8 matches (50.00%), Average Confidence: 0.00%
- Functional Equivalence: 2 matches (12.50%), Average Confidence: 95.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Granularity Difference
Activity: Check stock availability
Ground Truth Steps:
  - [1] Check if product is in stock
Response Steps:
  - [1] Access inventory management system
  - [2] Search product in database
  - [3] Verify current stock levels
  - [4] Update stock status
Confidence: 80%
Explanation: Ground truth step 1 corresponds to response steps 1, 2, 3, and 4. The response steps provide a more detailed breakdown of the process.

Activity: Retrieve

2024-09-26 14:30:10,747 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:30:12,629 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:30:16,641 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:30:19,038 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:30:22,166 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:30:24,823 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:30:29,099 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:30:32,072 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:30:34,577 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:30:37,866 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:30:42,474 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:30:45,247 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:30:48,442 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:30:50,929 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:30:54,640 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 15...


2024-09-26 14:30:56,861 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:30:56,865 - INFO - Successfully processed and saved results for business/order_to_cash with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic


Quantitative Metrics:
Total Activities Evaluated: 2
Average Overall Similarity Score: 10.50%
Average Confidence Score: 13.82%
Average Sequence Alignment Score: 16.67%
Ground Truth Steps Coverage: 64.29%
Response Steps Coverage: 54.84%

Match Type Distribution:
- Granularity Difference: 3 matches (17.65%), Average Confidence: 78.33%
- No Match: 14 matches (82.35%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Granularity Difference
Activity: Purchase order received
Ground Truth Steps:
  - [1] Check stock availability
Response Steps:
  - [1] Access inventory management system
  - [2] Search for ordered product
  - [3] Verify current stock levels
Confidence: 80%
Explanation: The ground truth step 'Check stock availability' corresponds to the response steps 'Access inventory management system', 'Search for ordered product', and 'Verify current stock levels'. The response steps provide a more detailed breakdown of the process.

Act

2024-09-26 14:30:57,114 - INFO - Processing sector: customer_service, activity: complaint_handling
2024-09-26 14:31:02,984 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:31:07,426 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:31:11,236 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:31:14,703 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:31:18,796 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:31:21,560 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:31:23,928 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:31:26,937 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:31:30,960 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:31:35,738 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:31:35,742 - INFO - Successfully processed and saved results for customer_service/complaint_handling with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:31:35,743 - INFO - Processing sector: customer_service, activity: complaint_handling_v2


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 32.00%
Average Confidence Score: 35.00%
Average Sequence Alignment Score: 85.19%
Ground Truth Steps Coverage: 90.48%
Response Steps Coverage: 71.43%

Match Type Distribution:
- Functional Equivalence: 6 matches (31.58%), Average Confidence: 82.50%
- Granularity Difference: 2 matches (10.53%), Average Confidence: 85.00%
- No Match: 11 matches (57.89%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Incoming call
Ground Truth Steps:
  - [1] Answer the call
Response Steps:
  - [1] Answer incoming call
Confidence: 95%
Explanation: The steps describe the same action of answering an incoming call, but use slightly different wording.

Activity: Incoming call
Ground Truth Steps:
  - [2] Gather customer information
Response Steps:
  - [2] Record caller's information
Confidence: 90%
Explanation: The steps have the same co

2024-09-26 14:31:40,561 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:31:44,143 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:31:48,172 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:31:52,350 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:31:56,818 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:32:00,708 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:32:04,934 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:32:12,538 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:32:12,542 - INFO - Successfully processed and saved results for customer_service/complaint_handling_v2 with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:32:12,543 - INFO - Processing sector: customer_service, activity: purchase_order


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 79.19%
Average Confidence Score: 78.64%
Average Sequence Alignment Score: 71.90%
Ground Truth Steps Coverage: 100.00%
Response Steps Coverage: 83.33%

Match Type Distribution:
- Functional Equivalence: 15 matches (68.18%), Average Confidence: 83.00%
- Granularity Difference: 5 matches (22.73%), Average Confidence: 77.00%
- Identical Match: 1 matches (4.55%), Average Confidence: 100.00%
- No Match: 1 matches (4.55%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Send automatic reply to customer
Ground Truth Steps:
  - [1] Receive complaint from customer
Response Steps:
  - [1] Receive complaint notification
Confidence: 95%
Explanation: The ground truth step 'Receive complaint from customer' is functionally equivalent to the response step 'Receive complaint notification'. The core function of receiving a complain

2024-09-26 14:32:17,757 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:32:20,468 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:32:23,105 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:32:27,481 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:32:30,567 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:32:33,015 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:32:36,330 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:32:38,833 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:32:42,429 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:32:44,778 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:32:44,782 - INFO - Successfully processed and saved results for customer_service/purchase_order with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:32:44,783 - INFO - Processing sector: customer_service, activity: purchase_request


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 66.11%
Average Confidence Score: 54.29%
Average Sequence Alignment Score: 58.33%
Ground Truth Steps Coverage: 92.31%
Response Steps Coverage: 67.65%

Match Type Distribution:
- Functional Equivalence: 5 matches (35.71%), Average Confidence: 86.00%
- Granularity Difference: 4 matches (28.57%), Average Confidence: 82.50%
- No Match: 5 matches (35.71%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Check stock availability
Ground Truth Steps:
  - [1] Check if product is in stock
Response Steps:
  - [1] Query inventory database
Confidence: 90%
Explanation: The ground truth step 'Check if product is in stock' is functionally equivalent to the response step 'Query inventory database'. Both steps involve checking the availability of the product.

Activity: Retrieve product from warehouse
Ground Truth Steps:
  - [1] Lo

2024-09-26 14:33:00,782 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:33:04,651 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:33:07,769 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:33:11,246 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:33:14,311 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:33:17,223 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:33:20,967 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:33:24,376 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:33:27,094 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:33:30,517 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:33:34,041 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:33:37,041 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:33:42,753 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:33:46,171 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:33:50,527 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 15...


2024-09-26 14:33:54,825 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 16...


2024-09-26 14:33:58,483 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 17...


2024-09-26 14:34:02,443 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 18...


2024-09-26 14:34:06,157 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 19...


2024-09-26 14:34:08,447 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 20...


2024-09-26 14:34:13,641 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 21...


2024-09-26 14:34:17,290 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 22...


2024-09-26 14:34:20,076 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 23...


2024-09-26 14:34:23,744 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 24...


2024-09-26 14:34:26,426 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 25...


2024-09-26 14:34:28,804 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 26...


2024-09-26 14:34:31,507 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 27...


2024-09-26 14:34:35,270 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:34:35,274 - INFO - Successfully processed and saved results for customer_service/purchase_request with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:34:35,275 - INFO - Processing sector: customer_service, activity: rental_request


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 20.72%
Average Confidence Score: 21.27%
Average Sequence Alignment Score: 70.37%
Ground Truth Steps Coverage: 82.09%
Response Steps Coverage: 61.17%

Match Type Distribution:
- Functional Equivalence: 7 matches (12.73%), Average Confidence: 88.57%
- No Match: 41 matches (74.55%), Average Confidence: 0.00%
- Identical Match: 1 matches (1.82%), Average Confidence: 100.00%
- Granularity Difference: 6 matches (10.91%), Average Confidence: 75.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Fill purchase request form
Ground Truth Steps:
  - [1] Open purchase request form
Response Steps:
  - [1] Access purchase request form
Confidence: 95%
Explanation: The response step 'Access purchase request form' is functionally equivalent to the ground truth step 'Open purchase request form'. Although the wording is different, both steps describe the ac

2024-09-26 14:34:39,642 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:34:43,077 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:34:46,363 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:34:49,618 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:34:53,195 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:34:57,204 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:34:57,208 - INFO - Successfully processed and saved results for customer_service/rental_request with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:34:57,209 - INFO - Processing sector: customer_service, activity: special_order


Quantitative Metrics:
Total Activities Evaluated: 2
Average Overall Similarity Score: 45.13%
Average Confidence Score: 46.79%
Average Sequence Alignment Score: 66.67%
Ground Truth Steps Coverage: 87.50%
Response Steps Coverage: 91.67%

Match Type Distribution:
- Functional Equivalence: 8 matches (57.14%), Average Confidence: 81.88%
- No Match: 6 matches (42.86%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Submit equipment rental request
Ground Truth Steps:
  - [1] Fill out rental request form
Response Steps:
  - [1] Fill request
Confidence: 80%
Explanation: The core function of filling out the rental request form is equivalent to filling the request.

Activity: Submit equipment rental request
Ground Truth Steps:
  - [3] Submit request
Response Steps:
  - [2] Send request to clerk
Confidence: 70%
Explanation: The core function of submitting the request is equivalent to sending the request to t

2024-09-26 14:35:01,879 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:35:05,790 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:35:08,651 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:35:11,202 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:35:12,938 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:35:15,715 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:35:18,940 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:35:23,053 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:35:26,153 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:35:28,463 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:35:30,639 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:35:33,697 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:35:36,893 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:35:36,897 - INFO - Successfully processed and saved results for customer_service/special_order with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:35:36,899 - INFO - Processing sector: customer_service, activity: training_course


Quantitative Metrics:
Total Activities Evaluated: 1
Average Overall Similarity Score: 0.00%
Average Confidence Score: 0.00%
Average Sequence Alignment Score: 0.00%
Ground Truth Steps Coverage: 0.00%
Response Steps Coverage: 50.00%

Match Type Distribution:
- No Match: 12 matches (100.00%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: No Match
Activity: Special order received
Ground Truth Steps:
Response Steps:
  - [1] Receive special order request
  - [2] Create new order entry in system
  - [3] Assign unique order number
  - [4] Record initial order details
Confidence: 0%
Explanation: There are no corresponding steps in the ground truth for the response steps.

Activity: Register order
Ground Truth Steps:
Response Steps:
  - [1] Review order specifications
  - [2] Verify customer information
  - [3] Confirm product customisation requirements
Confidence: 0%
Explanation: There are no corresponding steps in the ground truth for 

2024-09-26 14:35:44,254 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:35:47,090 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:35:51,319 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:35:54,453 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:35:57,975 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:36:01,243 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:36:05,904 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:36:11,761 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:36:15,024 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:36:18,791 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:36:26,037 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:36:32,007 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:36:34,873 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:36:34,877 - INFO - Successfully processed and saved results for customer_service/training_course with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 39.45%
Average Confidence Score: 33.61%
Average Sequence Alignment Score: 60.42%
Ground Truth Steps Coverage: 78.05%
Response Steps Coverage: 61.82%

Match Type Distribution:
- Granularity Difference: 6 matches (16.67%), Average Confidence: 83.33%
- No Match: 22 matches (61.11%), Average Confidence: 0.00%
- Functional Equivalence: 7 matches (19.44%), Average Confidence: 90.00%
- Process Alternative: 1 matches (2.78%), Average Confidence: 80.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Granularity Difference
Activity: Review application and budget forms
Ground Truth Steps:
  - [1] Review application forms
Response Steps:
  - [1] Receive application and budget forms
  - [2] Review application details
Confidence: 80%
Explanation: Ground truth step 1 corresponds to response steps 1 and 2. The response steps are more detailed and include receiving the forms.

Activity:

2024-09-26 14:36:35,134 - INFO - Processing sector: government, activity: fine_management
2024-09-26 14:36:43,932 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:36:46,616 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:36:49,979 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:36:53,087 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:36:55,086 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:36:58,124 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:37:02,039 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:37:05,475 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:37:08,543 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:37:13,133 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:37:16,112 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:37:21,073 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:37:24,592 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:37:24,596 - INFO - Successfully processed and saved results for government/fine_management with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:37:24,597 - INFO - Processing sector: government, activity: rental_process


Quantitative Metrics:
Total Activities Evaluated: 2
Average Overall Similarity Score: 23.33%
Average Confidence Score: 24.62%
Average Sequence Alignment Score: 55.56%
Ground Truth Steps Coverage: 61.90%
Response Steps Coverage: 48.33%

Match Type Distribution:
- Functional Equivalence: 4 matches (30.77%), Average Confidence: 80.00%
- No Match: 9 matches (69.23%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Create Fine
Ground Truth Steps:
  - [1] Initiate process
Response Steps:
  - [1] Record details of the offense
Confidence: 80%
Explanation: The ground truth step 'Initiate process' is functionally equivalent to the response step 'Record details of the offense'. Although the wording is different, both steps involve starting the process by gathering information about the offense.

Activity: Payment
Ground Truth Steps:
  - [1] Process payment
Response Steps:
  - [1] Update system with fine noti

2024-09-26 14:37:30,422 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:37:33,594 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:37:35,360 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:37:39,361 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:37:42,483 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:37:46,159 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:37:48,674 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:37:50,604 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:37:53,636 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:37:56,606 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:37:58,763 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:38:01,312 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:38:04,914 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:38:08,495 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:38:11,763 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 15...


2024-09-26 14:38:14,114 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:38:14,118 - INFO - Successfully processed and saved results for government/rental_process with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 45.80%
Average Confidence Score: 40.83%
Average Sequence Alignment Score: 63.89%
Ground Truth Steps Coverage: 87.50%
Response Steps Coverage: 66.67%

Match Type Distribution:
- Functional Equivalence: 6 matches (25.00%), Average Confidence: 83.33%
- No Match: 12 matches (50.00%), Average Confidence: 0.00%
- Granularity Difference: 6 matches (25.00%), Average Confidence: 80.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Apply for Viewing Appointment
Ground Truth Steps:
  - [1] Submit application form
Response Steps:
  - [1] Prospective tenant submits application for viewing
Confidence: 90%
Explanation: The core function of both steps is to submit an application form, although the wording differs slightly.

Activity: Apply for Viewing Appointment
Ground Truth Steps:
  - [3] Wait for application acceptance
Response Steps:
  - [3] Initia

2024-09-26 14:38:14,356 - INFO - Processing sector: healthcare, activity: automated_prescription_fulfillment
2024-09-26 14:38:17,657 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:38:23,369 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:38:29,198 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:38:35,253 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:38:38,638 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:38:43,688 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:38:47,889 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:38:50,584 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:38:50,588 - INFO - Successfully processed and saved results for healthcare/automated_prescription_fulfillment with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:38:50,589 - INFO - Processing sector: healthcare, activity: catheter_insertion


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 23.36%
Average Confidence Score: 25.28%
Average Sequence Alignment Score: 79.52%
Ground Truth Steps Coverage: 100.00%
Response Steps Coverage: 55.56%

Match Type Distribution:
- Functional Equivalence: 4 matches (22.22%), Average Confidence: 78.75%
- No Match: 12 matches (66.67%), Average Confidence: 0.00%
- Granularity Difference: 2 matches (11.11%), Average Confidence: 70.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Perform thorough check
Ground Truth Steps:
  - [1] Check prescription details
Response Steps:
  - [1] Receive prescription from customer
Confidence: 80%
Explanation: The ground truth step 'Check prescription details' is functionally equivalent to the response step 'Receive prescription from customer'. Although the wording is different, both steps involve obtaining the prescription information.

Activity: Enter prescri

2024-09-26 14:39:10,343 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:39:13,819 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:39:20,370 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:39:23,319 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:39:26,496 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:39:30,977 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:39:33,404 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:39:35,378 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:39:38,417 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:39:41,470 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:39:45,224 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:39:47,952 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:39:51,699 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:39:54,299 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:39:57,727 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 15...


2024-09-26 14:40:01,281 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 16...


2024-09-26 14:40:04,451 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 17...


2024-09-26 14:40:07,858 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 18...


2024-09-26 14:40:10,795 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 19...


2024-09-26 14:40:12,737 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 20...


2024-09-26 14:40:15,761 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 21...


2024-09-26 14:40:18,001 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 22...


2024-09-26 14:40:20,907 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 23...


2024-09-26 14:40:23,647 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 24...


2024-09-26 14:40:25,421 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 25...


2024-09-26 14:40:28,923 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 26...


2024-09-26 14:40:32,490 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:40:32,494 - INFO - Successfully processed and saved results for healthcare/catheter_insertion with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:40:32,495 - INFO - Processing sector: healthcare, activity: medical_delivery


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 48.30%
Average Confidence Score: 48.02%
Average Sequence Alignment Score: 66.15%
Ground Truth Steps Coverage: 85.45%
Response Steps Coverage: 67.44%

Match Type Distribution:
- Functional Equivalence: 24 matches (45.28%), Average Confidence: 83.96%
- Granularity Difference: 6 matches (11.32%), Average Confidence: 71.67%
- Identical Match: 1 matches (1.89%), Average Confidence: 100.00%
- No Match: 22 matches (41.51%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Prepare implements
Ground Truth Steps:
  - [1] Gather necessary implements
Response Steps:
  - [1] Gather necessary medical equipment
Confidence: 80%
Explanation: The steps describe the same action of gathering necessary items, but use different terminology ('implements' vs 'medical equipment').

Activity: Prepare implements
Ground Truth Steps:
  - [2] 

2024-09-26 14:40:35,431 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:40:38,430 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:40:40,333 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:40:41,834 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:40:44,179 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:40:46,124 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:40:47,406 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:40:48,656 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:40:48,662 - INFO - Successfully processed and saved results for healthcare/medical_delivery with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:40:48,664 - INFO - Processing sector: healthcare, activity: medical_supplies


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 72.14%
Average Confidence Score: 64.44%
Average Sequence Alignment Score: 88.10%
Ground Truth Steps Coverage: 90.00%
Response Steps Coverage: 81.82%

Match Type Distribution:
- No Match: 2 matches (22.22%), Average Confidence: 0.00%
- Granularity Difference: 1 matches (11.11%), Average Confidence: 70.00%
- Functional Equivalence: 6 matches (66.67%), Average Confidence: 85.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: No Match
Activity: Check destination
Ground Truth Steps:
  - [1] Verify destination address
Response Steps:
  - [1] Unload package from pallet
Confidence: 0%
Explanation: The ground truth step 'Verify destination address' does not have a corresponding step in the response.

Activity: Write details on package
Ground Truth Steps:
  - [2] Write destination address on package
Response Steps:
Confidence: 0%
Explanation: There is no corresponding step in the

2024-09-26 14:40:52,133 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:40:58,312 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:41:06,559 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:41:09,964 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:41:14,406 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:41:14,410 - INFO - Successfully processed and saved results for healthcare/medical_supplies with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:41:14,410 - INFO - Processing sector: healthcare, activity: patient_intake


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 38.17%
Average Confidence Score: 35.00%
Average Sequence Alignment Score: 58.33%
Ground Truth Steps Coverage: 76.92%
Response Steps Coverage: 53.33%

Match Type Distribution:
- Functional Equivalence: 3 matches (27.27%), Average Confidence: 80.00%
- No Match: 6 matches (54.55%), Average Confidence: 0.00%
- Granularity Difference: 2 matches (18.18%), Average Confidence: 72.50%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Package receipt at dock
Ground Truth Steps:
  - [1] Receive package at dock
Response Steps:
  - [1] Unload package
Confidence: 80%
Explanation: The ground truth step 'Receive package at dock' is functionally equivalent to the response step 'Unload package'. Although the wording is different, both steps involve the action of taking the package off the delivery vehicle.

Activity: Package processing in the rack room
Groun

2024-09-26 14:41:22,011 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:41:24,571 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:41:26,671 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:41:29,753 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:41:33,035 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:41:35,138 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:41:37,572 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:41:39,549 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:41:41,806 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:41:45,863 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:41:47,644 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:41:51,639 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:41:53,690 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:41:55,879 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:41:58,811 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 15...


2024-09-26 14:42:00,680 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 16...


2024-09-26 14:42:02,665 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 17...


2024-09-26 14:42:09,729 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 18...


2024-09-26 14:42:11,968 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:42:11,971 - INFO - Successfully processed and saved results for healthcare/patient_intake with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:42:11,972 - INFO - Processing sector: healthcare, activity: prescription_fulfillment


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 74.86%
Average Confidence Score: 61.67%
Average Sequence Alignment Score: 31.67%
Ground Truth Steps Coverage: 100.00%
Response Steps Coverage: 90.41%

Match Type Distribution:
- Functional Equivalence: 5 matches (18.52%), Average Confidence: 87.00%
- No Match: 7 matches (25.93%), Average Confidence: 0.00%
- Granularity Difference: 15 matches (55.56%), Average Confidence: 82.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Answer notice
Ground Truth Steps:
  - [1] Receive notice by phone
Response Steps:
  - [1] Receive phone call
Confidence: 95%
Explanation: The ground truth step 'Receive notice by phone' is functionally equivalent to the response step 'Receive phone call'.

Activity: Store and print notice
Ground Truth Steps:
  - [1] Store notice information
Response Steps:
  - [1] Save notice in digital system
Confidence: 90%
Explanat

2024-09-26 14:42:27,203 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:42:29,753 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:42:32,082 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:42:34,634 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:42:36,730 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:42:38,345 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:42:40,174 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:42:42,299 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:42:45,116 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:42:47,567 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:42:50,510 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 11...


2024-09-26 14:42:53,292 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 12...


2024-09-26 14:42:55,830 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 13...


2024-09-26 14:42:59,104 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 14...


2024-09-26 14:43:02,143 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 15...


2024-09-26 14:43:06,491 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 16...


2024-09-26 14:43:10,046 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 17...


2024-09-26 14:43:13,226 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 18...


2024-09-26 14:43:16,760 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 19...


2024-09-26 14:43:19,660 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 20...


2024-09-26 14:43:22,540 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:43:22,544 - INFO - Successfully processed and saved results for healthcare/prescription_fulfillment with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 16.00%
Average Confidence Score: 16.00%
Average Sequence Alignment Score: 73.75%
Ground Truth Steps Coverage: 58.82%
Response Steps Coverage: 41.46%

Match Type Distribution:
- No Match: 16 matches (80.00%), Average Confidence: 0.00%
- Functional Equivalence: 3 matches (15.00%), Average Confidence: 80.00%
- Granularity Difference: 1 matches (5.00%), Average Confidence: 80.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: No Match
Activity: Drop off prescription
Ground Truth Steps:
  - [1] Customer arrives at the pharmacy
Response Steps:
  - [1] Choose drop-off location (drive-through or front counter)
  - [2] Hand over prescription to staff
Confidence: 0%
Explanation: The ground truth step 'Customer arrives at the pharmacy' does not have a corresponding step in the response.

Activity: Drop-off location
Ground Truth Steps:
  - [1] Customer selects drive-through or fron

2024-09-26 14:43:22,784 - INFO - Processing sector: insurance, activity: damage_compensation
2024-09-26 14:43:27,570 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:43:29,449 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:43:32,093 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:43:35,019 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:43:37,753 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:43:40,650 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:43:43,687 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:43:46,852 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:43:50,011 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:43:50,015 - INFO - Successfully processed and saved results for insurance/damage_compensation with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:43:50,016 - INFO - Processing sector: insurance, activity: handle_claim


Quantitative Metrics:
Total Activities Evaluated: 2
Average Overall Similarity Score: 16.25%
Average Confidence Score: 18.89%
Average Sequence Alignment Score: 42.71%
Ground Truth Steps Coverage: 60.00%
Response Steps Coverage: 52.38%

Match Type Distribution:
- Functional Equivalence: 2 matches (22.22%), Average Confidence: 85.00%
- No Match: 7 matches (77.78%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Start
Ground Truth Steps:
  - [1] Request for compensation
Response Steps:
  - [1] Receive request for compensation
Confidence: 90%
Explanation: The ground truth step 'Request for compensation' and the response step 'Receive request for compensation' describe the same action using different words.

Activity: CheckCompliance
Ground Truth Steps:
  - [1] Check compliance
Response Steps:
  - [1] Record compliance status
Confidence: 80%
Explanation: The response step 'Record compliance status' is

2024-09-26 14:43:54,462 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:43:57,637 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:44:00,779 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:44:05,384 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:44:08,802 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:44:13,849 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:44:17,717 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:44:21,223 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:44:25,716 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:44:29,697 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 10...


2024-09-26 14:44:32,086 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:44:32,092 - INFO - Successfully processed and saved results for insurance/handle_claim with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:44:32,094 - INFO - Processing sector: insurance, activity: handle_claim_v2


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 81.80%
Average Confidence Score: 75.80%
Average Sequence Alignment Score: 51.67%
Ground Truth Steps Coverage: 100.00%
Response Steps Coverage: 94.12%

Match Type Distribution:
- Functional Equivalence: 13 matches (52.00%), Average Confidence: 86.54%
- Granularity Difference: 7 matches (28.00%), Average Confidence: 81.43%
- No Match: 3 matches (12.00%), Average Confidence: 0.00%
- Identical Match: 2 matches (8.00%), Average Confidence: 100.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Functional Equivalence
Activity: Create claim
Ground Truth Steps:
  - [1] Receive claim details
Response Steps:
  - [1] Initialize new claim in system
Confidence: 80%
Explanation: Both steps involve starting the process of creating a claim, although the wording is different.

Activity: Create claim
Ground Truth Steps:
  - [3] Assign claim number
Response Steps:
  - [2] Assign unique cl

2024-09-26 14:44:37,124 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:44:39,802 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:44:42,631 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:44:45,507 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:44:48,949 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:44:48,954 - INFO - Successfully processed and saved results for insurance/handle_claim_v2 with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:44:48,955 - INFO - Processing sector: insurance, activity: motor_claim


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 25.00%
Average Confidence Score: 45.00%
Average Sequence Alignment Score: 79.17%
Ground Truth Steps Coverage: 80.00%
Response Steps Coverage: 37.50%

Match Type Distribution:
- Identical Match: 1 matches (25.00%), Average Confidence: 100.00%
- Granularity Difference: 1 matches (25.00%), Average Confidence: 80.00%
- No Match: 2 matches (50.00%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Identical Match
Activity: Start
Ground Truth Steps:
  - [1] Receive new claim
Response Steps:
  - [1] Receive new claim
Confidence: 100%
Explanation: The ground truth step 'Receive new claim' is identical to the response step 'Receive new claim'.



Match Type: Granularity Difference
Activity: Determine likelihood of the claim
Ground Truth Steps:
  - [1] Assess claim liability
Response Steps:
  - [1] Gather additional information if needed
  - [2] Review cla

2024-09-26 14:44:55,300 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:44:57,835 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:45:02,060 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:45:05,443 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:45:08,551 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:45:12,412 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:45:15,630 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:45:18,492 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:45:24,935 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:45:24,939 - INFO - Successfully processed and saved results for insurance/motor_claim with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic
2024-09-26 14:45:24,940 - INFO - Processing sector: insurance, activity: quote_submission


Quantitative Metrics:
Total Activities Evaluated: 3
Average Overall Similarity Score: 31.41%
Average Confidence Score: 48.46%
Average Sequence Alignment Score: 73.96%
Ground Truth Steps Coverage: 76.47%
Response Steps Coverage: 54.05%

Match Type Distribution:
- Identical Match: 1 matches (7.69%), Average Confidence: 100.00%
- Functional Equivalence: 7 matches (53.85%), Average Confidence: 75.71%
- No Match: 5 matches (38.46%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Identical Match
Activity: Start
Ground Truth Steps:
  - [1] Receive claim documentation from customer
Response Steps:
  - [1] Receive claim documentation from customer
Confidence: 100%
Explanation: The ground truth step and response step are identical in wording and meaning.



Match Type: Functional Equivalence
Activity: Claim Received
Ground Truth Steps:
  - [1] Check completeness of claim documentation
Response Steps:
  - [1] Review all submitted claim doc

2024-09-26 14:45:30,854 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 1...


2024-09-26 14:45:33,007 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 2...


2024-09-26 14:45:36,123 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 3...


2024-09-26 14:45:44,445 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 4...


2024-09-26 14:45:48,415 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 5...


2024-09-26 14:45:50,844 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 6...


2024-09-26 14:45:52,794 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 7...


2024-09-26 14:45:56,827 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 8...


2024-09-26 14:46:00,136 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Comparing activity 9...


2024-09-26 14:46:02,878 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-09-26 14:46:02,882 - INFO - Successfully processed and saved results for insurance/quote_submission with GPT-3.5-Activity-Breakdown-Subject_Matter_Expert_Detailed_Basic


Quantitative Metrics:
Total Activities Evaluated: 4
Average Overall Similarity Score: 59.07%
Average Confidence Score: 55.71%
Average Sequence Alignment Score: 69.44%
Ground Truth Steps Coverage: 100.00%
Response Steps Coverage: 62.16%

Match Type Distribution:
- Granularity Difference: 3 matches (21.43%), Average Confidence: 76.67%
- Functional Equivalence: 6 matches (42.86%), Average Confidence: 80.00%
- Process Alternative: 1 matches (7.14%), Average Confidence: 70.00%
- No Match: 4 matches (28.57%), Average Confidence: 0.00%


Qualitative Analysis:

Examples of Matched Steps by Match Type:

Match Type: Granularity Difference
Activity: Request insurance quote
Ground Truth Steps:
  - [1] Client requests insurance quote
Response Steps:
  - [1] Identify insurance needs
  - [2] Gather necessary personal and property information
  - [3] Contact insurer or fill out online quote request form
  - [4] Submit quote request
Confidence: 80%
Explanation: The ground truth step 'Client requests in