In [1]:
import sys
import os
root_path = os.path.abspath(os.path.join(os.getcwd(), "../.."))
sys.path.append(root_path)
import json
import logging
from pprint import pprint

from typing import List, Tuple, Dict

ROOT_DIR = r"C:\Projects\Research\SWEEP\SWEEP"

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

root_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(root_path)

from src.common.models import ActivityBreakdown, ValueAddingAnalysis
from src.value_adding_analysis import ValueClassificationGPT, compare_value_classifications, print_comparison_results

def get_sectors(train=True) -> List[str]:
    data_path = os.path.join(ROOT_DIR, "data", "train" if train else "test")
    return [sector for sector in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, sector))]

def get_activities(sector: str, train=True) -> List[str]:
    sector_path = os.path.join(ROOT_DIR, "data", "train" if train else "test", sector)
    return [activity for activity in os.listdir(sector_path) if os.path.isdir(os.path.join(sector_path, activity))]

def get_file_paths(sector: str, activity: str, model_name: str, train=True) -> Tuple[str, str, str, str]:
    data_path = os.path.join(ROOT_DIR, "data", "train" if train else "test", sector, activity)
    test_path = os.path.join(ROOT_DIR, "test", "results", sector, activity)
    
    activity_breakdown_path = os.path.join(data_path, f"{activity}_activity_breakdown.json")
    ground_truth_path = os.path.join(data_path, f"{activity}_step_value_analysis.json")
    response_path = os.path.join(test_path, f"{model_name}_response.json")
    
    return test_path, activity_breakdown_path, ground_truth_path, response_path

In [2]:
def classify_value_adding(sector: str, activity: str, model: ValueClassificationGPT, model_name) -> None:
    logging.info(f"Processing sector: {sector}, activity: {activity}")
    
    try:
        # Load activity breakdown and ground truth
        test_path, activity_breakdown_path, ground_truth_path, response_path = get_file_paths(sector, activity, model_name)
        activity_breakdown: ActivityBreakdown = ActivityBreakdown.from_json(activity_breakdown_path)
        ground_truth = ValueAddingAnalysis.from_json(ground_truth_path)
        
        activity_breakdown_dict = activity_breakdown.to_dict()
        
        logging.info(f"Processing with model: {model_name}")
        
        logging.info(f"Activity breakdown: {activity_breakdown_dict}")
        
        # Ensure test directory exists
        os.makedirs(test_path, exist_ok=True)
        
        # Get model response
        response = model.value_classification_step_level(activity_breakdown_dict)
        
        model_value_adding_analysis: ValueAddingAnalysis = ValueAddingAnalysis.from_dict(activity, response)
        try:
            # Process model response
            comparison_metrics = compare_value_classifications(model_value_adding_analysis, ground_truth)
        except:
            comparison_metrics = None
        
        response_dict = {
            "model": {
                "name": model_name,
                "system_prompt": model.system_message,
                "user_prompt": model.user_message,
            },
            "response": model_value_adding_analysis.to_dict(),
            "metrics": comparison_metrics.to_dict()
        }
        
        # Save results
        with open('experimentation', 'w') as f:
            json.dump(response_dict, f, indent=4)
        
        logging.info(f"Successfully processed and saved results for {sector}/{activity} with {model_name}")
    
    except Exception as e:
        logging.error(f"Error processing {sector}/{activity}: {str(e)}, {str(e.__traceback__)}")
        
def save_model_configurations(models: Dict[str, ValueClassificationGPT], llm_version: str):
    """
    Save the configuration of each model as a JSON file.
    
    :param models: Dictionary of model names and their corresponding ValueClassificationComponentsGPT instances
    :param llm_version: Version of the LLM being used (e.g., "gpt-4")
    """
    output_dir = os.path.join("test", "models")
    os.makedirs(output_dir, exist_ok=True)
    
    for model_name, model in models.items():
        config = {
            "model_name": model_name,
            "llm_version": llm_version
        }
        
        file_name = f"{model_name}.json"
        file_path = os.path.join(output_dir, file_name)
        
        with open(file_path, 'w') as f:
            json.dump(config, f, indent=2)
        
        print(f"Saved configuration for {model_name} to {file_path}")

In [14]:
system_message = """# Value Adding Analysis Prompt

You are an expert in business process analysis and optimization. Your task is to perform a Value Adding Analysis on a given activity breakdown. The breakdown will be provided in JSON format, containing activities and their substeps. You need to classify each substep into one of three categories:

1. VA (Value Adding): Activities that directly contribute to meeting customer requirements or business objectives. These are essential steps that transform inputs into outputs.

2. BVA (Business Value Adding): Activities that are necessary for the business but don't directly add value from the customer's perspective. These support the VA activities and are required for legal, financial, or operational reasons.

3. NVA (Non-Value Adding): Activities that consume resources without adding value to the customer or the business. These are often candidates for elimination or reduction.

Use the following function to classify each step:

```
classify(classification, activity, step, justification)
```

Parameters:
- classification: The type of classification for the step ("VA", "BVA", or "NVA")
- activity: The name of the activity this step belongs to
- step: A short, descriptive name for the step
- justification: The reason for its classification

For each substep in the provided activity breakdown, you should:

1. Analyze the description and context within the overall process.
2. Determine the most appropriate classification (VA, BVA, or NVA).
3. Provide a brief justification for your classification.
4. Use the classify function to output your analysis.

Consider the following guidelines when classifying:

- VA activities typically involve direct assessment, evaluation, or transformation of the core product/service.
- BVA activities often include necessary communications, record-keeping, or compliance-related tasks.
- NVA activities might be preparatory steps, redundant processes, or excessive documentation.

Strive for consistency in your classifications across similar types of activities, while considering the specific context of each step.

Example input:
```json
{
    "activity_name": "Process customer order",
    "substeps": [
        {
            "step_number": 1,
            "description": "Receive order from customer"
        },
        {
            "step_number": 2,
            "description": "Verify order details and payment"
        },
        {
            "step_number": 3,
            "description": "Print order summary for filing"
        },
        {
            "step_number": 4,
            "description": "Pack items for shipment"
        },
        {
            "step_number": 5,
            "description": "Update inventory system"
        }
    ]
}
```

Example output:
```
classify("BVA", "Process customer order", "Receive order from customer", "Necessary for initiating the process, but doesn't directly add value to the product.")
classify("VA", "Process customer order", "Verify order details and payment", "Directly contributes to ensuring correct order fulfillment and financial transaction.")
classify("NVA", "Process customer order", "Print order summary for filing", "Consumes resources without adding value; electronic records could suffice.")
classify("VA", "Process customer order", "Pack items for shipment", "Directly prepares the product for delivery to the customer.")
classify("BVA", "Process customer order", "Update inventory system", "Necessary for business operations but doesn't directly add value to the customer.")
```

Please analyze the provided activity breakdown and classify each substep using the classify function."""

user_message = """Your task is to analyze the given JSON-formatted BPMN process output and classify each step as either 
value adding (VA), business value adding (BVA), or non-value adding (NVA). For each step, you should:
1. Determine the appropriate classification based on the step's contribution to the process.
2. Provide a brief justification for your classification.
3. Format your response as a function call for each step, including the classification, activity name, 
    step name, and justification.
"""

In [15]:

value_classification_gpt = ValueClassificationGPT(system_message, user_message)

def main():
    sectors = get_sectors()
    for sector in sectors:
        activities = get_activities(sector)
        for activity in activities:
            if activity != "b2b_sales":
                continue
            classify_value_adding(sector, activity, value_classification_gpt, "TEST_MODEL")

if __name__ == "__main__":
    main()

2024-10-07 22:22:36,172 - INFO - Processing sector: business, activity: b2b_sales
2024-10-07 22:22:36,188 - INFO - Processing with model: TEST_MODEL
2024-10-07 22:22:36,188 - INFO - Activity breakdown: [{'activity_name': 'Send marketing information', 'substeps': [{'step_number': 1, 'description': 'Prepare relevant marketing materials'}, {'step_number': 2, 'description': 'Customize information based on application'}, {'step_number': 3, 'description': 'Send marketing package to applicant'}]}, {'activity_name': 'Receive response', 'substeps': [{'step_number': 1, 'description': 'Monitor incoming communications'}, {'step_number': 2, 'description': 'Identify response from applicant'}, {'step_number': 3, 'description': 'Log response method (email or post)'}]}, {'activity_name': 'Assess application', 'substeps': [{'step_number': 1, 'description': 'Review application details'}, {'step_number': 2, 'description': 'Verify provided information'}, {'step_number': 3, 'description': "Evaluate applican

In [11]:
ab = [{'activity_name': 'Send marketing information', 'substeps': [{'step_number': 1, 'description': 'Prepare relevant marketing materials'}, {'step_number': 2, 'description': 'Customize information based on application'}, {'step_number': 3, 'description': 'Send marketing package to applicant'}]}, {'activity_name': 'Receive response', 'substeps': [{'step_number': 1, 'description': 'Monitor incoming communications'}, {'step_number': 2, 'description': 'Identify response from applicant'}, {'step_number': 3, 'description': 'Log response method (email or post)'}]}, {'activity_name': 'Assess application', 'substeps': [{'step_number': 1, 'description': 'Review application details'}, {'step_number': 2, 'description': 'Verify provided information'}, {'step_number': 3, 'description': "Evaluate applicant's eligibility"}, {'step_number': 4, 'description': 'Determine if clarification is needed'}]}, {'activity_name': 'Send rejection', 'substeps': [{'step_number': 1, 'description': 'Document reasons for rejection'}, {'step_number': 2, 'description': 'Prepare rejection letter'}, {'step_number': 3, 'description': 'Send rejection notification to applicant'}, {'step_number': 4, 'description': "Update application status to 'Rejected'"}]}, {'activity_name': 'Make appointment', 'substeps': [{'step_number': 1, 'description': 'Identify available time slots'}, {'step_number': 2, 'description': 'Contact applicant to schedule appointment'}, {'step_number': 3, 'description': 'Confirm appointment details'}, {'step_number': 4, 'description': 'Send appointment reminder'}]}, {'activity_name': 'Discuss application', 'substeps': [{'step_number': 1, 'description': 'Review application with applicant'}, {'step_number': 2, 'description': 'Clarify any unclear points'}, {'step_number': 3, 'description': 'Gather additional information if needed'}, {'step_number': 4, 'description': 'Update application with new information'}]}, {'activity_name': 'Send offer', 'substeps': [{'step_number': 1, 'description': 'Prepare offer details'}, {'step_number': 2, 'description': 'Draft offer letter'}, {'step_number': 3, 'description': 'Review offer for accuracy'}, {'step_number': 4, 'description': 'Send offer to applicant'}]}, {'activity_name': 'File application', 'substeps': [{'step_number': 1, 'description': 'Organize all application documents'}, {'step_number': 2, 'description': 'Update application status'}, {'step_number': 3, 'description': 'Store application in appropriate system'}, {'step_number': 4, 'description': 'Ensure all data protection requirements are met'}]}]

In [16]:
response = value_classification_gpt.value_classification_step_level(ab)

2024-10-07 22:23:02,399 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-10-07 22:23:06,955 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


In [17]:
response

'classify("VA", "Send marketing information", "Prepare relevant marketing materials", "Directly contributes to creating the marketing package for the applicant.")\nclassify("VA", "Send marketing information", "Customize information based on application", "Directly tailors the marketing materials to the specific applicant.")\nclassify("VA", "Send marketing information", "Send marketing package to applicant", "Directly delivers the marketing materials to the applicant.")\nclassify("BVA", "Receive response", "Monitor incoming communications", "Necessary for identifying and tracking applicant responses.")\nclassify("VA", "Receive response", "Identify response from applicant", "Directly determines if a response has been received from the applicant.")\nclassify("BVA", "Receive response", "Log response method (email or post)", "Necessary for record-keeping and tracking response methods.")\nclassify("VA", "Assess application", "Review application details", "Directly involves examining the appl