In [1]:
import sys
import os
root_path = os.path.abspath(os.path.join(os.getcwd(), "../.."))
sys.path.append(root_path)
import json
import logging
from pprint import pprint

from typing import List, Tuple, Dict
from collections import defaultdict
import math
from dataclasses import asdict
from src.common.prompts.value_adding_analysis.gpt_components import VAPromptComponents


ROOT_DIR = r"C:\Projects\Research\SWEEP\SWEEP"

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

root_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(root_path)

from src.common.models import ActivityBreakdown, ValueAddingAnalysis, ProcessModel
from src.value_adding_analysis import ValueClassificationComponentsGPT, compare_value_classifications, print_comparison_results

def get_sectors(train=True) -> List[str]:
    data_path = os.path.join(ROOT_DIR, "data", "train" if train else "test")
    return [sector for sector in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, sector))]

def get_activities(sector: str, train=True) -> List[str]:
    sector_path = os.path.join(ROOT_DIR, "data", "train" if train else "test", sector)
    return [activity for activity in os.listdir(sector_path) if os.path.isdir(os.path.join(sector_path, activity))]

def get_file_paths(sector: str, activity: str, model_name: str, train=True) -> Tuple[str, str, str, str]:
    data_path = os.path.join(ROOT_DIR, "data", "train" if train else "test", sector, activity)
    test_path = os.path.join(ROOT_DIR, "test", "results", sector, activity)
    
    activity_breakdown_path = os.path.join(data_path, f"{activity}_activity_breakdown.json")
    ground_truth_path = os.path.join(data_path, f"{activity}_step_value_analysis.json")
    response_path = os.path.join(test_path, f"{model_name}_response.json")
    
    return test_path, activity_breakdown_path, ground_truth_path, response_path

In [2]:
def classify_value_adding(sector: str, activity: str, model: ValueClassificationComponentsGPT, model_name) -> None:
    logging.info(f"Processing sector: {sector}, activity: {activity}")
    
    try:
        # Load activity breakdown and ground truth
        test_path, activity_breakdown_path, ground_truth_path, response_path = get_file_paths(sector, activity, model_name)
        activity_breakdown: ActivityBreakdown = ActivityBreakdown.from_json(activity_breakdown_path)
        ground_truth = ValueAddingAnalysis.from_json(ground_truth_path)
        
        activity_breakdown_dict = activity_breakdown.to_dict()
        
        logging.info(f"Processing with model: {model_name}")
        
        # Ensure test directory exists
        os.makedirs(test_path, exist_ok=True)
        
        # Get model response
        response = model.value_classification_step_level(activity_breakdown_dict)
        model_value_adding_analysis: ValueAddingAnalysis = ValueAddingAnalysis.from_dict(activity, activity_breakdown_dict)
        model_value_adding_analysis.classify_substeps(response)
        try:
            # Process model response
            comparison_metrics = compare_value_classifications(model_value_adding_analysis, ground_truth)
        except:
            comparison_metrics = None
        
        response_dict = {
            "model": {
                "name": model_name,
                "components": model.prompt_components["_raw_input"]
            },
            "response": response,
            "metrics": comparison_metrics.to_dict()
        }
        
        # Save results
        with open(f"experiment\{activity}_experiment.json", 'w') as f:
            json.dump(response_dict, f, indent=4)
        
        logging.info(f"Successfully processed and saved results for {sector}/{activity} with {model_name}")
    
    except Exception as e:
        logging.error(f"Error processing {sector}/{activity}: {str(e)}")
        
def save_model_configurations(models: Dict[str, ValueClassificationComponentsGPT], llm_version: str):
    """
    Save the configuration of each model as a JSON file.
    
    :param models: Dictionary of model names and their corresponding ValueClassificationComponentsGPT instances
    :param llm_version: Version of the LLM being used (e.g., "gpt-4")
    """
    output_dir = os.path.join("test", "models")
    os.makedirs(output_dir, exist_ok=True)
    
    for model_name, model in models.items():
        config = {
            "model_name": model_name,
            "prompt_components": model.prompt_components,
            "llm_version": llm_version
        }
        
        file_name = f"{model_name}.json"
        file_path = os.path.join(output_dir, file_name)
        
        with open(file_path, 'w') as f:
            json.dump(config, f, indent=2)
        
        print(f"Saved configuration for {model_name} to {file_path}")

In [3]:
models = [
        {
            "name": "GPT-3.5-Value-Classification-Experiment_Lean",
            "components": VAPromptComponents.from_dict({
                "role_description": "lean_expert",
                "task_description": "efficiency_focused",
                "classification_types": "custom",
                "function_definition": "detailed",
                "parsing_instructions": "holistic",
                "output_format": "structured",
                "example_output": "custom_process",
                "additional_guidelines": "lean_principles"
            })
        },
        {
            "name": "GPT-3.5-Value-Classification-Process_Engineer_Technical",
            "components": VAPromptComponents.from_dict({
                "role_description": "process_engineer",
                "task_description": "standard",
                "classification_types": "detailed",
                "function_definition": "basic",
                "parsing_instructions": "sequential",
                "output_format": "basic",
                "example_output": "complex_process"
            })
        },
    ]

model = models[0]

value_classification_gpt = ValueClassificationComponentsGPT(model["components"])

In [4]:
def main():
    sectors = get_sectors()
    for sector in sectors:
        if sector != "business":
            continue
        activities = get_activities(sector)
        for activity in activities:
            if activity != "b2b_sales":
                continue
            classify_value_adding(sector, activity, value_classification_gpt, model["name"])

if __name__ == "__main__":
    main()

2024-10-08 15:35:54,817 - INFO - Processing sector: business, activity: b2b_sales
2024-10-08 15:35:54,830 - INFO - Processing with model: GPT-3.5-Value-Classification-Experiment_Lean
2024-10-08 15:35:58,948 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-10-08 15:36:03,927 - INFO - HTTP Request: POST https://datascience-openai-local.openai.azure.com//openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2024-10-08 15:36:03,930 - INFO - Successfully processed and saved results for business/b2b_sales with GPT-3.5-Value-Classification-Experiment_Lean
