In [None]:
# Standard library imports
import itertools
import json
import logging
import sys
import time
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
import os

from concurrent.futures import ProcessPoolExecutor
import multiprocessing
import json
import os
from pathlib import Path
import pandas as pd
import hashlib

# Third-party imports
import pulp
import pandas as pd
import numpy as np
from tqdm import tqdm  # Make sure this is installed: pip install tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('scheduler.log'),
        logging.StreamHandler(sys.stdout)
    ]
)

@dataclass
class ScenarioParams:
    """Parameters defining a scenario"""
    resource_levels: Dict[str, int]
    sample_counts: Dict[str, int]

@dataclass
class ScenarioResult:
    """Results from running a scenario"""
    params: ScenarioParams
    makespan: Optional[int]
    total_cost: float
    samples_per_year: int
    cost_per_sample: float
    resource_utilization: Dict[str, float]
    error: Optional[str] = None  # Track any errors that occurred

class SchedulerError(Exception):
    """Custom exception for scheduler-specific errors"""
    pass

def generate_scenario_hash(scenario):
    """Generate a unique hash for a scenario configuration."""
    scenario_str = json.dumps(scenario, sort_keys=True)
    return hashlib.md5(scenario_str.encode()).hexdigest()

def load_checkpoint(checkpoint_dir):
    """Load completed scenarios from checkpoint directory."""
    checkpoint_dir = Path(checkpoint_dir)
    completed_scenarios = {}
    
    if checkpoint_dir.exists():
        # Load all checkpoint files
        for checkpoint_file in checkpoint_dir.glob("checkpoint_*.json"):
            with open(checkpoint_file, 'r') as f:
                batch_results = json.load(f)
                for result in batch_results:
                    scenario_hash = generate_scenario_hash(result['scenario'])
                    completed_scenarios[scenario_hash] = result
    
    return completed_scenarios

def save_checkpoint(results_batch, checkpoint_dir, batch_num):
    """Save a batch of results to a checkpoint file."""
    checkpoint_dir = Path(checkpoint_dir)
    checkpoint_dir.mkdir(exist_ok=True)
    
    checkpoint_file = checkpoint_dir / f"checkpoint_{batch_num:04d}.json"
    with open(checkpoint_file, 'w') as f:
        json.dump(results_batch, f, indent=2)

def visualize_results(results_df):
    """Create enhanced visualizations for scheduling results."""
    plt.style.use('seaborn-v0_8')
    
    # Create figure with subplots
    fig = plt.figure(figsize=(20, 15))
    
    # 1. Resource Utilization Plot
    ax1 = plt.subplot(321)
    utilization_cols = [col for col in results_df.columns if col.startswith('utilization_')]
    utilization_data = results_df[utilization_cols].mean()
    utilization_data = utilization_data.sort_values(ascending=True)
    
    sns.barplot(x=utilization_data.values * 100, 
                y=[col.replace('utilization_', '').replace('_', ' ') for col in utilization_data.index],
                ax=ax1)
    ax1.set_title('Average Resource Utilization (%)')
    ax1.set_xlabel('Utilization %')
    
    # 2. Capacity vs Throughput Analysis
    ax2 = plt.subplot(322)
    total_resources = results_df[[col for col in results_df.columns if col.startswith('resource_')]].sum(axis=1)
    sns.scatterplot(data=results_df, 
                    x='samples_per_year', 
                    y='total_cost',
                    hue=total_resources,
                    size=total_resources,
                    alpha=0.6,
                    ax=ax2)
    ax2.set_title('Capacity vs Throughput/Cost Tradeoff')
    ax2.set_xlabel('Annual Throughput (samples/year)')
    ax2.set_ylabel('Total Cost ($)')
    
    # 3. Resource Allocation Impact
    ax3 = plt.subplot(323)
    resource_cols = [col for col in results_df.columns if col.startswith('resource_')]
    for col in resource_cols:
        sns.kdeplot(data=results_df[results_df[col] > 0], 
                   x='samples_per_year',
                   hue=col,
                   ax=ax3)
    ax3.set_title('Impact of Resource Levels on Throughput')
    ax3.set_xlabel('Annual Throughput (samples/year)')
    
    # 4. Cost Efficiency Analysis
    ax4 = plt.subplot(324)
    sns.scatterplot(data=results_df,
                    x='samples_per_year',
                    y='cost_per_sample',
                    hue=total_resources,
                    size=total_resources,
                    alpha=0.6,
                    ax=ax4)
    ax4.set_title('Cost Efficiency Analysis')
    ax4.set_xlabel('Annual Throughput (samples/year)')
    ax4.set_ylabel('Cost per Sample ($)')
    
    # 5. Sample Mix Analysis
    ax5 = plt.subplot(325)
    sample_cols = [col for col in results_df.columns if col.startswith('samples_')]
    sample_data = results_df[sample_cols].mean()
    sns.barplot(x=[col.replace('samples_', '').replace('_', ' ') for col in sample_cols],
                y=sample_data.values,
                ax=ax5)
    ax5.set_title('Average Sample Mix')
    ax5.set_ylabel('Number of Samples')
    
    # 6. Makespan Distribution
    ax6 = plt.subplot(326)
    sns.histplot(data=results_df, x='makespan', bins=30, ax=ax6)
    ax6.set_title('Distribution of Makespan')
    ax6.set_xlabel('Makespan (days)')
    
    plt.tight_layout()
    return fig

def create_gantt_chart(tasks, start_times, resource_capacities, makespan):
    """
    Create a Gantt chart visualization of the schedule.
    
    Args:
        tasks: List of task dictionaries
        start_times: Dictionary of task start times
        resource_capacities: Dictionary of resource capacities
        makespan: Total schedule duration
    """
    plt.style.use('seaborn-v0_8')
    
    # Prepare data
    task_data = []
    for task in tasks:
        task_id = task['id']
        if task_id in start_times:
            task_data.append({
                'Task': task_id,
                'Resource': task['resource'],
                'Start': start_times[task_id],
                'Duration': task['duration'],
                'End': start_times[task_id] + task['duration']
            })
    
    df = pd.DataFrame(task_data)
    
    # Sort by resource and start time
    df = df.sort_values(['Resource', 'Start'])
    
    # Create figure
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), height_ratios=[4, 1])
    
    # Plot tasks
    resources = df['Resource'].unique()
    colors = plt.cm.tab20(np.linspace(0, 1, len(resources)))
    resource_colors = dict(zip(resources, colors))
    
    y_labels = []
    y_ticks = []
    current_y = 0
    
    for i, (resource, group) in enumerate(df.groupby('Resource')):
        tasks_in_resource = len(group)
        y_positions = np.arange(current_y, current_y + tasks_in_resource)
        
        for idx, task in group.iterrows():
            ax1.barh(y_positions[len(y_labels)], 
                    task['Duration'],
                    left=task['Start'],
                    color=resource_colors[resource],
                    alpha=0.8)
            
            # Add task label
            ax1.text(task['Start'], y_positions[len(y_labels)], 
                    f" {task['Task']}", 
                    va='center', fontsize=8)
            
            y_labels.append(f"{task['Task']}")
            y_ticks.append(y_positions[len(y_labels)-1])
        
        current_y += tasks_in_resource + 1

    # Customize first subplot
    ax1.set_ylim(-1, current_y)
    ax1.set_xlim(-1, makespan + 1)
    ax1.set_yticks(y_ticks)
    ax1.set_yticklabels(y_labels)
    ax1.grid(True, axis='x', alpha=0.3)
    ax1.set_title('Task Schedule by Resource', pad=20)
    ax1.set_xlabel('Time (days)')
    
    # Add resource utilization subplot
    time_points = np.arange(makespan + 1)
    for resource, color in resource_colors.items():
        utilization = []
        for t in time_points:
            active_tasks = len([
                task for task in task_data 
                if task['Resource'] == resource 
                and task['Start'] <= t < task['End']
            ])
            utilization.append(active_tasks / resource_capacities[resource])
        
        ax2.plot(time_points, utilization, 
                label=resource, color=color, alpha=0.8)
    
    ax2.set_xlim(-1, makespan + 1)
    ax2.set_ylim(0, 1.5)  # Allow some overflow to show over-utilization
    ax2.set_xlabel('Time (days)')
    ax2.set_ylabel('Resource Utilization')
    ax2.grid(True, alpha=0.3)
    ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    plt.tight_layout()
    return fig

def create_pareto_frontier(results_df):
    """
    Create Pareto frontier analysis for throughput vs cost tradeoffs.
    
    Args:
        results_df: DataFrame containing scenario results
    """
    plt.style.use('seaborn-v0_8')
    
    # Create figure
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
    
    # 1. Throughput vs Cost Frontier
    points = results_df[['samples_per_year', 'total_cost']].values
    
    # Identify Pareto frontier points
    is_pareto = np.ones(len(points), dtype=bool)
    for i, point in enumerate(points):
        if is_pareto[i]:
            # Keep points with either lower cost or higher throughput
            is_pareto[is_pareto] = np.any(points[is_pareto] < point, axis=1) | np.all(points[is_pareto] == point, axis=1)
            is_pareto[i] = True  # Keep current point
    
    pareto_points = points[is_pareto]
    sorted_pareto = pareto_points[np.argsort(pareto_points[:, 0])]
    
    # Plot all points
    scatter = ax1.scatter(results_df['samples_per_year'], 
                         results_df['total_cost'],
                         c=results_df['resource_count'], 
                         cmap='viridis',
                         alpha=0.6)
    
    # Plot Pareto frontier
    ax1.plot(sorted_pareto[:, 0], sorted_pareto[:, 1], 
             'r--', linewidth=2, label='Pareto Frontier')
    
    ax1.set_xlabel('Annual Throughput (samples/year)')
    ax1.set_ylabel('Total Cost ($)')
    ax1.set_title('Cost-Throughput Pareto Frontier')
    ax1.legend()
    plt.colorbar(scatter, ax=ax1, label='Total Resources')
    
    # 2. Resource Efficiency Frontier
    results_df['cost_per_sample'] = results_df['total_cost'] / results_df['samples_per_year']
    points_eff = results_df[['samples_per_year', 'cost_per_sample']].values
    
    # Identify efficiency frontier
    is_efficient = np.ones(len(points_eff), dtype=bool)
    for i, point in enumerate(points_eff):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(points_eff[is_efficient] < point, axis=1) | np.all(points_eff[is_efficient] == point, axis=1)
            is_efficient[i] = True
    
    efficient_points = points_eff[is_efficient]
    sorted_efficient = efficient_points[np.argsort(efficient_points[:, 0])]
    
    # Plot efficiency points
    scatter2 = ax2.scatter(results_df['samples_per_year'],
                          results_df['cost_per_sample'],
                          c=results_df['resource_count'],
                          cmap='viridis',
                          alpha=0.6)
    
    # Plot efficiency frontier
    ax2.plot(sorted_efficient[:, 0], sorted_efficient[:, 1],
             'r--', linewidth=2, label='Efficiency Frontier')
    
    ax2.set_xlabel('Annual Throughput (samples/year)')
    ax2.set_ylabel('Cost per Sample ($)')
    ax2.set_title('Cost Efficiency Frontier')
    ax2.legend()
    plt.colorbar(scatter2, ax=ax2, label='Total Resources')
    
    plt.tight_layout()
    return fig

def analyze_tradeoffs(results_df):
    """Analyze key performance tradeoffs and efficiency frontiers."""
    # Calculate efficiency metrics
    results_df['resource_count'] = results_df[[col for col in results_df.columns 
                                             if col.startswith('resource_')]].sum(axis=1)
    results_df['throughput_per_resource'] = results_df['samples_per_year'] / results_df['resource_count']
    results_df['cost_per_throughput'] = results_df['total_cost'] / results_df['samples_per_year']
    
    # Create efficiency frontier visualization
    fig, axes = plt.subplots(2, 2, figsize=(15, 15))
    
    # 1. Throughput vs Resources
    sns.scatterplot(data=results_df,
                    x='resource_count',
                    y='samples_per_year',
                    ax=axes[0,0])
    axes[0,0].set_title('Throughput vs Total Resources')
    
    # 2. Cost Efficiency
    sns.scatterplot(data=results_df,
                    x='samples_per_year',
                    y='cost_per_throughput',
                    ax=axes[0,1])
    axes[0,1].set_title('Cost Efficiency vs Throughput')
    
    # 3. Resource Utilization vs Throughput
    util_cols = [col for col in results_df.columns if col.startswith('utilization_')]
    results_df['avg_utilization'] = results_df[util_cols].mean(axis=1)
    sns.scatterplot(data=results_df,
                    x='samples_per_year',
                    y='avg_utilization',
                    ax=axes[1,0])
    axes[1,0].set_title('Resource Utilization vs Throughput')
    
    # 4. Throughput per Resource
    sns.scatterplot(data=results_df,
                    x='resource_count',
                    y='throughput_per_resource',
                    ax=axes[1,1])
    axes[1,1].set_title('Efficiency (Throughput per Resource)')
    
    plt.tight_layout()
    return fig
# Example usage in your notebook:
def analyze_results(output_file):
    """Load and analyze results with visualizations."""
    # Read the results
    results_df = pd.read_csv(output_file)
    
    # Print summary statistics
    print("\nSummary Statistics:")
    print(f"Average makespan: {results_df['makespan'].mean():.1f} days")
    print(f"Average cost: ${results_df['total_cost'].mean():.2f}")
    print(f"Average samples per year: {results_df['samples_per_year'].mean():.1f}")
    
    # Show resource utilization
    utilization_cols = [col for col in results_df.columns if col.startswith('utilization_')]
    print("\nResource Utilization:")
    for col in utilization_cols:
        resource = col.replace('utilization_', '')
        util = results_df[col].mean() * 100
        print(f"{resource}: {util:.1f}%")
    
    # Create visualizations
    fig = visualize_results(results_df)
    return results_df, fig

def prepare_dashboard_data(results):
    """Process results into a format needed for dashboard visualization."""
    # Initialize summary structures
    summary = defaultdict(lambda: {"total_cost": 0, "samples_per_year": 0, "scenarios": 0})
    mix = defaultdict(lambda: defaultdict(int))
    timelines = []
    sample_types = ['metals', 'ceramics', 'composites', 'polymers']
    sample_summaries = {stype: [] for stype in sample_types}

    # Process each result
    for result in results:
        # Skip if no makespan (indicating failed scenario)
        if result.get('makespan') is None:
            continue

        scenario = result['scenario']
        total_resources = sum(scenario['resource_capacities'].values())
        
        # Calculate total samples
        total_samples = (
            scenario.get('metals_count', 0) +
            scenario.get('ceramics_count', 0) +
            scenario.get('composites_count', 0) +
            scenario.get('polymer_count', 0)
        )
        
        if total_samples == 0:
            continue

        # Summary data
        summary_key = total_resources
        summary[summary_key]["total_cost"] += result['total_cost']
        summary[summary_key]["samples_per_year"] += result.get('samples_per_year', 0)
        summary[summary_key]["scenarios"] += 1

        # Mix data
        mix[summary_key]['metals'] += scenario.get('metals_count', 0)
        mix[summary_key]['ceramics'] += scenario.get('ceramics_count', 0)
        mix[summary_key]['composites'] += scenario.get('composites_count', 0)
        mix[summary_key]['polymers'] += scenario.get('polymer_count', 0)

        # Timeline data
        timelines.append({
            "scenario": {k: v for k, v in scenario.items()},  # Create a copy
            "makespan": result['makespan'],
            "total_cost": result['total_cost']
        })

        # Sample summaries
        annual_factor = 365 / result['makespan']
        for sample_type in sample_types:
            count = scenario.get(f"{sample_type}_count", 0)
            if count > 0:
                annual_samples = int(count * annual_factor)
                type_cost = result['total_cost'] * (count / total_samples)
                cost_per_sample = type_cost / count

                sample_summaries[sample_type].append({
                    'total_resources': total_resources,
                    'samples_per_year': annual_samples,
                    'cost_per_sample': round(cost_per_sample, 2)
                })

    # Create final dashboard data structure
    dashboard_data = {
        'summary': [
            {
                "total_resources": key,
                "average_cost": value["total_cost"] / value["scenarios"],
                "average_samples_per_year": value["samples_per_year"] / value["scenarios"]
            }
            for key, value in summary.items()
        ],
        'mix': [
            {
                "total_resources": key,
                **values
            }
            for key, values in mix.items()
        ],
        'timelines': timelines,
        'sample_summaries': sample_summaries
    }

    # Save to JSON
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    dashboard_file = f"dashboard_data_{timestamp}.json"
    
    with open(dashboard_file, 'w') as f:
        json.dump(dashboard_data, f, indent=4)
    
    logging.info(f"Dashboard data prepared and saved to '{dashboard_file}'")
    return dashboard_data



def validate_scenario(scenario: dict, templates: List[dict]) -> None:
    """Validate that a scenario has necessary resources for the templates."""
    required_resources = set()
    for template in templates:
        for task in template:
            required_resources.add(task["resource"])
    
    available_resources = set(scenario['resource_capacities'].keys())
    missing_resources = required_resources - available_resources
    
    if missing_resources:
        raise SchedulerError(
            f"Missing required resources: {missing_resources}. "
            "These resources are needed for tasks in the templates."
        )

def run_scenarios_with_checkpoints(
    scenarios, 
    templates, 
    resource_unit_costs, 
    checkpoint_dir="checkpoints",
    checkpoint_frequency=100,  # Save every N scenarios
    max_workers=None
):
    """Run scenarios with checkpointing support.
    
    Args:
        scenarios: List of scenarios to evaluate
        templates: List of WBS templates
        resource_unit_costs: Dictionary of resource costs
        checkpoint_dir: Directory for checkpoint files
        checkpoint_frequency: How often to save checkpoints
        max_workers: Number of worker processes
    """
    if max_workers is None:
        max_workers = max(1, multiprocessing.cpu_count() - 1)
    
    # Load existing checkpoints
    completed_scenarios = load_checkpoint(checkpoint_dir)
    logging.info(f"Loaded {len(completed_scenarios)} completed scenarios from checkpoints")
    
    # Filter out completed scenarios
    scenarios_to_run = []
    results = []
    
    for scenario in scenarios:
        scenario_hash = generate_scenario_hash(scenario)
        if scenario_hash in completed_scenarios:
            results.append(completed_scenarios[scenario_hash])
        else:
            scenarios_to_run.append(scenario)
    
    total_scenarios = len(scenarios_to_run)
    if total_scenarios == 0:
        logging.info("All scenarios already completed!")
        return results
    
    logging.info(f"Running {total_scenarios} remaining scenarios using {max_workers} processes")
    completed = 0
    current_batch = []
    current_batch_num = len(completed_scenarios) // checkpoint_frequency
    start_time = time.time()
    
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        # Submit remaining scenarios
        future_to_scenario = {
            executor.submit(
                process_scenario, 
                scenario, 
                templates, 
                resource_unit_costs,
                i + 1,
                total_scenarios
            ): i for i, scenario in enumerate(scenarios_to_run)
        }
        
        # Process with progress bar
        with tqdm(total=total_scenarios, desc="Processing scenarios") as pbar:
            for future in as_completed(future_to_scenario):
                scenario_idx = future_to_scenario[future]
                try:
                    result = future.result()
                    results.append(result)
                    current_batch.append(result)
                    completed += 1
                    
                    # Save checkpoint if needed
                    if len(current_batch) >= checkpoint_frequency:
                        save_checkpoint(current_batch, checkpoint_dir, current_batch_num)
                        current_batch_num += 1
                        current_batch = []
                        
                        # Also save a CSV summary
                        summary_df = pd.DataFrame(results)
                        summary_df.to_csv(Path(checkpoint_dir) / "results_summary.csv", index=False)
                    
                    # Progress updates
                    if completed % max(1, total_scenarios // 100) == 0 or result.get('error'):
                        elapsed = time.time() - start_time
                        rate = completed / elapsed
                        eta = (total_scenarios - completed) / rate if rate > 0 else 0
                        logging.info(
                            f"\nProgress Update:\n"
                            f"Completed {completed}/{total_scenarios} scenarios "
                            f"({completed/total_scenarios*100:.1f}%)\n"
                            f"Rate: {rate:.1f} scenarios/sec\n"
                            f"ETA: {eta/60:.1f} minutes\n"
                            f"Last checkpoint: Batch {current_batch_num}"
                        )
                        
                        if result.get('error'):
                            logging.error(
                                f"Scenario {scenario_idx + 1} failed: {result.get('error')}\n"
                                f"Configuration: {scenarios_to_run[scenario_idx]}"
                            )
                        elif result.get('makespan'):
                            logging.info(
                                f"Scenario {scenario_idx + 1} succeeded:\n"
                                f"Makespan: {result['makespan']}\n"
                                f"Total Cost: {result['total_cost']:.2f}"
                            )
                    
                except Exception as e:
                    logging.error(f"Error processing scenario {scenario_idx + 1}: {str(e)}")
                    error_result = {
                        'scenario': scenarios_to_run[scenario_idx],
                        'makespan': None,
                        'total_cost': 0,
                        'usage': {},
                        'error': str(e)
                    }
                    results.append(error_result)
                    current_batch.append(error_result)
                
                pbar.update(1)
    
    # Save final batch
    if current_batch:
        save_checkpoint(current_batch, checkpoint_dir, current_batch_num)
        summary_df = pd.DataFrame(results)
        summary_df.to_csv(Path(checkpoint_dir) / "results_summary.csv", index=False)
    
    # Final summary
    end_time = time.time()
    total_time = end_time - start_time
    successful = sum(1 for r in results if r.get('makespan') is not None)
    
    logging.info(
        f"\nFinal Results:\n"
        f"Total scenarios: {len(results)}\n"
        f"Successfully completed: {successful}/{len(results)} "
        f"({successful/len(results)*100:.1f}%)\n"
        f"Failed: {len(results)-successful}/{len(results)} "
        f"({(len(results)-successful)/len(results)*100:.1f}%)\n"
        f"Total runtime: {total_time/3600:.1f} hours\n"
        f"Average rate: {total_scenarios/total_time:.1f} scenarios/sec\n"
        f"Results saved in: {checkpoint_dir}"
    )
    
    return results

def run_scenarios_optimized(scenarios, templates, resource_unit_costs, max_workers=None):
    """Run all scenarios in parallel with process-based parallelization.
    
    Args:
        scenarios: List of scenarios to evaluate
        templates: List of WBS templates
        resource_unit_costs: Dictionary of resource costs
        max_workers: Number of worker processes (defaults to CPU count - 1)
    """
    if max_workers is None:
        max_workers = max(1, multiprocessing.cpu_count() - 1)
    
    total_scenarios = len(scenarios)
    completed = 0
    results = []
    
    logging.info(f"Starting optimization of {total_scenarios} scenarios using {max_workers} processes")
    start_time = time.time()
    
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        # Submit all scenarios
        future_to_scenario = {
            executor.submit(
                process_scenario, 
                scenario, 
                templates, 
                resource_unit_costs,
                i + 1,
                total_scenarios
            ): i for i, scenario in enumerate(scenarios)
        }
        
        # Process completed scenarios with progress bar
        with tqdm(total=total_scenarios, desc="Processing scenarios") as pbar:
            for future in as_completed(future_to_scenario):
                scenario_idx = future_to_scenario[future]
                try:
                    result = future.result()
                    results.append(result)
                    completed += 1
                    
                    # Update progress every N scenarios or when there's an error
                    if completed % max(1, total_scenarios // 100) == 0 or result.get('error'):
                        elapsed = time.time() - start_time
                        rate = completed / elapsed
                        eta = (total_scenarios - completed) / rate if rate > 0 else 0
                        logging.info(
                            f"\nProgress Update:\n"
                            f"Completed {completed}/{total_scenarios} scenarios "
                            f"({completed/total_scenarios*100:.1f}%)\n"
                            f"Rate: {rate:.1f} scenarios/sec\n"
                            f"ETA: {eta/60:.1f} minutes"
                        )
                        
                        if result.get('error'):
                            logging.error(
                                f"Scenario {scenario_idx + 1} failed: {result.get('error')}\n"
                                f"Configuration: {scenarios[scenario_idx]}"
                            )
                        elif result.get('makespan'):
                            logging.info(
                                f"Scenario {scenario_idx + 1} succeeded:\n"
                                f"Makespan: {result['makespan']}\n"
                                f"Total Cost: {result['total_cost']:.2f}"
                            )
                    
                except Exception as e:
                    logging.error(f"Error processing scenario {scenario_idx + 1}: {str(e)}")
                    results.append({
                        'scenario': scenarios[scenario_idx],
                        'makespan': None,
                        'total_cost': 0,
                        'usage': {},
                        'error': str(e)
                    })
                
                pbar.update(1)
    
    # Final summary
    end_time = time.time()
    total_time = end_time - start_time
    successful = sum(1 for r in results if r.get('makespan') is not None)
    
    logging.info(
        f"\nCompleted {total_scenarios} scenarios in {total_time:.1f} seconds\n"
        f"Successful: {successful}/{total_scenarios} ({successful/total_scenarios*100:.1f}%)\n"
        f"Failed: {total_scenarios-successful}/{total_scenarios} "
        f"({(total_scenarios-successful)/total_scenarios*100:.1f}%)\n"
        f"Average rate: {total_scenarios/total_time:.1f} scenarios/sec"
    )
    
    return results

def generate_scenarios(
    resource_ranges: Dict[str, Tuple[int, int]],  # e.g., {"Admin": (1,3)}
    sample_ranges: Dict[str, Tuple[int, int]],    # e.g., {"metals": (0,10)}
    step_sizes: Dict[str, int] = None             # Optional step sizes
) -> List[ScenarioParams]:
    """Generate all scenario combinations within given ranges"""
    if step_sizes is None:
        step_sizes = {k: 1 for k in {**resource_ranges, **sample_ranges}.keys()}
    
    # Generate resource level combinations
    resource_values = [
        range(start, end + 1, step_sizes.get(resource, 1))
        for resource, (start, end) in resource_ranges.items()
    ]
    resource_combinations = list(itertools.product(*resource_values))
    
    # Generate sample count combinations
    sample_values = [
        range(start, end + 1, step_sizes.get(sample_type, 1))
        for sample_type, (start, end) in sample_ranges.items()
    ]
    sample_combinations = list(itertools.product(*sample_values))
    
    # Create all possible combinations
    scenarios = []
    for res_combo in resource_combinations:
        for sample_combo in sample_combinations:
            resource_dict = dict(zip(resource_ranges.keys(), res_combo))
            sample_dict = dict(zip(sample_ranges.keys(), sample_combo))
            
            # Validate scenario parameters
            if all(count == 0 for count in sample_dict.values()):
                logging.warning(f"Skipping invalid scenario with no samples: {sample_dict}")
                continue
            
            scenarios.append(ScenarioParams(
                resource_levels=resource_dict,
                sample_counts=sample_dict
            ))
    return scenarios

############################
# Critical Path Calculation
############################
def compute_earliest_starts(tasks):
    """Compute earliest possible start times using critical path method."""
    task_dict = {t["id"]: t for t in tasks}
    earliest_starts = {}
    
    def get_earliest_start(task_id, memo=None):
        if memo is None:
            memo = {}
        if task_id in memo:
            return memo[task_id]
            
        task = task_dict[task_id]
        if not task["dependencies"]:
            memo[task_id] = 0
            return 0
            
        max_pred_finish = 0
        for pred_id in task["dependencies"]:
            pred_start = get_earliest_start(pred_id, memo)
            pred_finish = pred_start + task_dict[pred_id]["duration"]
            max_pred_finish = max(max_pred_finish, pred_finish)
            
        memo[task_id] = max_pred_finish
        return max_pred_finish
    
    for task in tasks:
        earliest_starts[task["id"]] = get_earliest_start(task["id"])
        
    return earliest_starts

############################
# Resource Usage and Cost
############################
def compute_resource_usage_cost(tasks, start_times, resource_unit_costs, makespan):
    """Calculate resource usage and cost over the makespan."""
    resource_usage = defaultdict(lambda: defaultdict(int))
    for task_id, start_time in start_times.items():
        task = next(t for t in tasks if t["id"] == task_id)
        for t in range(start_time, start_time + task["duration"]):
            if t <= makespan:
                resource_usage[task["resource"]][t] += 1
    
    total_cost = sum(
        usage * resource_unit_costs.get(r, 0.0)
        for r, times in resource_usage.items()
        for usage in times.values()
    )
    return total_cost, dict(resource_usage)

############################
# Run Scenarios
############################

def process_and_save_results(scenarios: List[ScenarioParams], 
                           raw_results: List[dict], 
                           output_file: str) -> pd.DataFrame:
    """Process raw scheduling results into final format and save to CSV."""
    final_results = []
    
    for scenario, result in zip(scenarios, raw_results):
        # Add error handling for successful cases
        if result.get('makespan') is not None:  # Changed from error check to makespan check
            # Calculate samples per year (assuming makespan is in days)
            total_samples = sum(scenario.sample_counts.values())
            samples_per_year = int(365 * total_samples / result['makespan'])
            
            # Calculate cost per sample
            cost_per_sample = result['total_cost'] / total_samples if total_samples > 0 else 0
            
            # Calculate resource utilization
            utilization = {}
            for resource, usage_dict in result['usage'].items():
                max_possible_usage = scenario.resource_levels[resource] * result['makespan']
                total_usage = sum(usage_dict.values())
                utilization[resource] = total_usage / max_possible_usage if max_possible_usage > 0 else 0
            
            final_results.append(ScenarioResult(
                params=scenario,
                makespan=result['makespan'],
                total_cost=result['total_cost'],
                samples_per_year=samples_per_year,
                cost_per_sample=cost_per_sample,
                resource_utilization=utilization,
                error=None
            ))
        else:
            # Handle failed scenarios
            final_results.append(ScenarioResult(
                params=scenario,
                makespan=None,
                total_cost=0.0,
                samples_per_year=0,
                cost_per_sample=0.0,
                resource_utilization={},
                error=result.get('error', 'Unknown error')
            ))
    
    # Convert to DataFrame for saving
    records = []
    for result in final_results:
        record = {
            # Resource levels
            **{f"resource_{k}": v for k, v in result.params.resource_levels.items()},
            # Sample counts
            **{f"samples_{k}": v for k, v in result.params.sample_counts.items()},
            # Results
            "makespan": result.makespan,
            "total_cost": result.total_cost,
            "samples_per_year": result.samples_per_year,
            "cost_per_sample": result.cost_per_sample,
            # Resource utilization
            **{f"utilization_{k}": v for k, v in result.resource_utilization.items()},
            # Error tracking
            "error": result.error
        }
        records.append(record)
    
    # Create DataFrame and save
    df = pd.DataFrame(records)
    df.to_csv(output_file, index=False)
    
    # Log summary statistics
    successful = df['error'].isna().sum()
    total = len(df)
    logging.info(f"\nResults Summary:")
    logging.info(f"Total scenarios processed: {total}")
    logging.info(f"Successful scenarios: {successful} ({successful/total*100:.1f}%)")
    logging.info(f"Failed scenarios: {total-successful} ({(total-successful)/total*100:.1f}%)")
    if successful > 0:
        logging.info(f"Average samples per year: {df['samples_per_year'].mean():.1f}")
        logging.info(f"Average cost per sample: ${df['cost_per_sample'].mean():.2f}")
        logging.info(f"Results saved to: {output_file}")
    
    return df

def run_capacity_analysis(
    templates: List[dict],
    resource_ranges: Dict[str, Tuple[int, int]],
    sample_ranges: Dict[str, Tuple[int, int]],
    resource_unit_costs: Dict[str, float],
    step_sizes: Dict[str, int] = None,
    output_file: str = "capacity_results.csv"
) -> pd.DataFrame:
          
    """Run complete capacity analysis across all scenarios."""
    # Validate ranges and step sizes
    if not resource_ranges or not sample_ranges:
        raise ValueError("Resource and sample ranges must not be empty.")
    
    
    # Generate scenarios
    scenarios = generate_scenarios(resource_ranges, sample_ranges, step_sizes)
    
    # Ensure scenarios were generated
    if not scenarios:
        raise ValueError("No scenarios generated. Check ranges and step sizes.")
    
    print(f"Generated {len(scenarios)} scenarios")
    logging.info(f"Generated {len(scenarios)} scenarios to evaluate")
    
    # Convert scenarios to scheduler format
    scheduler_scenarios = []
    for scenario in scenarios:
        scheduler_scenario = {
            "metals_count": scenario.sample_counts.get("metals", 0),
            "ceramics_count": scenario.sample_counts.get("ceramics", 0),
            "composites_count": scenario.sample_counts.get("composites", 0),
            "polymer_count": scenario.sample_counts.get("polymers", 0),
            "resource_capacities": scenario.resource_levels
        }
        scheduler_scenarios.append(scheduler_scenario)
    
    # Run scenarios
    results = run_scenarios_with_checkpoints(
    scenarios=scenarios,
    templates=templates,
    resource_unit_costs=resource_unit_costs,
    checkpoint_dir="checkpoints",
    checkpoint_frequency=100
)
    
    # Convert results to ScenarioResult format and save
    final_results = process_and_save_results(scenarios, results, output_file)
    
    return final_results

def schedule_optimized(tasks, resource_capacities, max_solve_time=30):
    """
    Optimized scheduler with:
    - No work on weekends
    - Maximum 10 hours per person per day
    - Proper binary variable formulation
    """
    # 1. Quick lookups and preprocessing
    task_dict = {t["id"]: t for t in tasks}
    resource_tasks = defaultdict(list)
    for t in tasks:
        resource_tasks[t["resource"]].append(t["id"])
    
    # 2. Compute earliest starts using critical path
    earliest_starts = compute_earliest_starts(tasks)
    latest_completion = max(earliest_starts[t["id"]] + t["duration"] for t in tasks)
    
    # Add some buffer to the time horizon
    time_horizon = latest_completion + 10
    
    # 3. Create model
    model = pulp.LpProblem("Schedule", pulp.LpMinimize)
    
    # 4. Variables
    starts = {
        t["id"]: pulp.LpVariable(f'start_{t["id"]}', 
                                earliest_starts[t["id"]], 
                                time_horizon - t["duration"],
                                cat='Integer')
        for t in tasks
    }
    
    # Binary variables for task activity in each hour (more granular than before)
    is_active = {
        (task["id"], h): pulp.LpVariable(
            f'active_{task["id"]}_{h}',
            cat='Binary'
        )
        for task in tasks
        for h in range(time_horizon * 24)  # Hours instead of periods
    }
    
    makespan = pulp.LpVariable("makespan", 0, time_horizon, cat='Integer')
    
    # 5. Objective: Minimize makespan
    model += makespan
    
    # 6. Constraints
    M = time_horizon * 24  # Big-M value adjusted for hours
    
    # Makespan constraint
    for task in tasks:
        model += starts[task["id"]] * 24 + task["duration"] * 24 <= makespan * 24

    # Dependencies
    for task in tasks:
        for dep_id in task["dependencies"]:
            model += starts[task["id"]] >= starts[dep_id] + task_dict[dep_id]["duration"]
    
    # Activity constraints with hour granularity
    for task in tasks:
        task_id = task["id"]
        duration = task["duration"] * 24  # Convert to hours
        
        for h in range(time_horizon * 24):
            # Task active in this hour if it starts before hour end and ends after hour start
            model += starts[task_id] * 24 <= h + M * (1 - is_active[task_id, h])
            model += starts[task_id] * 24 + duration >= h - M * (1 - is_active[task_id, h])
    
    # Weekend constraints - no work on Saturday (day % 7 == 5) or Sunday (day % 7 == 6)
    for task in tasks:
        task_id = task["id"]
        for h in range(time_horizon * 24):
            day = h // 24
            if day % 7 in [5, 6]:  # Weekend
                model += is_active[task_id, h] == 0
    
    # Daily work hour constraints - maximum 10 hours per person per day
    for res, res_tasks_list in resource_tasks.items():
        if not res_tasks_list or res not in resource_capacities:
            continue
            
        for day in range(time_horizon):
            day_start = day * 24
            day_end = (day + 1) * 24
            
            # Sum of active hours in day must not exceed 10 per resource unit
            model += (pulp.lpSum(is_active[task_id, h] 
                               for task_id in res_tasks_list
                               for h in range(day_start, day_end)
                               ) <= 10 * resource_capacities[res])
    
    # 7. Solve
    solver = pulp.PULP_CBC_CMD(msg=1, timeLimit=max_solve_time)
    status = model.solve(solver)
    
    if status != pulp.LpStatusOptimal:
        return None, {}
    
    # 8. Extract results - converting back to days for compatibility
    start_times = {t["id"]: int(starts[t["id"]].value()) for t in tasks}
    makespan_val = int(makespan.value())
    
    return makespan_val, start_times

def process_scenario(scenario, templates, resource_unit_costs, scenario_num=0, total_scenarios=0):
    """Process a single scenario with detailed logging."""
    try:
        logging.info(f"\nProcessing scenario {scenario_num}/{total_scenarios}")
        logging.info("Scenario configuration:")
        logging.info(f"Sample counts: Metals={scenario.get('metals_count', 0)}, "
                    f"Ceramics={scenario.get('ceramics_count', 0)}, "
                    f"Composites={scenario.get('composites_count', 0)}, "
                    f"Polymers={scenario.get('polymer_count', 0)}")
        logging.info(f"Resource capacities: {scenario['resource_capacities']}")
        
        # Generate tasks for this scenario
        all_tasks = []
        scenario_values = [
            scenario.get('metals_count', 0),
            scenario.get('ceramics_count', 0),
            scenario.get('composites_count', 0),
            scenario.get('polymer_count', 0)
        ]
        
        for template, count, prefix in zip(templates, scenario_values, 
                                         ["MET_", "CER_", "COMP_", "POLY_"]):
            if count > 0:
                new_tasks = replicate_wbs_optimized(template, count, prefix)
                all_tasks.extend(new_tasks)
                logging.info(f"Generated {len(new_tasks)} tasks for {prefix.strip('_')} template")
        
        # Log task generation summary
        logging.info(f"Total tasks generated: {len(all_tasks)}")
        
        # Resource usage summary
        resource_task_counts = {}
        for task in all_tasks:
            resource_task_counts[task["resource"]] = resource_task_counts.get(task["resource"], 0) + 1
        logging.info("Tasks per resource:")
        for resource, count in resource_task_counts.items():
            logging.info(f"  {resource}: {count} tasks")
        
        # Validate dependencies
        task_ids = {task["id"] for task in all_tasks}
        for task in all_tasks:
            for dep in task["dependencies"]:
                if dep not in task_ids:
                    raise ValueError(f"Task {task['id']} references missing dependency {dep}")
        
        # If no tasks, return early
        if not all_tasks:
            logging.warning("No tasks generated for this scenario")
            return {
                'scenario': scenario,
                'makespan': 0,
                'total_cost': 0,
                'usage': {},
                'error': None
            }
        
        # Schedule tasks
        logging.info("Starting scheduling optimization...")
        makespan, starts = schedule_optimized(
            tasks=all_tasks,
            resource_capacities=scenario['resource_capacities']
        )
        
        if makespan is None:
            logging.error("No feasible schedule found")
            return {
                'scenario': scenario,
                'makespan': None,
                'total_cost': 0,
                'usage': {},
                'error': "No feasible schedule found"
            }
        
        logging.info(f"Schedule found with makespan: {makespan}")
        
        # Calculate resource usage and costs
        usage = defaultdict(lambda: defaultdict(int))
        for task_id, start_time in starts.items():
            task = next(t for t in all_tasks if t["id"] == task_id)
            for t in range(start_time, start_time + task["duration"]):
                usage[task["resource"]][t] += 1
        
        total_cost = sum(
            count * resource_unit_costs.get(resource, 0)
            for resource, times in usage.items()
            for count in times.values()
        )
        
        logging.info(f"Total cost calculated: {total_cost}")
        
        # Calculate resource utilization
        for resource, times in usage.items():
            max_possible_usage = scenario['resource_capacities'][resource] * makespan
            total_usage = sum(times.values())
            utilization = total_usage / max_possible_usage if max_possible_usage > 0 else 0
            logging.info(f"Resource utilization - {resource}: {utilization:.1%}")
        
        return {
            'scenario': scenario,
            'makespan': makespan,
            'total_cost': total_cost,
            'usage': dict(usage),
            'error': None
        }
        
    except Exception as e:
        logging.error(f"Error processing scenario: {str(e)}")
        import traceback
        logging.error(f"Traceback: {traceback.format_exc()}")
        return {
            'scenario': scenario,
            'makespan': None,
            'total_cost': 0,
            'usage': {},
            'error': str(e)
        }

def run_scenarios_optimized(scenarios, templates, resource_unit_costs, max_workers=4):
    """Run all scenarios in parallel with progress tracking."""
    results = []
    total = len(scenarios)
    completed = 0
    start_time = time.time()
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all scenarios
        future_to_scenario = {
            executor.submit(
                process_scenario, 
                s, 
                templates, 
                resource_unit_costs,
                i + 1,  # scenario number
                total   # total scenarios
            ): i 
            for i, s in enumerate(scenarios)
        }
        
        # Process results with progress bar
        with tqdm(total=total, desc="Processing scenarios") as pbar:
            for future in as_completed(future_to_scenario):
                scenario_idx = future_to_scenario[future]
                try:
                    result = future.result()
                    results.append(result)
                    completed += 1
                    
                    # Log progress every 10 scenarios or when there's an error
                    if completed % 10 == 0 or result.get('error'):
                        elapsed = time.time() - start_time
                        rate = completed / elapsed
                        eta = (total - completed) / rate if rate > 0 else 0
                        logging.info(
                            f"\nProgress Update:\n"
                            f"Completed {completed}/{total} scenarios "
                            f"({completed/total*100:.1f}%)\n"
                            f"Rate: {rate:.1f} scenarios/sec\n"
                            f"ETA: {eta/60:.1f} minutes"
                        )
                        
                        if result.get('error'):
                            logging.error(
                                f"Scenario {scenario_idx + 1} failed: {result.get('error')}\n"
                                f"Configuration: {scenarios[scenario_idx]}"
                            )
                        elif result.get('makespan'):
                            logging.info(
                                f"Scenario {scenario_idx + 1} succeeded:\n"
                                f"Makespan: {result['makespan']}\n"
                                f"Total Cost: {result['total_cost']:.2f}"
                            )
                    
                except Exception as e:
                    logging.error(f"Error processing scenario {scenario_idx + 1}: {str(e)}")
                    results.append({
                        'scenario': scenarios[scenario_idx],
                        'makespan': None,
                        'total_cost': 0,
                        'usage': {},
                        'error': str(e)
                    })
                
                pbar.update(1)
    
    # Final summary
    successful = sum(1 for r in results if not r.get('error'))
    logging.info(
        f"\nFinal Results Summary:\n"
        f"Total scenarios: {total}\n"
        f"Successful: {successful} ({successful/total*100:.1f}%)\n"
        f"Failed: {total-successful} ({(total-successful)/total*100:.1f}%)\n"
        f"Total time: {time.time() - start_time:.1f} seconds"
    )
    
    return results

# Helper function for task replication
def replicate_wbs_optimized(template, count, prefix):
    """Optimized WBS template replication."""
    if count == 0:
        return []
    replicated = []
    for n in range(count):
        instance_suffix = f"_{n}"
        for task in template:
            replicated.append({
                "id": f"{prefix}{task['id']}{instance_suffix}",
                "duration": task["duration"],
                "dependencies": [f"{prefix}{dep}{instance_suffix}" 
                               for dep in task["dependencies"]],
                "resource": task["resource"]
            })
    return replicated

def save_results(results: List[ScenarioResult], filename: str):
    """Save scenario results to CSV"""
    records = []
    for result in results:
        record = {
            # Resource levels
            **{f"resource_{k}": v for k, v in result.params.resource_levels.items()},
            # Sample counts
            **{f"samples_{k}": v for k, v in result.params.sample_counts.items()},
            # Results
            "makespan": result.makespan,
            "total_cost": result.total_cost,
            "samples_per_year": result.samples_per_year,
            "cost_per_sample": result.cost_per_sample,
            # Resource utilization
            **{f"utilization_{k}": v for k, v in result.resource_utilization.items()}
        }
        records.append(record)
    
    df = pd.DataFrame(records)
    df.to_csv(filename, index=False)
    return df

    
import pulp
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from typing import Dict, List, Tuple
import pandas as pd
import time

# [Previous scenario generation code remains the same through the classes]
@dataclass
class ScenarioParams:
    resource_levels: Dict[str, int]
    sample_counts: Dict[str, int]

@dataclass
class ScenarioResult:
    params: ScenarioParams
    makespan: int
    total_cost: float
    samples_per_year: int
    cost_per_sample: float
    resource_utilization: Dict[str, float]

def run_capacity_analysis(
    templates: List[dict],
    resource_ranges: Dict[str, Tuple[int, int]],
    sample_ranges: Dict[str, Tuple[int, int]],
    resource_unit_costs: Dict[str, float],
    step_sizes: Dict[str, int] = None,  # Optional - set to None to use step size of 1
    output_file: str = "capacity_results.csv"
) -> pd.DataFrame:
    """
    Run complete capacity analysis across all scenarios.
    
    Args:
        templates: List of WBS templates [metals, ceramics, composites, polymers]
        resource_ranges: Dict of resource min/max e.g., {"Admin": (1,3)}
        sample_ranges: Dict of sample type min/max e.g., {"metals": (0,10)}
        resource_unit_costs: Dict of costs per resource unit
        step_sizes: Dict of step sizes for each parameter (optional)
        output_file: Where to save results
    """
    # Generate scenarios
    scenarios = generate_scenarios(resource_ranges, sample_ranges, step_sizes)
    print(f"Generated {len(scenarios)} scenarios to evaluate")
    
    # Convert scenarios to format needed by scheduler
    scheduler_scenarios = []
    for scenario in scenarios:
        scheduler_scenario = {
            "metals_count": scenario.sample_counts.get("metals", 0),
            "ceramics_count": scenario.sample_counts.get("ceramics", 0),
            "composites_count": scenario.sample_counts.get("composites", 0),
            "polymer_count": scenario.sample_counts.get("polymers", 0),
            "resource_capacities": scenario.resource_levels
        }
        scheduler_scenarios.append(scheduler_scenario)
    
    # Run scenarios
    start_time = time.time()
    results = run_scenarios_optimized(scheduler_scenarios, templates, resource_unit_costs)
    end_time = time.time()
    print(f"Completed {len(scenarios)} scenarios in {end_time - start_time:.1f} seconds")
    
    # Convert results to ScenarioResult format
    final_results = []
    for scenario, result in zip(scenarios, results):
        if result['makespan'] is None:  # Handle infeasible scenarios
            continue
            
        # Calculate samples per year (assuming makespan is in days)
        total_samples = sum(scenario.sample_counts.values())
        samples_per_year = int(365 * total_samples / result['makespan'])
        
        # Calculate cost per sample
        cost_per_sample = result['total_cost'] / total_samples if total_samples > 0 else 0
        
        # Calculate resource utilization
        utilization = {}
        for resource, usage_dict in result['usage'].items():
            max_possible_usage = scenario.resource_levels[resource] * result['makespan']
            total_usage = sum(usage_dict.values())
            utilization[resource] = total_usage / max_possible_usage if max_possible_usage > 0 else 0
        
        final_results.append(ScenarioResult(
            params=scenario,
            makespan=result['makespan'],
            total_cost=result['total_cost'],
            samples_per_year=samples_per_year,
            cost_per_sample=cost_per_sample,
            resource_utilization=utilization
        ))
    
    # Save results
    df = save_results(final_results, output_file)
    return df

#
# ============= YOUR WBS TEMPLATES =============
#
metals_template = [
    # ------ Sample Management ------
    {"id": "1.1", "name": "Receive and Log Samples",        "duration": 4, "dependencies": [],        "resource": "Administrative Support Specialist"},
    {"id": "1.2", "name": "Assign Unique Identifiers",      "duration": 2, "dependencies": ["1.1"],   "resource": "Administrative Support Specialist"},
    {"id": "1.3", "name": "Photograph Initial Condition",   "duration": 2, "dependencies": ["1.2"],   "resource": "Laboratory Technician"},
    {"id": "1.4", "name": "Write Analysis Plan",            "duration": 3, "dependencies": ["1.3"],   "resource": "Materials Scientist (Metals)"},
    {"id": "1.5", "name": "Store Samples Appropriately",    "duration": 1, "dependencies": ["1.4"],   "resource": "Administrative Support Specialist"},
    {"id": "1.6", "name": "Document Chain of Custody",      "duration": 1, "dependencies": ["1.5"],   "resource": "Administrative Support Specialist"},
    # ------ Sample Preparation ------
    {"id": "2.1", "name": "Cut Subsamples for Mounting",    "duration": 3, "dependencies": ["1.6"],   "resource": "Laboratory Technician"},
    {"id": "2.2", "name": "Cut Subsamples for Light El.",   "duration": 3, "dependencies": ["1.6"],   "resource": "Laboratory Technician"},
    {"id": "2.3", "name": "Cut for ICP-OES/ICP-MS",         "duration": 3, "dependencies": ["1.6"],   "resource": "Laboratory Technician"},
    {"id": "2.4", "name": "Clean Cut Subsamples",           "duration": 2, "dependencies": ["2.1","2.2","2.3"], "resource": "Laboratory Technician"},
    {"id": "2.5", "name": "Mount Subsample in Epoxy",       "duration": 3, "dependencies": ["2.1"],   "resource": "Laboratory Technician"},
    {"id": "2.6", "name": "Grind & Polish Mounted Subsmpl", "duration": 4, "dependencies": ["2.5"],   "resource": "Materials Scientist (Metals)"},
    {"id": "2.7", "name": "Digest Sample with Acid/MW",     "duration": 4, "dependencies": ["2.4"],   "resource": "Analytical Instrument Specialist"},
    {"id": "2.8", "name": "Prep ICP Standards/Blanks",      "duration": 2, "dependencies": [],        "resource": "Analytical Instrument Specialist"},
    {"id": "2.9", "name": "Coat Sample for SEM/EBSD",       "duration": 2, "dependencies": ["2.6"],   "resource": "Laboratory Technician"},
    {"id": "2.10","name": "Label & Catalog Prepared Smpls", "duration": 2, "dependencies": ["2.3","2.6","2.7","2.9"], "resource": "Administrative Support Specialist"},
    # ------ Sample Analysis ------
    {"id": "3.1", "name": "Optical Microscope",             "duration": 2, "dependencies": ["2.6"],   "resource": "Materials Scientist (Metals)"},
    {"id": "3.2", "name": "SEM Imaging",                    "duration": 4, "dependencies": ["2.9"],   "resource": "Advanced Imaging Specialist"},
    {"id": "3.3", "name": "SEM-EDS Bulk Composition",       "duration": 4, "dependencies": ["3.2"],   "resource": "Advanced Imaging Specialist"},
    {"id": "3.4", "name": "SEM-EBSD Grain Analysis",        "duration": 4, "dependencies": ["3.2"],   "resource": "Advanced Imaging Specialist"},
    {"id": "3.5", "name": "Density via Pycnometer",         "duration": 3, "dependencies": ["2.4"],   "resource": "Laboratory Technician"},
    {"id": "3.6", "name": "Composition XRF",                "duration": 3, "dependencies": ["2.4"],   "resource": "Laboratory Technician"},
    {"id": "3.7", "name": "Composition SparkOES",           "duration": 3, "dependencies": ["2.6"],   "resource": "Laboratory Technician"},
    {"id": "3.8", "name": "Light Elements (CHNOS)",         "duration": 3, "dependencies": ["2.4"],   "resource": "Analytical Instrument Specialist"},
    {"id": "3.9", "name": "Hardness Microindentation",      "duration": 3, "dependencies": ["2.6"],   "resource": "Materials Scientist (Metals)"},
    {"id": "3.10","name": "ICP-MS/ICP-OES",                 "duration": 8, "dependencies": [],        "resource": "Analytical Instrument Specialist"},
    # ------ Data Analysis ------
    {"id": "4.1", "name": "Compile Analytical Results",     "duration": 2, "dependencies": ["3.1","3.2","3.3","3.4","3.5","3.6","3.7","3.8","3.9"],     "resource": "Materials Scientist (Metals)"},
    {"id": "4.2", "name": "Cross-Validate Composition",     "duration": 6, "dependencies": ["3.3","3.4","3.5","3.6","3.7","3.8","3.9"], "resource": "Materials Scientist (Metals)"},
    {"id": "4.3", "name": "DB Match or Similar Alloy",      "duration": 4, "dependencies": [],        "resource": "Materials Scientist (Metals)"},
    {"id": "4.4", "name": "Prepare Final Figures",          "duration": 3, "dependencies": ["4.3"],   "resource": "Materials Scientist (Metals)"},
    {"id": "4.5", "name": "Perform Peer Review",            "duration": 2, "dependencies": ["4.4"],   "resource": "Materials Scientist (Metals)"},
    {"id": "4.6", "name": "QA Review of Data/Results",      "duration": 1, "dependencies": ["4.5"],   "resource": "Quality Assurance Officer"},
    {"id": "4.7", "name": "Draft Report to Client",         "duration": 2, "dependencies": ["4.6"],   "resource": "Project Manager"},
    {"id": "4.8", "name": "Update Report Per Feedback",     "duration": 2, "dependencies": ["4.7"],   "resource": "Project Manager"},
    {"id": "4.9", "name": "Enter Results into Database",    "duration": 2, "dependencies": ["4.8"],   "resource": "Administrative Support Specialist"},
]

ceramics_template = [
    # ------ Sample Management ------
    {"id": "1.1", "name": "Receive and Log Samples",         "duration": 4, "dependencies": [],       "resource": "Administrative Support Specialist"},
    {"id": "1.2", "name": "Assign Unique Identifiers",       "duration": 2, "dependencies": ["1.1"],  "resource": "Administrative Support Specialist"},
    {"id": "1.3", "name": "Photograph Initial Condition",    "duration": 2, "dependencies": ["1.2"],  "resource": "Laboratory Technician"},
    {"id": "1.4", "name": "Write Analysis Plan",             "duration": 3, "dependencies": ["1.3"],  "resource": "Materials Scientist (Ceramics)"},
    {"id": "1.5", "name": "Store Samples Appropriately",     "duration": 1, "dependencies": ["1.4"],  "resource": "Administrative Support Specialist"},
    {"id": "1.6", "name": "Document Chain of Custody",       "duration": 1, "dependencies": ["1.5"],  "resource": "Administrative Support Specialist"},
    # ------ Sample Preparation ------
    {"id": "2.1", "name": "Cut Subsamples for Mounting",     "duration": 3, "dependencies": ["1.6"],  "resource": "Laboratory Technician"},
    {"id": "2.2", "name": "Cut Subsamples for ICP",          "duration": 3, "dependencies": ["1.6"],  "resource": "Laboratory Technician"},
    {"id": "2.3", "name": "Clean Cut Subsamples",            "duration": 2, "dependencies": ["2.1","2.2"], "resource": "Laboratory Technician"},
    {"id": "2.4", "name": "Mount Subsample in Epoxy",        "duration": 3, "dependencies": ["2.3"],  "resource": "Laboratory Technician"},
    {"id": "2.5", "name": "Crush & Sieve for Powder",        "duration": 4, "dependencies": ["2.3"],  "resource": "Laboratory Technician"},
    {"id": "2.6", "name": "Grind & Polish Mounted Subsmpl",  "duration": 4, "dependencies": ["2.4"],  "resource": "Materials Scientist (Ceramics)"},
    {"id": "2.7", "name": "Digest Sample (Acid/MW)",         "duration": 4, "dependencies": ["2.3"],  "resource": "Analytical Instrument Specialist"},
    {"id": "2.8", "name": "Prep ICP Standards/Blanks",       "duration": 2, "dependencies": [],       "resource": "Analytical Instrument Specialist"},
    {"id": "2.9", "name": "Coat Sample for SEM/EBSD",        "duration": 2, "dependencies": ["2.6"],  "resource": "Laboratory Technician"},
    {"id": "2.10","name": "Label & Catalog Prepared Smpls",  "duration": 2, "dependencies": ["2.5","2.3","2.6","2.7","2.9"], "resource": "Administrative Support Specialist"},
    # ------ Sample Analysis ------
    {"id": "3.1", "name": "FTIR for Composition",            "duration": 4, "dependencies": ["2.10"], "resource": "Materials Scientist (Ceramics)"},
    {"id": "3.2", "name": "Raman for Crystalline Phases",    "duration": 4, "dependencies": ["2.10"], "resource": "Materials Scientist (Ceramics)"},
    {"id": "3.3", "name": "SEM Imaging (Microstructure)",     "duration": 6, "dependencies": ["2.10"], "resource": "Materials Scientist (Ceramics)"},
    {"id": "3.4", "name": "XRD for Phase Info",               "duration": 5, "dependencies": ["2.10"], "resource": "Materials Scientist (Ceramics)"},
    {"id": "3.5", "name": "TGA for Thermal Stability",        "duration": 5, "dependencies": ["2.10"], "resource": "Materials Scientist (Ceramics)"},
    {"id": "3.6", "name": "DSC for Thermal Transitions",      "duration": 5, "dependencies": ["2.10"], "resource": "Materials Scientist (Ceramics)"},
    {"id": "3.7", "name": "Microindentation (Knoop)",         "duration": 6, "dependencies": ["2.10"], "resource": "Materials Scientist (Ceramics)"},
    {"id": "3.8", "name": "SEM-EDS for Elemental Comp",       "duration": 6, "dependencies": ["2.10"], "resource": "Advanced Imaging Specialist"},
    {"id": "3.9", "name": "ICP-OES/ICP-MS for Trace Elem",    "duration": 6, "dependencies": ["2.10"], "resource": "Materials Scientist (Ceramics)"},
    # ------ Data Analysis ------
    {"id": "4.1", "name": "Compile Analytical Results",       "duration": 2, "dependencies": ["3.3","3.4","3.5","3.6","3.7","3.8","3.9"],    "resource": "Materials Scientist (Ceramics)"},
    {"id": "4.2", "name": "Interpret Data (Ceramic Props)",   "duration": 6, "dependencies": ["4.1"],  "resource": "Materials Scientist (Ceramics)"},
    {"id": "4.3", "name": "Cross-Validate Across Techniques", "duration": 6, "dependencies": ["4.2"],  "resource": "Materials Scientist (Ceramics)"},
    {"id": "4.4", "name": "Prepare Final Figures",            "duration": 3, "dependencies": ["4.3"],  "resource": "Materials Scientist (Ceramics)"},
    {"id": "4.5", "name": "Perform Peer Review",             "duration": 2, "dependencies": ["4.4"],  "resource": "Materials Scientist (Ceramics)"},
    {"id": "4.6", "name": "QA Review of Data/Results",       "duration": 2, "dependencies": ["4.5"],  "resource": "Quality Assurance Officer"},
    {"id": "4.7", "name": "Draft Report to Client",          "duration": 2, "dependencies": ["4.6"],  "resource": "Project Manager"},
    {"id": "4.8", "name": "Update Report Per Feedback",      "duration": 2, "dependencies": ["4.7"],  "resource": "Project Manager"},
    {"id": "4.9", "name": "Enter Results into Database",     "duration": 2, "dependencies": ["4.8"],  "resource": "Administrative Support Specialist"},
]

composites_template = [
    # ------ Sample Management ------
    {"id": "1.1", "name": "Receive and Log Samples",         "duration": 4, "dependencies": [],       "resource": "Administrative Support Specialist"},
    {"id": "1.2", "name": "Assign Unique Identifiers",       "duration": 2, "dependencies": ["1.1"],  "resource": "Administrative Support Specialist"},
    {"id": "1.3", "name": "Photograph Initial Condition",    "duration": 2, "dependencies": ["1.2"],  "resource": "Laboratory Technician"},
    {"id": "1.4", "name": "Write Analysis Plan",             "duration": 3, "dependencies": ["1.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "1.5", "name": "Store Samples Appropriately",     "duration": 1, "dependencies": ["1.4"],  "resource": "Administrative Support Specialist"},
    {"id": "1.6", "name": "Document Chain of Custody",       "duration": 1, "dependencies": ["1.5"],  "resource": "Administrative Support Specialist"},
    # ------ Sample Preparation ------
    {"id": "2.1", "name": "Section and Polish",             "duration": 3, "dependencies": ["1.6"],  "resource": "Advanced Imaging Specialist"},
    {"id": "2.2", "name": "Embed Samples in Resin",         "duration": 3, "dependencies": ["1.6"],  "resource": "Laboratory Technician"},
    {"id": "2.3", "name": "Label & Catalog Prepared Smpls", "duration": 3, "dependencies": ["1.6"],  "resource": "Administrative Support Specialist"},
    # ------ Sample Analysis ------
    {"id": "3.1", "name": "FTIR for Matrix",                "duration": 4, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.2", "name": "Raman for Fibers",               "duration": 4, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.3", "name": "Raman for Matrix",               "duration": 4, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.4.1","name": "SEM-EDS Fiber Comp",            "duration": 6, "dependencies": ["2.3"],  "resource": "Advanced Imaging Specialist"},
    {"id": "3.4.2","name": "SEM-EDS Matrix Comp",           "duration": 6, "dependencies": ["2.3"],  "resource": "Advanced Imaging Specialist"},
    {"id": "3.5", "name": "TGA (Thermal Stability/Filler)", "duration": 5, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.6", "name": "DSC (Polymer Transition)",       "duration": 5, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.7", "name": "PYGCMS (Matrix Composition)",    "duration": 6, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.8", "name": "ICP-OES/ICP-MS (Elements)",      "duration": 8, "dependencies": ["2.3"],  "resource": "Analytical Instrument Specialist"},
    {"id": "3.9", "name": "Fiber Analysis (Optical/SEM)",   "duration": 8, "dependencies": ["2.3"],  "resource": "Advanced Imaging Specialist"},
    {"id": "3.10","name": "Measure Density (Pycnometer)",   "duration": 3, "dependencies": ["2.3"],  "resource": "Laboratory Technician"},
    # ------ Data Analysis ------
    {"id": "4.1", "name": "Cross-Validate Composition",     "duration": 6, "dependencies": ["3.3","3.4.1","3.4.2","3.5","3.6","3.7","3.8","3.9"],    "resource": "Materials Scientist (Composites)"},
    {"id": "4.2", "name": "Prepare Final Figures",          "duration": 3, "dependencies": ["4.1"],  "resource": "Materials Scientist (Composites)"},
    {"id": "4.3", "name": "Perform Peer Review",            "duration": 2, "dependencies": ["4.2"],  "resource": "Materials Scientist (Composites)"},
    {"id": "4.4", "name": "QA Review of Data/Results",      "duration": 1, "dependencies": ["4.3"],  "resource": "Quality Assurance Officer"},
    {"id": "4.5", "name": "Draft Report to Client",         "duration": 2, "dependencies": ["4.4"],  "resource": "Project Manager"},
    {"id": "4.6", "name": "Update Report Per Feedback",     "duration": 2, "dependencies": ["4.5"],  "resource": "Project Manager"},
    {"id": "4.7", "name": "Enter Results into Database",    "duration": 2, "dependencies": ["4.6"],  "resource": "Administrative Support Specialist"},
]

polymers_template = [
    # ------ Sample Management ------
    {"id": "1.1", "name": "Receive and Log Samples",        "duration": 4, "dependencies": [],       "resource": "Administrative Support Specialist"},
    {"id": "1.2", "name": "Assign Unique Identifiers",      "duration": 2, "dependencies": ["1.1"],  "resource": "Administrative Support Specialist"},
    {"id": "1.3", "name": "Photograph Initial Condition",   "duration": 2, "dependencies": ["1.2"],  "resource": "Laboratory Technician"},
    {"id": "1.4", "name": "Write Analysis Plan",            "duration": 3, "dependencies": ["1.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "1.5", "name": "Store Samples Appropriately",    "duration": 1, "dependencies": ["1.4"],  "resource": "Administrative Support Specialist"},
    {"id": "1.6", "name": "Document Chain of Custody",      "duration": 1, "dependencies": ["1.5"],  "resource": "Administrative Support Specialist"},
    # ------ Sample Preparation ------
    {"id": "2.1", "name": "Cut Subsamples for Mounting",    "duration": 3, "dependencies": [],       "resource": "Laboratory Technician"},
    {"id": "2.2", "name": "Cut Subsamples for ICP",         "duration": 3, "dependencies": [],       "resource": "Laboratory Technician"},
    {"id": "2.3", "name": "Clean Cut Subsamples",           "duration": 2, "dependencies": ["2.1","2.2"], "resource": "Laboratory Technician"},
    {"id": "2.4", "name": "Mount Subsample in Epoxy",       "duration": 3, "dependencies": ["2.1"],  "resource": "Laboratory Technician"},
    {"id": "2.5", "name": "Grind & Polish Mounted Subsmpl", "duration": 4, "dependencies": ["2.4"],  "resource": "Materials Scientist (Composites)"},
    {"id": "2.6", "name": "Digest Sample (Acid/MW)",        "duration": 4, "dependencies": ["2.3"],  "resource": "Analytical Instrument Specialist"},
    {"id": "2.7", "name": "Prep ICP Standards/Blanks",      "duration": 2, "dependencies": [],       "resource": "Analytical Instrument Specialist"},
    {"id": "2.8", "name": "Coat Sample for SEM/EBSD",       "duration": 2, "dependencies": ["2.5"],  "resource": "Laboratory Technician"},
    {"id": "2.9", "name": "Label & Catalog Prepared Smpls", "duration": 2, "dependencies": ["2.2","2.5","2.6","2.8"], "resource": "Administrative Support Specialist"},
    # ------ Sample Analysis ------
    {"id": "3.1", "name": "Optical Microscope",             "duration": 2, "dependencies": ["2.5"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.2", "name": "SEM Imaging",                    "duration": 4, "dependencies": ["2.8"],  "resource": "Advanced Imaging Specialist"},
    {"id": "3.3", "name": "SEM-EDS (Polymer Comp)",         "duration": 4, "dependencies": ["3.2"],  "resource": "Advanced Imaging Specialist"},
    {"id": "3.4", "name": "FTIR for Composition",           "duration": 4, "dependencies": ["2.5"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.5", "name": "Raman for Composition",          "duration": 4, "dependencies": ["2.5"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.6", "name": "Measure Density (Pycnometer)",   "duration": 3, "dependencies": ["2.3"],  "resource": "Laboratory Technician"},
    {"id": "3.7", "name": "Hardness Microindentation",      "duration": 3, "dependencies": ["2.5"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.8", "name": "TGA (Thermal Stability/Filler)", "duration": 5, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.9", "name": "DSC (Polymer Transition)",       "duration": 5, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.10","name": "PYGCMS (Matrix)",                "duration": 6, "dependencies": ["2.3"],  "resource": "Materials Scientist (Composites)"},
    {"id": "3.11","name": "ICP-MS/ICP-OES",                 "duration": 8, "dependencies": ["2.6","2.7"], "resource": "Analytical Instrument Specialist"},
    # ------ Data Analysis ------
    {"id": "4.1", "name": "Cross-Validate Composition",     "duration": 6, "dependencies": ["3.3","3.4","3.5","3.6","3.7","3.8","3.9"],    "resource": "Materials Scientist (Composites)"},
    {"id": "4.2", "name": "Prepare Final Figures",          "duration": 3, "dependencies": ["4.1"],  "resource": "Materials Scientist (Composites)"},
    {"id": "4.3", "name": "Perform Peer Review",            "duration": 2, "dependencies": ["4.2"],  "resource": "Materials Scientist (Composites)"},
    {"id": "4.4", "name": "QA Review of Data/Results",      "duration": 1, "dependencies": ["4.3"],  "resource": "Quality Assurance Officer"},
    {"id": "4.5", "name": "Draft Report to Client",         "duration": 2, "dependencies": ["4.4"],  "resource": "Project Manager"},
    {"id": "4.6", "name": "Update Report Per Feedback",     "duration": 2, "dependencies": ["4.5"],  "resource": "Project Manager"},
    {"id": "4.7", "name": "Enter Results into Database",    "duration": 2, "dependencies": ["4.6"],  "resource": "Administrative Support Specialist"},
]

if __name__ == "__main__":
    logging.info("Starting capacity analysis")
    
    templates = [metals_template, ceramics_template, composites_template, polymers_template]
    
    resource_ranges = {
        "Administrative Support Specialist": (1, 2),
        "Laboratory Technician": (1, 2),
        "Materials Scientist (Metals)": (1, 2),
        "Materials Scientist (Ceramics)": (1, 2),
        "Materials Scientist (Composites)": (1, 2),
        "Advanced Imaging Specialist": (1, 2),
        "Analytical Instrument Specialist": (1, 2),
        "Quality Assurance Officer": (1, 1),
        "Project Manager": (1, 1)
    }
    
    sample_ranges = {
        "metals": (5, 20),
        "ceramics": (0, 20),
        "composites": (0, 20),
        "polymers": (0, 20)
    }

    # step_sizes = None
 
    step_sizes = {
        "metals": 10,
        "ceramics": 10,
        "composites": 10,
        "polymers": 10,
        **{r: 1 for r in resource_ranges.keys()}
    }

    resource_unit_costs = {
        "Administrative Support Specialist": 75,
        "Laboratory Technician": 100,
        "Materials Scientist (Metals)": 150,
        "Materials Scientist (Ceramics)": 150,
        "Materials Scientist (Composites)": 150,
        "Advanced Imaging Specialist": 150,
        "Analytical Instrument Specialist": 125,
        "Quality Assurance Officer": 125,
        "Project Manager": 175
    }
    
    try:
        timestamp = time.strftime("%Y%m%d_%H%M%S")
        output_file = f"scenario_results_{timestamp}.csv"
        
        results = run_capacity_analysis(
            templates=templates,
            resource_ranges=resource_ranges,
            sample_ranges=sample_ranges,
            resource_unit_costs=resource_unit_costs,
            step_sizes=step_sizes,
            output_file=output_file
        )
        
        # Analyze and visualize results
        results_df, fig = analyze_results(output_file)
        plt.show()
        
    except Exception as e:
        logging.error(f"Analysis failed: {str(e)}")


2025-01-21 21:38:19,213 - INFO - Starting capacity analysis
Generated 6912 scenarios to evaluate
2025-01-21 21:38:19,254 - INFO - 
Processing scenario 1/6912
2025-01-21 21:38:19,254 - INFO - 
Processing scenario 2/6912
2025-01-21 21:38:19,254 - INFO - Scenario configuration:
2025-01-21 21:38:19,254 - INFO - 
Processing scenario 3/6912
2025-01-21 21:38:19,264 - INFO - 
Processing scenario 4/6912
2025-01-21 21:38:19,406 - INFO - Scenario configuration:
2025-01-21 21:38:19,469 - INFO - Sample counts: Metals=5, Ceramics=0, Composites=0, Polymers=0


Processing scenarios:   0%|          | 0/6912 [00:00<?, ?it/s]

2025-01-21 21:38:19,477 - INFO - Scenario configuration:
2025-01-21 21:38:19,480 - INFO - Scenario configuration:
2025-01-21 21:38:19,480 - INFO - Sample counts: Metals=5, Ceramics=0, Composites=0, Polymers=10
2025-01-21 21:38:19,488 - INFO - Resource capacities: {'Administrative Support Specialist': 1, 'Laboratory Technician': 1, 'Materials Scientist (Metals)': 1, 'Materials Scientist (Ceramics)': 1, 'Materials Scientist (Composites)': 1, 'Advanced Imaging Specialist': 1, 'Analytical Instrument Specialist': 1, 'Quality Assurance Officer': 1, 'Project Manager': 1}
2025-01-21 21:38:19,490 - INFO - Sample counts: Metals=5, Ceramics=0, Composites=0, Polymers=20
2025-01-21 21:38:19,498 - INFO - Sample counts: Metals=5, Ceramics=0, Composites=10, Polymers=0
2025-01-21 21:38:19,498 - INFO - Resource capacities: {'Administrative Support Specialist': 1, 'Laboratory Technician': 1, 'Materials Scientist (Metals)': 1, 'Materials Scientist (Ceramics)': 1, 'Materials Scientist (Composites)': 1, 'Ad

In [None]:
# 3. Create visualizations
# Basic performance metrics
fig_performance = visualize_results(results_df)
plt.show()

# Tradeoff analysis
fig_tradeoffs = analyze_tradeoffs(results_df)
plt.show()

# Pareto frontier analysis
fig_pareto = create_pareto_frontier(results_df)
plt.show()

In [None]:
# 4. Analyze specific scenarios in detail
# Get the most efficient scenario
efficient_scenario = results_df.sort_values('cost_per_sample').iloc[0]
print("\nMost Cost-Efficient Scenario:")
for col in results_df.columns:
    if not col.startswith('utilization'):
        print(f"{col}: {efficient_scenario[col]}")

# Get highest throughput scenario
max_throughput = results_df.sort_values('samples_per_year', ascending=False).iloc[0]
print("\nHighest Throughput Scenario:")
for col in results_df.columns:
    if not col.startswith('utilization'):
        print(f"{col}: {max_throughput[col]}")

In [None]:
# 5. Create Gantt chart for a specific scenario
# First, run the scheduler for the scenario we want to visualize
scenario_tasks = []
for template, count, prefix in zip(
    [metals_template, ceramics_template, composites_template, polymers_template],
    [efficient_scenario['samples_metals'], efficient_scenario['samples_ceramics'], 
     efficient_scenario['samples_composites'], efficient_scenario['samples_polymers']],
    ["MET_", "CER_", "COMP_", "POLY_"]
):
    if count > 0:
        scenario_tasks.extend(replicate_wbs_optimized(template, count, prefix))

# Get resource capacities from the scenario
resource_capacities = {
    resource: efficient_scenario[f'resource_{resource}']
    for resource in resource_ranges.keys()
}

# Run scheduler
makespan, start_times = schedule_optimized(
    tasks=scenario_tasks,
    resource_capacities=resource_capacities
)

# Create Gantt chart
if makespan and start_times:
    fig_gantt = create_gantt_chart(
        tasks=scenario_tasks,
        start_times=start_times,
        resource_capacities=resource_capacities,
        makespan=makespan
    )
    plt.show()

In [None]:
# 7. Generate summary report
summary = f"""
Analysis Summary ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
================================================

Dataset Overview:
----------------
Total Scenarios Analyzed: {len(results_df)}
Feasible Scenarios: {len(results_df[results_df['makespan'].notna()])}
Resource Configurations: {len(results_df.groupby([col for col in results_df.columns if col.startswith('resource_')]))}

Performance Metrics:
------------------
Average Throughput: {results_df['samples_per_year'].mean():.1f} samples/year
Average Cost per Sample: ${results_df['cost_per_sample'].mean():.2f}
Average Makespan: {results_df['makespan'].mean():.1f} days

Most Efficient Configuration:
---------------------------
Cost per Sample: ${efficient_scenario['cost_per_sample']:.2f}
Annual Throughput: {efficient_scenario['samples_per_year']:.0f}
Total Cost: ${efficient_scenario['total_cost']:.2f}

Resource Utilization:
-------------------
{results_df[[col for col in results_df.columns if col.startswith('utilization_')]].mean().to_string()}

Visualization Files:
------------------
- Performance Metrics: performance_metrics_{timestamp}.png
- Tradeoff Analysis: tradeoff_analysis_{timestamp}.png
- Pareto Frontier: pareto_frontier_{timestamp}.png
- Gantt Chart: gantt_chart_{timestamp}.png
"""

with open(f'analysis_summary_{timestamp}.txt', 'w') as f:
    f.write(summary)

print("\nAnalysis complete! Check the output files for detailed results.")