### GPT-4o Robustness Analysis: Advanced Chart Generation (Medium-Complex)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from datetime import datetime, timedelta
import json
import random
from pathlib import Path
import logging

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)

print("=" * 80)
print(" ADVANCED CHART GENERATION: MEDIUM-COMPLEX CHARTS")
print(" Creating Challenging Charts for Robust AI Evaluation")
print("=" * 80)

# Setup logging
logger = logging.getLogger('research')

 ADVANCED CHART GENERATION: MEDIUM-COMPLEX CHARTS
 Creating Challenging Charts for Robust AI Evaluation


### SECTION 1: ADVANCED CHART CONFIGURATION

In [2]:
print("\n SECTION 1: ADVANCED CHART CONFIGURATION")

# Advanced chart parameters for medium-complex charts
ADVANCED_CONFIG = {
    "complexity_levels": {
        "medium": {
            "data_points": (6, 10),
            "series_count": (2, 3),
            "annotations": True,
            "dual_axes": False,
            "error_bars": True
        },
        "complex": {
            "data_points": (8, 12),
            "series_count": (3, 4), 
            "annotations": True,
            "dual_axes": True,
            "error_bars": True
        },
        "advanced": {
            "data_points": (10, 15),
            "series_count": (4, 6),
            "annotations": True,
            "dual_axes": True,
            "error_bars": True
        }
    },
    
    "chart_categories": {
        "business_analytics": {
            "scenarios": [
                "quarterly_performance_dashboard",
                "regional_sales_comparison", 
                "product_line_profitability",
                "market_share_analysis",
                "customer_satisfaction_metrics"
            ],
            "metrics": ["Revenue", "Profit", "Costs", "Growth Rate", "Market Share", "ROI"]
        },
        
        "scientific_research": {
            "scenarios": [
                "experimental_results_comparison",
                "treatment_efficacy_analysis",
                "sensor_measurements_overtime",
                "correlation_analysis",
                "multi_variable_relationship"
            ],
            "metrics": ["Temperature", "Pressure", "Concentration", "Efficiency", "Error Rate", "Accuracy"]
        },
        
        "financial_analysis": {
            "scenarios": [
                "portfolio_performance_tracking",
                "risk_return_analysis",
                "sector_comparison",
                "economic_indicators",
                "investment_allocation"
            ],
            "metrics": ["Returns", "Volatility", "Sharpe Ratio", "Beta", "Alpha", "VaR"]
        },
        
        "operational_metrics": {
            "scenarios": [
                "production_efficiency",
                "quality_control_metrics", 
                "supply_chain_performance",
                "resource_utilization",
                "service_level_agreement"
            ],
            "metrics": ["Throughput", "Quality Score", "Utilization", "Downtime", "Efficiency", "Cost per Unit"]
        }
    }
}

print(" Advanced configuration loaded")
print(f" Complexity levels: {list(ADVANCED_CONFIG['complexity_levels'].keys())}")
print(f" Business categories: {len(ADVANCED_CONFIG['chart_categories'])}")



 SECTION 1: ADVANCED CHART CONFIGURATION
 Advanced configuration loaded
 Complexity levels: ['medium', 'complex', 'advanced']
 Business categories: 4


### SECTION 2: ADVANCED CHART GENERATOR

In [5]:
print("\n SECTION 2: ADVANCED CHART GENERATOR")

class AdvancedChartGenerator:
    """Generate medium-complex charts for robust AI evaluation"""
    
    def __init__(self):
        self.style_settings = {
            'figure.figsize': (12, 8),
            'axes.labelsize': 11,
            'axes.titlesize': 14,
            'xtick.labelsize': 10,
            'ytick.labelsize': 10,
            'legend.fontsize': 10,
            'font.family': 'Arial'
        }
        
        # Professional color palettes
        self.color_palettes = {
            'business': ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b'],
            'scientific': ['#2E86C1', '#28B463', '#F39C12', '#E74C3C', '#8E44AD', '#17A2B8'],
            'financial': ['#0E4B99', '#2E8B57', '#DAA520', '#DC143C', '#4B0082', '#008B8B'],
            'operational': ['#483D8B', '#228B22', '#FF8C00', '#B22222', '#9932CC', '#008080']
        }
        
        plt.rcParams.update(self.style_settings)
    
    def generate_realistic_data(self, scenario, complexity, num_points, num_series):
        """Generate realistic data based on scenario and complexity - FIXED"""
        
        if scenario == "quarterly_performance_dashboard":
            quarters = [f"Q{i//3+1} '{22+i//12}" for i in range(num_points)]
            
            # Revenue with growth trend + seasonality + noise
            base_revenue = 1000000
            growth_rate = 0.05
            revenues = []
            for i in range(num_points):
                seasonal = 1 + 0.15 * np.sin(2 * np.pi * i / 4)  # Quarterly seasonality
                trend = base_revenue * (1 + growth_rate) ** (i/4)
                noise = np.random.normal(0, 0.08) * trend
                revenues.append(max(0, trend * seasonal + noise))
            
            # Profit margins (correlated but more volatile)
            profits = [r * (0.12 + np.random.normal(0, 0.03)) for r in revenues]
            
            # Costs (inverse relationship with efficiency improvements)
            costs = [r - p + np.random.normal(0, r*0.02) for r, p in zip(revenues, profits)]
            
            # FIXED: Proper dictionary creation and slicing
            all_series = {
                'Revenue ($M)': [r/1000000 for r in revenues],
                'Profit ($M)': [p/1000000 for p in profits], 
                'Costs ($M)': [c/1000000 for c in costs]
            }
            
            # Take only the requested number of series
            series_items = list(all_series.items())[:num_series]
            selected_series = dict(series_items)
            
            return {
                'categories': quarters,
                'series': selected_series
            }
        
        elif scenario == "experimental_results_comparison":
            conditions = [f"Condition {chr(65+i)}" for i in range(num_points)]
            
            # Control group - baseline performance
            control_mean = 75
            control_std = 8
            control_results = np.random.normal(control_mean, control_std, num_points)
            
            # Treatment groups with different effect sizes
            treatment_effects = [5, 12, -3, 8, 15]  # Various effect sizes
            all_series = {'Control': control_results.tolist()}
            
            for i in range(min(num_series-1, len(treatment_effects))):
                treatment_mean = control_mean + treatment_effects[i]
                treatment_std = control_std * (0.8 + np.random.uniform(-0.2, 0.3))
                treatment_results = np.random.normal(treatment_mean, treatment_std, num_points)
                all_series[f'Treatment {i+1}'] = treatment_results.tolist()
            
            # Take only requested number of series
            series_items = list(all_series.items())[:num_series]
            selected_series = dict(series_items)
            
            return {
                'categories': conditions,
                'series': selected_series
            }
        
        elif scenario == "portfolio_performance_tracking":
            months = [(datetime.now() - timedelta(days=30*i)).strftime('%b %Y') 
                     for i in range(num_points-1, -1, -1)]
            
            # Different asset classes with realistic correlations
            asset_classes = ['Stocks', 'Bonds', 'Real Estate', 'Commodities', 'Cash']
            
            # Generate correlated returns using simple method
            all_series = {}
            base_returns = np.random.normal(0.01, 0.05, num_points)  # Monthly returns
            
            for i in range(min(num_series, len(asset_classes))):
                # Add correlation and different volatilities
                correlation = 0.3 if i > 0 else 1.0
                asset_returns = base_returns * correlation + np.random.normal(0, 0.03, num_points) * (1-correlation)
                
                # Convert to cumulative performance
                cumulative = np.cumprod(1 + asset_returns) * 100
                all_series[asset_classes[i]] = cumulative.tolist()
            
            return {
                'categories': months,
                'series': all_series
            }
        
        elif scenario == "regional_sales_comparison":
            regions = ['North America', 'Europe', 'Asia Pacific', 'Latin America', 
                      'Middle East', 'Africa', 'Oceania', 'Nordic Countries'][:num_points]
            
            # Market size affects base sales
            market_sizes = [1.0, 0.8, 1.2, 0.4, 0.3, 0.2, 0.1, 0.15][:num_points]
            
            all_series = {}
            product_lines = ['Product A', 'Product B', 'Product C', 'Product D', 'Product E', 'Product F']
            
            for i in range(min(num_series, len(product_lines))):
                product = product_lines[i]
                # Different products have different regional preferences
                regional_preferences = np.random.uniform(0.5, 2.0, num_points)
                base_sales = [m * p * np.random.uniform(50, 200) 
                             for m, p in zip(market_sizes, regional_preferences)]
                all_series[product] = base_sales
            
            return {
                'categories': regions,
                'series': all_series
            }
        
        else:
            # Generic data generation for other scenarios
            categories = [f"Category {i+1}" for i in range(num_points)]
            all_series = {}
            
            for i in range(num_series):
                # Create realistic data with trends and noise
                base_values = np.random.uniform(20, 100, num_points)
                trend = np.linspace(0, np.random.uniform(-20, 20), num_points)
                noise = np.random.normal(0, 5, num_points)
                values = np.maximum(0, base_values + trend + noise)
                all_series[f'Series {chr(65+i)}'] = values.tolist()
            
            return {
                'categories': categories,
                'series': all_series
            }
    
    def create_advanced_bar_chart(self, data, title, complexity, output_path):
        """Create advanced multi-series bar chart with annotations"""
        
        fig, ax = plt.subplots(figsize=(14, 9))
        
        categories = data['categories']
        series = data['series']
        series_names = list(series.keys())
        
        # Calculate bar positions for multiple series
        x = np.arange(len(categories))
        width = 0.8 / len(series_names)
        
        bars_list = []
        for i, (series_name, values) in enumerate(series.items()):
            offset = (i - len(series_names)/2 + 0.5) * width
            bars = ax.bar(x + offset, values, width, label=series_name, 
                         alpha=0.8, edgecolor='white', linewidth=0.7)
            bars_list.append(bars)
            
            # Add value labels on bars if complexity is high
            if complexity in ['complex', 'advanced']:
                for bar, value in zip(bars, values):
                    height = bar.get_height()
                    ax.text(bar.get_x() + bar.get_width()/2., height + max(values)*0.01,
                           f'{value:.1f}', ha='center', va='bottom', fontsize=9, 
                           fontweight='bold')
        
        # Customization based on complexity
        ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
        ax.set_xlabel('Categories', fontsize=12, fontweight='bold')
        ax.set_ylabel('Values', fontsize=12, fontweight='bold')
        ax.set_xticks(x)
        ax.set_xticklabels(categories, rotation=45, ha='right')
        
        # Advanced features for complex charts
        if complexity in ['complex', 'advanced']:
            # Add grid
            ax.grid(True, alpha=0.3, axis='y')
            ax.set_axisbelow(True)
            
            # Add trend line for first series
            if len(series_names) > 0:
                first_series = list(series.values())[0]
                z = np.polyfit(x, first_series, 1)
                p = np.poly1d(z)
                ax.plot(x, p(x), "--", color='red', alpha=0.8, linewidth=2, 
                       label=f'{series_names[0]} Trend')
            
            # Add average line
            all_values = [val for values in series.values() for val in values]
            avg_value = np.mean(all_values)
            ax.axhline(y=avg_value, color='gray', linestyle=':', alpha=0.7, 
                      label=f'Overall Average: {avg_value:.1f}')
        
        # Professional legend
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', 
                 frameon=True, fancybox=True, shadow=True)
        
        plt.tight_layout()
        plt.savefig(output_path, dpi=300, bbox_inches='tight', 
                   facecolor='white', edgecolor='none')
        plt.close()
        
        return bars_list
    
    def create_advanced_line_chart(self, data, title, complexity, output_path):
        """Create advanced multi-series line chart with confidence intervals"""
        
        fig, ax = plt.subplots(figsize=(14, 9))
        
        categories = data['categories']
        series = data['series']
        
        for i, (series_name, values) in enumerate(series.items()):
            x_pos = range(len(categories))
            
            # Main line
            line = ax.plot(x_pos, values, marker='o', linewidth=2.5, 
                          markersize=6, label=series_name, alpha=0.9)
            color = line[0].get_color()
            
            # Add confidence intervals for complex charts
            if complexity in ['complex', 'advanced']:
                # Simulate confidence intervals (±10% of values)
                values_array = np.array(values)
                ci_lower = values_array * 0.9
                ci_upper = values_array * 1.1
                ax.fill_between(x_pos, ci_lower, ci_upper, alpha=0.2, color=color)
            
            # Add value annotations for key points
            if complexity in ['advanced']:
                for j, (x, y) in enumerate(zip(x_pos, values)):
                    if j % 2 == 0:  # Annotate every other point to avoid clutter
                        ax.annotate(f'{y:.1f}', (x, y), xytext=(5, 10), 
                                  textcoords='offset points', fontsize=9,
                                  bbox=dict(boxstyle='round,pad=0.3', facecolor=color, alpha=0.7))
        
        # Customization
        ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
        ax.set_xlabel('Time Period', fontsize=12, fontweight='bold')
        ax.set_ylabel('Values', fontsize=12, fontweight='bold')
        ax.set_xticks(range(len(categories)))
        ax.set_xticklabels(categories, rotation=45, ha='right')
        
        # Advanced features
        if complexity in ['complex', 'advanced']:
            ax.grid(True, alpha=0.3)
            ax.set_axisbelow(True)
            
            # Add secondary y-axis for advanced charts
            if complexity == 'advanced' and len(series) >= 2:
                ax2 = ax.twinx()
                second_series = list(series.items())[1]
                ax2.plot(range(len(categories)), second_series[1], 
                        'r--', linewidth=2, alpha=0.7, label=f'{second_series[0]} (Right)')
                ax2.set_ylabel('Secondary Values', fontsize=12, color='r')
                ax2.tick_params(axis='y', labelcolor='r')
        
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.savefig(output_path, dpi=300, bbox_inches='tight', 
                   facecolor='white', edgecolor='none')
        plt.close()
    
    def create_advanced_pie_chart(self, data, title, complexity, output_path):
        """Create advanced pie chart with detailed breakdown - FIXED"""
        
        # FIXED: Proper subplot handling
        if complexity == 'advanced':
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
            main_ax = ax1
        else:
            fig, main_ax = plt.subplots(1, 1, figsize=(10, 8))
            ax2 = None
        
        # Use first series for main pie chart
        categories = data['categories']
        values = list(data['series'].values())[0]
        
        # Calculate percentages
        total = sum(values)
        percentages = [v/total * 100 for v in values]
        
        # Color scheme
        colors = plt.cm.Set3(np.linspace(0, 1, len(categories)))
        
        # Main pie chart
        wedges, texts, autotexts = main_ax.pie(values, labels=categories, autopct='%1.1f%%',
                                             colors=colors, startangle=90, 
                                             explode=[0.05 if i == 0 else 0 for i in range(len(values))])
        
        # Enhance text
        for autotext in autotexts:
            autotext.set_color('white')
            autotext.set_fontweight('bold')
            autotext.set_fontsize(10)
        
        main_ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
        
        # Advanced features: detailed breakdown
        if complexity == 'advanced' and ax2 is not None:
            # Second chart: detailed breakdown
            ax2.barh(categories, values, color=colors, alpha=0.8)
            ax2.set_xlabel('Values', fontsize=12, fontweight='bold')
            ax2.set_title('Detailed Breakdown', fontsize=14, fontweight='bold')
            
            # Add value labels
            for i, v in enumerate(values):
                ax2.text(v + max(values)*0.01, i, f'{v:.1f} ({percentages[i]:.1f}%)', 
                        va='center', fontweight='bold')
            
            ax2.grid(True, alpha=0.3, axis='x')
        
        # Add summary statistics for complex charts
        if complexity in ['complex', 'advanced']:
            stats_text = f'Total: {total:.1f}\nAverage: {np.mean(values):.1f}\nMax: {max(values):.1f}'
            main_ax.text(-1.3, -1.3, stats_text, bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
        
        plt.tight_layout()
        plt.savefig(output_path, dpi=300, bbox_inches='tight', 
                   facecolor='white', edgecolor='none')
        plt.close()
    
    def create_advanced_scatter_plot(self, data, title, complexity, output_path):
        """Create advanced scatter plot with regression and clustering"""
        
        fig, ax = plt.subplots(figsize=(12, 9))
        
        # For scatter plots, use first two series as x and y
        series_items = list(data['series'].items())
        if len(series_items) < 2:
            # Generate second series if only one exists
            first_values = series_items[0][1]
            second_values = [v + np.random.normal(0, v*0.1) for v in first_values]
            x_values, y_values = first_values, second_values
            x_label, y_label = series_items[0][0], 'Related Metric'
        else:
            x_values, y_values = series_items[0][1], series_items[1][1]
            x_label, y_label = series_items[0][0], series_items[1][0]
        
        # Create scatter plot with varying sizes and colors
        sizes = [50 + i*20 for i in range(len(x_values))]
        colors = range(len(x_values))
        
        scatter = ax.scatter(x_values, y_values, s=sizes, c=colors, alpha=0.7, 
                           cmap='viridis', edgecolors='black', linewidth=0.5)
        
        # Add labels for each point
        for i, (x, y, cat) in enumerate(zip(x_values, y_values, data['categories'])):
            if complexity in ['complex', 'advanced']:
                ax.annotate(cat, (x, y), xytext=(5, 5), textcoords='offset points',
                           fontsize=9, alpha=0.8)
        
        # Add regression line for complex charts
        if complexity in ['complex', 'advanced']:
            z = np.polyfit(x_values, y_values, 1)
            p = np.poly1d(z)
            x_trend = np.linspace(min(x_values), max(x_values), 100)
            ax.plot(x_trend, p(x_trend), "--", color='red', linewidth=2, alpha=0.8,
                   label=f'Trend Line (R²={np.corrcoef(x_values, y_values)[0,1]**2:.3f})')
            
            # Add confidence bands for advanced charts
            if complexity == 'advanced':
                # Simple confidence band simulation
                residuals = [y - p(x) for x, y in zip(x_values, y_values)]
                std_residuals = np.std(residuals)
                y_err = [p(x) + 1.96*std_residuals for x in x_trend]
                y_err_lower = [p(x) - 1.96*std_residuals for x in x_trend]
                ax.fill_between(x_trend, y_err_lower, y_err, alpha=0.2, color='red')
        
        # Customization
        ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
        ax.set_xlabel(x_label, fontsize=12, fontweight='bold')
        ax.set_ylabel(y_label, fontsize=12, fontweight='bold')
        ax.grid(True, alpha=0.3)
        
        # Add colorbar for advanced charts
        if complexity == 'advanced':
            cbar = plt.colorbar(scatter, ax=ax)
            cbar.set_label('Data Point Index', fontsize=10)
        
        if complexity in ['complex', 'advanced']:
            ax.legend()
        
        plt.tight_layout()
        plt.savefig(output_path, dpi=300, bbox_inches='tight', 
                   facecolor='white', edgecolor='none')
        plt.close()


 SECTION 2: ADVANCED CHART GENERATOR


### SECTION 3: CHART GENERATION EXECUTION

In [6]:
print("\n SECTION 3: CHART GENERATION EXECUTION")

def generate_advanced_chart_dataset():
    """Generate complete dataset of advanced charts - FIXED"""
    
    generator = AdvancedChartGenerator()
    chart_configs = []
    
    # Load research configuration
    with open('research_config.json', 'r') as f:
        config = json.load(f)
    
    total_charts = config['experimental_design']['total_charts']
    complexity_distribution = {
        'medium': 0.4,      # 40% medium complexity
        'complex': 0.4,     # 40% complex
        'advanced': 0.2     # 20% advanced (most challenging)
    }
    
    chart_type_distribution = {
        'bar': 0.35,        # 35% - most common in business
        'line': 0.25,       # 25% - time series analysis
        'pie': 0.15,        # 15% - composition analysis
        'scatter': 0.15,    # 15% - correlation analysis
        'area': 0.10        # 10% - cumulative data
    }
    
    print(f" Generating {total_charts} advanced charts...")
    print(f" Complexity distribution: {complexity_distribution}")
    print(f" Chart type distribution: {chart_type_distribution}")
    
    chart_id = 1
    
    for category_name, category_info in ADVANCED_CONFIG['chart_categories'].items():
        category_chart_count = total_charts // len(ADVANCED_CONFIG['chart_categories'])
        
        print(f"\n📋 Generating {category_chart_count} charts for: {category_name}")
        
        for i in range(category_chart_count):
            # Determine complexity level
            complexity_rand = random.random()
            if complexity_rand < complexity_distribution['medium']:
                complexity = 'medium'
            elif complexity_rand < complexity_distribution['medium'] + complexity_distribution['complex']:
                complexity = 'complex'
            else:
                complexity = 'advanced'
            
            # Determine chart type
            chart_type_rand = random.random()
            cumulative_prob = 0
            chart_type = 'bar'  # default
            for ctype, prob in chart_type_distribution.items():
                cumulative_prob += prob
                if chart_type_rand < cumulative_prob:
                    chart_type = ctype
                    break
            
            # Select scenario
            scenario = random.choice(category_info['scenarios'])
            
            # Get complexity parameters
            complexity_params = ADVANCED_CONFIG['complexity_levels'][complexity]
            num_points = random.randint(*complexity_params['data_points'])
            num_series = random.randint(*complexity_params['series_count'])
            
            # Generate realistic data
            chart_data = generator.generate_realistic_data(
                scenario, complexity, num_points, num_series
            )
            
            # Create professional title
            title_parts = [
                scenario.replace('_', ' ').title(),
                f"({category_name.replace('_', ' ').title()})"
            ]
            title = ' - '.join(title_parts)
            
            # Generate chart
            chart_filename = f"chart_{chart_id:03d}_{complexity}_{chart_type}.png"
            output_path = f"data/raw_charts/{chart_filename}"
            
            try:
                if chart_type == 'bar':
                    generator.create_advanced_bar_chart(chart_data, title, complexity, output_path)
                elif chart_type == 'line':
                    generator.create_advanced_line_chart(chart_data, title, complexity, output_path)
                elif chart_type == 'pie':
                    generator.create_advanced_pie_chart(chart_data, title, complexity, output_path)
                elif chart_type == 'scatter':
                    generator.create_advanced_scatter_plot(chart_data, title, complexity, output_path)
                elif chart_type == 'area':
                    # Use line chart method with filled areas
                    generator.create_advanced_line_chart(chart_data, title, complexity, output_path)
                
                # Save chart configuration
                chart_config = {
                    'id': f'chart_{chart_id:03d}',
                    'filename': chart_filename,
                    'title': title,
                    'category': category_name,
                    'scenario': scenario,
                    'chart_type': chart_type,
                    'complexity': complexity,
                    'data_points': num_points,
                    'series_count': num_series,
                    'categories': chart_data['categories'],
                    'series_data': chart_data['series'],
                    'generation_timestamp': datetime.now().isoformat()
                }
                
                chart_configs.append(chart_config)
                
                if chart_id % 20 == 0:
                    print(f" Generated {chart_id}/{total_charts} charts...")
                
                chart_id += 1
                
            except Exception as e:
                print(f" Failed to generate chart {chart_id}: {e}")
                logger.error(f"Chart generation failed for {chart_filename}: {e}")
                
                # Continue with next chart instead of stopping
                chart_id += 1
                continue
    
    # Save all configurations
    with open('data/ground_truth/chart_configurations.json', 'w') as f:
        json.dump(chart_configs, f, indent=2)
    
    print(f"\n Generated {len(chart_configs)} advanced charts successfully")
    print(f" Configurations saved to: data/ground_truth/chart_configurations.json")
    
    return chart_configs

# Generate the advanced chart dataset
try:
    advanced_charts = generate_advanced_chart_dataset()
    print(f" CHART GENERATION COMPLETED: {len(advanced_charts)} charts created")
except Exception as e:
    print(f" Chart generation failed: {e}")
    import traceback
    traceback.print_exc()


 SECTION 3: CHART GENERATION EXECUTION
 Generating 200 advanced charts...
 Complexity distribution: {'medium': 0.4, 'complex': 0.4, 'advanced': 0.2}
 Chart type distribution: {'bar': 0.35, 'line': 0.25, 'pie': 0.15, 'scatter': 0.15, 'area': 0.1}

📋 Generating 50 charts for: business_analytics
 Generated 20/200 charts...
 Generated 40/200 charts...

📋 Generating 50 charts for: scientific_research
 Generated 60/200 charts...
 Generated 80/200 charts...
 Generated 100/200 charts...

📋 Generating 50 charts for: financial_analysis
 Generated 120/200 charts...
 Generated 140/200 charts...

📋 Generating 50 charts for: operational_metrics
 Generated 160/200 charts...
 Generated 180/200 charts...
 Generated 200/200 charts...

 Generated 200 advanced charts successfully
 Configurations saved to: data/ground_truth/chart_configurations.json
 CHART GENERATION COMPLETED: 200 charts created


### SECTION 4: QUALITY VALIDATION

In [7]:
print("\n SECTION 4: QUALITY VALIDATION")

def validate_chart_quality(chart_configs):
    """Validate the quality and complexity of generated charts"""
    
    if not chart_configs:
        print(" No charts to validate - generation may have failed")
        return {}
    
    print(" CHART QUALITY ANALYSIS:")
    print("-" * 50)
    
    # Complexity analysis
    complexity_counts = {}
    chart_type_counts = {}
    category_counts = {}
    data_point_stats = []
    series_count_stats = []
    
    for chart in chart_configs:
        # Count complexity levels
        complexity = chart['complexity']
        complexity_counts[complexity] = complexity_counts.get(complexity, 0) + 1
        
        # Count chart types
        chart_type = chart['chart_type']
        chart_type_counts[chart_type] = chart_type_counts.get(chart_type, 0) + 1
        
        # Count categories
        category = chart['category']
        category_counts[category] = category_counts.get(category, 0) + 1
        
        # Collect stats
        data_point_stats.append(chart['data_points'])
        series_count_stats.append(chart['series_count'])
    
    print(f" COMPLEXITY DISTRIBUTION:")
    for complexity, count in complexity_counts.items():
        percentage = (count / len(chart_configs)) * 100
        print(f"  {complexity.title()}: {count} charts ({percentage:.1f}%)")
    
    print(f"\n CHART TYPE DISTRIBUTION:")
    for chart_type, count in chart_type_counts.items():
        percentage = (count / len(chart_configs)) * 100
        print(f"  {chart_type.title()}: {count} charts ({percentage:.1f}%)")
    
    print(f"\n CATEGORY DISTRIBUTION:")
    for category, count in category_counts.items():
        percentage = (count / len(chart_configs)) * 100
        print(f"  {category.replace('_', ' ').title()}: {count} charts ({percentage:.1f}%)")
    
    print(f"\n DATA COMPLEXITY METRICS:")
    print(f"  Data Points per Chart: {np.mean(data_point_stats):.1f} ± {np.std(data_point_stats):.1f}")
    print(f"  Range: {min(data_point_stats)} - {max(data_point_stats)} points")
    print(f"  Series per Chart: {np.mean(series_count_stats):.1f} ± {np.std(series_count_stats):.1f}")
    print(f"  Range: {min(series_count_stats)} - {max(series_count_stats)} series")
    
    # Complexity score calculation
    complexity_scores = []
    for chart in chart_configs:
        score = 0
        
        # Base complexity from level
        if chart['complexity'] == 'medium':
            score += 3
        elif chart['complexity'] == 'complex':
            score += 5
        elif chart['complexity'] == 'advanced':
            score += 7
        
        # Additional complexity from data points
        score += min(chart['data_points'] / 3, 3)
        
        # Additional complexity from series count
        score += min(chart['series_count'], 3)
        
        complexity_scores.append(score)
    
    avg_complexity = np.mean(complexity_scores)
    print(f"\n OVERALL COMPLEXITY SCORE: {avg_complexity:.2f}/13")
    
    if avg_complexity >= 8:
        print(" HIGH complexity - Excellent for robustness testing")
    elif avg_complexity >= 6:
        print(" MEDIUM-HIGH complexity - Good for robustness testing")
    elif avg_complexity >= 4:
        print("  MEDIUM complexity - Adequate for robustness testing")
    else:
        print(" LOW complexity - May not challenge AI sufficiently")
    
    return {
        'complexity_distribution': complexity_counts,
        'chart_type_distribution': chart_type_counts,
        'category_distribution': category_counts,
        'avg_data_points': np.mean(data_point_stats),
        'avg_series_count': np.mean(series_count_stats),
        'complexity_score': avg_complexity
    }

# Run validation if charts were generated successfully
if 'advanced_charts' in locals() and advanced_charts:
    validation_results = validate_chart_quality(advanced_charts)
else:
    print(" Skipping validation - no charts generated")
    validation_results = {}


 SECTION 4: QUALITY VALIDATION
 CHART QUALITY ANALYSIS:
--------------------------------------------------
 COMPLEXITY DISTRIBUTION:
  Medium: 72 charts (36.0%)
  Advanced: 50 charts (25.0%)
  Complex: 78 charts (39.0%)

 CHART TYPE DISTRIBUTION:
  Bar: 72 charts (36.0%)
  Line: 51 charts (25.5%)
  Pie: 27 charts (13.5%)
  Scatter: 33 charts (16.5%)
  Area: 17 charts (8.5%)

 CATEGORY DISTRIBUTION:
  Business Analytics: 50 charts (25.0%)
  Scientific Research: 50 charts (25.0%)
  Financial Analysis: 50 charts (25.0%)
  Operational Metrics: 50 charts (25.0%)

 DATA COMPLEXITY METRICS:
  Data Points per Chart: 9.9 ± 2.4
  Range: 6 - 15 points
  Series per Chart: 3.5 ± 1.2
  Range: 2 - 6 series

 OVERALL COMPLEXITY SCORE: 10.41/13
 HIGH complexity - Excellent for robustness testing


### SECTION 5: SAMPLE CHART SHOWCASE

In [8]:
print("\n SECTION 5: SAMPLE CHART SHOWCASE")

def showcase_sample_charts(chart_configs, num_samples=5):
    """Display information about sample charts for review"""
    
    if not chart_configs:
        print(" No charts to showcase")
        return
    
    print(f"📋 SAMPLE CHART DETAILS (showing {min(num_samples, len(chart_configs))} examples):")
    print("=" * 80)
    
    # Select diverse samples
    samples = []
    complexity_levels = ['medium', 'complex', 'advanced']
    
    for complexity in complexity_levels:
        complexity_charts = [c for c in chart_configs if c['complexity'] == complexity]
        if complexity_charts:
            samples.extend(random.sample(complexity_charts, min(2, len(complexity_charts))))
    
    for i, chart in enumerate(samples[:num_samples]):
        print(f"\n SAMPLE {i+1}: {chart['filename']}")
        print(f"   Title: {chart['title']}")
        print(f"   Type: {chart['chart_type'].title()} | Complexity: {chart['complexity'].title()}")
        print(f"   Category: {chart['category'].replace('_', ' ').title()}")
        print(f"   Data Points: {chart['data_points']} | Series: {chart['series_count']}")
        print(f"   Categories: {', '.join(chart['categories'][:3])}{'...' if len(chart['categories']) > 3 else ''}")
        
        # Show sample data
        if chart['series_data']:
            series_sample = list(chart['series_data'].items())[0]
            sample_values = series_sample[1][:3] if len(series_sample[1]) >= 3 else series_sample[1]
            print(f"   Sample Data ({series_sample[0]}): {[f'{v:.1f}' for v in sample_values]}...")

# Run showcase if charts exist
if 'advanced_charts' in locals() and advanced_charts:
    showcase_sample_charts(advanced_charts)



 SECTION 5: SAMPLE CHART SHOWCASE
📋 SAMPLE CHART DETAILS (showing 5 examples):

 SAMPLE 1: chart_139_medium_bar.png
   Title: Economic Indicators - (Financial Analysis)
   Type: Bar | Complexity: Medium
   Category: Financial Analysis
   Data Points: 6 | Series: 2
   Categories: Category 1, Category 2, Category 3...
   Sample Data (Series A): ['63.0', '95.3', '51.1']...

 SAMPLE 2: chart_054_medium_bar.png
   Title: Multi Variable Relationship - (Scientific Research)
   Type: Bar | Complexity: Medium
   Category: Scientific Research
   Data Points: 8 | Series: 2
   Categories: Category 1, Category 2, Category 3...
   Sample Data (Series A): ['36.7', '82.1', '38.7']...

 SAMPLE 3: chart_078_complex_bar.png
   Title: Sensor Measurements Overtime - (Scientific Research)
   Type: Bar | Complexity: Complex
   Category: Scientific Research
   Data Points: 12 | Series: 4
   Categories: Category 1, Category 2, Category 3...
   Sample Data (Series A): ['43.1', '60.9', '48.7']...

 SAMPLE 4: ch

### SECTION 6: ERROR REPORTING AND SUMMARY

In [9]:
print("\n SECTION 6: EXECUTION SUMMARY")

# Create execution summary
if 'advanced_charts' in locals() and advanced_charts:
    execution_summary = {
        'status': 'SUCCESS',
        'charts_generated': len(advanced_charts),
        'target_charts': 200,
        'success_rate': (len(advanced_charts) / 200) * 100,
        'complexity_score': validation_results.get('complexity_score', 0),
        'ready_for_perturbations': len(advanced_charts) > 0,
        'estimated_perturbations': len(advanced_charts) * 15,
        'next_notebook': '03_Perturbation_Framework.ipynb'
    }
    
    print(f" GENERATION STATUS: {execution_summary['status']}")
    print(f" Charts Generated: {execution_summary['charts_generated']}/{execution_summary['target_charts']}")
    print(f" Success Rate: {execution_summary['success_rate']:.1f}%")
    
    if execution_summary['success_rate'] >= 90:
        print(" EXCELLENT generation rate!")
    elif execution_summary['success_rate'] >= 75:
        print(" GOOD generation rate - sufficient for analysis")
    elif execution_summary['success_rate'] >= 50:
        print(" MODERATE generation rate - may need investigation")
    else:
        print(" LOW generation rate - requires debugging")
        
else:
    execution_summary = {
        'status': 'FAILED',
        'charts_generated': 0,
        'target_charts': 200,
        'success_rate': 0,
        'ready_for_perturbations': False,
        'next_notebook': 'Debug and retry'
    }
    
    print(f" GENERATION STATUS: {execution_summary['status']}")
    print(" Please check error messages above and retry")

# Save summary for next notebook
if execution_summary['status'] == 'SUCCESS':
    next_phase_summary = {
        'charts_generated': execution_summary['charts_generated'],
        'average_complexity_score': validation_results.get('complexity_score', 0),
        'complexity_distribution': validation_results.get('complexity_distribution', {}),
        'chart_types': list(validation_results.get('chart_type_distribution', {}).keys()),
        'ready_for_perturbations': True,
        'estimated_perturbations': execution_summary['estimated_perturbations'],
        'next_notebook': '03_Perturbation_Framework.ipynb'
    }
    
    with open('data/analysis_cache/chart_generation_summary.json', 'w') as f:
        json.dump(next_phase_summary, f, indent=2)
    
    print(" Phase summary saved for next notebook")

print("\n" + "=" * 80)
if execution_summary['status'] == 'SUCCESS':
    print(" ADVANCED CHART GENERATION COMPLETE!")
    print(" Medium-Complex Charts Successfully Created")
    print(" Ready for Phase 3: Perturbation Framework")
else:
    print(" CHART GENERATION HAD ISSUES")
    print(" Please review errors and retry")
print("=" * 80)

# Log final status
if 'logger' in locals():
    logger.info(f"Chart generation completed: {execution_summary['charts_generated']} charts")
    if 'validation_results' in locals() and validation_results:
        logger.info(f"Average complexity score: {validation_results.get('complexity_score', 0):.2f}/13")
    logger.info(f"Status: {execution_summary['status']}")


 SECTION 6: EXECUTION SUMMARY
 GENERATION STATUS: SUCCESS
 Charts Generated: 200/200
 Success Rate: 100.0%
 EXCELLENT generation rate!
 Phase summary saved for next notebook

 ADVANCED CHART GENERATION COMPLETE!
 Medium-Complex Charts Successfully Created
 Ready for Phase 3: Perturbation Framework
