# Advanced Report Generation and Analysis

This notebook demonstrates advanced features of the UCLA LPPI Report Generator for researchers and power users.

## Contents
1. Custom Configuration
2. Working with Multiple Counties
3. Analyzing Generated Data
4. Customizing Output Formats
5. Troubleshooting Common Issues

## 1. Custom Configuration

In [None]:
from report_generator import ReportGenerator
import os
import pandas as pd
import matplotlib.pyplot as plt

# Custom configuration for different data sources or output locations
custom_config = {
    'output_dir': 'custom_output',
    'geojson_path': 'inputs/geojson/ca_counties_simplified.geojson',
    # Add your custom URLs or file paths here
    'population_csv_url': 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTDl0u8xAvazJjlCn62edUDjjK1tLwyi4hXihYpYIGOxawrN3_HfzvYKJ1ARzH4AzhrHZysIpkc_1Nc/pub?gid=1869860862&single=true&output=csv'
}

# Initialize with custom config
generator = ReportGenerator(config=custom_config)
print(f"Output directory: {generator.output_dir}")
print(f"Configuration keys: {list(generator.config.keys())}")

## 2. Working with Multiple Counties

In [None]:
# Load data
generator.load_data(offline_mode=True)  # Set to False when you have connectivity

# Get all available counties
all_counties = generator.list_available_counties()
print(f"Total available counties: {len(all_counties)}")

# Select specific counties for analysis
target_counties = ['Los Angeles', 'San Diego']  # Modify as needed
available_targets = [c for c in target_counties if c in all_counties]

print(f"Target counties for analysis: {available_targets}")

# Get summaries for each county
county_summaries = []
for county in available_targets:
    summary = generator.get_county_summary(county)
    if 'error' not in summary:
        county_summaries.append(summary)
        print(f"\n{county} Summary:")
        for key, value in summary.items():
            print(f"  {key}: {value}")

print(f"\nSuccessfully processed {len(county_summaries)} counties")

## 3. Batch Processing Multiple Counties

In [None]:
# Process multiple counties in batch
batch_results = []

for county in available_targets:
    print(f"\n🏭 Processing {county}...")
    
    try:
        # Generate reports for individual county
        result = generator.generate_full_report(
            counties=[county],
            include_pdfs=False,
            offline_mode=True
        )
        
        result['county'] = county
        batch_results.append(result)
        
        print(f"✅ {county} completed. Files: {len(result.get('files_generated', []))}")
        
    except Exception as e:
        print(f"❌ Error processing {county}: {e}")
        batch_results.append({'county': county, 'error': str(e)})

# Summary of batch processing
print("\n📊 Batch Processing Summary:")
successful = [r for r in batch_results if 'error' not in r]
failed = [r for r in batch_results if 'error' in r]

print(f"Successful: {len(successful)}")
print(f"Failed: {len(failed)}")

if failed:
    print("\nFailed counties:")
    for result in failed:
        print(f"  - {result['county']}: {result['error']}")

## 4. Analyzing Generated Data

In [None]:
# Analyze the batch results
if successful:
    print("📈 Analysis of Generated Reports:")
    
    # Count files by type
    file_types = {}
    total_files = 0
    
    for result in successful:
        files = result.get('files_generated', [])
        total_files += len(files)
        
        for file_path in files:
            ext = os.path.splitext(file_path)[1].lower()
            file_types[ext] = file_types.get(ext, 0) + 1
    
    print(f"Total files generated: {total_files}")
    print("\nFile types:")
    for ext, count in sorted(file_types.items()):
        print(f"  {ext}: {count} files")
    
    # Analyze directory structure
    output_dirs = set()
    for result in successful:
        output_dirs.add(result.get('output_directory', 'unknown'))
    
    print(f"\nOutput directories: {len(output_dirs)}")
    for directory in output_dirs:
        if os.path.exists(directory):
            subdirs = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
            print(f"  {directory}: {len(subdirs)} subdirectories")
else:
    print("No successful results to analyze.")

## 5. Custom Visualization of Results

In [None]:
# Create visualizations of the processing results
if batch_results:
    # Success rate visualization
    success_count = len(successful)
    failure_count = len(failed)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # Pie chart of success/failure
    labels = ['Successful', 'Failed']
    sizes = [success_count, failure_count]
    colors = ['#2ecc71', '#e74c3c']
    
    ax1.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
    ax1.set_title('Processing Success Rate')
    
    # Bar chart of files generated per county
    if successful:
        counties = [r['county'] for r in successful]
        file_counts = [len(r.get('files_generated', [])) for r in successful]
        
        ax2.bar(counties, file_counts, color='#3498db')
        ax2.set_title('Files Generated per County')
        ax2.set_xlabel('County')
        ax2.set_ylabel('Number of Files')
        ax2.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    # Summary statistics
    if successful and file_counts:
        print(f"\n📊 File Generation Statistics:")
        print(f"Average files per county: {sum(file_counts) / len(file_counts):.1f}")
        print(f"Max files generated: {max(file_counts)}")
        print(f"Min files generated: {min(file_counts)}")
else:
    print("No results to visualize.")

## 6. Working with Different Report Types

In [None]:
# Generate specific report types
if available_targets:
    sample_county = available_targets[0]
    print(f"Generating different report types for {sample_county}...")
    
    # Heat reports only
    print("\n🌡️ Generating heat reports...")
    generator.generate_html_reports('extremeheat', [sample_county])
    
    # Air pollution reports only
    print("🏭 Generating air pollution reports...")
    generator.generate_html_reports('airpollution', [sample_county])
    
    print("✅ Specific report types generated")
else:
    print("No counties available for report generation.")

## 7. Troubleshooting and Diagnostics

In [None]:
# Diagnostic information
print("🔍 System Diagnostics:")

# Check required directories
required_dirs = ['inputs', 'inputs/geojson', 'templates', 'static']
print("\nRequired directories:")
for directory in required_dirs:
    exists = os.path.exists(directory)
    status = "✅" if exists else "❌"
    print(f"  {status} {directory}")

# Check key files
key_files = [
    'report_generator.py',
    'main.py', 
    'flask_app.py',
    'requirements.txt'
]
print("\nKey files:")
for file_path in key_files:
    exists = os.path.exists(file_path)
    status = "✅" if exists else "❌"
    print(f"  {status} {file_path}")

# Check output directory
output_dir = generator.output_dir
print(f"\nOutput directory: {output_dir}")
if os.path.exists(output_dir):
    subdirs = [d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d))]
    print(f"Subdirectories: {subdirs}")
    
    # Count files in output
    total_output_files = 0
    for root, dirs, files in os.walk(output_dir):
        total_output_files += len(files)
    print(f"Total output files: {total_output_files}")
else:
    print("Output directory does not exist.")

# Memory and performance info
import psutil
import sys
print(f"\nSystem info:")
print(f"Python version: {sys.version.split()[0]}")
print(f"Available memory: {psutil.virtual_memory().available / (1024**3):.1f} GB")
print(f"CPU count: {psutil.cpu_count()}")

## 8. Export Summary Report

In [None]:
# Create a summary report of the analysis session
summary_data = {
    'session_timestamp': pd.Timestamp.now(),
    'counties_attempted': len(batch_results) if batch_results else 0,
    'counties_successful': len(successful) if batch_results else 0,
    'counties_failed': len(failed) if batch_results else 0,
    'total_files_generated': sum(len(r.get('files_generated', [])) for r in successful) if batch_results else 0,
    'output_directory': generator.output_dir,
    'configuration_used': str(generator.config)
}

# Save summary to CSV
summary_df = pd.DataFrame([summary_data])
summary_file = os.path.join(generator.output_dir, 'analysis_summary.csv')
summary_df.to_csv(summary_file, index=False)

print(f"📋 Analysis summary saved to: {summary_file}")
print("\nSession Summary:")
for key, value in summary_data.items():
    print(f"  {key}: {value}")

# Display the summary as a formatted table
print("\n📊 Summary Table:")
display(summary_df)

## Next Steps

This advanced notebook demonstrated:

- ✅ Custom configuration setup
- ✅ Batch processing multiple counties
- ✅ Data analysis and visualization
- ✅ Report type customization
- ✅ System diagnostics and troubleshooting
- ✅ Session summary and export

### For Production Use:

1. **Data Preparation**: Ensure all input data files are available and properly formatted
2. **System Requirements**: Install all dependencies including `wkhtmltopdf` for PDF generation
3. **Network Access**: Enable internet connectivity for real data fetching
4. **Resource Planning**: Monitor memory and CPU usage for large batch operations
5. **Error Handling**: Implement additional error handling for production workflows

### Performance Tips:

- Process counties in smaller batches to manage memory usage
- Use offline mode when possible to reduce network dependencies
- Monitor disk space as reports can generate many files
- Consider using parallel processing for independent county operations