# Solar Power Emission Projection Analysis
## Interactive Analysis and Scenario Modeling

This notebook provides interactive exploration of solar power emission projections under different IEA scenarios.

## 1. Setup and Data Loading

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import yaml
import warnings
warnings.filterwarnings('ignore')

# Plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print('✓ Libraries loaded')

✓ Libraries loaded


In [2]:
# Load configuration
with open('../config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print('Configuration loaded')
print(f"Scenarios: {list(config['scenarios'].keys())}")

Configuration loaded
Scenarios: ['NZE', 'APS', 'STEPS']


In [3]:
# Load data
df_raw = pd.read_parquet('../data/processed/solar_data_processed.parquet')
df_features = pd.read_parquet('../data/processed/solar_data_features.parquet')
df_projections = pd.read_parquet('../data/processed/scenario_projections.parquet')
df_risk = pd.read_parquet('../data/processed/transition_risk.parquet')

print(f'Datasets loaded:')
print(f'  Raw data: {df_raw.shape}')
print(f'  Features: {df_features.shape}')
print(f'  Projections: {df_projections.shape}')
print(f'  Risk metrics: {df_risk.shape}')

Datasets loaded:
  Raw data: (75671, 32)
  Features: (75671, 72)
  Projections: (90, 6)
  Risk metrics: (30, 11)


## 2. Exploratory Data Analysis

In [4]:
# Data overview
print('Dataset Overview:')
print(df_raw.head())
print('\nData Types:')
print(df_raw.dtypes.value_counts())
print('\nMissing Values:')
print(df_raw.isnull().sum().sort_values(ascending=False).head(10))

Dataset Overview:
  date_last_researched country_area               project_name phase_name  \
0           2024-07-23  Afghanistan       Ayno Mena solar farm         --   
1           2024-07-23  Afghanistan  Badghis Solar Power Plant         --   
2           2024-07-23  Afghanistan           Balkh solar farm         --   
3           2024-07-23  Afghanistan         Behsood solar farm         --   
4           2024-07-23  Afghanistan       Dab Pal 4 solar farm         --   

  project_name_in_local_language___script                other_name(s)  \
0                                    None                         None   
1                                    None                         None   
2                                    None   Northern Balkh solar plant   
3                                    None                         None   
4                                    None  DAB PAL 4 solar energy park   

   capacity_(mw) capacity_rating technology_type                    status

In [5]:
# Global capacity distribution
fig = px.histogram(
    df_raw,
    x='capacity_(mw)',
    nbins=50,
    title='Global Solar Capacity Distribution',
    labels={'capacity_(mw)': 'Capacity (MW)'},
    marginal='box'
)
fig.show()

print(f'Total global capacity: {df_raw["capacity_(mw)"].sum():,.0f} MW')
print(f'Average project size: {df_raw["capacity_(mw)"].mean():.1f} MW')
print(f'Median project size: {df_raw["capacity_(mw)"].median():.1f} MW')

Total global capacity: 3,204,721 MW
Average project size: 42.4 MW
Median project size: 5.0 MW


In [6]:
# Regional distribution
regional_capacity = df_features.groupby('region_group')['capacity_(mw)'].sum().sort_values(ascending=False)

fig = px.bar(
    x=regional_capacity.index,
    y=regional_capacity.values,
    title='Solar Capacity by Region',
    labels={'x': 'Region', 'y': 'Capacity (MW)'},
    text=regional_capacity.values
)
fig.update_traces(texttemplate='%{text:,.0f}', textposition='outside')
fig.show()

In [7]:
# Status distribution over time
status_timeline = df_raw.groupby(['start_year', 'status']).size().reset_index(name='count')
status_timeline = status_timeline[status_timeline['start_year'] >= 2010]

fig = px.line(
    status_timeline,
    x='start_year',
    y='count',
    color='status',
    title='Solar Project Status Over Time',
    labels={'start_year': 'Year', 'count': 'Number of Projects'}
)
fig.show()

## 3. Scenario Projections Analysis

In [8]:
# Global emissions avoided by scenario
global_projections = df_projections.groupby(['year', 'scenario'])['emissions_avoided_tco2e'].sum().reset_index()

fig = px.line(
    global_projections,
    x='year',
    y='emissions_avoided_tco2e',
    color='scenario',
    title='Global Emissions Avoided by Scenario',
    labels={'year': 'Year', 'emissions_avoided_tco2e': 'Emissions Avoided (tCO2e)'},
    markers=True
)
fig.update_layout(hovermode='x unified')
fig.show()

In [9]:
# Regional projections comparison
year_2030 = df_projections[df_projections['year'] == 2030]

fig = px.bar(
    year_2030,
    x='region',
    y='emissions_avoided_tco2e',
    color='scenario',
    barmode='group',
    title='2030 Emissions Avoided by Region and Scenario',
    labels={'emissions_avoided_tco2e': 'Emissions Avoided (tCO2e)'}
)
fig.show()

In [10]:
# Scenario divergence analysis
pivot_2050 = df_projections[df_projections['year'] == 2050].pivot_table(
    index='region',
    columns='scenario',
    values='emissions_avoided_tco2e'
)

pivot_2050['NZE_STEPS_gap'] = pivot_2050['NZE'] - pivot_2050['STEPS']
pivot_2050 = pivot_2050.sort_values('NZE_STEPS_gap', ascending=False)

fig = go.Figure()
fig.add_trace(go.Bar(name='NZE', x=pivot_2050.index, y=pivot_2050['NZE']))
fig.add_trace(go.Bar(name='APS', x=pivot_2050.index, y=pivot_2050['APS']))
fig.add_trace(go.Bar(name='STEPS', x=pivot_2050.index, y=pivot_2050['STEPS']))

fig.update_layout(
    title='2050 Scenario Comparison by Region',
    xaxis_title='Region',
    yaxis_title='Emissions Avoided (tCO2e)',
    barmode='group'
)
fig.show()

## 4. Transition Risk Analysis

In [11]:
# Risk scores over time
risk_timeline = df_risk.groupby('year')[['transition_risk_score', 'policy_risk_score']].mean().reset_index()

fig = make_subplots(rows=1, cols=2, subplot_titles=('Transition Risk', 'Policy Risk'))

fig.add_trace(
    go.Scatter(x=risk_timeline['year'], y=risk_timeline['transition_risk_score'], mode='lines+markers', name='Transition Risk'),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=risk_timeline['year'], y=risk_timeline['policy_risk_score'], mode='lines+markers', name='Policy Risk'),
    row=1, col=2
)

fig.update_layout(title='Risk Metrics Over Time', showlegend=False)
fig.show()

In [12]:
# High-risk regions
high_risk = df_risk[df_risk['year'] == 2030].nlargest(10, 'transition_risk_score')

fig = px.bar(
    high_risk,
    x='transition_risk_score',
    y='region',
    orientation='h',
    title='Top 10 High-Risk Regions (2030)',
    labels={'transition_risk_score': 'Transition Risk Score'},
    color='transition_risk_score',
    color_continuous_scale='RdYlGn_r'
)
fig.show()

In [13]:
# Stranded asset exposure
stranded_exposure = df_risk.groupby('year')['stranded_asset_exposure'].sum().reset_index()

fig = px.area(
    stranded_exposure,
    x='year',
    y='stranded_asset_exposure',
    title='Global Stranded Asset Exposure Over Time',
    labels={'stranded_asset_exposure': 'Stranded Asset Exposure (tCO2e)'},
    color_discrete_sequence=['red']
)
fig.show()

## 5. Feature Importance Analysis

In [14]:
# Load a trained model
import joblib

model_nze = joblib.load('../data/models/emission_model_NZE.pkl')

# Get feature importance
if hasattr(model_nze, 'feature_importances_'):
    importance_df = pd.DataFrame({
        'feature': model_nze.feature_names_in_,
        'importance': model_nze.feature_importances_
    }).sort_values('importance', ascending=False).head(20)
    
    fig = px.bar(
        importance_df,
        x='importance',
        y='feature',
        orientation='h',
        title='Top 20 Feature Importances (NZE Model)',
        labels={'importance': 'Importance Score'}
    )
    fig.show()
else:
    print('Feature importance not available for this model type')

## 6. Interactive Dashboard

Use the widgets below to explore projections interactively:

In [15]:
from ipywidgets import interact, widgets

@interact(
    scenario=widgets.Dropdown(options=['NZE', 'APS', 'STEPS'], value='NZE'),
    year=widgets.IntSlider(min=2025, max=2050, step=5, value=2030),
    metric=widgets.Dropdown(options=['capacity_mw', 'emissions_avoided_tco2e'], value='emissions_avoided_tco2e')
)
def plot_scenario(scenario, year, metric):
    data = df_projections[(df_projections['scenario'] == scenario) & (df_projections['year'] == year)]
    
    fig = px.bar(
        data,
        x='region',
        y=metric,
        title=f'{scenario} - {year} - {metric}',
        color=metric,
        color_continuous_scale='Viridis'
    )
    fig.show()

interactive(children=(Dropdown(description='scenario', options=('NZE', 'APS', 'STEPS'), value='NZE'), IntSlide…

## 7. Summary Statistics

In [16]:
# Generate summary report
print('EMISSION PROJECTION SUMMARY')
print('=' * 80)

for scenario in ['NZE', 'APS', 'STEPS']:
    scenario_data = df_projections[df_projections['scenario'] == scenario]
    
    print(f'\n{scenario} Scenario:')
    print(f'  2030 Total Emissions Avoided: {scenario_data[scenario_data["year"] == 2030]["emissions_avoided_tco2e"].sum():,.0f} tCO2e')
    print(f'  2050 Total Emissions Avoided: {scenario_data[scenario_data["year"] == 2050]["emissions_avoided_tco2e"].sum():,.0f} tCO2e')
    print(f'  Average Annual Growth Rate: {config["scenarios"][scenario]["solar_growth_rate"] * 100:.1f}%')

print('\n' + '=' * 80)
print('Analysis complete!')

EMISSION PROJECTION SUMMARY

NZE Scenario:
  2030 Total Emissions Avoided: 58,747,238 tCO2e
  2050 Total Emissions Avoided: 635,654,777 tCO2e
  Average Annual Growth Rate: 15.0%

APS Scenario:
  2030 Total Emissions Avoided: 44,994,198 tCO2e
  2050 Total Emissions Avoided: 200,118,519 tCO2e
  Average Annual Growth Rate: 10.0%

STEPS Scenario:
  2030 Total Emissions Avoided: 36,027,622 tCO2e
  2050 Total Emissions Avoided: 76,388,848 tCO2e
  Average Annual Growth Rate: 6.0%

Analysis complete!
