In [None]:
# Load EMBER and IRENA datasets from cache

import pandas as pd
import pickle
import sys
from pathlib import Path

# Add parent directory to path
parent_dir = Path().resolve().parent
sys.path.append(str(parent_dir))

from shared_data_loader import get_cache_path

# Load cache file
cache_file = get_cache_path('global_data_cache.pkl')
with open(cache_file, 'rb') as f:
    cache_data = pickle.load(f)


# Extract datasets
df_ember_annual = cache_data['df_ember']

from shared_data_loader import get_shared_loader

loader = get_shared_loader('../data/')
df_ember_monthly = loader.get_monthly_hydro_data()

df_ember_monthly = df_ember_monthly.rename(columns={'Country code': 'iso_code'})

# Create annual hydro capacity factor table
# Filter for hydro generation and capacity from yearly data

# Get hydro generation (TWh)
hydro_gen = df_ember_annual[
    (df_ember_annual['Category'] == 'Electricity generation') & 
    (df_ember_annual['model_fuel'] == 'hydro') & 
    (df_ember_annual['Unit'] == 'TWh')
][['iso_code', 'Year', 'Value']].copy()
hydro_gen.rename(columns={'Value': 'Generation_TWh'}, inplace=True)

# Get hydro capacity (GW) 
hydro_cap = df_ember_annual[
    (df_ember_annual['Category'] == 'Capacity') & 
    (df_ember_annual['model_fuel'] == 'hydro') & 
    (df_ember_annual['Unit'] == 'GW')
][['iso_code', 'Year', 'Value']].copy()
hydro_cap.rename(columns={'Value': 'Capacity_GW'}, inplace=True)


# Merge generation and capacity
hydro_annual = hydro_gen.merge(hydro_cap, on=['iso_code', 'Year'], how='inner')

# Calculate annual capacity factor
# CF = Generation_TWh / (Capacity_GW * 8.76)
hydro_annual['annual_hydro_cf'] = hydro_annual['Generation_TWh'] / (hydro_annual['Capacity_GW'] * 8.76)

# Final table with only requested columns
annual_cf_table = hydro_annual[['iso_code', 'Year', 'annual_hydro_cf', 'Generation_TWh', 'Capacity_GW']].copy()

print(f"Final table: {len(annual_cf_table)} records")
display(annual_cf_table.head(10))

annual_cf_table = annual_cf_table[annual_cf_table['iso_code'].notnull() & annual_cf_table['Generation_TWh'].notnull()]


annual_cf_table.to_csv('annual_cf_table.csv', index=False)





In [None]:
# Create monthly hydro capacity factor table
# Extract month as integer from Date column

# Add month and year columns to monthly data
df_ember_monthly['Date'] = pd.to_datetime(df_ember_monthly['Date'])
df_ember_monthly['Year'] = df_ember_monthly['Date'].dt.year
df_ember_monthly['month'] = df_ember_monthly['Date'].dt.month

# Filter for hydro generation (TWh) from monthly data
monthly_hydro_gen = df_ember_monthly[
    (df_ember_monthly['Category'] == 'Electricity generation') & 
    (df_ember_monthly['Variable'] == 'Hydro') & 
    (df_ember_monthly['Unit'] == 'TWh')
][['iso_code', 'Year', 'month', 'Value']].copy()
monthly_hydro_gen.rename(columns={'Value': 'Generation_TWh'}, inplace=True)

# Use the annual capacity data we already have
# Merge monthly generation with annual capacity
monthly_with_capacity = monthly_hydro_gen.merge(
    hydro_cap, 
    on=['iso_code', 'Year'], 
    how='inner'
)

# Calculate days in each month for the capacity factor formula
import calendar
def days_in_month(year, month):
    return calendar.monthrange(year, month)[1]

monthly_with_capacity['days_in_month'] = monthly_with_capacity.apply(
    lambda row: days_in_month(row['Year'], row['month']), axis=1
)

# Calculate monthly capacity factor
# CF = Generation_TWh / (Capacity_GW * 8.76 * days_in_month / 365)
monthly_with_capacity['monthly_hydro_cf'] = (
    monthly_with_capacity['Generation_TWh'] / 
    (monthly_with_capacity['Capacity_GW'] * 8.76 * monthly_with_capacity['days_in_month'] / 365)
)

# Final monthly table with only requested columns
monthly_cf_table = monthly_with_capacity[['iso_code', 'Year', 'month', 'monthly_hydro_cf', 'Generation_TWh', 'Capacity_GW']].copy()

print(f"Monthly table: {len(monthly_cf_table)} records")
display(monthly_cf_table.head(10))

monthly_cf_table = monthly_cf_table[monthly_cf_table['iso_code'].notnull() & monthly_cf_table['Generation_TWh'].notnull()]

monthly_cf_table.to_csv('monthly_cf_table.csv', index=False)