In [3]:
import pandas as pd
import os
import re

def process_excel_file(year):
    """
    Processes a single Excel file with 12 monthly worksheets, creates a 
    time-series DataFrame, and saves it to CSV.
    """
    excel_filename=f"FuelMixReport_PreviousYears/IntGenbyFuel{year}.xlsx"
    # 1. Define the order of sheets to process
    month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                   'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
    # This regex pattern strictly matches time formats like '0:15', '1:00', or '24:00'
    time_pattern = re.compile(r'^\d{1,2}:\d{2}$')
    
    # These are the ID columns that we will not melt
    id_vars = ['Date', 'Fuel', 'Settlement Type', 'Total']
    
    all_monthly_dfs = []

    # 2. Read the entire Excel file into a dictionary of DataFrames
    print(f"Reading all worksheets from {excel_filename}...")
    try:
        # sheet_name=None reads all sheets.
        all_sheets = pd.read_excel(excel_filename, sheet_name=None)
    except FileNotFoundError:
        print(f"Error: File not found: {excel_filename}")
        print("Please upload the single .xlsx file to run this script.")
        return
    except Exception as e:
        print(f"Error reading Excel file: {e}")
        return

    # 3. Loop, Read, and Melt
    print(f"Processing 12 monthly worksheets...")
    for month_name in month_names:
        if month_name not in all_sheets:
            print(f"Warning: Worksheet '{month_name}' not found in the Excel file. Skipping.")
            continue
        
        try:
            df = all_sheets[month_name]
            
            # Identify time columns *for this specific worksheet*
            potential_time_cols = [col for col in df.columns if col not in id_vars]
            time_cols = [col for col in potential_time_cols if time_pattern.match(str(col))]
            
            if not time_cols:
                print(f"Warning: No valid time columns found in sheet '{month_name}'. Skipping.")
                continue

            # Melt the DataFrame from wide to long format
            df_long = df.melt(
                id_vars=['Date', 'Fuel'], 
                value_vars=time_cols, 
                var_name='Time',         # New column for '0:15', '0:30', etc.
                value_name='Generation_MW' # New column for the values
            )
            all_monthly_dfs.append(df_long)
            
        except Exception as e:
            print(f"Error processing sheet '{month_name}': {e}")
            
    if not all_monthly_dfs:
        print("No data was successfully processed from any worksheet. Exiting.")
        return

    # 4. Combine
    print("Combining data...")
    combined_df = pd.concat(all_monthly_dfs, ignore_index=True)

    # 5. Clean Generation_MW column
    combined_df['Generation_MW'] = pd.to_numeric(combined_df['Generation_MW'], errors='coerce')
    combined_df = combined_df.dropna(subset=['Generation_MW'])

    # 6. Create Datetime Index
    print("Creating datetime index...")
    
    # Ensure the 'Time' column is a string before doing string operations
    combined_df['Time'] = combined_df['Time'].astype(str)
    
    # Store which rows are '24:00' (end of day)
    is_2400 = (combined_df['Time'] == '24:00')
    
    # Replace '24:00' with '0:00' to make parsing uniform
    time_adj = combined_df['Time'].str.replace('24:00', '0:00')
    
    # Convert 'Date' column to datetime, then back to string to ensure 'YYYY-MM-DD' format
    date_str = pd.to_datetime(combined_df['Date']).dt.strftime('%Y-%m-%d')
    
    # Create the full datetime string (e.g., '2023-01-01 0:15')
    datetime_str = date_str + ' ' + time_adj
    
    # Convert to datetime. This is the *end* of the interval.
    interval_end_dt = pd.to_datetime(datetime_str, format='%Y-%m-%d %H:%M', errors='coerce')
    
    # Add 1 day to all the rows that were originally '24:00'
    interval_end_dt.loc[is_2400] = interval_end_dt.loc[is_2400] + pd.Timedelta(days=1)

    # Check for any parsing errors
    bad_rows = interval_end_dt.isna()
    if bad_rows.any():
        print(f"Found and dropped rows with problematic date/time strings.")
        combined_df = combined_df[~bad_rows]
        interval_end_dt = interval_end_dt.dropna()

    if combined_df.empty:
        print("No valid data remaining after time conversion. Exiting.")
        return
        
    # The final datetime is the *start* of the interval (end time - 15 mins)
    combined_df['datetime'] = interval_end_dt - pd.Timedelta(minutes=15)

    # 7. Pivot
    print("Pivoting DataFrame...")
    final_df = combined_df.pivot_table(
        index='datetime', 
        columns='Fuel', 
        values='Generation_MW', 
        aggfunc='sum'
    )
    
    # 8. Clean and Save
    final_df = final_df.sort_index()
    final_df = final_df.fillna(0) # Assume missing values are 0 generation
    output_csv = f'ercot_generation_by_fuel_{year}_timeseries_from_excel.csv'
    final_df.to_csv(output_csv)
    
    print("\n--- Success! ---")
    print(f"Successfully processed data and saved to: {output_csv}")
    print("\nDataFrame Head:")
    print(final_df.head())
    print("\nDataFrame Info:")
    final_df.info()

    return final_df

In [4]:
for year in range(2016,2026):
    process_excel_file(year)

Reading all worksheets from FuelMixReport_PreviousYears/IntGenbyFuel2016.xlsx...
Processing 12 monthly worksheets...
No data was successfully processed from any worksheet. Exiting.
Reading all worksheets from FuelMixReport_PreviousYears/IntGenbyFuel2017.xlsx...
Processing 12 monthly worksheets...
Combining data...
Creating datetime index...
Pivoting DataFrame...

--- Success! ---
Successfully processed data and saved to: ercot_generation_by_fuel_2017_timeseries_from_excel.csv

DataFrame Head:
Fuel                   Biomass         Coal         Gas       Gas-CC  \
datetime                                                               
2016-12-31 23:45:00  13.516539  2194.360961  204.550559   784.408180   
2017-01-01 00:00:00  14.075394  4314.453941  217.786501  1089.048595   
2017-01-01 00:15:00  14.071301  4321.568298  219.407610   996.812974   
2017-01-01 00:30:00  14.081591  4290.613423  214.255654   997.357426   
2017-01-01 00:45:00  14.085300  4280.098265  214.360903   966.491013  

In [5]:
process_excel_file(2025)

Reading all worksheets from FuelMixReport_PreviousYears/IntGenbyFuel2025.xlsx...
Processing 12 monthly worksheets...
Error processing sheet 'Oct': "The following id_vars or value_vars are not present in the DataFrame: ['Fuel']"
Error processing sheet 'Nov': "The following id_vars or value_vars are not present in the DataFrame: ['Fuel']"
Error processing sheet 'Dec': "The following id_vars or value_vars are not present in the DataFrame: ['Fuel']"
Combining data...
Creating datetime index...
Pivoting DataFrame...

--- Success! ---
Successfully processed data and saved to: ercot_generation_by_fuel_2025_timeseries_from_excel.csv

DataFrame Head:
Fuel                  Biomass         Coal         Gas       Gas-CC     Hydro  \
datetime                                                                        
2024-12-31 23:45:00  3.048085  1934.772915  296.505007  3839.913792  1.042736   
2025-01-01 00:00:00  3.054841  1635.159759  222.914735  4356.562359  1.033143   
2025-01-01 00:15:00  3.049

Fuel,Biomass,Coal,Gas,Gas-CC,Hydro,Nuclear,Other,Solar,WSL,Wind
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-12-31 23:45:00,3.048085,1934.772915,296.505007,3839.913792,1.042736,1271.347524,13.749501,0.000018,-222.641600,4274.792857
2025-01-01 00:00:00,3.054841,1635.159759,222.914735,4356.562359,1.033143,1271.441126,2.713811,0.000012,-194.077677,3536.617372
2025-01-01 00:15:00,3.049060,1591.098918,224.037142,4359.891349,1.033923,1271.023989,1.570692,0.000012,-193.709565,3573.464001
2025-01-01 00:30:00,3.054768,1555.589102,224.465410,4359.030173,1.034740,1271.482283,1.220092,0.000012,-249.695136,3649.335367
2025-01-01 00:45:00,3.053916,1518.956907,232.559316,4384.863310,1.034923,1271.547756,2.382973,0.000018,-316.258660,3697.360457
...,...,...,...,...,...,...,...,...,...,...
2025-09-30 22:30:00,15.883263,2303.111797,1630.168277,6354.505602,4.777148,1252.260487,25.471235,0.000374,-31.265327,2772.102136
2025-09-30 22:45:00,15.883598,2204.549513,1557.743545,6198.068810,4.281369,1252.143288,13.543603,0.000396,-105.873400,2884.218085
2025-09-30 23:00:00,15.892906,2129.168671,1342.488751,6058.154326,0.000000,1252.180696,41.388584,0.000364,-93.618180,3022.936190
2025-09-30 23:15:00,15.876592,2065.096003,1102.549042,5993.472102,0.000000,1252.043115,20.793732,0.000356,-93.128710,3154.630685


In [6]:
from gridstatus import Ercot
ercot = Ercot()

In [7]:
ercot.get_fuel_mix("latest")

2025-11-09 08:27:40 - INFO - Requesting https://www.ercot.com/api/1/services/read/dashboards/fuel-mix.json with {}


Unnamed: 0,Time,Coal and Lignite,Hydro,Nuclear,Power Storage,Solar,Wind,Natural Gas,Other
0,2025-11-07 00:04:56-06:00,6144.143555,0.000000,4392.654785,402.015167,1.428761,23995.400391,11835.574980,0.0
1,2025-11-07 00:09:56-06:00,6237.263672,0.000000,4405.371582,215.424469,0.605437,24013.574219,11692.975319,0.0
2,2025-11-07 00:14:56-06:00,6471.045898,0.000000,4405.310059,71.763268,0.445807,23964.892578,11428.079790,0.0
3,2025-11-07 00:19:56-06:00,6556.156738,0.000000,4407.919922,47.327503,0.656503,23843.123047,11094.545027,0.0
4,2025-11-07 00:24:56-06:00,6443.469238,0.000000,4407.117676,88.569580,0.611149,23862.875000,11089.510120,0.0
...,...,...,...,...,...,...,...,...,...
492,2025-11-08 17:04:56-06:00,7172.600586,54.098518,5061.100586,4093.764648,4016.989258,16321.678711,21184.167908,0.0
493,2025-11-08 17:09:56-06:00,7175.628906,54.207901,5062.681641,4264.214355,3017.718994,16648.583984,21481.484737,0.0
494,2025-11-08 17:14:56-06:00,7208.358887,54.185467,5060.911621,4603.275391,2194.724609,16804.181641,21578.821735,0.0
495,2025-11-08 17:19:56-06:00,7195.438477,54.196533,5061.260742,5036.317383,1541.808228,16811.259766,21780.338680,0.0
