In [1]:
%reload_ext autoreload
%autoreload 2

import os

os.chdir(f"/home/{os.getlogin()}/watttime-python-client-aer-algo")

import math
import numpy as np
import pandas as pd
import datetime
import pytz
import seaborn as sns
from datetime import datetime, timedelta
import concurrent.futures
from itertools import product
from tqdm import tqdm

from watttime import WattTimeForecast, WattTimeHistorical

import data.s3 as s3u
import evaluation.eval_framework as efu

username = os.getenv("WATTTIME_USER")
password = os.getenv("WATTTIME_PASSWORD")

actual_data = WattTimeHistorical(username, password)
hist_data = WattTimeForecast(username, password)

s3 = s3u.s3_utils()

In [2]:
regions = [
 'CAISO_NORTH',
 'SPP_TX',
 'ERCOT_EASTTX',
 'FPL',
 'SOCO',
 'PJM_CHICAGO',
 'LDWP',
 'PJM_DC',
 'NYISO_NYC'
]

In [3]:
dates_2023 = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')

In [4]:
forecast_generator = WattTimeForecast(username, password)
historical_generator = WattTimeHistorical(username, password)

In [None]:
def get_daily_historical_data(date, region):
    daily_data = historical_generator.get_historical_pandas(start = date,
                                               end = date + timedelta(days = 1),
                                               region = region,
                                               signal_type="co2_moer")
    
    daily_data["region"] = region

    return daily_data
    
for region in regions:
    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
        print(f"Getting 2023 actuals data for {region}")

        result = list(tqdm(
            executor.map(lambda date: get_daily_historical_data(date, region), dates_2023),
            total=len(dates_2023),
            desc=f"Processing forecast data for region {region}"
        ))
    
    out = pd.concat(result, ignore_index=True)
    s3.store_parquetdataframe(out, f'complete_2023_actual_history/{region}.parquet')
    print("Wrote parquet file to s3")

Getting 2023 actuals data for CAISO_NORTH


Processing forecast data for region CAISO_NORTH: 100%|██████████| 365/365 [00:02<00:00, 124.34it/s]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 actuals data for SPP_TX


Processing forecast data for region SPP_TX: 100%|██████████| 365/365 [00:03<00:00, 106.94it/s]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 actuals data for ERCOT_EASTTX


Processing forecast data for region ERCOT_EASTTX: 100%|██████████| 365/365 [00:03<00:00, 96.77it/s] 


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 actuals data for FPL


Processing forecast data for region FPL: 100%|██████████| 365/365 [00:03<00:00, 107.26it/s]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 actuals data for SOCO


Processing forecast data for region SOCO: 100%|██████████| 365/365 [00:03<00:00, 104.00it/s]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 actuals data for PJM_CHICAGO


Processing forecast data for region PJM_CHICAGO: 100%|██████████| 365/365 [00:03<00:00, 105.80it/s]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 actuals data for LDWP


Processing forecast data for region LDWP: 100%|██████████| 365/365 [00:03<00:00, 103.12it/s]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 actuals data for PJM_DC


Processing forecast data for region PJM_DC: 100%|██████████| 365/365 [00:13<00:00, 26.10it/s]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 actuals data for NYISO_NYC


Processing forecast data for region NYISO_NYC: 100%|██████████| 365/365 [00:04<00:00, 89.97it/s] 


Successful S3 put_object response. Status - 200
Wrote parquet file to s3


In [None]:
def get_daily_forecast_data(date, region, horizon = 24):
    daily_data = forecast_generator.get_historical_forecast_pandas(
                start=date, 
                end=date + timedelta(days = 1),
                region=region,
                signal_type="co2_moer",
                horizon_hours=horizon,
            )
    daily_data["region"] = region

    return daily_data


for region in regions:
    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
        print(f"Getting 2023 data for {region}")

        result = list(tqdm(
            executor.map(lambda date: get_daily_forecast_data(date, region), dates_2023),
            total=len(dates_2023),
            desc=f"Processing forecast data for region {region}"
        ))
    
    out = pd.concat(result, ignore_index=True)
    s3.store_parquetdataframe(out, f'complete_2023_forecast_history/{region}.parquet')
    print("Wrote parquet file to s3")

Getting 2023 data for CAISO_NORTH


Processing forecast data for region CAISO_NORTH:   0%|          | 0/365 [00:00<?, ?it/s]






Processing forecast data for region CAISO_NORTH:  25%|██▌       | 92/365 [03:26<05:18,  1.17s/it]   





Processing forecast data for region CAISO_NORTH:  31%|███       | 112/365 [03:40<04:27,  1.06s/it]




Processing forecast data for region CAISO_NORTH:  40%|████      | 147/365 [05:59<07:37,  2.10s/it]




Processing forecast data for region CAISO_NORTH: 100%|██████████| 365/365 [09:47<00:00,  1.61s/it]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 data for SPP_TX


Processing forecast data for region SPP_TX:   0%|          | 0/365 [00:00<?, ?it/s]





Processing forecast data for region SPP_TX:  30%|██▉       | 108/365 [03:21<01:42,  2.50it/s]  





Processing forecast data for region SPP_TX:  30%|██▉       | 108/365 [03:36<01:42,  2.50it/s]




Processing forecast data for region SPP_TX: 100%|██████████| 365/365 [09:52<00:00,  1.62s/it]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 data for ERCOT_EASTTX


Processing forecast data for region ERCOT_EASTTX:   0%|          | 0/365 [00:00<?, ?it/s]





Processing forecast data for region ERCOT_EASTTX:   7%|▋         | 27/365 [02:59<18:44,  3.33s/it]   





Processing forecast data for region ERCOT_EASTTX: 100%|██████████| 365/365 [09:49<00:00,  1.62s/it]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 data for FPL


Processing forecast data for region FPL:  11%|█         | 41/365 [03:00<07:24,  1.37s/it]   





Processing forecast data for region FPL:  45%|████▍     | 164/365 [05:40<01:25,  2.36it/s]




Processing forecast data for region FPL:  57%|█████▋    | 208/365 [06:59<10:18,  3.94s/it]




Processing forecast data for region FPL: 100%|██████████| 365/365 [09:45<00:00,  1.60s/it]


Successful S3 put_object response. Status - 200
Wrote parquet file to s3
Getting 2023 data for SOCO


Processing forecast data for region SOCO:   0%|          | 0/365 [00:00<?, ?it/s]