In [None]:
%reload_ext autoreload
%autoreload 2

import os

os.chdir(f"/home/{os.getlogin()}/watttime-python-client-aer-algo")

import math
import numpy as np
import pandas as pd
import datetime
import pytz
import seaborn as sns
from datetime import datetime, timedelta
import concurrent.futures

from watttime import WattTimeForecast, WattTimeHistorical

import optimizer.s3 as s3u
import evaluation.eval_framework as efu

username = os.getenv("WATTTIME_USER")
password = os.getenv("WATTTIME_PASSWORD")

actual_data = WattTimeHistorical(username, password)
hist_data = WattTimeForecast(username, password)

s3 = s3u.s3_utils()
key = "20240726_1k_synth_users_163_days.csv"
generated_data = s3.load_csvdataframe(file=key)
generated_data = generated_data[-8000:]

# Synthetic User Data

In [None]:
# regions = [
# 'CAISO_NORTH',
# 'SPP_TX',
# 'ERCOT_EASTTX',
# 'FPL',
# 'SOCO',
# 'PJM_CHICAGO',
# 'LDWP',
# 'PJM_DC',
# 'NYISO_NYC'
# ]
regions = [
    "PJM_CHICAGO",
]
region = regions[0]

synth_data = generated_data.copy(deep=True)
synth_data["session_start_time"] = pd.to_datetime(synth_data["session_start_time"])
synth_data["unplug_time"] = pd.to_datetime(synth_data["unplug_time"])

import pickle

actual_pickle = s3.load_file(file="pjm_actual.pkl")
HISTORICAL_ACTUAL_CACHE = pickle.loads(actual_pickle)

forecast_pickle = s3.load_file(file="pjm_forecast.pkl")
HISTORICAL_FORECAST_CACHE = pickle.loads(actual_pickle)

# Cached version of the get_*_data functions

In [None]:
%%time


def precache_actual_data(synth_data, regions):
    distinct_dates = [
        datetime.strptime(date, "%Y-%m-%d").date()
        for date in synth_data["distinct_dates"].unique().tolist()
    ]
    all_dates_regions = [
        (date, region) for date in distinct_dates for region in regions
    ]

    def get_actual_data_for_region_date(date, region):
        start = pd.to_datetime(date)
        end = start + pd.Timedelta("2d")
        return (
            region,
            date,
            actual_data.get_historical_pandas(
                start - pd.Timedelta("9h"),
                end + pd.Timedelta("9h"),
                region,
            ),
        )

    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
        result = executor.map(
            get_actual_data_for_region_date,
            [date for (date, region) in all_dates_regions],
            [region for (date, region) in all_dates_regions],
        )
    result = list(result)

    return {(region, date): data for (region, date, data) in result}


HISTORICAL_ACTUAL_CACHE = precache_actual_data(synth_data, regions)

In [None]:
%%time


def precache_fcst_data(synth_data, regions):
    distinct_dates = [
        datetime.strptime(date, "%Y-%m-%d").date()
        for date in synth_data["distinct_dates"].unique().tolist()
    ]
    all_dates_regions = [
        (date, region) for date in distinct_dates for region in regions
    ]

    def get_fsct_data_for_region_date(date, region):
        start = pd.to_datetime(date)
        end = pd.to_datetime(date) + pd.Timedelta("1d")
        return (
            region,
            date,
            hist_data.get_historical_forecast_pandas(
                start - pd.Timedelta("9h"),
                end + pd.Timedelta("9h"),
                region,
            ),
        )

    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
        result = executor.map(
            get_fsct_data_for_region_date,
            [date for (date, region) in all_dates_regions],
            [region for (date, region) in all_dates_regions],
        )
    result = list(result)
    return {(region, date): data for (region, date, data) in result}


HISTORICAL_FORECAST_CACHE = precache_fcst_data(synth_data, regions)

---

In [None]:
def get_historical_fcst_data_cached(session_start_time, horizon, region):
    time_zone = efu.get_timezone_from_dict(region)
    session_start_time_utc = pd.Timestamp(
        efu.convert_to_utc(session_start_time, time_zone)
    )
    date = session_start_time.date()
    if (region, date) not in HISTORICAL_FORECAST_CACHE.keys():
        print(type(date), date)
        start = pd.to_datetime(date)
        end = pd.to_datetime(date) + pd.Timedelta("1d")
        HISTORICAL_FORECAST_CACHE[(region, date)] = (
            hist_data.get_historical_forecast_pandas(
                start - pd.Timedelta("9h"),
                end + pd.Timedelta("9h"),
                region,
            )
        )
    cache = HISTORICAL_FORECAST_CACHE[(region, date)]

    # make this match efu.get_historical_fsct_data
    generated_at_times = cache["generated_at"].unique()
    generated_at = max([t for t in generated_at_times if t < session_start_time_utc])
    df = cache[cache["generated_at"] == generated_at].copy()
    return df.iloc[: math.ceil(horizon / 12) * 12]


def get_historical_actual_data_cached(session_start_time, horizon, region):
    time_zone = efu.get_timezone_from_dict(region)
    session_start_time_utc = pd.Timestamp(
        efu.convert_to_utc(session_start_time, time_zone)
    )
    date = session_start_time.date()

    if (region, date) not in HISTORICAL_ACTUAL_CACHE.keys():
        start = pd.to_datetime(date)
        end = pd.to_datetime(date) + pd.Timedelta("2d")
        HISTORICAL_ACTUAL_CACHE[(region, date)] = actual_data.get_historical_pandas(
            start - pd.Timedelta("9h"),
            end + pd.Timedelta("9h"),
            region,
        )
    cache = HISTORICAL_ACTUAL_CACHE[(region, date)]

    t_start = max(
        [t for t in cache["point_time"].unique() if t < session_start_timet_time_utc]
    )
    df = cache[cache["point_time"] >= t_start].copy()
    return df.iloc[: math.ceil(horizon / 12) * 12 + 1].reset_index(drop=True)

## API Data with caching

In [None]:
%%time

synth_data["moer_data"] = synth_data.apply(
    lambda x: get_historical_fcst_data_cached(
        x.session_start_time, math.ceil(x.total_intervals_plugged_in), region=region
    ),
    axis=1,
)

In [None]:

%%time

synth_data['moer_data_actual'] = synth_data.apply(
    lambda x: get_historical_actual_data_cached(
    x.session_start_time,
    math.ceil(x.total_intervals_plugged_in),
    region = region
    ), axis = 1
)

## MOER - No Optimization - Actual Emissions

In [None]:
%%time

get_charging_schedule_lambda = lambda x: x["usage"].values.flatten()
get_total_emissions_lambda = lambda x: x["emissions_co2e_lb"].sum()

synth_data["charger_baseline_actual_api"] = synth_data.apply(
    lambda x: efu.get_schedule_and_cost_api(
        x.power_output_rate,
        math.ceil(min(x.total_seconds_to_95, x.length_of_session_in_seconds) / 300.0)
        * 5.0,
        x.moer_data_actual,
        optimization_method="baseline",
    ),
    axis=1,
)

synth_data["baseline_charging_schedule_api"] = synth_data[
    "charger_baseline_actual_api"
].apply(get_charging_schedule_lambda)
synth_data["baseline_actual_emissions_api"] = synth_data[
    "charger_baseline_actual_api"
].apply(get_total_emissions_lambda)

## MOER - Simple Optimization - Forecast Emissions

In [None]:
%%time


# TODO: I feel like this slicing might lead to bugs in the future
def get_total_emission(moer, schedule):
    x = np.array(schedule).flatten()
    return np.dot(moer[: x.shape[0]], x)


synth_data["charger_simple_forecast"] = synth_data.apply(
    lambda x: efu.get_schedule_and_cost_api(
        x.power_output_rate,
        int(
            math.ceil(min(x.total_seconds_to_95, x.get_schedule_and_cost_api) / 300.0)
            * 5
        ),
        x.moer_data,
        optimization_method="simple",
    ),
    axis=1,
)

synth_data["simple_charging_schedule"] = synth_data["charger_simple_forecast"].apply(
    get_charging_schedule_lambda
)
synth_data["simple_estimated_emissions"] = synth_data["charger_simple_forecast"].apply(
    get_total_emissions_lambda
)
synth_data["simple_actual_emissions"] = synth_data.apply(
    lambda x: get_total_emission(
        x.moer_data_actual["value"],
        x.charger_simple_forecast.energy_usage_mwh,
    ),
    axis=1,
)

In [None]:
%%time

synth_data["charger_simple_actual"] = synth_data.apply(
    lambda x: efu.get_schedule_and_cost_api(
        x.power_output_rate,
        int(
            math.ceil(
                min(x.total_seconds_to_95, x.length_of_session_in_seconds) / 300.0
            )
            * 5
        ),
        x.moer_data_actual,
        optimization_method="simple",
    ),
    axis=1,
)


synth_data["simple_actual_charging_schedule"] = synth_data[
    "charger_simple_actual"
].apply(get_charging_schedule_lambda)
synth_data["simple_ideal_emissions"] = synth_data["charger_simple_actual"].apply(
    get_total_emissions_lambda
)

---

# store the results

In [None]:
from optimizer.s3 import s3_utils

s3 = s3_utils()
cols = [
    "user_type",
    "power_output_rate",
    "distinct_dates",
    "session_start_time",
    "total_intervals_plugged_in",
    "charged_kWh_actual",
    "MWh_fraction",
    "simple_actual_emissions",
    "baseline_actual_emissions",
    "simple_estimated_emissions",
    "simple_ideal_emissions",
]

s3.store_csvdataframe(
    synth_data[cols], f"results_v2/20240726_1k_synth_users_163_days_{region}.csv"
)