In [24]:
import pandas as pd 
import numpy as np 
import gridstatusio as grid
import os
import functools as ft

In [2]:
# import API key & hydro data
client = grid.GridStatusClient('c6e457c5394b4f9c9617294d52b3c84b')
hydro_capacity = pd.read_csv('/Users/peterambiel/Downloads/hydro_resource_capacity.csv')

In [3]:
df_caiso_mix = client.get_dataset(
    dataset="caiso_fuel_mix",
    start=None,  
    end=None, 
    tz="US/Pacific",  
)

Fetching Page 1...Done in 2.18 seconds. 
Fetching Page 2...Done in 1.72 seconds. Total time: 3.9s. Avg per page: 1.95s
Fetching Page 3...Done in 1.61 seconds. Total time: 5.51s. Avg per page: 1.84s
Fetching Page 4...Done in 1.92 seconds. Total time: 7.43s. Avg per page: 1.86s
Fetching Page 5...Done in 1.52 seconds. Total time: 8.95s. Avg per page: 1.79s
Fetching Page 6...Done in 1.45 seconds. Total time: 10.41s. Avg per page: 1.73s
Fetching Page 7...Done in 1.38 seconds. Total time: 11.78s. Avg per page: 1.68s
Fetching Page 8...Done in 1.79 seconds. Total time: 13.57s. Avg per page: 1.7s
Fetching Page 9...Done in 1.62 seconds. Total time: 15.19s. Avg per page: 1.69s
Fetching Page 10...Done in 1.64 seconds. Total time: 16.83s. Avg per page: 1.68s
Fetching Page 11...Done in 1.76 seconds. Total time: 18.59s. Avg per page: 1.69s
Fetching Page 12...Done in 1.75 seconds. Total time: 20.34s. Avg per page: 1.69s
Fetching Page 13...Done in 1.11 seconds. Total time: 21.44s. Avg per page: 1.65s



In [4]:
hydro_capacity_filtered = hydro_capacity.loc[:, ['Resource', '2018', '2019', '2020', '2021', '2022']]

In [5]:
df_caiso_mix_filtered = df_caiso_mix.loc[:, ['interval_start_local', 'interval_end_local', 'small_hydro', 'large_hydro']]


In [6]:
df_caiso_mix_filtered = (df_caiso_mix_filtered
    .assign(year = lambda x: x['interval_start_local'].dt.year,
        month = lambda x: x['interval_start_local'].dt.month,
        day = lambda x: x['interval_start_local'].dt.day, 
        hour = lambda x: x['interval_start_local'].dt.hour)
    .groupby(['year', 'month', 'day', 'hour'])
    .apply(lambda x: x.assign(mean_hourly_large_hydro=x['large_hydro'].mean(), 
        mean_hourly_small_hydro=x['small_hydro'].mean()))
    .assign(large_hydro_capacity = 12281, small_hydro_capacity = 1759)
    .assign(large_hydro_cf = lambda x: x['mean_hourly_large_hydro']/ x['large_hydro_capacity'], 
        small_hydro_cf = lambda x: x['mean_hourly_small_hydro']/ x['small_hydro_capacity'])
    .reset_index()
    .drop_duplicates(subset=['year', 'month', 'day', 'hour'], keep='first') 
)


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(lambda x: x.assign(mean_hourly_large_hydro=x['large_hydro'].mean(),


In [7]:
folder_path = '/Users/peterambiel/Desktop/good_model/'
file_name= 'hydro_capacity_factor.csv'

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Write the DataFrame to a CSV file in the specified folder
df_caiso_mix_filtered .to_csv(os.path.join(folder_path, file_name), index=False)

In [17]:
years = list(range(2018, 2023))


annual_cf = {}

for year in years: 

    df = (df_caiso_mix_filtered
    .query('year == @year')
    .reset_index()
    .assign(index_hours = lambda x: x.index))
    annual_cf[str(year)] = df


In [35]:
df_list = [df for key, df in annual_cf.items()][::-1]

In [38]:
new_df = ft.reduce(lambda left, right: pd.merge(left, right, on='index_hours', how='left'), df_list)

  new_df = ft.reduce(lambda left, right: pd.merge(left, right, on='index_hours', how='left'), df_list)


In [42]:
new_df = (new_df
    .loc[:, ['index_hours'] + [col for col in new_df.columns if 'cf' in col]])
    

In [51]:
hydro_capacity_factor = (new_df
    .assign(large_cf_mean = lambda x: x[[col for col in x.columns if 'large_hydro' in col]].mean(axis=1), 
        small_cf_mean = lambda x: x[[col for col in x.columns if 'small_hydro' in col]].mean(axis=1))
        [['index_hours', 'large_cf_mean', 'small_cf_mean']])

In [52]:
folder_path = '/Users/peterambiel/Desktop/good_model/'
file_name= 'hydro_capacity_factor.csv'

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Write the DataFrame to a CSV file in the specified folder
hydro_capacity_factor.to_csv(os.path.join(folder_path, file_name), index=False)