In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys

sys.path.append("/home/truggles/Inter-annual_Variability_Residual_Load")
from helpers import return_file_info_map

In [None]:
def get_temp_file(ba):
    ba_info = return_file_info_map( ba )
    df = pd.read_csv(f"../{ba_info['temp'][0]}")
    df['yr_local'] = df['year'].shift(periods=-1*ba_info['to_local'])
    df['hr_local'] = df['hour'].shift(periods=-1*ba_info['to_local'])
    df['day_local'] = df['day'].shift(periods=-1*ba_info['to_local'])
    df = df.loc[ (df['yr_local'] >= ba_info['years'][0]) & (df['year'] <= ba_info['years'][-1]) ]
    df = df.reset_index()
    df = df.drop(['index',], axis=1)
    #print(df.head(10))
    return df

def get_demand_file(ba):

    ba_info = return_file_info_map( ba )
    df = pd.read_csv(f"../{ba_info['demand'][0]}")
    df['yr_local'] = df['year'].shift(periods=-1*ba_info['to_local'])
    df['hr_local'] = df['hour'].shift(periods=-1*ba_info['to_local'])
    df['day_local'] = df['day'].shift(periods=-1*ba_info['to_local'])
    df = df.loc[ (df['yr_local'] >= ba_info['years'][0]) & (df['year'] <= ba_info['years'][-1]) ]
    df = df.reset_index()
    df = df.drop(['index',], axis=1)
    df = demand_normalize_annual_mean(ba, df)
    #print(df.head(10))
    return df

def demand_normalize_annual_mean(ba, dem):
    ba_info = return_file_info_map( ba )
    years = ba_info['years']
    
    ary = np.zeros(0)
    for yr in years:
        df = dem.loc[ dem['year'] == yr ]
        norm = df['demand (MW)'] / np.nanmean(df['demand (MW)'])
        ary = np.append(ary, norm)
    dem['normalized demand'] = ary
    return dem

def analyze(ba, dy):
    ba_info = return_file_info_map( ba )
    years = ba_info['years']
    
    corrs = []
    
    for yr in years:
        t = dy.loc[ dy['year'] == yr ]
        tgt = t[['CDD_nan', 'pk_dem']]
        corr = tgt.corr()
        corrs.append(corr.loc['CDD_nan', 'pk_dem'])
    return corrs

def calc_daily_values(dem, temp, HDD_base, CDD_base, demand='normalized demand'):
    yrs = []
    months = []
    days = []
    day_pk_dem = []
    day_min_temp = []
    day_max_temp = []
    day_avg_temp = []
    day_HDD = []
    day_CDD = []
    
    pk_dem = -99.
    min_temp = 99.
    max_temp = -99.
    avg_temp = 0.
    
    for idx in dem.index:
        if dem.loc[idx, 'hr_local'] == 24:
            day_pk_dem.append(pk_dem)
            pk_dem = -99.
            
            yrs.append(dem.loc[idx, 'year'])
            months.append(dem.loc[idx, 'month'])
            days.append(dem.loc[idx, 'day'])
        else:
            if dem.loc[idx, demand] > pk_dem:
                pk_dem = dem.loc[idx, demand]
    
    for idx in temp.index:
        if temp.loc[idx, 'hr_local'] == 24:
            day_min_temp.append(min_temp)
            day_max_temp.append(max_temp)
            day_avg_temp.append(avg_temp/24.)
            min_temp = 99.
            max_temp = -99.
            avg_temp = 0.
        else:
            avg_temp += temp.loc[idx, 'temp']
            if temp.loc[idx, 'temp'] > max_temp:
                max_temp = temp.loc[idx, 'temp']
            if temp.loc[idx, 'temp'] < min_temp:
                min_temp = temp.loc[idx, 'temp']
    df = pd.DataFrame({
        'year':yrs,
        'month':months,
        'day':days,
        'pk_dem':day_pk_dem,
        'min_temp':day_min_temp,
        'max_temp':day_max_temp,
        'avg_temp':day_avg_temp
    })
    # "Mean"-based
    #df['HDD'] = HDD_base - (df['min_temp'] + df['max_temp'])/2.
    #df['HDD'] = np.where(df['HDD'] > 0., df['HDD'], 0.)
    df['CDD'] = (df['min_temp'] + df['max_temp'])/2. - CDD_base
    df['CDD_nan'] = np.where(df['CDD'] > 0., df['CDD'], np.nan)
    
    # Avg based
    #df['HDD_alt'] = HDD_base - df['avg_temp']
    #df['HDD_alt'] = np.where(df['HDD_alt'] > 0., df['HDD_alt'], 0.)
    df['CDD_alt'] = df['avg_temp'] - CDD_base
    df['CDD_alt_nan'] = np.where(df['CDD_alt'] > 0., df['CDD_alt'], np.nan)
    
    return df

In [None]:
BAs = ['ERCOT', 'NYISO', 'PJM']

all_corrs = []

for ba in BAs:
    dem = get_demand_file(ba)
    temp = get_temp_file(ba)
    print(len(dem.index))
    print(len(temp.index))
    
    dy = calc_daily_values(dem, temp, 18, 18)
    
    ba_info = return_file_info_map( ba )
    years = ba_info['years']
    #years = [2019,]# 2018, 2019]

    fig, ax = plt.subplots(figsize=(3, 3))
    for yr in years:
        t = dy.loc[ dy['year'] == yr ]
        #ax.scatter(-1.*t['HDD'], t['pk_dem'])
        ax.scatter(t['CDD'], t['pk_dem'], alpha=0.1)
    ax.set_xlabel('DD (Celcius)')
    ax.set_ylabel('Normalized daily peak demand')
    ax.set_xlim(-40,20)
    ax.set_ylim(.6, 2.0)
    plt.savefig(f'plots/{ba}_degree_days_vs_pk_demand.pdf')

    corrs = analyze(ba, dy)
    all_corrs.append(corrs)

bins = np.arange(0.5, 1.01, 0.025)
fig, ax = plt.subplots(figsize=(4, 3))
for i, ba in enumerate(BAs):
    ax.hist(all_corrs[i], bins, label=BAs[i], alpha=0.5)
ax.set_xlabel('Correlation of Degree Days vs. peak demand')
ax.set_ylabel('Occurances')
plt.legend()
plt.savefig(f'plots/degree_days_vs_pk_demand_corr.pdf')
    