In [8]:
import io
import datetime
import time
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.ticker as mticker

e_st = 'out.electricity.'
r_st = '.energy_consumption'



enduse_map = {
    'commercial': {
        'cooling': [e_st+'cooling'+r_st],
        'heating': [e_st+'heat_recovery'+r_st,
                    e_st+'heat_rejection'+r_st,
                    e_st+'heating'+r_st],
        'ventilation': [e_st+'fans'+r_st],
        'lighting': [e_st+'interior_lighting'+r_st,
                     e_st+'exterior_lighting'+r_st],
        'water_heating': [e_st+'water_systems'+r_st],
        'refrigeration': [e_st+'refrigeration'+r_st],
        'other_mels': [e_st+'interior_equipment'+r_st],
        'pumps': [e_st+'pumps'+r_st]
    },
    'residential': {
        'heating': [e_st+'heating'+r_st,
                    e_st+'heating_hp_bkup'+r_st],
        'cooling': [e_st+'cooling'+r_st],
        'water_heating': [e_st+'hot_water'+r_st],
        'cooking': [e_st+'range_oven'+r_st],        
        'lighting': [e_st+'lighting_interior'+r_st,
                     e_st+'lighting_exterior'+r_st,
                     e_st+'lighting_garage'+r_st],
        'refrigeration': [e_st+'refrigerator'+r_st,
                         e_st+'freezer'+r_st],
        'ceiling_fan': [e_st+'ceiling_fan'+r_st],
        'fans_and_pumps': [e_st+'heating_fans_pumps'+r_st,
                          e_st+'cooling_fans_pumps'+r_st,
                          e_st+'well_pump'+r_st,
                          e_st+'heating_hp_bkup_fa'+r_st,
                          e_st+'mech_vent'+r_st],
        'clothes_washing': [e_st+'clothes_washer'+r_st],
        'drying': [e_st+'clothes_dryer'+r_st],
        'dishwasher': [e_st+'dishwasher'+r_st],
        'pool_heaters': [e_st+'pool_heater'+r_st],
        'pool_pumps': [e_st+'pool_pump'+r_st],
        'other': [e_st+'plug_loads'+r_st],
        'portable_electric_spas': [e_st+'permanent_spa_heat'+r_st,
                                  e_st+'permanent_spa_pump'+r_st]
    }}


def all_df(df, date):
    df = df[df.index.strftime('%m-%d') != date]
    return df

def get_normalized(df,com_res):
    end_uses = list(enduse_map[com_res])
    df = df.drop(end_uses, axis=1)
    df.columns = df.columns.str.replace('_sqft', '')
    return df
  
#######################################################################################
def returnPkDayData(df, dmy):
    import numpy as np             
    mo = dmy.month
    dy = dmy.day
    # print(mo,' ',dy)
    d_st = datetime.datetime(2018, mo, dy, 1) - pd.Timedelta(hours=0)
    d_end = datetime.datetime(2018, mo, dy, 23)
    df = df.loc[d_st:d_end]
    # df = df[df.index.dayofweek < 5]
    # df['dayofweek'] = df.index.dayofweek
    # for dt in holidays:
    #     df = removeDates(df,dt)
    df = df.groupby([df.index.hour]).mean()
    df.index = np.arange(1,1+len(df.index))
    return df

def plot_hourlyloads_byreg(com_res, season, version):
    df = pd.read_csv(f'{com_res}_{reg}.csv')
    if version == 'normalized':
        df = get_normalized(df,com_res)
    df = df.set_index('timestamp_hour')
    df.index = pd.to_datetime(df.index)
    # end_uses = [ele for ele in list(df.columns) if ele not in ['timestamp',reg,'building_type']]
    end_uses = list(enduse_map[com_res])
    i_regions = df[reg].unique()
    building_types = df['building_type'].unique()
    print(com_res,' ', season)
    pkday_i_reg = get_peakdays_i_reg(df, end_uses, season)
    
    
    if com_res == 'commercial':
        nrows = 13
        ncols = 10
    else:
        nrows = 13
        ncols = 6
    
    i = j = 0
    fig, ax = plt.subplots(nrows, ncols, figsize=(ncols*4+4, nrows*3.5), sharex=True, sharey=False)
    for i_reg in i_regions:
        for bt in building_types:
            for eu in end_uses:
                data = df[(df[reg]==i_reg) & (df['building_type']==bt)]
                data = returnPkDayData(data, pkday_i_reg[i_reg])
                ax[i, j].plot(data[eu], label=eu)
                ax[i, j].set_title(f'{i_reg}\nbt={bt}')
                if i == nrows - 1:
                    ax[i, j].set_xlabel("Hour")
                if j == 0:
                    ax[i, j].set_ylabel("Loads [kWh]")
            if i==0 and j == ncols-1: ax[i,j].legend(fontsize=13, bbox_to_anchor=(1.5, 1.01), loc=1)
            j = j + 1
            if j > ncols-1:
                i = i + 1
                j = 0

    plt.tight_layout()
    plt.savefig(f'./figures/hourlyloads/{com_res[:3]}_{season}_peakday_{reg}.png',dpi=100, bbox_inches='tight')

def diagnostic_print(df):
    import numpy as np
    df = df.set_index('timestamp_hour')
    df.index = pd.to_datetime(df.index)
    d_st = datetime.datetime(2018, 1, 5, 1) - pd.Timedelta(hours=0)
    d_end = datetime.datetime(2018, 1, 5, 23)
    i_reg = 'TRE'
    eu = 'heating'
    df_bt = pd.DataFrame()
    for bt in df['building_type'].unique():
        dat = df[(df[reg]=='BASN') & (df['building_type']==bt)]
        dat = dat.loc[d_st:d_end]
        dat = dat.groupby([dat.index.hour]).mean()
        dat.index = np.arange(1,1+len(dat.index))
        dat.columns = [bt +'_'+ column_name for column_name in dat.columns]
        df_bt = pd.concat([df_bt, dat], axis=1)
    df_bt_filtered = df_bt.filter(like=eu)
    df_bt_filtered.to_csv(f'diagnostics/diag_{reg}_{eu}.csv', index=False)
    print(f'{i_reg}')
    print(df_bt_filtered)
    return df_bt

def get_peakdays_dailytotal_i_reg(data, varis, season):
    import numpy as np
    # data['timestamp'] = pd.to_datetime(data['timestamp'])
    # data.set_index('timestamp', inplace=True)    
    unique_i_reg = data[reg].unique()
    data['total'] = data[varis].sum(axis=1)
    daily_totals = data.groupby(reg).resample('D')['total'].sum()
    daily_totals = daily_totals.reset_index()
    filtered_data = daily_totals
    if season == 'summer':
        filtered_data = daily_totals[(daily_totals['timestamp_hour'].dt.month >= 5) & (daily_totals['timestamp_hour'].dt.month <= 8)]
    elif season == 'winter':
        filtered_data = daily_totals[((daily_totals['timestamp_hour'].dt.month >= 11) | (daily_totals['timestamp_hour'].dt.month <= 2))]

    peak_days = filtered_data.loc[filtered_data.groupby(reg)['total'].idxmax()]
    peak_days.drop(columns='total')
    
    # print(peak_days)
    peak_days_dict = dict(zip(peak_days[reg], peak_days['timestamp_hour']))
    return peak_days_dict

def get_peakdays_i_reg(data, com_res, season):
    import numpy as np
    # data['timestamp'] = pd.to_datetime(data['timestamp'])
    # data.set_index('timestamp', inplace=True)
    data = data.set_index('timestamp_hour')
    data.index = pd.to_datetime(data.index)
    
    unique_i_reg = data[reg].unique()
    end_uses = list(enduse_map[com_res])
    data['total'] = data[end_uses].sum(axis=1)
    if season == 'summer':
        data = data[(data.index.month >= 5) & (data.index.month <= 8)]
    elif season == 'winter':
        data = data[((data.index.month >= 11) | (data.index.month <= 2))]
    
    data['timestamp_hour'] = data.index.date
    data = data[[reg, 'timestamp_hour', 'total']].reset_index(drop=True)    
    data = data.groupby(['timestamp_hour',reg]).sum()
    df_sorted = data.sort_values([reg, 'total'], ascending=[True, False])
    peak_days_df = df_sorted.groupby(reg).head(1)
    peak_days_df = peak_days_df.reset_index()
    peak_days_dict = dict(zip(peak_days_df[reg], peak_days_df['timestamp_hour']))
    print(peak_days_df)
    print(peak_days_dict)
    return peak_days_dict


def plot_hourlyloads_eu(com_res, season, version):
    if version == 'normalized':
        df = pd.read_csv(f"csv/normalized/{com_res}_{reg}.csv")
    else:
        df = pd.read_csv(f"csv/{com_res}_{reg}.csv")
    if com_res == "commercial":
        df = df.loc[df['building_type'].isin(['LargeHotel', 'LargeOffice','MediumOffice','RetailStandalone','Warehouse'])]
    elif com_res == "residential":
        df = df.loc[df['building_type'].isin(["Multi-Family with 5+ Units", "Mobile Home", "Single-Family Detached"])]
    print(f'{com_res} {season} {version}')
    df_pkdy = df.copy()
    pkday_i_reg = get_peakdays_i_reg(df_pkdy, com_res, season)

    # print(df[reg].unique())
    # diagnostic_print(df) 
    if version == 'normalized':
        df = get_normalized(df,com_res)
        
    df = df.set_index('timestamp_hour')
    df.index = pd.to_datetime(df.index)

    end_uses = list(enduse_map[com_res])
    i_regions = df[reg].unique()
    building_types = df['building_type'].unique()
    
    if com_res == 'commercial':
        nrows = 8
        ncols = 5
    else:
        nrows = 15
        ncols = 3
    
    i = j = 0
    fig, ax = plt.subplots(nrows, ncols, figsize=(ncols*5, nrows*3.5), sharex=True, sharey=False)
    for eu in end_uses:
        for bt in building_types:
            for i_reg in i_regions:
                dat = df.copy()
                data = dat[(dat[reg]==i_reg) & (dat['building_type']==bt)]
                data = data.drop([reg, 'building_type'], axis=1)
                
                data = returnPkDayData(data, pkday_i_reg[i_reg])
                ax[i, j].plot(data[eu], label=i_reg)
        
            ax[i, j].set_title(f'eu={eu}\nbt={bt}')
            if i == nrows - 1:
                ax[i, j].set_xlabel("Hour")
            
            wh = 'kWh'
            den = ''
            ymin, ymax = ax[i, j].get_ylim()
            if version == 'normalized':
                if com_res == 'commercial':
                    den = "/ft\u00b2"
                    ax[i,j].yaxis.set_major_formatter(mticker.FuncFormatter(lambda xi, pos: f'{xi*1000:.3f}'.rstrip('0').rstrip('.')))
                elif com_res == 'residential':
                    den = "/housing units"
                    ax[i,j].yaxis.set_major_formatter(mticker.FuncFormatter(lambda xy, pos: f'{xy:.3f}'.rstrip('0').rstrip('.')))
            else:
                if ymax > 998:
                    wh = 'MWh'
                    # ax[i,j].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{x/1000:.0f}'))
                    ax[i,j].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{x/1000}'.rstrip('0').rstrip('.')))
                else:
                    ax[i,j].yaxis.set_major_formatter(mticker.FuncFormatter(lambda xy, pos: f'{xy:.3f}'.rstrip('0').rstrip('.')))
            ylab = f'{wh}{den}'
            
            # ax[i,j].yaxis.set_label_coords(0.05, 1.03)
            # ax[i, j].set_ylabel(f'{ylab}',rotation=0, ha='right', va='center')
            
            if version == 'normalized':
                if j == 0:
                    ax[i, j].set_ylabel(f'{ylab}')
                    ax[i, j].yaxis.set_label_coords(-0.1, 0.5)
            else:
                ax[i, j].set_ylabel(f'{ylab}')
                ax[i, j].yaxis.set_label_coords(-0.1, 0.5)
            
            ###########
            if i==0 and j == ncols-1: ax[i,j].legend(fontsize=13, bbox_to_anchor=(1.5, 1.01), loc=1)
            j = j + 1
            if j > ncols-1:
                i = i + 1
                j = 0

    plt.tight_layout()
    plt.savefig(f'./diagnostics/{com_res[:3]}_{season}_{version}.png',dpi=100, bbox_inches='tight')

reg = 'state'

plot_hourlyloads_eu('commercial', 'annual','')
plot_hourlyloads_eu('commercial', 'summer','')
plot_hourlyloads_eu('commercial', 'winter','')

plot_hourlyloads_eu('residential', 'annual','')
plot_hourlyloads_eu('residential', 'summer','')
plot_hourlyloads_eu('residential', 'winter','')

# plot_hourlyloads_eu('commercial', 'annual','normalized')
# plot_hourlyloads_eu('commercial', 'summer','normalized')
# plot_hourlyloads_eu('commercial', 'winter','normalized')

# plot_hourlyloads_eu('residential', 'annual','normalized')
# plot_hourlyloads_eu('residential', 'summer','normalized')
# plot_hourlyloads_eu('residential', 'winter','normalized')

# plot_hourlyloads_byreg('commercial', 'annual','normalized')
# plot_hourlyloads_byreg('commercial', 'summer','normalized')
# plot_hourlyloads_byreg('commercial', 'winter','normalized')

# plot_hourlyloads_byreg('residential', 'annual','normalized')
# plot_hourlyloads_byreg('residential', 'summer','normalized')
# plot_hourlyloads_byreg('residential', 'winter','normalized')



residential summer 
   timestamp_hour state         total
0      2018-07-21    AL  1.194938e+08
1      2018-06-28    AR  7.565772e+07
2      2018-07-25    AZ  2.145976e+08
3      2018-07-07    CA  4.298707e+08
4      2018-06-28    CO  6.640632e+07
5      2018-08-29    CT  4.079587e+07
6      2018-07-03    DC  7.807121e+06
7      2018-08-29    DE  1.867462e+07
8      2018-08-05    FL  4.177733e+08
9      2018-06-20    GA  2.141357e+08
10     2018-06-17    IA  5.536032e+07
11     2018-08-10    ID  2.307976e+07
12     2018-06-30    IL  2.119587e+08
13     2018-06-18    IN  1.221901e+08
14     2018-06-29    KS  7.346131e+07
15     2018-07-05    KY  1.014411e+08
16     2018-07-22    LA  1.185602e+08
17     2018-08-29    MA  6.980715e+07
18     2018-07-03    MD  1.085559e+08
19     2018-07-05    ME  1.290075e+07
20     2018-06-30    MI  1.509794e+08
21     2018-06-29    MN  7.470778e+07
22     2018-06-30    MO  1.423811e+08
23     2018-06-28    MS  7.427273e+07
24     2018-08-11    MT  1.175