In [None]:
#%matplotlib notebook # This is needed to load pickeled figures

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date, time, datetime, timedelta
import matplotlib
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
import matplotlib.dates as mdates
import pickle

In [None]:
def return_all_regions():
    return ['AEC', 'AECI', 'CPLE', 'CPLW',
    'DUK', 'FMPP', 'FPC',
    'FPL', 'GVL', 'HST', 'ISNE',
    'JEA', 'LGEE', 'MISO', 'NSB',
    'NYIS', 'OVEC', 'PJM', 'SC',
    'SCEG', 'SEC', 'SOCO',
    'SPA', 'SWPP', 'TAL', 'TEC',
    'TVA', 'ERCO',
    'AVA', 'AZPS', 'BANC', 'BPAT',
    'CHPD', 'CISO', 'DOPD',
    'EPE', 'GCPD', 'IID',
    'IPCO', 'LDWP', 'NEVP', 'NWMT',
    'PACE', 'PACW', 'PGE', 'PNM',
    'PSCO', 'PSEI', 'SCL', 'SRP',
    'TEPC', 'TIDC', 'TPWR', 'WACM',
    'WALC', 'WAUW']


def return_good_regions():
    regions = return_all_regions()
    regions.remove('SEC')
    regions.remove('OVEC')
    return regions

In [None]:
base = '/Users/truggles/Downloads/for_tyler_Oct07_overimputes/'

df = pd.read_csv(base+'csv_MASTER_v12_2day.csv')
df['date_time'] = pd.to_datetime(df['date_time'])
df = df.drop(['SEC','SEC_category','OVEC','OVEC_category'], axis=1)

# Drop all *_category instances
regions = return_good_regions()
r_cats = [f'{r}_category' for r in regions]
df = df.drop(r_cats, axis=1)
df.head()

In [None]:
# Normalize all columns
def normalize_each_column(df, cols):
    for col in cols:
        df[col] = df[col] / np.nanmean(df[col])
    return df

print(df['AECI'])
df = normalize_each_column(df, regions)
print(df['AECI'])

In [None]:
def find_datetime_index(df, dt_target):
    
    if type(dt_target) == str:
        dt_target = datetime.strptime(dt_target, '%Y%m%dT%HZ')
    
    
    for idx in df.index:
        dt = df.loc[idx, 'date_time']
        #print(dt)
        if dt.year == dt_target.year and \
                dt.month == dt_target.month and \
                dt.day == dt_target.day and \
                dt.hour == dt_target.hour:
            print(f"{dt_target} at index {idx}")
            return idx, dt_target
    print(f"No datetime found in find_datetime_index for {dt_target}")

    
    
def max_and_min_in_range(df, cols, idx_start, idx_end):
    
    z_max = np.nanmax(df.loc[idx_start:idx_end, regions])
    z_min = np.nanmin(df.loc[idx_start:idx_end, regions])
    return z_min, z_max


    
def plot_2D_range(ax, df, cols, start_dt_str, length_dt, start_end_idx, z_range, ba_highlight, hour_highlight):
    
    
    start_dt = datetime.strptime(start_dt_str, '%Y%m%dT%HZ')
    end_dt = start_dt + timedelta(days=length_dt)

    
    # Differences in PDF saving shown here: https://github.com/breedlun/matplotlib/pull/1
    im = ax.imshow(df.loc[start_end_idx[0]:start_end_idx[1], cols], interpolation='nearest', origin='lower', \
                   vmin=z_range[0], vmax=z_range[1], aspect='auto')

    # Axis ticks
    plt.xticks(np.arange(54), cols, rotation=90)
    plt.xticks(fontsize=12.5)
    dts = []
    start_dt_date = date( start_dt.year, start_dt.month, start_dt.day )
    for i in range(length_dt+1):
        dts.append(start_dt_date + timedelta(days=i))
    print(dts)
    plt.yticks(np.arange(0, length_dt*24+1, 24), dts)


    # Add imputations visualization highlighting
    pc_cent, pc = add_highlights(cols, ba_highlight, length_dt*24, hour_highlight)
    ax.add_collection(pc_cent)
    ax.add_collection(pc)
    
    cbar = ax.figure.colorbar(im)
    cbar.ax.set_ylabel("Normalized demand")
    plt.tight_layout()
    
    pickle_file = open(f'plot_2D_slice_{start_dt_str}.pkl', 'wb') 
    pickle.dump(plt.gcf(), pickle_file)
    pickle_file.close()
    

    plt.savefig("plot_2D_slice_{}_plus_{}_days.pdf".format(start_dt_str, length_dt))




# Create grid showing which hours are used to impute the target hour
# using delta_hours from the start of that slice
# Locations must be computed w.r.t. the column list and start/end datetime
def add_highlights(cols, ba, total_hours, delta_hours):
    
    col_idx = cols.index(ba)
    print(f"BA {ba} at position {col_idx}")
    
    adjust=0.45
    rects = []
    width=1
    height=1
    rect = Rectangle( (col_idx-adjust, delta_hours+adjust), width, height)
    rects.append(rect)
    
    pc_cent = PatchCollection(rects, facecolor='orange', alpha=1.,
                         edgecolor='none')
    
    rects = []
    # Horizontal
    rect = Rectangle( (-0.5, delta_hours+adjust), col_idx+(0.5-adjust), height)
    rects.append(rect)
    rect = Rectangle( (col_idx+1.-adjust, delta_hours+adjust), len(cols)-col_idx, height)
    rects.append(rect)
    # Vertical
    rects.append( Rectangle( (col_idx-adjust, delta_hours+1.+adjust), width, height) )
    rects.append( Rectangle( (col_idx-adjust, delta_hours-1.+adjust), width, height) )
    i = delta_hours+24
    while True:
        if i > total_hours:
            break
        rects.append( Rectangle( (col_idx-adjust, i+adjust), width, height) )
        i += 24
    i = delta_hours-24
    while True:
        if i < 0:
            break
        rects.append( Rectangle( (col_idx-adjust, i+adjust), width, height) )
        i -= 24
    
    pc = PatchCollection(rects, facecolor='red', alpha=1.,
                         edgecolor='none')
    
    return pc_cent, pc
    
    
    
    
    
    
# Grab the middle Tues, Wed, Thurs of each month
date_strings = {
    '20180116T00Z' : ['SC', 42], 
    '20180417T00Z' : ['ERCO', 46], 
    '20180717T00Z' : ['IID', 21], 
    '20181016T00Z' : ['FPC', 45],
}

start_end_idx = {}

tot_min = 9.
tot_max = 0.

length_dt = 3 # days per 2D plot

for dt_str, highlights in date_strings.items():
    idx_start, start_dt = find_datetime_index(df, dt_str)
    end_dt = start_dt + timedelta(days=length_dt)
    idx_end, end_dt = find_datetime_index(df, end_dt)
    
    z_min, z_max = max_and_min_in_range(df, regions, idx_start, idx_end)
    if z_min < tot_min:
        tot_min = z_min
    if z_max > tot_max:
        tot_max = z_max
    
    start_end_idx[dt_str] = [idx_start, idx_end]


z_range = [tot_min, tot_max]


    
# Start master plot
plt.close()
matplotlib.rcParams['figure.figsize'] = (12,4*len(date_strings))
matplotlib.rcParams['font.size'] = 16

cnt = 1
axs = []
for dt_str, highlights in date_strings.items():
    if cnt == 1:
        ax = plt.subplot(410+len(date_strings))
    else:
        ax = plt.subplot(410+len(date_strings)+1-cnt, sharex=axs[0])
        plt.setp(ax.get_xticklabels(), visible=False)
    ax.margins(0.1)
    axs.append(ax)
    
    plot_2D_range(ax, df, regions, dt_str, length_dt, start_end_idx[dt_str], z_range, highlights[0], highlights[1])
    
    cnt += 1
    
    
#plt.savefig('plot_2D_imp2.pdf')


    
    

In [None]:
for dt_str in date_strings:
    continue
    print(dt_str)
    pickle_in = open(f'plot_2D_slice_{dt_str}.pkl', 'rb')
    fig = pickle.load(pickle_in)
    plt.show()