In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import os

In [None]:
# All 56 balancing authorities (BA)
def return_all_regions():
    return [
                'AEC', 'AECI', 'CPLE', 'CPLW',
                'DUK', 'FMPP', 'FPC',
                'FPL', 'GVL', 'HST', 'ISNE',
                'JEA', 'LGEE', 'MISO', 'NSB',
                'NYIS', 'PJM', 'SC',
                'SCEG', 'SOCO',
                'SPA', 'SWPP', 'TAL', 'TEC',
                'TVA', 'ERCO',
                'AVA', 'AZPS', 'BANC', 'BPAT',
                'CHPD', 'CISO', 'DOPD',
                'EPE', 'GCPD', 'IID',
                'IPCO', 'LDWP', 'NEVP', 'NWMT',
                'PACE', 'PACW', 'PGE', 'PNM',
                'PSCO', 'PSEI', 'SCL', 'SRP',
                'TEPC', 'TIDC', 'TPWR', 'WACM',
                'WALC', 'WAUW',
                'OVEC', 'SEC',
                ]

# All 54 "usable" balancing authorities (BA) (excludes OVEC and SEC)
# These 2 have significant
# enough reporting problems that we do not impute cleaned data for them.
def return_usable_BAs():
    return [
                'AEC', 'AECI', 'CPLE', 'CPLW',
                'DUK', 'FMPP', 'FPC',
                'FPL', 'GVL', 'HST', 'ISNE',
                'JEA', 'LGEE', 'MISO', 'NSB',
                'NYIS', 'PJM', 'SC',
                'SCEG', 'SOCO',
                'SPA', 'SWPP', 'TAL', 'TEC',
                'TVA', 'ERCO',
                'AVA', 'AZPS', 'BANC', 'BPAT',
                'CHPD', 'CISO', 'DOPD',
                'EPE', 'GCPD', 'IID',
                'IPCO', 'LDWP', 'NEVP', 'NWMT',
                'PACE', 'PACW', 'PGE', 'PNM',
                'PSCO', 'PSEI', 'SCL', 'SRP',
                'TEPC', 'TIDC', 'TPWR', 'WACM',
                'WALC', 'WAUW',
                # 'OVEC', 'SEC',
                ]

# mapping of each balancing authority (BA) to its associated
# U.S. interconnect (IC). Also, includes a combine contiguous
# U.S. total from all the usable BAs.
ICs_from_BAs = {
        'EASTERN_from_BAs' : [
                'AEC', 'AECI', 'CPLE', 'CPLW',
                'DUK', 'FMPP', 'FPC',
                'FPL', 'GVL', 'HST', 'ISNE',
                'JEA', 'LGEE', 'MISO', 'NSB',
                'NYIS', 'PJM', 'SC',
                'SCEG', 'SOCO',
                'SPA', 'SWPP', 'TAL', 'TEC',
                'TVA',
                'OVEC', 'SEC',
                ],
        'TEXAS_from_BAs' : [
                'ERCO',
                ],
        'WESTERN_from_BAs' : [
                'AVA', 'AZPS', 'BANC', 'BPAT',
                'CHPD', 'CISO', 'DOPD',
                'EPE', 'GCPD',
                'IID',
                'IPCO', 'LDWP', 'NEVP', 'NWMT',
                'PACE', 'PACW', 'PGE', 'PNM',
                'PSCO', 'PSEI', 'SCL', 'SRP',
                'TEPC', 'TIDC', 'TPWR', 'WACM',
                'WALC', 'WAUW',
                ],
        'CONUS_from_BAs' : return_usable_BAs(),
        }

# Defines a mapping between the balancing authorities (BAs)
# and their locally defined region based on EIA naming.
# This uses a json file defining the mapping.
def return_BAs_per_region_map():

    regions = {
            'CENT' : 'Central',
            'MIDW' : 'Midwest',
            'TEN' : 'Tennessee',
            'SE' : 'Southeast',
            'FLA' : 'Florida',
            'CAR' : 'Carolinas',
            'MIDA' : 'Mid-Atlantic',
            'NY' : 'New York',
            'NE' : 'New England',
            'TEX' : 'Texas',
            'CAL' : 'California',
            'NW' : 'Northwest',
            'SW' : 'Southwest'
    }

    rtn_map = {}
    for k, v in regions.items():
        rtn_map[k] = []

    # Load EIA's Blancing Authority Acronym table
    # https://www.eia.gov/realtime_grid/
    df = pd.read_csv('data/balancing_authority_acronyms.csv',
            skiprows=1) # skip first row as it is source info

    # Loop over all rows and fill map
    for idx in df.index:

        # Skip Canada and Mexico
        if df.loc[idx, 'Region'] in ['Canada', 'Mexico']:
            continue

        reg_acronym = ''
        # Get region to acronym
        for k, v in regions.items():
            if v == df.loc[idx, 'Region']:
                reg_acronym = k
                break
        assert(reg_acronym != '')

        rtn_map[reg_acronym].append(df.loc[idx, 'Code'])

    tot = 0
    print("\nBA to Region mapping:")
    for k, v in rtn_map.items():
        print(k, v)
        tot += len(v)
    print("\n\nTotal US48 BAs mapped {}.  Recall 10 are generation only.".format(tot))

    return rtn_map


# Assume the MICE results file is a subset of the original hours
def trim_rows_to_match_length(mice, df):
    mice_start = mice.loc[0, 'time']
    mice_end = mice.loc[len(mice.index)-1, 'time']
    to_drop = []
    for idx in df.index:
        if df.loc[idx, 'date_time'] != mice_start:
            to_drop.append(idx)
        else: # stop once equal
            break
    for idx in reversed(df.index):
        if df.loc[idx, 'date_time'] != mice_end:
            to_drop.append(idx)
        else: # stop once equal
            break
    
    df = df.drop(to_drop, axis=0)
    assert(len(mice.index) == len(df.index))
    return df
    
            
    

def distribute_MICE_results(raw_demand_file_loc, mice_results_csv):
    
    out_base = './data/final_results'
    if not os.path.exists(out_base):
        os.mkdir(out_base)
        for subdir in ['balancing_authorities', 'regions', 'interconnects', 'contiguous_US']:
            os.mkdir(f"{out_base}/{subdir}")
            print(f"Final results files will be located here: {out_base}/{subdir}")


    # Load MICE results
    mice = pd.read_csv(mice_results_csv)
    
    # Distribute to single BA results files first
    for ba in return_usable_BAs():
        df = pd.read_csv(f"{raw_demand_file_loc}/{ba}.csv")
        df = trim_rows_to_match_length(mice, df)
        break
        

In [None]:
mice_f = 'MICE_output/mean_impute_csv_MASTER.csv'
raw_demand_file_loc = '/Users/truggles/EIA_Cleaned_Hourly_Electricity_Demand_Data/data/release_2019_Oct/original_eia_files'

distribute_MICE_results(raw_demand_file_loc, mice_f)

In [None]:
for region in return_all_regions():
    break
    print(region)
    df = pd.read_csv(f'/Users/truggles/Get_eia_demand_data/data3/{region}.csv')
    
    # Convert date/time
    dts = []
    for idx in df.index:
        dt = datetime.strptime(df.loc[idx, 'time'], '%Y%m%dT%HZ')
        dts.append(dt)
    df['date_time'] = dts
    
    df = df[['date_time', 'demand (MW)', 'forecast demand (MW)']]
    
    df.to_csv(f'./for_Zenodo/{region}.csv', index=False)