# NCA5 Hydrogen Senky Diagram
Looking at data from Princeton Net-Zero America: https://netzeroamerica.princeton.edu/

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from matplotlib.sankey import Sankey

plt.rcParams['axes.facecolor']='white'
plt.rcParams['savefig.facecolor']='white'

GWh_per_PJ = 277.778
TJ_per_PJ = 1000

In [None]:
df20 = pd.read_csv('data/nzap-data-2050.csv')
print(len(df20.index))

In [None]:
df20 = df20[ df20['scenario'] == 'E+RE+']
print(len(df20.index))

In [None]:
df20

In [None]:
# Take Princeton's input csv file and prep table with all needed values for Senky diagram.
def prep_dt(year=2050, scenario='E+RE+'):
    
    # Read and get scenario
    print(f"\n\n{year}: {scenario}")
    df = pd.read_csv(f'data/nzap-data-{year}.csv')

    df = df[ df['scenario'] == scenario]
    
    
    
    # Make new df for skimmed values
    cols = df.columns
    dic = {col : [] for col in cols}
    df1 = pd.DataFrame(dic)
    
    
    
    # Skim all H2 related rows
    for idx in df.index:
        keep = False
        for col in cols:
            
            val = df.loc[idx, col]
            if type(val) == float or type(val) == np.float64 or type(val) == np.int64:
                continue
            
            if 'hydrogen' in val or 'Hydrogen' in val or 'H2' in val:
                keep = True
                break
        if keep:
            df1 = df1.append(df.loc[idx])
    
    df1.to_csv('tmp1.csv')
    
    
    
    # Reduce to H2 energy related rows
    df1 = df1[ df1['unit'].isin(['PJ', 'TJ', 'GWh']) ]
    df1.to_csv('tmp2.csv')
    #print(df1)
    
    
    
    # All values to GWh
    new_vals = []
    for idx in df1.index:
        val = df1.loc[idx, 'value']
        unit = df1.loc[idx, 'unit']
        if unit == 'GWh':
            new_vals.append(val)
        elif unit == 'PJ':
            new_vals.append(val * GWh_per_PJ)
        elif unit == 'TJ':
            new_vals.append(val / TJ_per_PJ * GWh_per_PJ)
        else:
            print(f"Unit was not considered in list, please fix this. Unit == {unit}")
            exit()
    df1['annual flow (GWh)'] = new_vals
    df1['annual flow (EJ)'] = df1['annual flow (GWh)'] / GWh_per_PJ / 1000.
    df1.to_csv('tmp3a.csv')
    #print(df1)

    
    
    # Stacked bar comparison against PNZA's figures on slide 194 w/ production and use in EJ
    #stacked_plot(year, scenario, df1, 'Production')
    #stacked_plot(year, scenario, df1, 'Uses')
    
    

    # Get conversion efficiencies and input energy
    effs = get_conversion_df()
    print(effs)
    
    
    
    # Map production conversion table to main data frame
    # NOTE: While SMR is in the datatable, SMR is skipped in the H2 production slides for 2050 (194)
    'BECCS hydrogen production -> hydrogen blend'
    'autothermal reforming hydrogen production w/ccu -> hydrogen blend'
    'central-station hydrogen electrolysis'

    
    
    # Create main table
    
    
    
    
    # Print summary outputs
    
    
    
    
    return


def stacked_plot(year, scenario, df, col):
    fig, ax = plt.subplots()
    plt.title(f"{year}, {scenario}: {col}")
    
    df1a = df[ df['filter_level_3'] == col ]
    df1a = df1a.set_index('variable_name')
    dT = df1a[['annual flow (EJ)',]].T
    dT.plot(kind='bar', stacked=True, ax=ax)

    
    #d.plot(kind='bar', ax=f.gca())
    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    ax.set_ylabel('H2 Quantity (EJ)')
    ax.xaxis.set_ticklabels([])
    plt.tight_layout()
    plt.savefig(f"plots/{year}_{scenario}_{col}.png")
    plt.show()
    return


def get_conversion_df():
    effs = pd.read_excel('data/NZA_Annex_A3_-_Inputs_catalog_for_EER_modeling.xlsx', sheet_name='conversion_efficiency')
    
    # fill in NaNs from previously merged cells
    p_dic = {}
    for col in ['tech', 'zone', 'vintage', 'unit']:
        p_dic[col] = ''
    for idx in effs.index:
        for col in p_dic.keys():
            val = effs.loc[idx, col]
            if type(val) != str and np.isnan(val):
                effs.loc[idx, col] = p_dic[col]
                #print('replaced', val, type(val))
            else:
                p_dic[col] = val
    
    return effs
    
    
    

prep_dt(2050, 'E+RE+')
#prep_dt(2050, 'E+RE-')