# Volcano_Plots.ipynb
Author:  Kevin Tran <ktran@andrew.cmu.edu>

This python notebook takes regression models createdy by `regress.ipynb` and uses them to construct volcano plots.

## Initializations/Data Management

###### Importing

In [16]:
from pprint import pprint   # for debugging
import sys
import math
import copy
import numpy as np
import pandas as pd
sys.path.append('..')
from vasp_settings_to_str import vasp_settings_to_str
from gas_pull import GASPull
import dill as pickle
pickle.settings['recurse'] = True     # required to pickle lambdify functions
import matplotlib.pyplot as plt
from plotly.offline import init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.plotly as py
import plotly.graph_objs as go

###### Load data

In [2]:
# Location of the *.db file
#DB_LOC = '/global/cscratch1/sd/zulissi/GASpy_DB/'  # Cori
DB_LOC = '/Users/KTran/Nerd/GASpy'                 # Local

# Calculation settings we want to look at
VASP_SETTINGS = vasp_settings_to_str({'gga': 'BF',
                                      'pp_version': '5.4.',
                                      'encut': 350})

# Pull the data from the Local database
GAS_PULL = GASPull(DB_LOC, VASP_SETTINGS, split=False)
ROWS = GAS_PULL.rows
MPIDS = np.unique([row.mpid for row in ROWS])
ADS = np.unique([row.adsorbate for row in ROWS])

###### Filter data:  Minima per facet

In [49]:
# Here, we use only the minimum adsorption energy per facet. So there will be a data
# point per facet per MPID per adsorbate
ENERGIES = dict.fromkeys(ADS, {})
for ads in ADS:
    for mpid in MPIDS:
        facets = np.unique([row.miller for row in ROWS
                            if row.adsorbate == ads
                            and row.mpid == mpid])
        for facet in facets:
            energies = [row.energy for row in ROWS
                        if row.adsorbate == ads
                        and row.mpid == mpid
                        and row.miller == facet]
            if energies:
                ENERGIES[ads][mpid+'\n'+facet] = np.min(energies)

###### Filter data:  Minima per MPID

In [45]:
# Here, we use only the minimum adsorption energy per facet. So there will be a data
# point per MPID per adsorbate
ENERGIES = dict.fromkeys(ADS, {})
for ads in ADS:
    for mpid in MPIDS:
        energies = [row.energy for row in ROWS
                    if row.adsorbate == ads
                    and row.mpid == mpid]
        if energies:
            ENERGIES[ads][mpid] = np.min(energies)

###### Load Surrogate Models

In [5]:
# Define the models you want to pull out here
MODELS = dict.fromkeys(('GBE', 'LR'))

# This loop will pull each model out of their pickle. File names are hard-coded, so
# make sure they match up with the pickles created in `regress.ipynb`
for model in MODELS:
    MODELS[model] = {}
    pkl = pickle.load(open('pkls/CoordcountAds_Energy_%s.pkl' % model, 'r'))
    MODELS[model]['model'] = pkl['model']
    MODELS[model]['pre_processors'] = pkl['pre_processors']

###### Load Volcanoes

In [62]:
# The `CURVES` dictionary will hold a function for each reaction we plan to look at.
# These functions will predict kinetic properties from adsorption energies.
CURVES = {}
# The `POINTS` dictionary will hold the incumbent data points from each volcano plot
POINTS = {}

def make_curve(_params):
    '''
    Since we have a variable number of reactions, we need to populate `CURVES` using
    a function factory. `make_curve` is this function factory.
    '''
    def calc_curve(e):
        ''' `calc_curve` is the function we'll be making over and over again '''
        if e < cutoff:
            return e*_params['LHS']['slope'] + _params['LHS']['intercept']
        else:
            return e*_params['RHS']['slope'] + _params['RHS']['intercept']
    return calc_curve

# Populate `CURVES` and `POINTS` for each reaction
for rxn in ['OER']:
    # Use pandas to pull a dataframe of our information. This script
    # is reliant on the structure of the Excel file, so keep it kosher.
    df = pd.read_excel('/Users/KTran/Google_Drive/Manuscripts/' + \
                       'StepOne/figures/Literature_Volcano_Data.xlsx',
                       sheetname=rxn)
    
    # Pull out the information for `POINTS'. Each value in `POINTS`
    # is a list of 2-tuples, where the first part is the y-value and
    # the second part is the adsorption energy
    POINTS[rxn] = {'y': df.ix[:, 0].get_values(),
                   'x': df.ix[:, 1].get_values()}
    
    # Do some fancy footwork to find `cutoff`, which is the x-value of
    # the vertex of the volcano curve.
    ind = (df.ix[:, 2] == 'Vertex')
    cutoff = df.ix[:, 1][ind].get_values()[0]
    # Find the slope and intercepts of the lines for both the LHS and
    # RHS of the volcano
    params = {'LHS': {}, 'RHS': {}}
    params['LHS']['slope'] = df.ix[0, 5]
    params['LHS']['intercept'] = df.ix[0, 6]
    params['RHS']['slope'] = df.ix[0, 9]
    params['RHS']['intercept'] = df.ix[0, 10]
    # Pass the slopes and intercepts to our function factory to create the curve
    CURVES[rxn] = make_curve(params)

## Plotting

###### OER

In [65]:
# The domain to plot over
x = np.linspace(0.8, 2.4, 20).tolist()

# Let's actually make a plot for each model
for model in MODELS:
    traces = []
    
    # Add the volcano line
    traces.append(go.Scatter(x=x,
                             y=map(CURVES['OER'], x),
                             mode='lines',
                             name='Volcano Line'))
 
    # Add the data points on the original volcano
    traces.append(go.Scatter(x=POINTS['OER']['x'],
                             y=POINTS['OER']['y'],
                             mode='markers',
                             name='Literature'))
    
    # Add our predictions
    # TODO:  Need to actually use O-OH, not O
    traces.append(go.Scatter(x=ENERGIES['O'].values(),
                             y=map(CURVES['OER'], ENERGIES['O'].values()),
                             mode='markers',
                             text=ENERGIES['O'].keys(),
                             name='GASpy Predictions'))
    
    # Format and plot
    layout = go.Layout(xaxis=dict(title='$\Delta G_O-\Delta G_{OH} [eV]$'),
                       yaxis=dict(title='$Overpotential [V] for j = 1 mA/cm^2_{cat}$',
                                  autorange='reversed'),
                       title='OER Volcano Projections using CoordcountAds (%s)' % model)
    iplot(go.Figure(data=traces, layout=layout))