# Volcano_Plots.ipynb
Author:  Kevin Tran <ktran@andrew.cmu.edu>

This python notebook takes regression models createdy by `regress.ipynb` and uses them to construct volcano plots.

## Initializations/Data Management

###### Importing

In [1]:
from pprint import pprint   # for debugging
import sys
import math
import copy
import numpy as np
import pandas as pd
sys.path.append('..')
from vasp_settings_to_str import vasp_settings_to_str
from gas_pull import GASPull
import dill as pickle
pickle.settings['recurse'] = True     # required to pickle lambdify functions
import matplotlib.pyplot as plt
from plotly.offline import init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.plotly as py
import plotly.graph_objs as go

###### Load data

In [14]:
# Location of the *.db file
#DB_LOC = '/global/cscratch1/sd/zulissi/GASpy_DB/'  # Cori
DB_LOC = '/Users/KTran/Nerd/GASpy'                 # Local

# Calculation settings we want to look at
VASP_SETTINGS = vasp_settings_to_str({})

# Pull the data from the Local database
GAS_PULL = GASPull(DB_LOC, VASP_SETTINGS, split=False)
ROWS = GAS_PULL.rows
MPIDS = np.unique([row.mpid for row in ROWS])
ADS = np.unique([row.adsorbate for row in ROWS])

###### Filter data

In [None]:
# We store all of the adsorption energies in the nested dictionary, `ENERGIES`. The first
# tier has keys for each adsorbate, and the second tier has keys for each mpid-facet pairing.
# Note that we only store the minimum (i.e., strongest) adsorption energy for each facet.
ENERGIES = dict.fromkeys(ADS, {})
for ads in ADS:
    for mpid in MPIDS:
        facets = np.unique([row.miller for row in ROWS
                            if row.adsorbate == ads
                            and row.mpid == mpid])
        for facet in facets:
            energies = [row.energy for row in ROWS
                        if row.adsorbate == ads
                        and row.mpid == mpid
                        and row.miller == facet]
            if energies:
                ENERGIES[ads][(mpid, facet)] = np.min(energies)

###### Filter data (OER)

In [76]:
# Filter data again, but do it for OER. Because OER is a special snowflake. Note that this
# `OER_ENERGIES` dictionary is not nested.
OER_ENERGIES = {}
for mpid in MPIDS:
    facets = np.unique([row.miller for row in ROWS
                        if row.adsorbate == 'O'
                        and row.mpid == mpid])
    for facet in facets:
        o_energies = [row.energy for row in ROWS
                      if row.adsorbate == 'O'
                      and row.mpid == mpid
                      and row.miller == facet]
        oh_energies = [row.energy for row in ROWS
                       if row.adsorbate == 'OH'
                       and row.mpid == mpid
                       and row.miller == facet]
        if o_energies and oh_energies:
            OER_ENERGIES[(mpid, facet)] = np.min(o_energies)-np.min(oh_energies)

###### Load Surrogate Models

In [18]:
# Define the models you want to pull out here
MODELS = dict.fromkeys(('GBE', 'LR', 'GP', 'TPOT'))

# This loop will pull each model out of their pickle. File names are hard-coded, so
# make sure they match up with the pickles created in `regress.ipynb`
for model in MODELS:
    MODELS[model] = {}
    pkl = pickle.load(open('pkls/CoordcountAds_Energy_%s.pkl' % model, 'r'))
    MODELS[model]['model'] = pkl['model']
    MODELS[model]['pre_processors'] = pkl['pre_processors']

###### Load Volcanoes

In [89]:
# The `CURVES` dictionary will hold a function for each reaction we plan to look at.
# These functions will predict kinetic properties from adsorption energies.
CURVES = {}
# The `POINTS` dictionary will hold the incumbent data points from each volcano plot
POINTS = {}

def make_curve(_params):
    '''
    Since we have a variable number of reactions, we need to populate `CURVES` using
    a function factory. `make_curve` is this function factory.
    '''
    def calc_curve(e):
        ''' `calc_curve` is the function we'll be making over and over again '''
        if e < cutoff:
            return e*_params['LHS']['slope'] + _params['LHS']['intercept']
        else:
            return e*_params['RHS']['slope'] + _params['RHS']['intercept']
    return calc_curve

# Populate `CURVES` and `POINTS` for each reaction
for rxn in ['OER']:
    # Use pandas to pull a dataframe of our information. This script
    # is reliant on the structure of the Excel file, so keep it kosher.
    # Note also that the name of the Excel sheet must match the string
    # value in this loop's iterator.
    df = pd.read_excel('/Users/KTran/Google_Drive/Manuscripts/' + \
                       'GASpy/figures/Literature_Volcano_Data.xlsx',
                       sheetname=rxn)
    
    # Pull out the information for `POINTS'
    POINTS[rxn] = {'y': df.ix[:, 0].get_values(),
                   'x': df.ix[:, 1].get_values(),
                   'labels': df.index.tolist()}
    
    # Do some fancy footwork to find `cutoff`, which is the x-value of
    # the vertex of the volcano curve.
    ind = (df.ix[:, 2] == 'Vertex')
    cutoff = df.ix[:, 3][ind].get_values()[0]
    # Find the slope and intercepts of the lines for both the LHS and
    # RHS of the volcano
    params = {'LHS': {}, 'RHS': {}}
    params['LHS']['slope'] = df.ix[0, 5]
    params['LHS']['intercept'] = df.ix[0, 6]
    params['RHS']['slope'] = df.ix[0, 9]
    params['RHS']['intercept'] = df.ix[0, 10]
    # Pass the slopes and intercepts to our function factory to create the curve
    CURVES[rxn] = make_curve(params)

## Plotting

###### OER

In [91]:
# The domain to plot over
x = np.linspace(0.8, 2.4, 100).tolist()

# Let's make a plot for each model
for model in MODELS:
    traces = []
    
    # Add the volcano line
    traces.append(go.Scatter(x=x,
                             y=map(CURVES['OER'], x),
                             mode='lines',
                             name='Volcano Line'))
 
    # Add the data points from the original volcano
    traces.append(go.Scatter(x=POINTS['OER']['x'],
                             y=POINTS['OER']['y'],
                             mode='markers',
                             text=POINTS['OER']['labels'],
                             name='Literature'))
    
    # Add our predictions
    traces.append(go.Scatter(x=OER_ENERGIES.values(),
                             y=map(CURVES['OER'], OER_ENERGIES.values()),
                             mode='markers',
                             text=['\n'.join(key) for key in OER_ENERGIES.keys()],
                             name='GASpy Predictions'))
    
    # Format and plot
    layout = go.Layout(xaxis=dict(title='$\Delta G_O-\Delta G_{OH} [eV]$'),
                       yaxis=dict(title='$Overpotential [V] for j = 1 mA/cm^2_{cat}$',
                                  autorange='reversed'),
                       title='OER Volcano Projections using CoordcountAds (%s)' % model)
    iplot(go.Figure(data=traces, layout=layout))