# Discovery List
This notebook makes volcano plots to show how many surfaces we found

In [1]:
from collections import OrderedDict
import cPickle as pickle
import numpy as np
from tabulate import tabulate
from pymatgen.matproj.rest import MPRester
import tqdm
from gaspy.utils import read_rc

# Initialize

## CO2RR

In [2]:
# Set the target energy
optimum_energy = -0.67
dE_min = -0.87
dE_max = -0.57

# Load the data
with open('../../figures/CO2RR_predictions.pkl', 'r') as f:
    results, _ = pickle.load(f)

## HER

In [3]:
## Set the target energy
#optimum_energy = -0.27
#dE_min = -0.47
#dE_max = -0.17
#
## Load the data
#with open('../figures/HER_predictions.pkl', 'r') as f:
#    results, _ = pickle.load(f)

# Load data

In [4]:
# Pull out the documents and filter by energy
docs = [result[0] for result in results]
docs = [doc for doc in docs if dE_min < doc['energy'] < dE_max]

# Find all of the MPID information
mpids = set(doc['mpid'] for doc in docs)
composition_by_mpid = dict.fromkeys(mpids)
with MPRester(read_rc()['matproj_api_key']) as mp_db:
    for mpid in tqdm.tqdm(mpids):
        # Have a dictionary that translate stupid MPIDs into smart ones
        translate = {'mvc-16085': 'mp-8016', 'mp-29284': 'mvc-16380',
                     'mvc-16090': 'mp-4979', 'mvc-16091': 'mp-6408',
                     'mp-16564': 'mvc-16089', 'mp-672216': 'mp-452',
                     'mp-867303': 'mp-863750', 'mvc-16074': 'mp-7493',
                     'mvc-16079': 'mp-10824', 'mp-866836': 'mp-866811',
                     'mp-18300': 'mvc-16396', 'mp-2346': 'mp-590',
                     'mp-571640': 'mp-542587', 'mvc-13876': 'mp-492',
                     'mp-641874': 'mp-4139', 'mp-28645': 'mvc-16068',
                     'mp-558110': 'mvc-11238', 'mp-867943': 'mp-863011',
                     'mp-1002063': 'mvc-7051', 'mvc-16439': 'mp-9029',
                     'mvc-10843': 'mp-9027', 'mp-558811': 'mp-288',
                     'mp-632394': 'mp-23850', 'mp-554868': 'mvc-16083',
                     'mp-553973': 'mp-542618', 'mp-35267': 'mvc-16094',
                     'mp-37405': 'mvc-16102', 'mp-654956': 'mp-583071',
                     'mvc-14442': 'mvc-15', 'mp-13147': 'mp-13146'
                    }
        try:
            entry = mp_db.get_entry_by_material_id(mpid)
        except IndexError:
            new_mpid = translate[mpid]
            entry = mp_db.get_entry_by_material_id(new_mpid)
        composition_by_mpid[mpid] = entry.as_dict()['composition']

100%|██████████| 133/133 [00:19<00:00,  8.36it/s]


# Parse

In [5]:
# Pull out the information that we want to tabulate
mpids = []
energies = []
millers = []
formulae = []
for doc in docs:
    mpids.append(doc['mpid'])
    energies.append(doc['energy'])
    millers.append(str(doc['miller']))
    # Calculate the chemical formula
    comp = composition_by_mpid[doc['mpid']]
    formula = []
    for element, count in comp.iteritems():
        # Omit the stoichiometric count if it's just 1
        if count == 1:
            formula.append(element)
        else:
            formula.append(element + '%.0f' % count)
    formula = ('').join(formula)
    formulae.append(formula)
comps = [composition_by_mpid[mpid].keys() for mpid in mpids]

# Filter out anything that has the elements we want to exclude
excluded_elements = set(['Ca', 'Na', 'Nb', 'Se', 'S'])
table_data = sorted(zip(comps, formulae, millers, energies, mpids))
table_data = [datum for datum in table_data
              if not set(datum[0]).intersection(excluded_elements)]

# Sort
comps = [('').join(comp) for comp, _, _, _, _ in table_data]
formulae = [formula for _, formula, _, _, _ in table_data]
millers = [miller for _, _, miller, _, _ in table_data]
energies = [energy for _, _, _, energy, _ in table_data]
mpids = [mpid for _, _, _, _, mpid in table_data]

# Tabulate

In [6]:
headers = ['Formula', 'Miller index', 'dE(CO) [eV]', 'MPID']
table_data = np.array([np.array(formulae),
                       np.array(millers),
                       np.array(energies),
                       np.array(mpids)]).transpose()
table = tabulate(table_data, headers=headers, tablefmt='latex', floatfmt='.2f')
print(table)

\begin{tabular}{llrl}
\hline
 Formula   & Miller index   &   dE(CO) [eV] & MPID       \\
\hline
 Ag3Pd     & [2, 0, 1]      &         -0.84 & mp-985296  \\
 Al2Cu     & [1, 1, 0]      &         -0.66 & mp-985806  \\
 Al2Cu     & [1, 1, 1]      &         -0.75 & mp-985806  \\
 Al2Cu     & [1, 1, 1]      &         -0.73 & mp-985806  \\
 Al2Cu     & [2, 1, 0]      &         -0.75 & mp-985806  \\
 Al2Cu     & [2, 1, 0]      &         -0.68 & mp-985806  \\
 Al2Cu     & [2, 2, 1]      &         -0.73 & mp-985806  \\
 Al2Cu6    & [0, 0, 1]      &         -0.79 & mp-12802   \\
 Al2Cu6    & [0, 0, 1]      &         -0.64 & mp-12802   \\
 Al2Cu6    & [1, 1, 0]      &         -0.80 & mp-12802   \\
 Al2Cu6    & [1, 1, 2]      &         -0.84 & mp-12802   \\
 Al2Cu6    & [1, 2, 0]      &         -0.65 & mp-12802   \\
 Al2Cu6    & [1, 2, 1]      &         -0.69 & mp-12802   \\
 Al2Cu6    & [1, 2, 2]      &         -0.78 & mp-12802   \\
 Al2Cu6    & [2, 1, 0]      &         -0.70 & mp-12802   \\
 Al2