# sandbox.ipynb

This python notebook performs regressions on data pulled from a processed mongo DB created by GASpy. It then saves these regressions into pickles (for later use) and creates parity plots of the regression fits.

# Initialize

In [None]:
# Importing
import pdb
from gaspy_regress.regressor import GASpyRegressor
from gaspy_regress import gio, plot, predict
from gaspy.utils import vasp_settings_to_str, read_rc

VASP_SETTINGS = vasp_settings_to_str({'gga': 'RP',
                                      'pp_version': '5.4',
                                      'encut': 350})

# Regress

In [None]:
import copy
from tpot import TPOTRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

In [None]:
model_name = 'GP_around_TPOT'
features = ['coordcount']
outer_features = ['neighbors_coordcounts']
responses = ['energy']
blocks = ['adsorbate']
fingerprints = {'neighborcoord': '$processed_data.fp_final.neighborcoord'}

In [None]:
tpot = TPOTRegressor(
                     generations=8,
                     population_size=32,
                     verbosity=2,
                     random_state=42,
                    )
gp = GaussianProcessRegressor(
                              #kernel= 1.0*RBF(length_scale=0.05) \
                              #       +1.0*RBF(length_scale=0.2) \
                              #       +1.0*WhiteKernel(noise_level=0.05**2.0),
                              #n_restarts_optimizer=2,
                             )
H = GASpyRegressor(features=features, responses=responses,
                   blocks=blocks, vasp_settings=VASP_SETTINGS,
                   fingerprints=fingerprints, train_size=0.8, dev_size=0.)
# H.fit_tpot(tpot, model_name=model_name)
# H.fit_hierarchical(gp, 'fit_sk', outer_features, model_name=model_name)
fit_blocks = [('CO',), ('H',)]
H.fit_tpot(tpot, model_name=model_name, blocks=fit_blocks)
H.fit_hierarchical(gp, 'fit_sk', outer_features, model_name=model_name, blocks=fit_blocks)

In [None]:
gio.dump_model(H)

In [None]:
H = gio.load_model(model_name, features+outer_features, responses, blocks)

In [None]:
x, y, text = H.parity_plot(plotter='matplotlib')

# Predict

In [None]:
regressor = H
excel_file_path = read_rc()['gaspy_path'] + '/GASpy_regressions/volcanos_parsed.xlsx'

## CO$_2$ Reduction

In [None]:
regressor_block = ('CO',)
adsorbate = 'CO'
system = 'CO2RR'
scale = 'log'

In [None]:
co2_data = predict.volcano(H, regressor_block, system, excel_file_path, scale, adsorbate)

In [None]:
gio.dump_predictions(co2_data, regressor=H, system=system)

In [None]:
co2_data = gio.load_predictions(model_name, features+outer_features, responses, blocks, system)

In [None]:
plot.volcano(co2_data, excel_file_path, system, scale)

In [None]:
plot.filtered_parity(co2_data, scale=scale, plot_type='plotly', plot_range=[0.01, 14],
                     x_label='DFT-predicted activity [mA/cm2]',
                     y_label='ML-estimated activity [mA/cm2]',
                     title='Activity on surface minima')

In [None]:
plot.filtered_parity(co2_data, scale=scale, plot_type='hex', plot_range=[0.0001, 14],
                     x_label='DFT-predicted activity [log(mA/cm2)]',
                     y_label='ML-estimated activity [log(mA/cm2)]',
                     title='Activity parity on surface minima')

## HER

In [None]:
regressor_block = ('H',)
adsorbate = 'H'
system = 'HER'
scale = 'log'

In [None]:
her_data = predict.volcano(H, regressor_block, system, excel_file_path, scale, adsorbate)

In [None]:
gio.dump_predictions(her_data, regressor=H, system=system)

In [None]:
her_data = gio.load_predictions(model_name, features+outer_features, responses, blocks, system)

In [None]:
plot.volcano(her_data, excel_file_path, system, scale)

In [None]:
plot.filtered_parity(her_data, scale=scale, plot_type='plotly', plot_range=[1e-12, 1e-3],
                     x_label='DFT-predicted activity [A/cm2]',
                     y_label='ML-estimated activity [A/cm2]',
                     title='Activity on surface minima')

In [None]:
plot.filtered_parity(her_data, scale=scale, plot_type='hex', plot_range=[1e-12, 1e-3],
                     x_label='DFT-predicted activity [log(A/cm2)]',
                     y_label='ML-estimated activity [log(A/cm2)]',
                     title='Activity parity on surface minima')