In [1]:
%load_ext ipycache
%load_ext autoreload
%autoreload 2

from pyinstrument import Profiler

  from IPython.utils.traitlets import Unicode


# Initialization
## Hyperparameters for discovery

In [2]:
import sys
sys.path.insert(0, '../../../')
from src.discoverers.adsorption.values import calc_co2rr_activity


adsorbate = 'CO'
initial_training_size = 200
batch_size = 200
quantile_cutoff = 0.95

## Gather validation data

In [3]:
import random
import ase.db


db_dir = '../../pull_data/%s/' % adsorbate
db = ase.db.connect(db_dir + '%s.db' % adsorbate)
rows = list(db.select())
random.Random(42).shuffle(rows)


def parse_rows(rows):
    features = []
    labels = []
    surfaces = []

    for row in rows:
        features.append(row.id)
        data = row.data
        labels.append(data['adsorption_energy'])
        surface = (data['mpid'], data['miller'], data['shift'], data['top'])
        surfaces.append(surface)

    return features, labels, surfaces


training_features, training_labels, training_surfaces = parse_rows(rows[:initial_training_size])
sampling_features, sampling_labels, sampling_surfaces = parse_rows(rows[initial_training_size:])

# Hallucinate
## Initialize

In [4]:
from src.discoverers.adsorption.mms import MultiscaleDiscoverer
from src.discoverers.adsorption.models import NullModel


profiler = Profiler()
profiler.start()

try:
    # Initialize
    model = NullModel(db_dir)
    discoverer = MultiscaleDiscoverer(model=model,
                                      quantile_cutoff=quantile_cutoff,
                                      value_calculator=calc_co2rr_activity,
                                      batch_size=batch_size,
                                      training_features=training_features,
                                      training_labels=training_labels,
                                      training_surfaces=training_surfaces,
                                      sampling_features=sampling_features,
                                      sampling_labels=sampling_labels,
                                      sampling_surfaces=sampling_surfaces,
                                      n_processes=8,
                                      #n_samples=3,  # decrease to speed up
                                      #init_train=False  # Set to `False` only for warm starts
                                     )

finally:
    profiler.stop()
    print(profiler.output_text(unicode=True, color=True))

### Loaded preprocessed data from:  ['../../pull_data/CO/data.pt']




HBox(children=(FloatProgress(value=0.0, description='surface energies', max=19105.0, style=ProgressStyle(descr…

Process ForkPoolWorker-5:
Process ForkPoolWorker-7:
Process ForkPoolWorker-4:





Process ForkPoolWorker-3:
Process ForkPoolWorker-8:
Process ForkPoolWorker-1:
Process ForkPoolWorker-2:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ktran/miniconda3/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ktran/miniconda3/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ktran/miniconda3/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ktran/miniconda3/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ktran/miniconda3/lib/python3.7/site-packages/multiprocess/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ktran/miniconda3/lib/python3.7/site-packages/multiprocess/process.py", line 99, in run


## Run

In [None]:
profiler = Profiler()
profiler.start()

try:
    discoverer.simulate_discovery()

finally:
    profiler.stop()
    print(profiler.output_text(unicode=True, color=True))

In [None]:
# Or load the last run
discoverer.load_last_run()

## Analyze

In [None]:
reward_fig, proxy_fig, accuracy_fig, uncertainty_fig, calibration_fig, nll_fig = discoverer.plot_performance(window=100)