In [2]:
%load_ext ipycache
%load_ext autoreload
%autoreload 2

The ipycache extension is already loaded. To reload it, use:
  %reload_ext ipycache
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Initialization
## Hyperparameters for discovery

In [3]:
adsorbate = 'CO'
target_energy = -0.67
initial_training_size = 200
batch_size = 200
quantile_cutoff = 0.95

## Gather validation data

In [4]:
import random
import ase.db


db_dir = '../pull_data/%s/' % adsorbate
db = ase.db.connect(db_dir + '%s.db' % adsorbate)
rows = list(db.select())
random.Random(42).shuffle(rows)


def parse_rows(rows):
    features = []
    labels = []
    surfaces = []

    for row in rows:
        features.append(row.id)
        data = row.data
        labels.append(data['adsorption_energy'])
        surface = (data['mpid'], data['miller'], data['shift'], data['top'])
        surfaces.append(surface)

    return features, labels, surfaces


training_features, training_labels, training_surfaces = parse_rows(rows[:initial_training_size])
sampling_features, sampling_labels, sampling_surfaces = parse_rows(rows[initial_training_size:])

In [5]:
import json

with open('../pull_data/CO/CO.json') as f:
    co_json = json.load(f)

In [6]:
co_json[0]

{'adsorbate': 'CO',
 'mpid': 'mp-30336',
 'miller': [0, 0, 1],
 'shift': 0.18204904999999982,
 'top': False,
 'coordination': 'W',
 'neighborcoord': ['W:Al-Al-Al-Al-Al-Al-Al'],
 'energy': -2.210072070000015}

# Hallucinate
## Initialize

In [11]:
import sys
sys.path.insert(0, '../../')
from src.discoverers.adsorption.mms_nullmodel import MultiscaleDiscovererNullModel


# Initialize
discoverer = MultiscaleDiscovererNullModel(
                                  json_list=co_json,
                                  db_dir=db_dir,
                                  target_energy=target_energy,
                                  quantile_cutoff=quantile_cutoff,
                                  batch_size=batch_size,
                                  training_features=training_features,
                                  training_labels=training_labels,
                                  training_surfaces=training_surfaces,
                                  sampling_features=sampling_features,
                                  sampling_labels=sampling_labels,
                                  sampling_surfaces=sampling_surfaces,
                                  init_train=False  # Set to `False` only for warm starts
                                 )

## Run

In [12]:
discoverer.simulate_discovery()

HBox(children=(FloatProgress(value=0.0, description='Hallucinating discovery...', max=92.0, style=ProgressStylâ€¦




KeyError: 'mp-30354'

In [None]:
# Or load the last run
discoverer.load_last_run()

## Analyze

In [None]:
reward_fig, proxy_fig, accuracy_fig, uncertainty_fig, calibration_fig, nll_fig = discoverer.plot_performance(window=100)