In [1]:
%load_ext ipycache
%load_ext autoreload
%autoreload 2

  from IPython.utils.traitlets import Unicode


# Initialization
## Hyperparameters for discovery

In [2]:
adsorbate = 'CO'
target_energy = -0.67
initial_training_size = 200
batch_size = 200
quantile_cutoff = 0.95

## Gather validation data

In [3]:
import random
import ase.db


db_dir = '../../pull_data/%s/' % adsorbate
db = ase.db.connect(db_dir + '%s.db' % adsorbate)
rows = list(db.select())
random.Random(42).shuffle(rows)


def parse_rows(rows):
    features = []
    labels = []
    surfaces = []

    for row in rows:
        features.append(row.id)
        data = row.data
        labels.append(data['adsorption_energy'])
        surface = (data['mpid'], data['miller'], data['shift'], data['top'])
        surfaces.append(surface)

    return features, labels, surfaces


training_features, training_labels, training_surfaces = parse_rows(rows[:initial_training_size])
sampling_features, sampling_labels, sampling_surfaces = parse_rows(rows[initial_training_size:])

# Hallucinate
## Initialize

In [4]:
import os
os.environ['PYTHONPATH'] = '/home/jovyan/GASpy:/home/jovyan/GASpy/GASpy_regressions:'

import sys
sys.path.insert(0, '/home/jovyan/GASpy')
sys.path.insert(0, '/home/jovyan/GASpy/GASpy_regressions')

sys.path.append('../../..')
from src.discoverers.adsorption.mms import MultiscaleDiscoverer
from src.discoverers.adsorption.models import CFGP
from src.discoverers.adsorption.values import calc_co2rr_activities


# Initialize
model = CFGP(db_dir)
discoverer = MultiscaleDiscoverer(model=model,
                                  value_calculator=calc_co2rr_activities,
                                  quantile_cutoff=quantile_cutoff,
                                  batch_size=batch_size,
                                  training_features=training_features,
                                  training_labels=training_labels,
                                  training_surfaces=training_surfaces,
                                  sampling_features=sampling_features,
                                  sampling_labels=sampling_labels,
                                  sampling_surfaces=sampling_surfaces,
                                  #init_train=False  # Set to `False` only for warm starts
                                 )

### Loaded preprocessed data from:  ['../../pull_data/CO/data.pt']
cmd:
  checkpoint_dir: /home/jovyan/catalyst-acquisitions/notebooks/MMS/CFGP/checkpoints/2020-08-26-18-51-39-cnn
  identifier: cnn
  logs_dir: /home/jovyan/catalyst-acquisitions/notebooks/MMS/CFGP/logs/tensorboard/2020-08-26-18-51-39-cnn
  print_every: 100
  results_dir: /home/jovyan/catalyst-acquisitions/notebooks/MMS/CFGP/results/2020-08-26-18-51-39-cnn
  seed: null
  timestamp: 2020-08-26-18-51-39-cnn
dataset:
  src: ../../pull_data/CO/
  test_size: 0
  train_size: 200
  val_size: 0
logger: tensorboard
model: cgcnn
model_attributes:
  atom_embedding_size: 64
  fc_feat_size: 128
  num_fc_layers: 4
  num_graph_conv_layers: 6
  regress_forces: false
optim:
  batch_size: 64
  lr_gamma: 0.1
  lr_initial: 0.001
  lr_milestones:
  - 25
  - 45
  max_epochs: 50
  num_gpus: 2
  warmup_epochs: 10
  warmup_factor: 0.2
task:
  dataset: gasdb
  description: Regression of DFT calculated binding energes
  labels:
  - binding energy


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha)


Iter 1/1 - Loss: 1.261   lengthscale: 0.705   noise: 0.672
Finished training on 200 data points using 2 GPUs.
Iter 1/100 - Loss: 1.261   lengthscale: 0.705   noise: 0.672
Iter 2/100 - Loss: 1.176   lengthscale: 0.724   noise: 0.602
Iter 3/100 - Loss: 1.124   lengthscale: 0.741   noise: 0.544
Iter 4/100 - Loss: 1.069   lengthscale: 0.768   noise: 0.477
Iter 5/100 - Loss: 1.016   lengthscale: 0.810   noise: 0.408
Iter 6/100 - Loss: 0.967   lengthscale: 0.870   noise: 0.345
Iter 7/100 - Loss: 0.916   lengthscale: 0.958   noise: 0.285


RuntimeError: graph_task->future_result_->completed() INTERNAL ASSERT FAILED at /opt/conda/conda-bld/pytorch_1591914880026/work/torch/csrc/autograd/engine.cpp:800, please report a bug to PyTorch. 

## Run

In [None]:
discoverer.simulate_discovery()

In [None]:
# Or load the last run
discoverer.load_last_run()

## Analyze

In [None]:
reward_fig, proxy_fig, accuracy_fig, uncertainty_fig, calibration_fig, nll_fig = discoverer.plot_performance(window=100)