In [None]:
import os
import sys
import logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # FATAL
logging.getLogger('tensorflow').setLevel(logging.DEBUG)

try:
    from google.colab import drive
    drive.mount('/content/drive')
    !pip install -q ruamel.yaml
    !pip install -q tensorboard-plugin-profile
    project_path = '/content/drive/MyDrive/Colab Projects/quantumflow'
except:
    project_path = os.path.expanduser('~/quantumflow')

In [None]:
os.chdir(project_path)
sys.path.append(project_path)

import numpy as np
import tensorflow as tf

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import quantumflow

experiment = 'alghadeer_2021'
run_name = 'recreate'

base_dir = os.path.join(project_path, "experiments", experiment)
params = quantumflow.utils.load_yaml(os.path.join(base_dir, f'{experiment}.yaml'))[run_name]
run_dir = os.path.join(base_dir, run_name)

In [None]:
dataset_train = quantumflow.instantiate(params['dataset_train'], run_dir=run_dir)
dataset_train.build()

dataset_validate = quantumflow.instantiate(params['dataset_validate'], run_dir=run_dir)
dataset_validate.build()

dataset_train.visualize()

In [None]:
training_indices = np.random.choice(dataset_train.density.shape[0], 1000, replace=False)
density = dataset_train.density[training_indices, :]
kinetic_energy = dataset_train.kinetic_energy[training_indices]

In [None]:
plt.figure(figsize=(20, 3))
plt.plot(dataset_train.x, density[:50, :].transpose())
plt.title(f"Random N, 1000 Training densities")
plt.show()

In [None]:
def get_alghadeer_features(density, h, r_d=1.0):

    density_dx = quantumflow.utils.derivative_five_point(density, h).numpy()

    with np.errstate(divide='ignore'):
        vW = density_dx**2/density
        vW[:, 0] = vW[:, 1]
        vW[:, -1] = vW[:, -2]

    features = np.stack([
        quantumflow.utils.np_integrate(density**3, h),
        quantumflow.utils.np_integrate(density*np.abs(density_dx), h),
        quantumflow.utils.np_integrate(vW, h),
        quantumflow.utils.np_integrate(1/r_d*density**2, h),
        quantumflow.utils.np_integrate(1/r_d*np.abs(density_dx), h),
        quantumflow.utils.np_integrate(1/r_d**2*density, h),
        #np.ones(len(density))
    ], axis=-1)
    
    return features


In [None]:
a = np.zeros(6)

a[0] = 0.4265802
a[1] = -0.197950333
a[2] = 0.10816284
a[3] = -0.006022615
a[4] = -0.158677969
a[5] = 0.05333859

errs = []
r_ds = np.logspace(-2, 2, 100)
for r_d in r_ds:
    pred = np.matmul(a, get_alghadeer_features(dataset_validate.density, dataset_validate.h, r_d).transpose())
    err = np.mean(np.abs(pred - dataset_validate.kinetic_energy))/np.mean(kinetic_energy)*1000
    errs.append(err)
    
plt.figure(figsize=(20, 4))
plt.plot(r_ds, errs)
plt.xscale('log')
plt.show()

r_d = r_ds[np.argmin(errs)]

In [None]:
features = get_alghadeer_features(density, dataset_train.h, r_d)
a, w_0, _, _ = np.linalg.lstsq(features, kinetic_energy, rcond=None)
a

In [None]:
pred = np.matmul(a, get_alghadeer_features(dataset_validate.density, dataset_validate.h, r_d).transpose())

In [None]:
cuts = [0, 15, 50, 100, 200, 300]
fig = plt.figure(figsize=(12, 12))
for i, (low, high) in enumerate(zip(cuts[:-1], cuts[1:])):
    ax = fig.add_subplot(5, 5, 21 - i*4)
    indices = np.logical_and(low < dataset_validate.kinetic_energy, dataset_validate.kinetic_energy < high)
    ax.scatter(dataset_validate.kinetic_energy[indices], dataset_validate.kinetic_energy[indices], s=1, c='k')
    ax.scatter(dataset_validate.kinetic_energy[indices], pred[indices], alpha=0.05)
    ax.set_aspect('equal', 'box')
    [s.set_visible(False) for s in ax.spines.values()]
plt.show()

In [None]:
pred_train = np.matmul(a, features.transpose())
print(np.mean(np.abs(pred_train - kinetic_energy)/np.mean(kinetic_energy))*1000)
print(np.std(np.abs(pred_train - kinetic_energy)/np.mean(kinetic_energy))*1000)
print(np.max(np.abs(pred_train - kinetic_energy)/np.mean(kinetic_energy))*1000)

In [None]:
print(np.mean(np.abs(pred - dataset_validate.kinetic_energy)/dataset_validate.kinetic_energy)*1000)
print(np.std(np.abs(pred - dataset_validate.kinetic_energy)/dataset_validate.kinetic_energy)*1000)
print(np.max(np.abs(pred - dataset_validate.kinetic_energy)/dataset_validate.kinetic_energy)*1000)

In [None]:
kcalmol_per_hartree = 627.51
print(np.mean(np.abs(pred - dataset_validate.kinetic_energy))*kcalmol_per_hartree)
print(np.std(np.abs(pred - dataset_validate.kinetic_energy))*kcalmol_per_hartree)
print(np.max(np.abs(pred - dataset_validate.kinetic_energy))*kcalmol_per_hartree)