In [1]:
import pandas as pd
import numpy as np
import os

from uf3.data import composition
from uf3.representation import bspline
from uf3.data import io
from uf3.representation import process
from uf3.regression import least_squares

from uf3.representation.utility_uff import get_data_for_UltraFastFeaturization, open_uff_feature

from concurrent.futures import ProcessPoolExecutor

In [2]:
element_list = ['W','Nb']
degree = 2

chemical_system = composition.ChemicalSystem(element_list=element_list,
                                             degree=degree)

r_min_map = {('Nb','Nb'): 0.001,
             ('Nb', 'W') : 0.001,
             ('W', 'W'): 0.001,
             }
r_max_map = {('Nb','Nb'): 5.5,
             ('Nb', 'W') : 6.5,
             ('W', 'W'): 7.5,
            }
resolution_map = {('Nb','Nb'): 3,
                  ('Nb', 'W') : 4,
                  ('W', 'W'): 5,
                 }
trailing_trim = 0
leading_trim = 0

bspline_config = bspline.BSplineBasis(chemical_system,
                                      r_min_map=r_min_map,
                                      r_max_map=r_max_map,
                                      resolution_map=resolution_map,
                                      leading_trim=leading_trim,
                                      trailing_trim=trailing_trim)

print(bspline_config)

BSplineBasis:
    Basis functions:
        ('Nb', 'Nb'): 6
        ('Nb', 'W'): 7
        ('W', 'W'): 8
ChemicalSystem:
    Elements: ('Nb', 'W')
    Degree: 2
    Pairs: [('Nb', 'Nb'), ('Nb', 'W'), ('W', 'W')]


**The Tungsten dataset ```w-14.xyz``` is available at qmml.org, maintained by Dr. Matthias Rupp.**

In [3]:
example_directory = os.getcwd()
data_filename = os.path.join(example_directory, "w-14.xyz")

data_coordinator = io.DataCoordinator()
data_coordinator.dataframe_from_trajectory(data_filename,
                                           prefix='dft')
df = data_coordinator.consolidate()
df = df.sample(20).copy()

In [4]:
representation = process.BasisFeaturizer(bspline_config)

In [5]:
representation.batched_to_hdf_uff('test_uff.h5',df_data=df,n_jobs=1)

Featurization took-0 secs


In [6]:
regularizer = bspline_config.get_regularization_matrix(ridge_1b=1e-6,
                                                       ridge_2b=0.0,
                                                       curvature_2b=1e-8)

model_uff = least_squares.WeightedLinearModel(bspline_config,
                                              regularizer=regularizer)

In [7]:
model_uff.fit_from_file('test_uff.h5', list(df.index), 
                        batch_size=100, UFF=True)

In [8]:
y_e, p_e, y_f, p_f, rmse_e, rmse_f = model_uff.batched_predict('test_uff.h5', 
                                                           keys=list(df.index),
                                                           UFF=True)

RMSE (energy): 0.033
RMSE (forces): 0.377


In [9]:
client = ProcessPoolExecutor(max_workers=1)
representation.batched_to_hdf('test_py.h5',df_data=df,client=client,n_jobs=1)

  0%|          | 0/1 [00:00<?, ?it/s]



  0%|          | 0/20 [00:00<?, ?it/s]

In [10]:
model_py = least_squares.WeightedLinearModel(bspline_config,
                                             regularizer=regularizer)
model_py.fit_from_file('test_py.h5', list(df.index), 
                        batch_size=100)

  0%|          | 0/1 [00:00<?, ?it/s]

In [11]:
y_e, p_e, y_f, p_f, rmse_e, rmse_f = model_py.batched_predict('test_py.h5', 
                                                           keys=list(df.index))

RMSE (energy): 0.033
RMSE (forces): 0.377


In [12]:
np.isclose(model_uff.coefficients, model_py.coefficients)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])