# Really Hard example

## Instantiate and GO!

If you don't have the dataset downloaded it will be downloaded automatically and cached. You just instantiate the class and you are ready to go.

In [1]:
from openqdc.datasets import Spice
ds = Spice()


[32m2023-10-31 11:43:09.510[0m | [1mINFO    [0m | [36mopenqdc.datasets.base[0m:[36mread_preprocess[0m:[36m236[0m - [1mReading preprocessed data[0m
[32m2023-10-31 11:43:09.511[0m | [1mINFO    [0m | [36mopenqdc.datasets.base[0m:[36mread_preprocess[0m:[36m237[0m - [1mspice data with the following units:
                     Energy: hartree,
                     Distance: bohr,
                     Forces: hartree/bohr[0m


Loaded atomic_inputs with shape (33175288, 5), dtype float32
Loaded position_idx_range with shape (1110165, 2), dtype int32
Loaded energies with shape (1110165, 1), dtype float32
Loaded forces with shape (33175288, 3, 1), dtype float32
Loaded name_uniques with shape (19155,), dtype <U632
Loaded name_inv_indices with shape (1110165,), dtype int64
Loaded subset_uniques with shape (6,), dtype <U20
Loaded subset_inv_indices with shape (1110165,), dtype int64


In [2]:
# Get the item at index 0

ds[0]

{'positions': array([[ 1.3423489 ,  4.156236  , -3.2724566 ],
        [ 0.11595206,  5.013099  , -0.7867248 ],
        [ 3.3304987 ,  2.0671773 , -2.706327  ],
        [-0.42705083,  3.1751413 ,  1.1297553 ],
        [ 2.2186143 , -0.07851297, -0.96923685],
        [-0.79288673, -0.46212256,  5.8392773 ],
        [ 1.3678371 ,  0.99231935,  1.5586541 ],
        [ 0.10456925, -1.268748  ,  3.1158264 ],
        [ 1.8262036 , -3.419858  ,  3.5646672 ],
        [-0.12426027,  3.5056047 , -4.523958  ],
        [ 2.3169334 ,  5.6847157 , -4.164261  ],
        [-1.5303158 ,  6.050244  , -1.2296801 ],
        [ 1.4974322 ,  6.3657036 ,  0.03400026],
        [ 4.834702  ,  2.9906907 , -1.8339012 ],
        [ 4.093574  ,  1.2181237 , -4.5069537 ],
        [-0.8948031 ,  3.9545724 ,  2.9265711 ],
        [-2.2285528 ,  2.2635868 ,  0.6513103 ],
        [ 3.508004  , -1.715405  , -0.96671045],
        [ 0.59404755, -0.79707295, -2.0085306 ],
        [ 0.7965676 ,  0.47622713,  6.8558455 ],
       

In [4]:
# get the entry as an ase atoms object

ds.get_ase_atoms(0)

Atoms(symbols='C8NH18', pbc=False, initial_charges=...)

#### Isolated atoms energies

The isolated atoms energies are automatically used inside the datasets for the correct level of theory but you can also use them directly by accessing the IsolatedAtomEnergyFactor class.

In [5]:
from openqdc.utils.atomization_energies import IsolatedAtomEnergyFactory

# Get the hasmap of isolated atom energies for the b3lyp/6-31g* method
IsolatedAtomEnergyFactory.get("b3lyp/6-31g*")

{('H', -1): -0.4618190740256503,
 ('H', 0): -0.5002733301377901,
 ('H', 1): 0.0,
 ('Li', 1): -7.284546111273075,
 ('B', -3): -23.577268753399462,
 ('B', -1): -24.614577395156598,
 ('B', 0): -24.65435524492553,
 ('B', 3): -22.018169862974275,
 ('C', -1): -37.844269871879376,
 ('C', 0): -37.84628033285479,
 ('C', 1): -37.42731164237431,
 ('N', -1): -54.52864356359092,
 ('N', 0): -54.584488815424095,
 ('N', 1): -54.0458621835885,
 ('O', -1): -75.05272792994404,
 ('O', 0): -75.06062109946738,
 ('O', 1): -74.54659271939704,
 ('F', -1): -99.75408410035712,
 ('F', 0): -99.71553471526475,
 ('Na', 1): -162.081235395777,
 ('Mg', 2): -199.22734695613283,
 ('Si', 4): -285.5564410277949,
 ('Si', 0): -289.3717359984153,
 ('Si', -4): -288.02795351148654,
 ('P', 0): -341.2580911838578,
 ('P', 1): -340.8765976669208,
 ('S', -1): -398.16568433994024,
 ('S', 0): -398.1049932797066,
 ('S', 1): -397.7199808615457,
 ('Cl', -2): -459.5066184980746,
 ('Cl', -1): -460.25223446009306,
 ('Cl', 0): -460.136243469

In [6]:
# Get the matrix of atomization energies for the b3lyp/6-31g* method
IsolatedAtomEnergyFactory.get_matrix("b3lyp/6-31g*")

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

#### Chemical space from SOAP descriptors

In [7]:
import matplotlib.pyplot as plt 
import umap.umap_ as umap
datum = ds.chemical_space(n_samples=100, progress=False)
reducer = umap.UMAP()
embedding = reducer.fit_transform(datum["soap"])



OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [None]:
plt.scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=[ds[i] for i in datum["indices"]])
plt.colorbar()
