In [None]:
from pinn.io import load_ds

## Comparing energies

## Plot data distribution

Histogram is plotted using the [plt.hist] and [plt.scatter] functions, more options are available in the options.

[plt.hist]: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.hist.html
[plt.scatter]: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html

Alternatively, save data as txt files using [np.savetxt] like this:

[np.savetxt]: https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html

```python
np.savetxt('e_qm9.txt', e_qm9)
```

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
hartree2eV = 27.211407953 # qm9 data are given in hartree while qm9x in eV

In [None]:
qm9x = load_ds('qm9x.yml')
qm9 = load_ds('qm9.yml')
rea = load_ds('reactant.yml')
pro = load_ds('product.yml')
ts = load_ds('ts.yml')

def get_ref(datum, unit=1):
    REF_ATOM = {
        1: -13.62222753701504,
        6: -1029.4130839658328,
        7: -1484.8710358098756,
        8: -2041.8396277138045,
        9: -2712.8213146878606,
    }
    ref = sum(REF_ATOM[e] for e in datum['elems'])
    return ref

In [None]:
e_qm9 =  np.array([d['e_data'] for d in qm9.as_numpy_iterator()])
e_qm9x = np.array([d['e_data'] for d in qm9x.as_numpy_iterator()])
e_rea =  np.array([d['e_data'] for d in rea.as_numpy_iterator()])
e_pro =  np.array([d['e_data'] for d in pro.as_numpy_iterator()])
e_ts =   np.array([d['e_data'] for d in ts.as_numpy_iterator()])

In [None]:
ref_qm9  = np.array([get_ref(d) for d in qm9.as_numpy_iterator()])
ref_qm9x = np.array([get_ref(d) for d in qm9x.as_numpy_iterator()])
ref_rea  = np.array([get_ref(d) for d in rea.as_numpy_iterator()])
ref_pro  = np.array([get_ref(d) for d in pro.as_numpy_iterator()])
ref_ts   = np.array([get_ref(d) for d in ts.as_numpy_iterator()])

In [None]:
plt.hist(e_qm9  - ref_qm9 , 25, density=True, label='QM9')
plt.hist(e_qm9x - ref_qm9x, 25, density=True, label='QM9x', histtype='step')
plt.hist(e_rea  - ref_rea , 25, density=True, label='Reactant', histtype='step')
plt.hist(e_pro  - ref_pro , 25, density=True, label='Product', histtype='step')
plt.hist(e_ts   - ref_ts  , 25, density=True, label='Transition State', histtype='step')

plt.legend()
plt.savefig('qm9x_e_hist.png')

In [None]:
plt.scatter(e_qm9x, e_qm9*hartree2eV) # the data do not seem to be ordered