In [1]:
from pyscf import gto, scf, dft
import numpy as np
from IPython.display import clear_output
import pandas as pd
from pathlib import Path

In [None]:
def np_to_scfinput(atoms, coords):
    '''
    Input: tuple of (coordinates: np.array; atom representation: tuple of strings)
    '''
    # safeguarding here
    # convertion
    raw = []
    for atom, coord in zip(atoms, coords):
        raw.append(' '.join([atom, *coord.astype(str)]))
    
    return ' ;'.join(raw)

def add_gaussian_noise(coords, sigma=0.02, seed=None):
    '''
    Input: (3,3)
    First row is oxygen: 0,0,0. Or any other atom that is kept at the origin
    Second and third rows are hydrogens / all other atoms
    '''
    # default_rng is the new np random Generator. good to use!
    rng = np.random.default_rng(seed)
    noise = rng.normal(loc=0.0, scale=sigma, size=coords.shape)
    coords_w_noise = coords + noise

    # force the first row (oxygen) to be 0,0,0
    coords_w_noise[0] = [0,0,0]
    return coords_w_noise

## Prepare water molecule file (default)

In [14]:
# actual water molecule data
bond_len = 0.9572
bond_ang = 104.5
x = bond_len * np.sin(np.radians(bond_ang))
y = bond_len * np.cos(np.radians(bond_ang))


h2o_atoms = ('O','H','H') # what atom that each row corresponds to 
h2o_coords = np.array([[0,0,0],[x, y, 0],[-x,y,0]]) # put in the actual data

In [15]:
mf_scanner = gto.M().apply(dft.RKS).set(xc='b3lyp', basis='def2-SVP').as_scanner() # an easy way to ask for energy only

data_dir = Path('./water_energies_noise')
data_dir.mkdir(exist_ok=True)

sigma = 0.02

for i in range(1000):
    h2o_coords_w_noise = add_gaussian_noise(h2o_coords, sigma=sigma, seed=i)
    h2o_inp = np_to_scfinput(h2o_atoms, h2o_coords_w_noise)
    e = mf_scanner(gto.M(atom=h2o_inp))

    np.savez_compressed(
        data_dir / f'h2o_noise_dft_b3lyp_def2-SVP_{i}.npz',
        coords=h2o_coords_w_noise,
        energy = e,
        noise_sigma = sigma
    )

converged SCF energy = -75.2379878188155
converged SCF energy = -75.2357325401288
converged SCF energy = -75.2303126561192


KeyboardInterrupt: 

In [16]:
# inspection of one single file
index = 6
file_path = data_dir / f'h2o_noise_dft_b3lyp_def2-SVP_{index}.npz'
data = np.load(file_path)
print(data['coords'], data['energy'])

[[ 0.          0.          0.        ]
 [ 0.92395162 -0.21938935  0.02704284]
 [-0.91363515 -0.20972138  0.00579915]] -75.23438594734546
