In [None]:
# Input SCF Data and Coordinates:
data = pd.read_csv('data/SCF_data.csv')

# Input elements | Ex. types_elements_init = ['Sr','Ti', 'O', 'He']
types_elements_init = []

# Input grid increment | Ex. grid_increment = 1 for a 7 x 7, 49 grid point system
grid_inc = 0

# Input amount of grid points, should be a square | Ex. grid_points = 49
grid_points = 0

In [None]:
#Imports
import numpy as np
import pandas as pd

## Feature Generation

In [None]:
def fc(rij, r_cut, type_func):
    if type_func == 'cos':
        if rij <= r_cut:
            return 0.5*(np.cos(np.pi*rij/r_cut) + 1.0)
        elif rij > r_cut:
            return 0.0
    elif type_func == 'tanh':
        if rij <= r_cut:
            return np.tanh(1-rij/r_cut)**3.0
        elif rij > r_cut:
            return 0.0


def get_eta_values(rcut, n, min_val, max_val):
    eta = np.zeros((2*(n+1)), dtype=float)
    rs = np.zeros((2*(n+1)), dtype=float)
    ran = max_val - min_val
    def R_sm(i, m): return rcut/(i**(m/i))
    for i in range(n + 1):
        # eta[i] = 2/( (i+1)*ran/( n + 1 ) )**2.0
        eta[i] = (1.0/R_sm(n + 1, i))**2.0
        rs[i] = 0.0
    for i in range(n + 1):
        rs[n+i+1] = R_sm(n + 1, i)
        eta[2*(n+1)-i-1] = 1.0/(R_sm(n + 1, n+1-i) - R_sm(n + 1, n-i))**2.0
    return eta, rs


def G1(distances, r_cut, type_func, types_elements, elements):
    nvirtual = elements.count('He')
    nreal = len(elements)-nvirtual
    g1desc = np.zeros((nvirtual, len(types_elements)-1), dtype=float)
    for i, typei in enumerate(elements):
        if typei == 'He':  # only compute G1 for virtual atoms
            for j, typej in enumerate(elements):
                if typej != 'He':  # only compute G1 wrt real atoms
                    rij = distances[i, j]
                    idx = types_elements.index(elements[j])
                    g1desc[i-nreal][idx] += fc(rij, r_cut, type_func)
    return g1desc
# For G2 and G3, each row has the ith atoms descriptors
# and each column are the descriptors for different eta/kappa values
#            eta1       eta2     ...
# atom1  [ desc1,1    desc1,2     ... ]
# atom2  [ desc2,1    desc2,2     ... ]


def G2(distances, r_cut, eta, rs, type_func, types_elements, elements):
    nvirtual = elements.count('He')
    # Dummy atoms were appended to the end of the list; nreal shows the length of real atoms when deleting the # of grid points from the count
    nreal = len(elements)-nvirtual
    # len(types) -1 represents the dimension of the real atoms only (there are two atom types here, real and dummies)
    g2desc = np.zeros((nvirtual, len(eta), len(types_elements)-1), dtype=float)
   # print(nreal,nvirtual)
    eta = np.array(eta)
    for i, typei in enumerate(elements):
        if typei == 'He':
            for j, typej in enumerate(elements):
                if typej != 'He':  # only compute G1 with respect to real atoms;
                    rij = distances[i, j]
                    f_fun = fc(rij, r_cut, type_func)
                   # rs_fac = np.multiply( -1, np.square( np.subtract( rij, rs ) ) )
                    # np.multiply( eta, rs_fac ) )
                    exp_fac = np.exp(-eta*(rij-rs)**2)
                    idx = types_elements.index(elements[j])
                    g2desc[i-nreal, :, idx] += exp_fac * \
                        f_fun  # np.multiply(exp_fac, f_fun)

    return g2desc


def G3(distances, r_cut, kappa, type_func, types_elements, elements):
    nvirtual = elements.count('He')
    g3desc = np.zeros(
        (nvirtual, len(kappa), len(types_elements)-1), dtype=float)
    kappa = np.array(kappa)

    # Dummy atoms were appended to the end of the list; nreal shows the length of real atoms when deleting the # of grid points from the count
    nreal = len(elements)-nvirtual
    # len(types) -1 represents the dimension of the real atoms only (there are two atom types here, real and dummies)
    g3desc = np.zeros(
        (nvirtual, len(kappa), len(types_elements)-1), dtype=float)
    # print(nreal,nvirtual)

    for i, typei in enumerate(elements):
        if typei == 'He':
            for j, typej in enumerate(elements):
                if typej != 'He':  # only compute G1 with respect to real atoms;
                    rij = distances[i, j]
                    f_fun = fc(rij, r_cut, type_func)
                    # rs_fac = np.multiply( -1, np.square( np.subtract( rij, rs ) ) )
                    # np.multiply( eta, rs_fac ) )
                    cos_fac = np.cos(kappa * rij)
                    idx = types_elements.index(elements[j])
                    g3desc[i-nreal, :, idx] += cos_fac * \
                        f_fun  # np.multiply(exp_fac, f_fun)

    return g3desc
# Lambda can have values of +1 or -1
# zeta can have integer values greater than zero


def G4(distance_vectors, distances, r_cut, type_func, elements, combo_types):
    # init_size = list( np.shape( bp_calcs ) )
    # init_size.append( len( type_combos ) )
    nvirtual = elements.count('He')
    ncombos = len(combo_types)
    g4desc = np.zeros((nvirtual, len(zeta), ncombos), dtype=float)
    # combos = np.array( combos )
    lambd = np.array([1.0])
    for i, typei in enumerate(elements):
        if typei == 'He':
            for j, typej in enumerate(elements):
                if typej != 'He':

                    rij = distances[i, j]
                    rij_vec = distance_vectors[i, j]

                    for k, typek in enumerate(elements):
                        if typek != 'He':
                            if j == k:
                                continue
                            rik = distances[i, k]
                            rik_vec = distance_vectors[i, k]

                            rjk = distances[j, k]
                            rjk_vec = distance_vectors[j, k]
                            for l, m in enumerate(zeta):

                                cost = np.dot(rij_vec, rik_vec)/(rij*rik)

                                ang_lin = (2.0**(1-m))*(1+lambd*cost)**m

                                exp_fac = np.exp(-eta[0]
                                                 * (rij**2+rik**2+rjk**2))

                                fc_fac = fc(rij, r_cut, type_func)*fc(rik,
                                                                      r_cut, type_func)*fc(rjk, r_cut, type_func)

                                if [typej, typek] in combo_types:
                                    idx = combo_types.index([typej, typek])
                                else:
                                    idx = combo_types.index([typek, typej])
                                g4desc[i-nreal, l, idx] += ang_lin * \
                                    exp_fac*fc_fac
    return g4desc
# Lambda can have values of +1 or -1
# zeta can have integer values greater than zero


def G5(distance_vectors, distances, r_cut, type_func, elements, combo_types):
    # init_size = list( np.shape( bp_calcs ) )
    # init_size.append( len( type_combos ) )
    nvirtual = elements.count('He')
    ncombos = len(combo_types)
    g5desc = np.zeros((nvirtual, len(zeta), ncombos), dtype=float)
    # combos = np.array( combos )
    lambd = np.array([1.0])
    for i, typei in enumerate(elements):
        if typei == 'He':
            for j, typej in enumerate(elements):
                if typej != 'He':

                    rij = distances[i, j]
                    rij_vec = distance_vectors[i, j]

                    for k, typek in enumerate(elements):
                        if typek != 'He':
                            if j == k:
                                continue
                            rik = distances[i, k]
                            rik_vec = distance_vectors[i, k]

                            rjk = distances[j, k]
                            rjk_vec = distance_vectors[j, k]
                            for l, m in enumerate(zeta):

                                cost = np.dot(rij_vec, rik_vec)/(rij*rik)

                                ang_lin = (2.0**(1-m))*(1+lambd*cost)**m

                                exp_fac = np.exp(-eta[0]*(rij**2+rik**2))

                                fc_fac = fc(rij, r_cut, type_func) * \
                                    fc(rik, r_cut, type_func)

                                if [typej, typek] in combo_types:
                                    idx = combo_types.index([typej, typek])
                                else:
                                    idx = combo_types.index([typek, typej])
                                g5desc[i-nreal, l, idx] += ang_lin * \
                                    exp_fac*fc_fac
    return g5desc

In [None]:
from ase.io import read
from ase.visualize import view
from ase.build import add_adsorbate
from ase.io import write

# convert data csv to xyz
ase.write('data.xyz', data, format='xyz')

total_ase_atoms = read('data.xyz')

#iterate through a 6 by 6 equally space grid of adsorption sites, with one corner at 0, 0, and one corner at 7.63363 , 7.63363. Add an adsorbate at each site as well as the opposite height of the entire slab.
# 7.63363/7 = 1.0905185714285713
# 7.63363/6 = 1.2722725

#write np code that takes the square root of 64


xx_reshape = np.zeros((64, 1))
yy_reshape = np.zeros((64, 1))
for i in range(0, np.sqrt(grid_points)):
    for j in range(0, np.sqrt(grid_points)):
        print(i*grid_inc, j*grid_inc)
        xx_reshape[i*np.sqrt(grid_points)+j] = i*grid_inc
        yy_reshape[i*np.sqrt(grid_points)+j] = j*grid_inc
        
        add_adsorbate(total_ase_atoms, 'He', 3, (i*grid_inc, j*grid_inc))
        
        # energy_total = model.get_potential_energy()

view(total_ase_atoms, viewer='x3d')

In [None]:
distance_vectors=total_ase_atoms.get_all_distances(mic=True,vector=True)
distances=total_ase_atoms.get_all_distances(mic=True)
types_elements = types_atoms_init
elements = total_ase_atoms.get_chemical_symbols()
nvirtual=elements.count('He')
nreal=len(elements)-nvirtual

In [None]:
type_func='cos'
r_cut = 12.0
eta,rs=get_eta_values(12,6,1,6)
kappa=[0.5, 1.0, 1.5, 2.0]
zeta=[1,2,4,8,16,32]

In [None]:
gridpoints=pd.DataFrame(xx_reshape,columns=['X'])
gridpoints['Y']=yy_reshape

In [None]:
g2=G2(distances,r_cut, eta, rs, type_func, types_elements, elements)
gridpoints['G2-0']=g2[:,0,0]
gridpoints['G2-1']=g2[:,1,0]
gridpoints['G2-2']=g2[:,2,0]

In [None]:
g3=G3(distances, r_cut, kappa, type_func, types_elements, elements)
gridpoints['G3-0']=g3[:,0,0]
gridpoints['G3-1']=g3[:,1,0]
gridpoints['G3-2']=g3[:,2,0]
gridpoints['G3-3']=g3[:,3,0]

In [None]:
ncombos = (len(types_elements) - 1)*len(types_elements)//2
combo_types = []
for i in range(len(types_elements)-1):
    for j in range(i,len(types_elements)-1):
        combo_types.append([types_elements[i],types_elements[j]])
g4=G4(distance_vectors,distances,r_cut,type_func,elements,combo_types)
gridpoints['G4-0']=g4[:,0,0]
gridpoints['G4-1']=g4[:,1,0]
gridpoints['G4-2']=g4[:,2,0]
gridpoints['G4-3']=g4[:,3,0]
gridpoints['G4-4']=g4[:,4,0]
gridpoints['G4-5']=g4[:,5,0]

In [None]:
gridpoints

## Feature Selection and Clustering

## Polynomial Regression