In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
# from pymatgen.core.surface import Lattice, SlabGenerator, Structure, generate_all_slabs
# from pymatgen.core.periodic_table import Element
from ase.io import read
from ase.visualize import view
from ase.build import surface, bulk
from ase.io import read, write

In [5]:
def fc( rij, r_cut, type_func ):
    if type_func == 'cos':
        if rij <= r_cut:
            return 0.5*( np.cos( np.pi*rij/r_cut ) + 1.0 )
        elif rij > r_cut:
            return 0.0
    elif type_func == 'tanh':
        if rij <= r_cut:
            return np.tanh( 1-rij/r_cut )**3.0
        elif rij > r_cut:
            return 0.0
def get_eta_values( rcut, n, min_val, max_val ):
    eta = np.zeros( ( 2*(n+1) ), dtype=float )
    rs = np.zeros( ( 2*(n+1) ), dtype=float )
    ran = max_val - min_val
    R_sm = lambda i, m: rcut/( i**( m/i ) )
    for i in range( n + 1 ):
        #eta[i] = 2/( (i+1)*ran/( n + 1 ) )**2.0
        eta[i] = ( 1.0/R_sm( n + 1, i ) )**2.0
        rs[i] = 0.0
    for i in range( n + 1 ):
        rs[n+i+1] = R_sm( n + 1, i )
        eta[2*(n+1)-i-1] = 1.0/( R_sm( n + 1, n+1-i ) - R_sm( n + 1, n-i ) )**2.0
    return eta, rs
def G1( distances, r_cut, type_func, types_elements, elements ):
    nvirtual=elements.count('He')
    nreal=len(elements)-nvirtual
    g1desc = np.zeros(( nvirtual, len(types_elements)-1 ), dtype=float )
    for i, typei in enumerate( elements ):
        if typei == 'He' : # only compute G1 for virtual atoms
            for j, typej in enumerate( elements ):
                if typej != 'He' : # only compute G1 wrt real atoms
                    rij=distances[i,j]
                    idx = types_elements.index( elements[j] )
                    g1desc[i-nreal][idx] += fc( rij, r_cut, type_func )
    return g1desc
# For G2 and G3, each row has the ith atoms descriptors
# and each column are the descriptors for different eta/kappa values
#            eta1       eta2     ...
# atom1  [ desc1,1    desc1,2     ... ]
# atom2  [ desc2,1    desc2,2     ... ]
def G2( distances, r_cut, eta, rs, type_func, types_elements, elements):
    nvirtual=elements.count('He')
    nreal=len(elements)-nvirtual #Dummy atoms were appended to the end of the list; nreal shows the length of real atoms when deleting the # of grid points from the count
    g2desc = np.zeros(( nvirtual, len(eta), len(types_elements)-1 ), dtype=float ) # len(types) -1 represents the dimension of the real atoms only (there are two atom types here, real and dummies)
   # print(nreal,nvirtual)
    eta = np.array( eta )
    for i, typei in enumerate( elements ):
        if typei == 'He':
            for j, typej in enumerate( elements ):
                if typej != 'He' : # only compute G1 with respect to real atoms;
                    rij = distances[i,j]
                    f_fun = fc( rij, r_cut, type_func )
                   # rs_fac = np.multiply( -1, np.square( np.subtract( rij, rs ) ) )
                    exp_fac = np.exp( -eta*(rij-rs)**2 ) # np.multiply( eta, rs_fac ) )
                    idx = types_elements.index( elements[j] )
                    g2desc[i-nreal,:,idx] += exp_fac*f_fun #np.multiply(exp_fac, f_fun)
                    
    return g2desc
def G3( distances, r_cut, kappa, type_func, types_elements, elements):
    nvirtual=elements.count('He')
    g3desc = np.zeros(( nvirtual, len(kappa), len(types_elements)-1 ), dtype=float )
    kappa = np.array( kappa )
    
    nreal=len(elements)-nvirtual #Dummy atoms were appended to the end of the list; nreal shows the length of real atoms when deleting the # of grid points from the count
    g3desc = np.zeros(( nvirtual, len(kappa), len(types_elements)-1 ), dtype=float ) # len(types) -1 represents the dimension of the real atoms only (there are two atom types here, real and dummies)
    # print(nreal,nvirtual)

    for i, typei in enumerate( elements ):
        if typei == 'He':
            for j, typej in enumerate( elements ):
                if typej != 'He' : # only compute G1 with respect to real atoms;
                    rij = distances[i,j]
                    f_fun = fc( rij, r_cut, type_func )
                    # rs_fac = np.multiply( -1, np.square( np.subtract( rij, rs ) ) )
                    cos_fac = np.cos( kappa* rij ) # np.multiply( eta, rs_fac ) )
                    idx = types_elements.index( elements[j] )
                    g3desc[i-nreal,:,idx] += cos_fac*f_fun #np.multiply(exp_fac, f_fun)
                    
    return g3desc
#Lambda can have values of +1 or -1
#zeta can have integer values greater than zero
def G4( distance_vectors, distances, r_cut, type_func, elements, combo_types):
  #init_size = list( np.shape( bp_calcs ) )
  #init_size.append( len( type_combos ) )
  nvirtual = elements.count('He')
  ncombos = len(combo_types)
  g4desc = np.zeros( ( nvirtual, len(zeta), ncombos ), dtype=float )
  #combos = np.array( combos )
  lambd=np.array([1.0])
  for i, typei in enumerate( elements ):
    if typei == 'He':
        for j, typej in enumerate( elements ):
            if typej != 'He':
              
              rij = distances[i,j]
              rij_vec = distance_vectors[i,j]
          
              for k, typek in enumerate( elements ):
                  if typek != 'He':
                      if j == k: continue
                      rik = distances[i,k]
                      rik_vec = distance_vectors[i,k]
                      
                      rjk = distances[j,k]
                      rjk_vec = distance_vectors[j,k]
                      for l,m in enumerate(zeta):
                  
                          cost = np.dot( rij_vec, rik_vec )/(rij*rik)

                          ang_lin = (2.0**(1-m))*(1+lambd*cost)**m

                          exp_fac = np.exp( -eta[0]*( rij**2+rik**2+rjk**2 ) )

                          fc_fac = fc( rij, r_cut, type_func )*fc( rik, r_cut, type_func )*fc( rjk, r_cut, type_func )

                          if [typej,typek] in combo_types : 
                            idx = combo_types.index([typej,typek])
                          else : 
                            idx = combo_types.index([typek,typej])   
                          g4desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  return g4desc
#Lambda can have values of +1 or -1
#zeta can have integer values greater than zero
def G5( distance_vectors, distances, r_cut, type_func, elements, combo_types):
  #init_size = list( np.shape( bp_calcs ) )
  #init_size.append( len( type_combos ) )
  nvirtual = elements.count('He')
  ncombos = len(combo_types)
  g5desc = np.zeros( ( nvirtual, len(zeta), ncombos ), dtype=float )
  #combos = np.array( combos )
  lambd=np.array([1.0])
  for i, typei in enumerate( elements ):
    if typei == 'He':
        for j, typej in enumerate( elements ):
            if typej != 'He':
              
              rij = distances[i,j]
              rij_vec = distance_vectors[i,j]
          
              for k, typek in enumerate( elements ):
                  if typek != 'He':
                      if j == k: continue
                      rik = distances[i,k]
                      rik_vec = distance_vectors[i,k]
                      
                      rjk = distances[j,k]
                      rjk_vec = distance_vectors[j,k]
                      for l,m in enumerate(zeta):
                  
                          cost = np.dot( rij_vec, rik_vec )/(rij*rik)
                          
                          ang_lin = (2.0**(1-m))*(1+lambd*cost)**m
                          
                          exp_fac = np.exp( -eta[0]*( rij**2+rik**2 ) )
                          
                          fc_fac = fc( rij, r_cut, type_func )*fc( rik, r_cut, type_func )

                          if [typej,typek] in combo_types : 
                            idx = combo_types.index([typej,typek])
                          else : 
                            idx = combo_types.index([typek,typej])   
                          g5desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  return g5desc


Create the bulk Pt structure and the corresponding slab

Create the gridpoints above the surface

Adding the grid points to the ase slab, treating them as He atoms

In [8]:
from ase.io import read
from ase.visualize import view
from ase.build import add_adsorbate

total_ase_atoms = read('SrTiO3ASiteUp.xyz')

#iterate through a 6 by 6 equally space grid of adsorption sites, with one corner at 0, 0, and one corner at 7.63363 , 7.63363. Add an adsorbate at each site as well as the opposite height of the entire slab.
# 7.63363/7 = 1.0905185714285713
# 7.63363/6 = 1.2722725

xx_reshape = np.zeros((64, 1))
yy_reshape = np.zeros((64, 1))
for i in range(0, 8):
    for j in range(0, 8):
        print(i*1.0905185714285713, j*1.1179142857142858)
        xx_reshape[i*8+j] = i*1.1179142857142858
        yy_reshape[i*8+j] = j*1.1179142857142858
        
        add_adsorbate(total_ase_atoms, 'He', 3, (i*1.1179142857142858, j*1.1179142857142858))
        
        # energy_total = model.get_potential_energy()

view(total_ase_atoms, viewer='x3d')

0.0 0.0
0.0 1.1179142857142859
0.0 2.2358285714285717
0.0 3.3537428571428576
0.0 4.471657142857143
0.0 5.589571428571429
0.0 6.707485714285715
0.0 7.825400000000001
1.0905185714285712 0.0
1.0905185714285712 1.1179142857142859
1.0905185714285712 2.2358285714285717
1.0905185714285712 3.3537428571428576
1.0905185714285712 4.471657142857143
1.0905185714285712 5.589571428571429
1.0905185714285712 6.707485714285715
1.0905185714285712 7.825400000000001
2.1810371428571425 0.0
2.1810371428571425 1.1179142857142859
2.1810371428571425 2.2358285714285717
2.1810371428571425 3.3537428571428576
2.1810371428571425 4.471657142857143
2.1810371428571425 5.589571428571429
2.1810371428571425 6.707485714285715
2.1810371428571425 7.825400000000001
3.2715557142857135 0.0
3.2715557142857135 1.1179142857142859
3.2715557142857135 2.2358285714285717
3.2715557142857135 3.3537428571428576
3.2715557142857135 4.471657142857143
3.2715557142857135 5.589571428571429
3.2715557142857135 6.707485714285715
3.271555714285713

Compute all distances, vector and magnitude, for descriptors calculations

In [9]:
distance_vectors=total_ase_atoms.get_all_distances(mic=True,vector=True)
distances=total_ase_atoms.get_all_distances(mic=True)
types_elements = ['Sr','Ti', 'O', 'He']
elements = total_ase_atoms.get_chemical_symbols()
nvirtual=elements.count('He')
nreal=len(elements)-nvirtual

Parameters used for descriptors calculations, we may want to check these

In [10]:
type_func='cos'
r_cut = 12.0
eta,rs=get_eta_values(12,6,1,6)
kappa=[0.5, 1.0, 1.5, 2.0]
zeta=[1,2,4,8,16,32]

Build a dataframe with the gridpoints, their positions, and their descriptors, the label will be the energy

In [11]:
gridpoints=pd.DataFrame(xx_reshape,columns=['X'])
gridpoints['Y']=yy_reshape

In [12]:
# g1=G1( distances, r_cut, type_func, types_elements, elements )
# gridpoints['G1']=g1

ValueError: Expected a 1D array, got an array with shape (64, 3)

In [13]:
g2=G2(distances,r_cut, eta, rs, type_func, types_elements, elements)
gridpoints['G2-0']=g2[:,0,0]
gridpoints['G2-1']=g2[:,1,0]
gridpoints['G2-2']=g2[:,2,0]


In [14]:
g3=G3(distances, r_cut, kappa, type_func, types_elements, elements)
gridpoints['G3-0']=g3[:,0,0]
gridpoints['G3-1']=g3[:,1,0]
gridpoints['G3-2']=g3[:,2,0]
gridpoints['G3-3']=g3[:,3,0]

In [15]:
ncombos = (len(types_elements) - 1)*len(types_elements)//2
combo_types = []
for i in range(len(types_elements)-1):
    for j in range(i,len(types_elements)-1):
        combo_types.append([types_elements[i],types_elements[j]])
g4=G4(distance_vectors,distances,r_cut,type_func,elements,combo_types)
gridpoints['G4-0']=g4[:,0,0]
gridpoints['G4-1']=g4[:,1,0]
gridpoints['G4-2']=g4[:,2,0]
gridpoints['G4-3']=g4[:,3,0]
gridpoints['G4-4']=g4[:,4,0]
gridpoints['G4-5']=g4[:,5,0]

  g4desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac


In [16]:
gridpoints

Unnamed: 0,X,Y,G2-0,G2-1,G2-2,G3-0,G3-1,G3-2,G3-3,G4-0,G4-1,G4-2,G4-3,G4-4,G4-5
0,0.0000,0.000000,3.763820,3.233349,2.550181,-2.370704,-1.693888,3.152967,-1.353682,8.935277,7.178460,4.919334,2.748825,1.334817,0.619130
1,0.0000,1.117914,3.653095,3.129311,2.457658,-2.331687,-1.340104,2.317200,-0.989062,8.539204,6.933344,4.833624,2.774650,1.381194,0.659998
2,0.0000,2.235829,3.518710,3.003452,2.346125,-2.283476,-1.018630,1.507810,-0.218050,8.053246,6.620390,4.708335,2.780879,1.415051,0.681366
3,0.0000,3.353743,3.735851,3.207041,2.526757,-2.360902,-1.596126,2.928429,-1.299863,8.835675,7.117599,4.899038,2.757112,1.348270,0.631645
4,0.0000,4.471657,3.735867,3.207055,2.526770,-2.360907,-1.596179,2.928552,-1.299901,8.835731,7.117633,4.899050,2.757108,1.348263,0.631638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,7.8254,3.353743,3.735851,3.207041,2.526757,-2.360902,-1.596126,2.928429,-1.299863,8.835675,7.117599,4.899038,2.757112,1.348270,0.631645
60,7.8254,4.471657,3.735867,3.207055,2.526770,-2.360907,-1.596179,2.928552,-1.299901,8.835731,7.117633,4.899050,2.757108,1.348263,0.631638
61,7.8254,5.589571,3.518755,3.003493,2.346161,-2.283492,-1.018719,1.508039,-0.218326,8.053407,6.620496,4.708380,2.780882,1.415044,0.681363
62,7.8254,6.707486,3.653064,3.129282,2.457632,-2.331676,-1.340018,2.316989,-0.988915,8.539094,6.933274,4.833598,2.774655,1.381204,0.660007


In [None]:

#energies=pd.read_csv('energies.csv',delimiter='    ',usecols=[1,2],names=['SCF','index']).set_index('index')
#energies.index.name=None
#energies['SCFshift']=energies['SCF']-energies['SCF'].mean()

#gridpoints=gridpoints.join(energies)
#len(gridpoints)

In [23]:
gridpoints = pd.DataFrame(xx_reshape, columns=['X'])
gridpoints['Y'] = yy_reshape
print(len(gridpoints))

energies = pd.read_csv('oxygen-coordinates-with-energy.csv')
energies.index.name = None
energies['SCFshift'] = energies['SCF'] - energies['SCF'].mean()
energies = energies.reset_index()  # Reset the index of energies
print(len(energies))

gridpoints['SCF'] = energies['SCF']
gridpoints['SCFshift'] = energies['SCFshift']
print(gridpoints)


64
64
         X         Y          SCF  SCFshift
0   0.0000  0.000000 -4270.695595  0.011849
1   0.0000  1.117914 -4270.703495  0.003949
2   0.0000  2.235829 -4270.707601 -0.000157
3   0.0000  3.353743 -4270.698102  0.009342
4   0.0000  4.471657 -4270.698101  0.009343
..     ...       ...          ...       ...
59  7.8254  3.353743 -4270.698102  0.009342
60  7.8254  4.471657 -4270.698101  0.009343
61  7.8254  5.589571 -4270.707600 -0.000156
62  7.8254  6.707486 -4270.703497  0.003947
63  7.8254  7.825400 -4270.695595  0.011849

[64 rows x 4 columns]


In [24]:
gridpoints

Unnamed: 0,X,Y,SCF,SCFshift
0,0.0000,0.000000,-4270.695595,0.011849
1,0.0000,1.117914,-4270.703495,0.003949
2,0.0000,2.235829,-4270.707601,-0.000157
3,0.0000,3.353743,-4270.698102,0.009342
4,0.0000,4.471657,-4270.698101,0.009343
...,...,...,...,...
59,7.8254,3.353743,-4270.698102,0.009342
60,7.8254,4.471657,-4270.698101,0.009343
61,7.8254,5.589571,-4270.707600,-0.000156
62,7.8254,6.707486,-4270.703497,0.003947


In [25]:
r_cut_list = [7,8,9,10,11,12]
kappa=[0.5, 1.0, 1.5, 2.0]
zeta=[1,2,4,8,16,32]
#rcut, n, min_val, max_val
eta_values=list()
rs_values=list()

for i in r_cut_list:

    eta,rs=get_eta_values(i,6,1,6)
    eta_values.append(eta)
    rs_values.append(rs)



In [26]:
# Radial functions

for i in r_cut_list:
    gridpoints['G1 - r= {}'.format(i)] = None
    g1 = G1(distances, int(i), type_func, types_elements, elements)
    gridpoints['G1 - r= {}'.format(i)] = g1
    
    g2=G2(distances,r_cut, eta, rs, type_func, types_elements, elements)
    gridpoints['G2-0 R_cut = {}'.format(i)]=g2[:,0,0]
    gridpoints['G2-1 R_cut = {}'.format(i)]=g2[:,1,0]
    gridpoints['G2-2 R_cut = {}'.format(i)]=g2[:,2,0]
    gridpoints['G2-3 R_cut = {}'.format(i)]=g2[:,3,0]
 

    g3=G3(distances, i, kappa, type_func, types_elements, elements)
    gridpoints['G3-0 R_cut = {}'.format(i)]=g3[:,0,0]
    gridpoints['G3-1 R_cut = {}'.format(i)]=g3[:,1,0]
    gridpoints['G3-2 R_cut = {}'.format(i)]=g3[:,2,0]
    gridpoints['G3-3 R_cut = {}'.format(i)]=g3[:,3,0]
   



In [27]:
# Radial and angular functions
ncombos = (len(types_elements) - 1)*len(types_elements)//2
combo_types = []
for k in r_cut_list:
    for i in range(len(types_elements)-1):
        for j in range(i,len(types_elements)-1):
            combo_types.append([types_elements[i],types_elements[j]])
    g4=G4(distance_vectors,distances,r_cut,type_func,elements,combo_types)
    gridpoints['G4-0  R_cut = {}'.format(k)]=g4[:,0,0]
    gridpoints['G4-1  R_cut = {}'.format(k)]=g4[:,1,0]
    gridpoints['G4-2  R_cut = {}'.format(k)]=g4[:,2,0]
    gridpoints['G4-3  R_cut = {}'.format(k)]=g4[:,3,0]
    gridpoints['G4-4  R_cut = {}'.format(k)]=g4[:,4,0]
    gridpoints['G4-5  R_cut = {}'.format(k)]=g4[:,5,0]

    g5=G5(distance_vectors,distances,r_cut,type_func,elements,combo_types)
    gridpoints['G5-0  R_cut = {}'.format(k)]=g5[:,0,0]
    gridpoints['G5-1  R_cut = {}'.format(k)]=g5[:,1,0]
    gridpoints['G5-2  R_cut = {}'.format(k)]=g5[:,2,0]
    gridpoints['G5-3  R_cut = {}'.format(k)]=g5[:,3,0]
    gridpoints['G5-4  R_cut = {}'.format(k)]=g5[:,4,0]
    gridpoints['G5-5  R_cut = {}'.format(k)]=g5[:,5,0]


  g4desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  g5desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  g4desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  g5desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  g4desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  g5desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  g4desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  g5desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  gridpoints['G5-0  R_cut = {}'.format(k)]=g5[:,0,0]
  gridpoints['G5-1  R_cut = {}'.format(k)]=g5[:,1,0]
  gridpoints['G5-2  R_cut = {}'.format(k)]=g5[:,2,0]
  gridpoints['G5-3  R_cut = {}'.format(k)]=g5[:,3,0]
  gridpoints['G5-4  R_cut = {}'.format(k)]=g5[:,4,0]
  gridpoints['G5-5  R_cut = {}'.format(k)]=g5[:,5,0]
  g4desc[i-nreal,l,idx] += ang_lin*exp_fac*fc_fac
  gridpoints['G4-0  R_cut = {}'.format(k)]=g4[:,0,0]
  gridpoints['G4-1  R_cut = {}'.format(k)]=g4[:,1,0]
  gridpoints['G4-2  R_cut = {}'.format(k)]=g4[:,2,0]
  gridpoints['G4-3  R_cut = {}'.format(k)]=g4[:,3,0]
  gridpoints['G4-4  

In [28]:
gridpoints

Unnamed: 0,X,Y,SCF,SCFshift,G1 - r= 7,G2-0 R_cut = 7,G2-1 R_cut = 7,G2-2 R_cut = 7,G2-3 R_cut = 7,G3-0 R_cut = 7,...,G4-2 R_cut = 12,G4-3 R_cut = 12,G4-4 R_cut = 12,G4-5 R_cut = 12,G5-0 R_cut = 12,G5-1 R_cut = 12,G5-2 R_cut = 12,G5-3 R_cut = 12,G5-4 R_cut = 12,G5-5 R_cut = 12
0,0.0000,0.000000,-4270.695595,0.011849,1.484380,3.763820,3.233349,2.550181,1.787126,-0.671867,...,4.919334,2.748825,1.334817,0.619130,16.635026,12.913335,8.592905,4.612682,2.098918,0.922391
1,0.0000,1.117914,-4270.703495,0.003949,1.399468,3.653095,3.129311,2.457658,1.714268,-0.584264,...,4.833624,2.774650,1.381194,0.659998,15.894273,12.485396,8.451070,4.660651,2.182691,0.988463
2,0.0000,2.235829,-4270.707601,-0.000157,1.300000,3.518710,3.003452,2.346125,1.626190,-0.497835,...,4.708335,2.780879,1.415051,0.681366,14.985554,11.929795,8.238127,4.676064,2.249087,1.028148
3,0.0000,3.353743,-4270.698102,0.009342,1.462687,3.735851,3.207041,2.526757,1.768702,-0.648279,...,4.899038,2.757112,1.348270,0.631645,16.448694,12.807820,8.559742,4.627797,2.122748,0.942227
4,0.0000,4.471657,-4270.698101,0.009343,1.462699,3.735867,3.207055,2.526770,1.768712,-0.648291,...,4.899050,2.757108,1.348263,0.631638,16.448798,12.807879,8.559761,4.627789,2.122735,0.942217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,7.8254,3.353743,-4270.698102,0.009342,1.462687,3.735851,3.207041,2.526757,1.768702,-0.648279,...,4.899038,2.757112,1.348270,0.631645,16.448694,12.807820,8.559742,4.627797,2.122748,0.942227
60,7.8254,4.471657,-4270.698101,0.009343,1.462699,3.735867,3.207055,2.526770,1.768712,-0.648291,...,4.899050,2.757108,1.348263,0.631638,16.448798,12.807879,8.559761,4.627789,2.122735,0.942217
61,7.8254,5.589571,-4270.707600,-0.000156,1.300033,3.518755,3.003493,2.346161,1.626219,-0.497860,...,4.708380,2.780882,1.415044,0.681363,14.985856,11.929985,8.238205,4.676066,2.249071,1.028141
62,7.8254,6.707486,-4270.703497,0.003947,1.399445,3.653064,3.129282,2.457632,1.714248,-0.584242,...,4.833598,2.774655,1.381204,0.660007,15.894067,12.485274,8.451027,4.660660,2.182710,0.988476


In [29]:
#convert dataframe to csv
exportframe = gridpoints.copy()
exportframe.to_csv('SrTiO3ASiteUpFullGrid.csv', index=False)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error, r2_score

#No training split


# Split the data into X and y
X = gridpoints['G5-5  R_cut = 12'].values.reshape(-1, 1)
y = gridpoints['SCF'].values

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X, y)
y_pred_lin = lin_reg.predict(X)

# Bayesian Regression
bayes_reg = BayesianRidge()
bayes_reg.fit(X, y)
y_pred_bayes = bayes_reg.predict(X)

# Evaluation metrics
mse_lin = mean_squared_error(y, y_pred_lin)
mse_bayes = mean_squared_error(y, y_pred_bayes)
r2_lin = r2_score(y, y_pred_lin)
r2_bayes = r2_score(y, y_pred_bayes)

# Plotting the results
#plt.scatter(X, y, color='blue', label='Actual')
#plt.plot(X, y_pred_lin, color='red', label='Linear Regression')
#plt.plot(X, y_pred_bayes, color='green', label='Bayesian Regression')
#plt.xlabel('X')
#plt.ylabel('Y')
#plt.title('Linear vs. Bayesian Regression')
#plt.legend()
#plt.show()

# Print evaluation metrics
print('Linear Regression:')
print('MSE:', mse_lin)
print('R^2 Score:', r2_lin)
print('---')
print('Bayesian Regression:')
print('MSE:', mse_bayes)
print('R^2 Score:', r2_bayes)


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error, r2_score

#With training split


# Split the data into X and y
X = gridpoints['G5-5  R_cut = 12'].values.reshape(-1, 1)
y = gridpoints['SCF'].values

# Split the data into train and test sets
train_size = int(0.1 * len(gridpoints))  # 80% for training, 20% for testing
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred_lin = lin_reg.predict(X_test)

# Bayesian Regression
bayes_reg = BayesianRidge()
bayes_reg.fit(X_train, y_train)
y_pred_bayes = bayes_reg.predict(X_test)

# Evaluation metrics
mse_lin = mean_squared_error(y_test, y_pred_lin)
mse_bayes = mean_squared_error(y_test, y_pred_bayes)
r2_lin = r2_score(y_test, y_pred_lin)
r2_bayes = r2_score(y_test, y_pred_bayes)

# Plotting the results
plt.scatter(X_test, y_test, color='blue', label='Actual')
plt.plot(X_test, y_pred_lin, color='red', label='Linear Regression')
plt.plot(X_test, y_pred_bayes, color='green', label='Bayesian Regression')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear vs. Bayesian Regression')
plt.legend()
plt.show()

# Print evaluation metrics
print('Linear Regression:')
print('MSE:', mse_lin)
print('R^2 Score:', r2_lin)
print('---')
print('Bayesian Regression:')
print('MSE:', mse_bayes)
print('R^2 Score:', r2_bayes)


In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error, r2_score

# Assuming you have already created the gridpoints DataFrame

# Initialize empty DataFrames for metrics
linear_metrics = pd.DataFrame(columns=['Column', 'MSE', 'R^2 Score'])
bayesian_metrics = pd.DataFrame(columns=['Column', 'MSE', 'R^2 Score'])

# Iterate over columns starting from column 5
for col in gridpoints.columns[4:]:
    # Split the data into X and y
    X = gridpoints['SCF'].values.reshape(-1, 1)
    y = gridpoints[col].values

    # Linear Regression
    lin_reg = LinearRegression()
    lin_reg.fit(X, y)
    y_pred_lin = lin_reg.predict(X)
    mse_lin = mean_squared_error(y, y_pred_lin)
    r2_lin = r2_score(y, y_pred_lin)

    # Bayesian Regression
    bayes_reg = BayesianRidge()
    bayes_reg.fit(X, y)
    y_pred_bayes = bayes_reg.predict(X)
    mse_bayes = mean_squared_error(y, y_pred_bayes)
    r2_bayes = r2_score(y, y_pred_bayes)

    # Append metrics to respective DataFrames
 #   df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    linear_metrics.loc[len(linear_metrics.index)] = [col, mse_lin, r2_lin]
    bayesian_metrics.loc[len(bayesian_metrics.index)] = [col, mse_bayes, r2_bayes]

# Print the linear regression metrics
print("Linear Regression Metrics:")
print(linear_metrics)

# Print the Bayesian regression metrics
print("Bayesian Regression Metrics:")
print(bayesian_metrics)


In [None]:
# Find columns with the lowest MSE (most accurate)
linear_best_columns_MSE = linear_metrics[linear_metrics['MSE'] == linear_metrics['MSE'].min()]['Column'].tolist()
bayesian_best_columns_MSE = bayesian_metrics[bayesian_metrics['MSE'] == bayesian_metrics['MSE'].min()]['Column'].tolist()

# Find columns with the highest MSE (most accurate)
linear_best_columns_R2 = linear_metrics[linear_metrics['R^2 Score'] == linear_metrics['R^2 Score'].max()]['Column'].tolist()
bayesian_best_columns_R2 = bayesian_metrics[bayesian_metrics['R^2 Score'] == bayesian_metrics['R^2 Score'].max()]['Column'].tolist()


print("Best columns based on Linear Regression:")
print("for MSE: {} \n".format(linear_best_columns_MSE))
print("for R squared: {} \n".format(linear_best_columns_R2))
print("Best columns based on Bayesian Regression:")
print("for MSE: {} \n".format(bayesian_best_columns_MSE))
print("for R squared: {} \n".format(bayesian_best_columns_MSE))

In [None]:
linear_best_columns_MSE = linear_metrics[linear_metrics['MSE'] == linear_metrics['MSE'].min()]
bayesian_best_columns_MSE = bayesian_metrics[bayesian_metrics['MSE'] == bayesian_metrics['MSE'].min()]

linear_best_columns_R2 = linear_metrics[linear_metrics['R^2 Score'] == linear_metrics['R^2 Score'].max()]
bayesian_best_columns_R2 = bayesian_metrics[bayesian_metrics['R^2 Score'] == bayesian_metrics['R^2 Score'].max()]

print("Best columns based on Linear Regression:")
for index, row in linear_best_columns_MSE.iterrows():
    column_name = row['Column']
    mse_value = row['MSE']
    print("Column:", column_name, "MSE:", mse_value)


for index, row in linear_best_columns_R2.iterrows():
    column_name = row['Column']
    r2_value = row['R^2 Score']
    print("Column:", column_name, "R^2 Score:", r2_value)


for index, row in bayesian_best_columns_MSE.iterrows():
    column_name = row['Column']
    mse_value = row['MSE']
    print("Column:", column_name, "MSE:", mse_value)
          
for index, row in bayesian_best_columns_R2.iterrows():
    column_name = row['Column']
    r2_value = row['R^2 Score']
    print("Column:", column_name, "R^2 Score:", r2_value)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error, r2_score


X = gridpoints['G3-2 R_cut = 8'].values.reshape(-1, 1)
y = gridpoints['SCF'].values

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X, y)
y_pred_lin = lin_reg.predict(X)

# Bayesian Regression
bayes_reg = BayesianRidge()
bayes_reg.fit(X, y)
y_pred_bayes = bayes_reg.predict(X)

# Evaluation metrics
mse_lin = mean_squared_error(y, y_pred_lin)
mse_bayes = mean_squared_error(y, y_pred_bayes)
r2_lin = r2_score(y, y_pred_lin)
r2_bayes = r2_score(y, y_pred_bayes)

# Plotting the results
plt.scatter(X, y, color='blue', label='Actual')
plt.plot(X, y_pred_lin, color='red', label='Linear Regression')
plt.plot(X, y_pred_bayes, color='green', label='Bayesian Regression')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear vs. Bayesian Regression')
plt.legend()
plt.show()

# Print evaluation metrics
print('Linear Regression:')
print('MSE:', mse_lin)
print('R^2 Score:', r2_lin)
print('---')
print('Bayesian Regression:')
print('MSE:', mse_bayes)
print('R^2 Score:', r2_bayes)


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error, r2_score


X = gridpoints['G3-1 R_cut = 7'].values.reshape(-1, 1)
y = gridpoints['SCF'].values

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X, y)
y_pred_lin = lin_reg.predict(X)

# Bayesian Regression
bayes_reg = BayesianRidge()
bayes_reg.fit(X, y)
y_pred_bayes = bayes_reg.predict(X)

# Evaluation metrics
mse_lin = mean_squared_error(y, y_pred_lin)
mse_bayes = mean_squared_error(y, y_pred_bayes)
r2_lin = r2_score(y, y_pred_lin)
r2_bayes = r2_score(y, y_pred_bayes)

# Plotting the results
plt.scatter(X, y, color='blue', label='Actual')
plt.plot(X, y_pred_lin, color='red', label='Linear Regression')
plt.plot(X, y_pred_bayes, color='green', label='Bayesian Regression')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear vs. Bayesian Regression')
plt.legend()
plt.show()

# Print evaluation metrics
print('Linear Regression:')
print('MSE:', mse_lin)
print('R^2 Score:', r2_lin)
print('---')
print('Bayesian Regression:')
print('MSE:', mse_bayes)
print('R^2 Score:', r2_bayes)
