In [1]:
import numpy as np
import re
import os
import pychemia as pc
import mlmd.tools.readers as readers
import mlmd.tools.builders as builders

In [2]:
#path to the training.in file
path_to_training_file='training_features_abinit.in'
#reading the information in training.in file
path_to_dft_data,code, pote_name, feature_parameters,\
GBR_E_parameters, GBR_F_parameters, nn_E_parameters, nn_F_parameters= readers.load_training(path_to_training_file)

In [3]:
# Loading the parameters for the feature calcuation

trans = feature_parameters['trans']
#trans -> translation dictionary {'chemical simbol':nuclear number Z}
#trans={'C': 6, 'Si': 14} for a potential with C and O

eta2b = feature_parameters['eta2b']
#values for the eta 2 body parameter in the Filter Behler Parrinello features

Rp = feature_parameters['Rp']
#values for the Rp (gaussian centers 2 body interaction) parameter in the Filter Behler Parrinello features

eta3b = feature_parameters['eta3b']
#values for the eta 3 body parameter in the Filter Behler Parrinello features

cos_p = feature_parameters['cos_p']
#values for the cos(\theta_P) (gaussian centers 3 body interaction) 
#parameter in the Filter Behler Parrinello features

In [4]:
#loading information from the directory with the dft calculations
species_simb, stru_names,stru,ftot_stru, ener= readers.load_abinit_structures(path_to_dft_data.strip())
#species_simb -> array with length (number of structures)
#species_simb[i] -> array with the composition of the i structure

#stru_names -> empty array 

#stru -> numpy_array with lenght (number of structures)
#stru[i] -> numpy_array with the positions of the atoms in structure i
#stru[i] -> has shape (number_of_atoms, 3(xyz_coordinates))

#ftot_stru -> numpy_array with lenght (number of structures)
#ftot_stru[i] -> numpy_array with the forces over the atoms in structure i
#ftot_stru[i] -> has shape (number_of_atoms, 3(xyz_coordinates))

#ener -> numpy_aray with lenght (number of structures)
#ener[i] -> energy of the i structure

1.out
2.out
3.out
LaMnO30.out


In [6]:
print species_simb

[array(['La', 'La', 'La', 'La', 'Mn', 'Mn', 'Mn', 'Mn', 'O', 'O', 'O', 'O',
       'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
      dtype='|S2'), array(['La', 'La', 'La', 'La', 'Mn', 'Mn', 'Mn', 'Mn', 'O', 'O', 'O', 'O',
       'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
      dtype='|S2'), array(['La', 'La', 'La', 'La', 'Mn', 'Mn', 'Mn', 'Mn', 'O', 'O', 'O', 'O',
       'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
      dtype='|S2'), array(['La', 'La', 'La', 'La', 'Mn', 'Mn', 'Mn', 'Mn', 'O', 'O', 'O', 'O',
       'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
      dtype='|S2')]


In [None]:
print species_simb

In [5]:
print species_simb

[[1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]]


In [6]:
feat_2b, feat_3b,X, DX= builders.build_FBP_DFBP(trans, eta2b,\
                        Rp, eta3b, cos_p,species_simb, stru_names, stru)
#X-> Filtered Behler & Parrinello (FBP)
#X-> Feature representation of stru (numb_struc, numb_of_features)
#DX -> Derivative of FBP
#DX dimensions (structures, atoms_in_structure, number_of_features, xyz_components)
#feat_2b numb_of_2-body featues
#feat_3b numb_of_3-body featues



In [9]:
print X.shape
print DX.shape

(4, 459)
(4, 20, 459, 3)


In [24]:
trans_inv={}
for i in trans.keys():
    print i
    trans_inv[trans[i]]= i
print trans_inv

Mn
O
La
{8: 'O', 25: 'Mn', 57: 'La'}


In [27]:
print trans_inv[57]

La


In [5]:
species_simb, stru_names,stru,ftot_stru, ener=load_abinit_structures(path_to_dft_data.strip(),trans)

1.out
2.out
3.out
LaMnO30.out


In [4]:
def load_abinit_structures(dir_path, trans):
    fl_nms= os.listdir(dir_path) #file names
    comp_str='([a-z A-Z _ - & \. 0-9]+\.out)'
    fl_nms1= re.findall(comp_str,str(fl_nms))
    stru= []
    name= []
    ener= []
    ftot_stru= []
    species_simb= []
    for ii, name_fl in enumerate(fl_nms1):
        print name_fl
        nat, mass, latvec_in, strten_in,z_struc= get_nat_mass_latvec_in_strten_in(dir_path+'/'+name_fl, trans)
        xred, fcart, ener_p= get_xred_fcart(dir_path+'/'+name_fl, nat)
        stru.append(np.dot(latvec_in, xred).T)
        #print s.shape
        species_simb.append(z_struc)
        ener.append(ener_p)
        ftot_stru.append(fcart.T)
        name.append(name_fl)
    ftot_stru= np.squeeze(np.array(ftot_stru))
    ener= np.array(ener)
    stru= np.array(stru)
    #from abinit
    #stru in borh
    #ftot_stru hartree/borh
    #ener hartree
    #transfomr the output to Angstroms and eV
    #1 Bohr=0.5291772108 Angstroms
    #1 Hartree = 27.2114 eV
    stru= np.multiply(stru,0.5291772108)
    ener= np.multiply(ener,27.2114)
    ftot_stru= np.multiply(ftot_stru, (27.2114/0.5291772108))
    return species_simb, name, stru, ftot_stru, ener

def get_nat_mass_latvec_in_strten_in(path_to_file, trans):
    data= open(path_to_file).read()
    nat= int(re.findall('natom\s+([0-9]+)', data)[0])
    typat= map(int, re.findall('\s+typat\s+(.+)',data)[0].split())
    #znucl= map(float, re.findall('\s+znucl((?:\s+\d+.\d+\s+)+)',data))
    znucl= map(int, map(float, re.findall('\s+znucl\s+(.+)',data)[0].split()))
    z_struc=[]
    mass=[]
    trans_inv={}
    for i in trans.keys():
        trans_inv[trans[i]]= i    
    for i in typat:
        z_struc.append(trans_inv[znucl[i-1]])
    z_struc= np.array(z_struc)
    a1= map(float, re.findall('R.1.=\s*(.\d+...\d+\s+.\d+...\d+\s+.\d+...\d+)', data)[0].split())
    a2= map(float, re.findall('R.2.=\s*(.\d+...\d+\s+.\d+...\d+\s+.\d+...\d+)', data)[0].split())
    a3= map(float, re.findall('R.3.=\s*(.\d+...\d+\s+.\d+...\d+\s+.\d+...\d+)', data)[0].split())
    latvec_in= np.array([a1,a2,a3]).T
    latvec_in.astype('float64')
    strten_in= []
    strten_in.append(np.float64(re.findall('sigma.1\s+1.=(\s+.\d+.\d+..\d+)', data)[0]))
    strten_in.append(np.float64(re.findall('sigma.2\s+2.=(\s+.\d+.\d+..\d+)', data)[0]))
    strten_in.append(np.float64(re.findall('sigma.3\s+3.=(\s+.\d+.\d+..\d+)', data)[0]))
    strten_in.append(np.float64(re.findall('sigma.3\s+2.=(\s+.\d+.\d+..\d+)', data)[0]))
    strten_in.append(np.float64(re.findall('sigma.3\s+1.=(\s+.\d+.\d+..\d+)', data)[0]))
    strten_in.append(np.float64(re.findall('sigma.2\s+1.=(\s+.\d+.\d+..\d+)', data)[0]))
    strten_in= np.array(strten_in)
    return nat, mass, latvec_in, strten_in,z_struc

def get_xred_fcart(path_to_file, nat):
    #1 Ha/Bohr3 = 29421.02648438959 GPa
    data= open(path_to_file).readlines()
    for n,line in enumerate(data):
        if re.findall('reduced\s+coordinates\s+.array\s+xred', str(line)): 
            xred_temp=  data[n+1:n+1+nat]
            xred= np.array([map(float, i.split('\n')[0].split()) for i in xred_temp]).T
            xred.astype('float64')
        elif re.findall('cartesian\s+forces\s+.hartree.bohr', str(line)): 
            fcart_temp=  data[n+1:n+1+nat]
            fcart= np.array([map(float, i.split('\n')[0].split()) for i in fcart_temp])[:,1:]
            fcart= fcart.T
            fcart.astype('float64')
        elif re.findall('>>>>>>>>>\s+Etotal=\s+.\d+', str(line)):#hartree 
            ener=  re.findall('>>>>>>>>>\s+Etotal=(\s+.\d+.\d+..\d+)', str(line))
            ener= np.float64(ener[0])
        elif re.findall('Pressure=\s+\d+.\d+..\d+', str(line)):#this preassure in GPa
            pressure=  re.findall('Pressure=(\s+\d+.\d+..\d+)', str(line))
            pressure= np.float64(pressure[0])
    return xred, fcart, ener