In [106]:
import os
import sys
import glob
import re
import pprint
import numpy as np

In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
#target_raw = './F-termi-ti3cn-data'
target_raw = '../data/target_raw'
dataset = 'bcc_npt_langevin_300K'
trajdir = target_raw + '/' + dataset

In [8]:
sys.path.append('../../statmechlib')

In [65]:
from statmechlib.read_write import read_poscar, read_xdatcar

In [149]:
xdatcar = os.path.join(trajdir, 'XDATCAR')
if os.path.isfile(xdatcar):
    traj = read_xdatcar(xdatcar)
else:
    print('XDATCAR not present')

In [151]:
traj['atom_num']

[128]

In [69]:
poscar = os.path.join(trajdir, 'POSCAR')
if os.path.isfile(poscar):
    traj = read_poscar(poscar)
else:
    print('POSCAR not present')

In [155]:
def read_outcar(filename):

    with open(filename, 'r') as f:
    
        # initialize trajectory dataset
        boxs = [] ; xyzs = [] ; enes = [] ; forces = [] ; temps = []
        vects = [] ; enes_free = [] ; enes_tot = []
    
        for line in iter(f.readline, ''):
        
            # number of ions/atoms
            if re.search('number of ions', line):
                nat = int(re.findall('\S+', line)[-1])
        
            # number of atoms of each type
            elif re.search('ions per type', line):
                atom_num = [int(n) for n in re.findall('\S+', line)[4:4+nat]]
        
            # box shape and dimensions
            elif re.search('VOLUME and BASIS-vectors are now', line):
                for _ in range(4):
                    line = f.readline()
                
                # read box information
                box = np.empty((3,3), dtype=np.float64)
                for i in range(3):
                    box[i,:] = [float(x) for x in re.findall('\S+', f.readline())][0:3]
                boxs.append(box)
            
                for _ in range(2):
                    line = f.readline()
                
                # read a, b, c vector lengths
                vect = np.array([float(x) for x in re.findall('\S+', f.readline())][0:3])
                vects.append(vect)

            # atom cartesian coordinates [A] and forces [eV/A]
            elif re.search('POSITION.*TOTAL-FORCE', line):                
                line = f.readline()
            
                # read coordinate and force data for all nat atoms
                data = np.array([[float(x) for x in f.readline().split()] for _ in range(nat)])
            
                # create new coordinate array
                xyz = np.empty((nat, 3), dtype=np.float64)
                xyz[:,:] = data[:,0:3]
            
                assert len(xyzs) + 1 == len(boxs), f'lengths of xyzs {len(xyzs)+1} and boxs {len(boxs)} do not match'
            
                # convert cartesian coordinates into lattice units
                box_inv = np.linalg.inv(boxs[-1])
                xyz = np.matmul(box_inv, xyz.T).T

                # create a new force array
                force = np.empty((nat, 3), dtype=np.float64)
                force[:,:] = data[:,3:6]
            
                xyzs.append(xyz)
                forces.append(force)
            
            # E0 energy without entropy for sigma->0
            elif re.search('FREE ENERG.*\s+OF\s+THE\s+ION.ELECTRON\s+SYSTEM\s+\(eV\)', line):
                
                # check if the format agrees with the current assumptions
                if not re.search('------------', f.readline()):
                    raise ValueError('Could not find a separator line (----).')
                
                # read free energy (without kinetics)
                line = f.readline()
                if re.search('free\s+energy\s+TOTEN\s+=.+eV', line):
                    ene_free = float(re.findall('\S+', line)[-2])
                    enes_free.append(ene_free)
                else:
                    raise ValueError('Could not find a line with free energy (TOTEN).')
                    
                line = f.readline()
                
                # read energy without entropy for sigma->0
                line = f.readline()
                if re.search('energy\s+without\s+entropy.+sigma', line):
                    ene = float(re.findall('\S+', line)[-1])
                    enes.append(ene)
                else:
                    raise ValueError('Could not find a line with free energy (TOTEN).')
                
           
            # Total energy including thermostat 
            elif re.search('ETOTAL', line):
                ene_tot = float(line.split()[-2])
                enes_tot.append(ene_tot)
            
            # Instantaneous temperature 
            elif re.search('EKIN_LAT', line):
                mo = re.search('\(temperature\s+(\d+\.?\d*)\s+K\)', line)
                temp = float(mo.group(1))
                temps.append(temp)

    #print(len(boxs), len(xyzs), len(enes), len(forces), len(temps))
    #print(len(enes_free), len(enes_tot))
    # check if the lengths of trajectory lists match
    assert len(enes) == len(xyzs), f'{dataset} energy and XYZ lenghts do not match: {len(enes)}, {len(xyzs)}'
    
    # combine trajectory data in a dictionary
    traj = {'box':boxs, 'xyz':xyzs, 'energy':enes, 'forces':forces, 'temp':temps}
    traj.update({'free_energy':enes_free, 'total_energy':enes_tot, 'atom_num':atom_num})
    
    return traj

In [156]:
filename = os.path.join(trajdir, 'OUTCAR')

In [157]:
traj = read_outcar(filename)

1496 1496 1496 1496 1496
1496 1496


In [158]:
traj['atom_num']

[128]

In [40]:
# read coordinate/forces files of a trajectory
coor_files = glob.glob('data-1/coor*')
fnum = lambda fname: int(re.findall('\d+$', fname)[0])
coor_sorted = sorted(coor_files, key=fnum)