In [23]:
import numpy as np
import pandas as pd
import ase.io as io

class Xyz:
    def __init__(self, path:str):
        self.path = path

    def read_eval(self, desc:str=''):
        '''Read and parse data from extended XYZ file output from MACE evaluation. Resulting dataframe saved under self.data.'''
        atoms_lst = io.read(
            self.path,
            index=':',
            format='extxyz'
        )
        data_lst = []

        for i, atom in enumerate(atoms_lst):
            entry_dict = atom.todict()
            entry = pd.DataFrame({
                    'Type': desc,
                    'Name': ''.join(np.unique(atom.get_chemical_symbols())),
                    'Lattice': [entry_dict['cell']],
                    'Configuration': i+1,
                    'Config. size': len(atom.get_positions()),
                    'Energy': entry_dict['info']['Energy'],
                    'Energy_NequIP': entry_dict['info']['NequIP_energy'],
                    'Atom': [atom.get_chemical_symbols()],
                    'Position': [entry_dict['positions']],
                    'Force': [entry_dict['forces']],
                    'Force_NequIP': [entry_dict['NequIP_forces']],
                    })
            data_lst.append(entry)
            
        self.data = pd.concat(data_lst)

    def get_errors(self):
        '''Get mean force error (sum over absolute difference in all directions) per configuration.'''
        self.data['Delta E'] = abs(self.data['Energy'] - self.data['Energy_NequIP'])/self.data['Config. size']
        self.data['Delta F'] = abs(self.data['Force'] - self.data['Force_NequIP'])
        self.data['Delta F sum'] = 0
        for run, config, delta_fs, config_size in zip(self.data['Name'],self.data['Configuration'],self.data['Delta F'],self.data['Config. size']):
            sums=[]
            for delta_f in delta_fs:
                sums.append(delta_f.sum()) # delta_f.sum() = sum of forces deviation per atom
            #print(sums)
            self.data.loc[(self.data['Name']==run) & (self.data['Configuration']==config),['Delta F sum']]=np.sum(sums)/config_size

    def get_stats(self, idt:str):
        '''Return dataframe containing summary of errors.'''
        stats=self.data.groupby(by='Name').mean(['Delta F sum', 'Delta E'])
        stats['ID']=idt
        return stats

In [29]:
#retrieve paths
import os
folder_path='/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_nequip/hea'

files=[]
for (dirpath, dirnames, filenames) in os.walk(folder_path):
    for filename in filenames:
        files.append(f'{dirpath}/{filename}')

f=filter(lambda x: '.xyz' in x, files)
files=list(f)

In [30]:
files_sub=list(filter(lambda x: 'test' in x, files))

In [31]:
stats_lst=[]
for file in files_sub:
    print(f'Parsing {file.split("/")[-1]}')
    xyz=Xyz(file)
    xyz.read_eval()
    xyz.get_errors()
    stats_lst.append(xyz.get_stats(idt=file.split("/")[-2]))
stats=pd.concat(stats_lst)

Parsing test_8.xyz
Parsing test_9.xyz
Parsing test_4.xyz
Parsing test_5.xyz
Parsing test_7.xyz
Parsing test_6.xyz
Parsing test_2.xyz
Parsing test_3.xyz
Parsing test_1.xyz
Parsing test_0.xyz
Parsing test_8.xyz
Parsing test_9.xyz
Parsing test_4.xyz
Parsing test_5.xyz
Parsing test_7.xyz
Parsing test_6.xyz
Parsing test_2.xyz
Parsing test_3.xyz
Parsing test_1.xyz
Parsing test_0.xyz
Parsing test_8.xyz
Parsing test_9.xyz
Parsing test_4.xyz
Parsing test_5.xyz
Parsing test_7.xyz
Parsing test_6.xyz
Parsing test_2.xyz
Parsing test_3.xyz
Parsing test_1.xyz
Parsing test_0.xyz


In [32]:
stats

Unnamed: 0_level_0,Configuration,Config. size,Energy,Energy_NequIP,Delta E,Delta F sum,ID
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CrTa,70.761905,17.706349,-187.682097,-187.691275,0.002654,0.051962,standard
CrTaV,685.000000,38.400000,-385.217510,-385.041083,0.003453,0.045919,standard
CrTaVW,710.808642,106.814815,-1115.658343,-1115.881954,0.006395,0.257745,standard
CrTaW,1030.653846,41.846154,-473.548236,-473.403682,0.002286,0.040133,standard
CrV,245.834254,13.005525,-119.980938,-119.899714,0.002230,0.027154,standard
...,...,...,...,...,...,...,...
CrTaVW,59.500000,47.322034,-509.311784,-509.284279,0.007940,0.043362,4comp
CrTaVW,59.500000,41.762712,-449.230934,-449.117062,0.006271,0.040118,4comp
CrTaVW,59.500000,36.135593,-389.627880,-389.565354,0.002721,0.039234,4comp
CrTaVW,59.500000,42.237288,-454.331659,-454.186848,0.006505,0.042006,4comp


In [33]:
results=stats.groupby(['Name', 'ID']).mean()

In [34]:
results.mean()

Configuration    564.937220
Config. size      41.952683
Energy          -445.018288
Energy_NequIP   -445.023323
Delta E            0.003774
Delta F sum        0.082087
dtype: float64