In [23]:
import numpy as np
import pandas as pd
import ase.io as io

class Xyz:
    def __init__(self, path:str):
        self.path = path

    def read_eval(self, desc:str=''):
        '''Read and parse data from extended XYZ file output from MACE evaluation. Resulting dataframe saved under self.data.'''
        atoms_lst = io.read(
            self.path,
            index=':',
            format='extxyz'
        )
        data_lst = []

        for i, atom in enumerate(atoms_lst):
            entry_dict = atom.todict()
            entry = pd.DataFrame({
                    'Type': desc,
                    'Name': ''.join(np.unique(atom.get_chemical_symbols())),
                    'Lattice': [entry_dict['cell']],
                    'Configuration': i+1,
                    'Config. size': len(atom.get_positions()),
                    'Energy': entry_dict['info']['Energy'],
                    'Energy_MACE': entry_dict['info']['MACE_energy'],
                    'Atom': [atom.get_chemical_symbols()],
                    'Position': [entry_dict['positions']],
                    'Force': [entry_dict['forces']],
                    'Force_MACE': [entry_dict['MACE_forces']],
                    })
            data_lst.append(entry)
            
        self.data = pd.concat(data_lst)

    def get_errors(self):
        '''Get mean force error (sum over absolute difference in all directions) per configuration.'''
        self.data['Delta E'] = abs(self.data['Energy'] - self.data['Energy_MACE'])/self.data['Config. size']
        self.data['Delta F'] = abs(self.data['Force'] - self.data['Force_MACE'])
        self.data['Delta F sum'] = 0
        for run, config, delta_fs, config_size in zip(self.data['Name'],self.data['Configuration'],self.data['Delta F'],self.data['Config. size']):
            sums=[]
            for delta_f in delta_fs:
                sums.append(delta_f.sum()) # delta_f.sum() = sum of forces deviation per atom
            #print(sums)
            self.data.loc[(self.data['Name']==run) & (self.data['Configuration']==config),['Delta F sum']]=np.sum(sums)/config_size

    def get_stats(self, idt:str):
        '''Return dataframe containing summary of errors.'''
        stats=self.data.groupby(by='Name').mean(['Delta F sum', 'Delta E'])
        stats['ID']=idt
        return stats

In [75]:
#retrieve paths
import os
folder_path='/Users/dominicwelti/Library/CloudStorage/Dropbox/Master_Thesis/data_npj/mace/results/eval_deformed'

files=[]
for (dirpath, dirnames, filenames) in os.walk(folder_path):
    for filename in filenames:
        files.append(f'{dirpath}/{filename}')

f=filter(lambda x: '.xyz' in x, files)
files=list(f)

In [76]:
files_sub=list(filter(lambda x: 'deformed_' in x, files))

In [77]:
stats_lst=[]
for file in files_sub:
    print(f'Parsing {file.split("/")[-1]}')
    xyz=Xyz(file)
    xyz.read_eval()
    xyz.get_errors()
    stats_lst.append(xyz.get_stats(idt=file.split("/")[-1]))
stats=pd.concat(stats_lst)

Parsing deformed_8.xyz
Parsing deformed_9.xyz
Parsing deformed_7.xyz
Parsing deformed_6.xyz
Parsing deformed_4.xyz
Parsing deformed_5.xyz
Parsing deformed_1.xyz
Parsing deformed_0.xyz
Parsing deformed_2.xyz
Parsing deformed_3.xyz


In [78]:
stats

Unnamed: 0_level_0,Configuration,Config. size,Energy,Energy_MACE,Delta E,Delta F sum,ID
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CrTa,2.0,432.0,-4618.564247,-4617.936523,0.001453,0.000991,deformed_8.xyz
CrV,6.0,432.0,-3971.51625,-3972.751465,0.002859,0.00295,deformed_8.xyz
CrW,5.0,432.0,-4836.233374,-4836.81543,0.001347,0.000713,deformed_8.xyz
TaV,3.0,432.0,-4465.183401,-4467.385742,0.005098,0.003715,deformed_8.xyz
TaW,1.0,432.0,-5358.871179,-5359.116211,0.000567,0.004233,deformed_8.xyz
VW,4.0,432.0,-4764.441315,-4764.874023,0.001002,0.000624,deformed_8.xyz
CrTa,2.0,432.0,-4618.564247,-4618.85791,0.00068,0.000991,deformed_9.xyz
CrV,6.0,432.0,-3971.51625,-3975.483887,0.009184,0.002957,deformed_9.xyz
CrW,5.0,432.0,-4836.233374,-4837.269043,0.002397,0.000679,deformed_9.xyz
TaV,3.0,432.0,-4465.183401,-4466.146484,0.002229,0.003713,deformed_9.xyz


In [79]:
results=stats.groupby('Name').mean()

  results=stats.groupby('Name').mean()


In [81]:
results.mean()

Configuration       3.500000
Config. size      432.000000
Energy          -4669.134961
Energy_MACE     -4670.048698
Delta E             0.002870
Delta F sum         0.002202
dtype: float64