In [23]:
import numpy as np
import pandas as pd
import ase.io as io

class Xyz:
    def __init__(self, path:str):
        self.path = path

    def read_eval(self, desc:str=''):
        '''Read and parse data from extended XYZ file output from MACE evaluation. Resulting dataframe saved under self.data.'''
        atoms_lst = io.read(
            self.path,
            index=':',
            format='extxyz'
        )
        data_lst = []

        for i, atom in enumerate(atoms_lst):
            entry_dict = atom.todict()
            entry = pd.DataFrame({
                    'Type': desc,
                    'Name': ''.join(np.unique(atom.get_chemical_symbols())),
                    'Lattice': [entry_dict['cell']],
                    'Configuration': i+1,
                    'Config. size': len(atom.get_positions()),
                    'Energy': entry_dict['info']['Energy'],
                    'Energy_MACE': entry_dict['info']['MACE_energy'],
                    'Atom': [atom.get_chemical_symbols()],
                    'Position': [entry_dict['positions']],
                    'Force': [entry_dict['forces']],
                    'Force_MACE': [entry_dict['MACE_forces']],
                    })
            data_lst.append(entry)
            
        self.data = pd.concat(data_lst)

    def get_errors(self):
        '''Get mean force error (sum over absolute difference in all directions) per configuration.'''
        self.data['Delta E'] = abs(self.data['Energy'] - self.data['Energy_MACE'])/self.data['Config. size']
        self.data['Delta F'] = abs(self.data['Force'] - self.data['Force_MACE'])
        self.data['Delta F sum'] = 0
        for run, config, delta_fs, config_size in zip(self.data['Name'],self.data['Configuration'],self.data['Delta F'],self.data['Config. size']):
            sums=[]
            for delta_f in delta_fs:
                sums.append(delta_f.sum()) # delta_f.sum() = sum of forces deviation per atom
            #print(sums)
            self.data.loc[(self.data['Name']==run) & (self.data['Configuration']==config),['Delta F sum']]=np.sum(sums)/config_size

    def get_stats(self, idt:str):
        '''Return dataframe containing summary of errors.'''
        stats=self.data.groupby(by='Name').mean(['Delta F sum', 'Delta E'])
        stats['ID']=idt
        return stats

In [6]:
#retrieve paths
import os
folder_path='/Users/dominicwelti/Library/CloudStorage/Dropbox/Master_Thesis/data_npj/mace/results/eval_valid'

files=[]
for (dirpath, dirnames, filenames) in os.walk(folder_path):
    for filename in filenames:
        files.append(f'{dirpath}/{filename}')

f=filter(lambda x: '.xyz' in x, files)
files=list(f)

In [7]:
files_sub=list(filter(lambda x: 'eval_valid_' in x, files))

In [24]:
stats_lst=[]
for file in files_sub:
    print(f'Parsing {file.split("/")[-1]}')
    xyz=Xyz(file)
    xyz.read_eval()
    xyz.get_errors()
    stats_lst.append(xyz.get_stats(idt=file.split("/")[-1]))
stats=pd.concat(stats_lst)

Parsing eval_valid_8.xyz
Parsing eval_valid_9.xyz
Parsing eval_valid_1.xyz
Parsing eval_valid_0.xyz
Parsing eval_valid_2.xyz
Parsing eval_valid_3.xyz
Parsing eval_valid_7.xyz
Parsing eval_valid_6.xyz
Parsing eval_valid_4.xyz
Parsing eval_valid_5.xyz


In [29]:
results=stats.groupby('Name').mean()

  results=stats.groupby('Name').mean()
