In [26]:
import numpy as np
import pandas as pd
import ase.io as io

class Xyz:
    def __init__(self, path:str):
        self.path = path

    def read_eval(self, desc:str='', energy_key:str='NequIP_energy', force_key:str='NequIP_forces'):
        '''Read and parse data from extended XYZ file output from MACE evaluation. Resulting dataframe saved under self.data.'''
        atoms_lst = io.read(
            self.path,
            index=':',
            format='extxyz'
        )
        data_lst = []

        for i, atom in enumerate(atoms_lst):
            entry_dict = atom.todict()
            entry = pd.DataFrame({
                    'Type': desc,
                    'Name': ''.join(np.unique(atom.get_chemical_symbols())),
                    'Lattice': [entry_dict['cell']],
                    'Configuration': i+1,
                    'Config. size': len(atom.get_positions()),
                    'Energy': entry_dict['info']['Energy'],
                    'Energy_Inference': entry_dict['info'][energy_key],
                    'Atom': [atom.get_chemical_symbols()],
                    'Position': [entry_dict['positions']],
                    'Force': [entry_dict['forces']],
                    'Force_Inference': [entry_dict[force_key]],
                    })
            data_lst.append(entry)
            
        self.data = pd.concat(data_lst)

    def get_errors(self):
        '''Get mean force error (sum over absolute difference in all directions) per configuration.'''
        self.data['Delta E'] = abs(self.data['Energy'] - self.data['Energy_Inference'])/self.data['Config. size']
        self.data['Delta F'] = abs(self.data['Force'] - self.data['Force_Inference'])
        self.data['Delta F sum'] = 0
        for run, config, delta_fs, config_size in zip(self.data['Name'],self.data['Configuration'],self.data['Delta F'],self.data['Config. size']):
            sums=[]
            for delta_f in delta_fs:
                sums.append(delta_f.sum()) # delta_f.sum() = sum of forces deviation per atom
            #print(sums)
            self.data.loc[(self.data['Name']==run) & (self.data['Configuration']==config),['Delta F sum']]=np.sum(sums)/config_size

    def get_stats(self, idt:str, splt):
        '''Return dataframe containing summary of errors.'''
        stats=self.data.groupby(by='Name').mean(['Delta F sum', 'Delta E'])
        stats['ID']=idt
        stats['Split']=int(splt)
        return stats

In [43]:
#%reset -f

In [38]:
#retrieve paths
import os
folder_path='/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_mace'
energy_key='MACE_energy'
force_key='MACE_forces'

files=[]
for (dirpath, dirnames, filenames) in os.walk(folder_path):
    for filename in filenames:
        files.append(f'{dirpath}/{filename}')

f=filter(lambda x: '.xyz' in x, files)
files=list(f)

In [39]:
files_sub=list(filter(lambda x: not 'deformed' in x, files))

In [40]:
# folder/file designations for this to work correctly: 
# <prepath>/<folder for specific model e.g. 'results_nequip'>/<type of configurations e.g. 'standard'>/<designation>_<split number>.xyz

stats_lst=[]
for file in files_sub:
    print(f'Parsing {file.split("/")[-2]} {file.split("/")[-1]}')
    xyz=Xyz(file)
    xyz.read_eval(energy_key=energy_key, force_key=force_key)
    xyz.get_errors()
    stats_lst.append(xyz.get_stats(idt=file.split("/")[-2],splt=file.split("/")[-1][-5]))
stats=pd.concat(stats_lst)

Parsing standard eval_valid_8.xyz
Parsing standard eval_valid_9.xyz
Parsing standard eval_valid_1.xyz
Parsing standard eval_valid_0.xyz
Parsing standard eval_valid_2.xyz
Parsing standard eval_valid_3.xyz
Parsing standard eval_valid_7.xyz
Parsing standard eval_valid_6.xyz
Parsing standard eval_valid_4.xyz
Parsing standard eval_valid_5.xyz
Parsing 2500k eval_9.xyz
Parsing 2500k eval_8.xyz
Parsing 2500k eval_5.xyz
Parsing 2500k eval_4.xyz
Parsing 2500k eval_6.xyz
Parsing 2500k eval_7.xyz
Parsing 2500k eval_3.xyz
Parsing 2500k eval_2.xyz
Parsing 2500k eval_0.xyz
Parsing 2500k eval_1.xyz
Parsing 4comp eval_9.xyz
Parsing 4comp eval_8.xyz
Parsing 4comp eval_5.xyz
Parsing 4comp eval_4.xyz
Parsing 4comp eval_6.xyz
Parsing 4comp eval_7.xyz
Parsing 4comp eval_3.xyz
Parsing 4comp eval_2.xyz
Parsing 4comp eval_0.xyz
Parsing 4comp eval_1.xyz


In [41]:
stats['Model']='MACE'

In [42]:
if not 'stats_collection' in globals():
    stats_collection=pd.DataFrame()
    
stats_collection=pd.concat([stats_collection,stats])

In [43]:
stats_collection.to_pickle('/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/stats_results.pkl')

In [46]:
results=stats.groupby(['Name', 'ID']).mean()

  results=stats.groupby(['Name', 'ID']).mean()


In [47]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,Configuration,Config. size,Energy,Energy_Inference,Delta E,Delta F sum,Split
Name,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CrTa,standard,353.234164,18.030397,-190.587377,-190.620884,0.001616,0.033317,4.5
CrTaV,standard,941.4,44.224,-443.432165,-443.44282,0.00144,0.041791,4.5
CrTaVW,2500k,103.5,140.543689,-1459.00167,-1459.003611,0.004038,0.424055,4.5
CrTaVW,4comp,59.5,43.986441,-473.313606,-473.309603,0.001958,0.033334,4.5
CrTaVW,standard,654.190741,105.377778,-1100.016511,-1100.016283,0.00328,0.281755,4.5
CrTaW,standard,807.187058,44.856468,-510.387237,-510.366916,0.001663,0.035264,4.5
CrV,standard,632.964165,12.708827,-117.659581,-117.704908,0.001183,0.019207,4.5
CrVW,standard,296.097972,43.144631,-453.368236,-453.346177,0.001308,0.030311,4.5
CrW,standard,811.172874,11.90225,-131.528369,-131.531671,0.00112,0.023476,4.5
TaV,standard,971.736769,13.929809,-144.727538,-144.730614,0.001,0.022807,4.5


In [48]:
results.drop(index=('CrTaVW','standard'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Configuration,Config. size,Energy,Energy_Inference,Delta E,Delta F sum,Split
Name,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CrTa,standard,353.234164,18.030397,-190.587377,-190.620884,0.001616,0.033317,4.5
CrTaV,standard,941.4,44.224,-443.432165,-443.44282,0.00144,0.041791,4.5
CrTaVW,2500k,103.5,140.543689,-1459.00167,-1459.003611,0.004038,0.424055,4.5
CrTaVW,4comp,59.5,43.986441,-473.313606,-473.309603,0.001958,0.033334,4.5
CrTaW,standard,807.187058,44.856468,-510.387237,-510.366916,0.001663,0.035264,4.5
CrV,standard,632.964165,12.708827,-117.659581,-117.704908,0.001183,0.019207,4.5
CrVW,standard,296.097972,43.144631,-453.368236,-453.346177,0.001308,0.030311,4.5
CrW,standard,811.172874,11.90225,-131.528369,-131.531671,0.00112,0.023476,4.5
TaV,standard,971.736769,13.929809,-144.727538,-144.730614,0.001,0.022807,4.5
TaVW,standard,431.872765,43.291058,-486.889356,-486.865777,0.001156,0.030002,4.5


In [49]:
results.mean()

Configuration       564.937220
Config. size         41.952683
Energy             -445.018288
Energy_Inference   -445.021962
Delta E               0.001701
Delta F sum           0.078128
Split                 4.500000
dtype: float64