In [24]:
import numpy as np
import pandas as pd
import ase.io as io

class Xyz:
    def __init__(self, path:str):
        self.path = path

    def read_eval(self, desc:str='', energy_key:str='NequIP_energy', force_key:str='NequIP_forces'):
        '''Read and parse data from extended XYZ file output from MACE evaluation. Resulting dataframe saved under self.data.'''
        atoms_lst = io.read(
            self.path,
            index=':',
            format='extxyz'
        )
        data_lst = []

        for i, atom in enumerate(atoms_lst):
            entry_dict = atom.todict()
            entry = pd.DataFrame({
                    'Type': desc,
                    'Name': ''.join(np.unique(atom.get_chemical_symbols())),
                    'Lattice': [entry_dict['cell']],
                    'Configuration': i+1,
                    'Config. size': len(atom.get_positions()),
                    'Energy': entry_dict['info']['Energy'],
                    'Energy_Inference': entry_dict['info'][energy_key],
                    'Atom': [atom.get_chemical_symbols()],
                    'Position': [entry_dict['positions']],
                    'Force': [entry_dict['forces']],
                    'Force_Inference': [entry_dict[force_key]],
                    })
            data_lst.append(entry)
            
        self.data = pd.concat(data_lst)

    def get_errors(self):
        '''Get mean force error (sum over absolute difference in all directions) per configuration.'''
        self.data['Delta E'] = abs(self.data['Energy'] - self.data['Energy_Inference'])/self.data['Config. size']
        self.data['Delta F'] = abs(self.data['Force'] - self.data['Force_Inference'])
        self.data['Delta F sum'] = 0
        for run, config, delta_fs, config_size in zip(self.data['Name'],self.data['Configuration'],self.data['Delta F'],self.data['Config. size']):
            sums=[]
            for delta_f in delta_fs:
                sums.append(delta_f.sum()) # delta_f.sum() = sum of forces deviation per atom
            #print(sums)
            self.data.loc[(self.data['Name']==run) & (self.data['Configuration']==config),['Delta F sum']]=np.sum(sums)/config_size

    def get_stats(self, idt:str, splt):
        '''Return dataframe containing summary of errors.'''
        stats=self.data.groupby(by='Name').mean(['Delta F sum', 'Delta E'])
        stats['ID']=idt
        stats['Split']=int(splt)
        return stats

In [18]:
# %reset -f

In [19]:
#retrieve paths
import os
folder_path='/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3'
energy_key='UF3_energy'
force_key='UF3_forces'

files=[]
for (dirpath, dirnames, filenames) in os.walk(folder_path):
    for filename in filenames:
        files.append(f'{dirpath}/{filename}')

f=filter(lambda x: '.xyz' in x, files)
files=list(f)

In [20]:
#files_sub=list(filter(lambda x: not 'deformed' in x, files))
files_sub=list(filter(lambda x: 'deformed' in x, files))

In [23]:
files_sub

['/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_8.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_9.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_7.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_6.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_4.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_5.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_1.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_0.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_2.xyz',
 '/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/results_uf3/deformed/deformed_3.xyz']

In [25]:
# folder/file designations for this to work correctly: 
# <prepath>/<folder for specific model e.g. 'results_nequip'>/<type of configurations e.g. 'standard'>/<designation>_<split number>.xyz

stats_lst=[]
for file in files_sub:
    print(f'Parsing {file.split("/")[-2]} {file.split("/")[-1]}')
    xyz=Xyz(file)
    xyz.read_eval(energy_key=energy_key, force_key=force_key)
    xyz.get_errors()
    stats_lst.append(xyz.get_stats(idt=file.split("/")[-2],splt=file.split("/")[-1][-5]))
stats=pd.concat(stats_lst)

Parsing deformed deformed_8.xyz
Parsing deformed deformed_9.xyz
Parsing deformed deformed_7.xyz
Parsing deformed deformed_6.xyz
Parsing deformed deformed_4.xyz
Parsing deformed deformed_5.xyz
Parsing deformed deformed_1.xyz
Parsing deformed deformed_0.xyz
Parsing deformed deformed_2.xyz
Parsing deformed deformed_3.xyz


In [26]:
stats['Model']='UF3'

In [42]:
if not 'stats_collection' in globals():
    stats_collection=pd.DataFrame()
    
stats_collection=pd.concat([stats_collection,stats])

In [16]:
# stats_collection=pd.read_pickle('/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/stats_results.pkl')
# stats_collection=pd.concat([stats_collection,stats])
# stats_collection.to_pickle('/Users/dominicwelti/Documents/Master_Thesis_Data_Set/hea/stats_results.pkl')

In [27]:
results=stats.groupby(['Name', 'ID']).mean()

  results=stats.groupby(['Name', 'ID']).mean()


In [28]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,Configuration,Config. size,Energy,Energy_Inference,Delta E,Delta F sum,Split
Name,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CrTa,deformed,2.0,432.0,-4618.564247,-4620.580251,0.004667,0.000999,4.5
CrV,deformed,6.0,432.0,-3971.51625,-3966.132279,0.012463,0.002952,4.5
CrW,deformed,5.0,432.0,-4836.233374,-4835.615723,0.00143,0.000669,4.5
TaV,deformed,3.0,432.0,-4465.183401,-4463.836301,0.003118,0.003727,4.5
TaW,deformed,1.0,432.0,-5358.871179,-5363.389913,0.01046,0.004225,4.5
VW,deformed,4.0,432.0,-4764.441315,-4764.835097,0.000912,0.000618,4.5


In [10]:
results.drop(index=('CrTaVW','standard'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Configuration,Config. size,Energy,Energy_Inference,Delta E,Delta F sum,Split
Name,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CrTa,standard,353.234164,18.030397,-190.587377,-190.625581,0.00482,0.165249,4.5
CrTaV,standard,941.4,44.224,-443.432165,-443.429352,0.002163,0.166414,4.5
CrTaVW,2500k,103.5,140.543689,-1459.00167,-1459.082194,0.006838,0.881383,4.5
CrTaVW,4comp,59.5,43.986441,-473.313606,-473.317383,0.003205,0.168753,4.5
CrTaW,standard,807.187058,44.856468,-510.387237,-510.441664,0.002544,0.193296,4.5
CrV,standard,632.964165,12.708827,-117.659581,-117.703363,0.001772,0.093714,4.5
CrVW,standard,296.097972,43.144631,-453.368236,-453.300661,0.002143,0.131081,4.5
CrW,standard,811.172874,11.90225,-131.528369,-131.532513,0.002228,0.107483,4.5
TaV,standard,971.736769,13.929809,-144.727538,-144.719689,0.00188,0.1504,4.5
TaVW,standard,431.872765,43.291058,-486.889356,-486.952256,0.00201,0.140519,4.5


In [29]:
results.mean()

Configuration          3.500000
Config. size         432.000000
Energy             -4669.134961
Energy_Inference   -4669.064927
Delta E                0.005508
Delta F sum            0.002198
Split                  4.500000
dtype: float64