In [14]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import py4vasp as pv

In [2]:
data_mace=pd.read_pickle('/Users/dominicwelti/Documents/Master_Thesis_Data_Set/Graphite/MACE/results.pkl')

In [3]:
dft_folder='/Users/dominicwelti/Documents/Master_Thesis_Data_Set/Graphite/DFT'
files=list(filter(lambda x:'.h5' in x, os.listdir(dft_folder)))

data_dft=pd.DataFrame({
    'Calculation type': 'DFT',
    'Model': '',
    'c Factor': [0]*len(files),
    'c Parameter': 0,
    'Energy': 0
})

In [4]:
for i, row in data_dft.iterrows():
    data_dft.loc[i, 'Model'] = files[i].split('_')[0]
    data_dft.loc[i, 'c Factor'] = files[i].split('_')[1].split('.h')[0]

    calc=pv.Calculation.from_file(f'{dft_folder}/{files[i]}')
    atoms=calc.structure.to_ase()
    data_dft.loc[i, 'c Parameter'] = atoms.get_cell()[2,2]
    data_dft.loc[i, 'Energy'] = calc.energy.read()['energy(sigma->0)']


  data_dft.loc[i, 'c Factor'] = files[i].split('_')[1].split('.h')[0]
  data_dft.loc[i, 'c Parameter'] = atoms.get_cell()[2,2]
  data_dft.loc[i, 'Energy'] = calc.energy.read()['energy(sigma->0)']


In [5]:
def normalize(df:pd.DataFrame):
    '''Normalize energy values (delta to lowest value of model)'''
    df['Relative energy']=0
    df['Energy difference [eV]']=0
    for model in df['Model'].unique():
        min_e, max_e = np.min(df.loc[df['Model']==model, 'Energy']), np.max(df.loc[df['Model']==model, 'Energy'])
        df.loc[df['Model']==model, 'Relative energy'] = (-df.loc[df['Model']==model, 'Energy'] + min_e) / (-max_e+min_e)
        df.loc[df['Model']==model, 'Energy difference [eV]'] = df.loc[df['Model']==model, 'Energy'] - min_e
    return df

In [6]:
data_dft=normalize(data_dft)
data_mace=normalize(data_mace)

  0.10293684  0.1135347   0.08934213  0.07245805  0.03102748  0.13182945
  0.12158932 -0.          0.01037473  0.20042502]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[df['Model']==model, 'Relative energy'] = (-df.loc[df['Model']==model, 'Energy'] + min_e) / (-max_e+min_e)
 0.07380562 0.08140427 0.06405822 0.05195235 0.02224668 0.09452159
 0.08717943 0.         0.00743867 0.14370455]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[df['Model']==model, 'Energy difference [eV]'] = df.loc[df['Model']==model, 'Energy'] - min_e
  0.28031527  0.26371959  0.88177412  0.42380183  0.24942478  0.30591864
  0.25187252  0.30386253  0.81534244 -0.        ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[df['Model']==model, 'Relative energy'] = (-df.loc[df['Model']==model, 'Energy'] + min_e) / (-max_e+min_e)
 2.79589844 2.63037109 8.79492188 4.22705078 

In [7]:
df=pd.concat([data_mace, data_dft])
df['c Factor']=df['c Factor'].astype(float)
df=df.sort_values('c Factor').reset_index(drop=True)

In [36]:
plot=px.line(df, x='c Factor', y='Relative energy', color='Model', markers=True, template='ggplot2', 
             title='Graphite: relative energy in relation to c factor (multiplier of relaxed c lattice parameter determined with DFT-PBE-D3BF). Model ANI-1ccx: MACE model trained on ANI-1ccx dataset (organic molecules CCSD(T) level). Model s66x8: MACE model trained on s66x8 dataset (organic dimer molecules CCSD(T) level). Model ANI-1ccx+s66x8: trained on both.')

In [37]:
plot.show()

In [18]:
plot2=px.line(df, x='c Factor', y='Energy difference [eV]', color='Model', markers=True, template='ggplot2', 
             title='Graphite: energy diffence in relation to c factor (multiplier of relaxed c lattice parameter determined with DFT-PBE-D3BF). Model ANI-1ccx: MACE model trained on ANI-1ccx dataset (organic molecules CCSD(T) level). Model s66x8: MACE model trained on s66x8 dataset (organic dimer molecules CCSD(T) level). Model ANI-1ccx+s66x8: trained on both.')

In [19]:
plot2.show()

In [38]:
plot.write_html('/Users/dominicwelti/Library/CloudStorage/Dropbox/Master_Thesis/Graphs/graphite_cRange.html')

In [10]:
# prepare plots for use in thesis report
# rename columns
df.loc[df['Model']=='LDA', 'Model']='DFT LDA'
df.loc[df['Model']=='PBE-D3BF', 'Model']='DFT PBE-D3BJ'
df.loc[df['Model']=='PBE', 'Model']='DFT PBE'
df.loc[df['Model']=='ANI-1ccx', 'Model']='MACE ANI-1ccx'
df.loc[df['Model']=='ANI-1ccx+s66x8', 'Model']='MACE ANI-1ccx+s66x8'
df.loc[df['Model']=='s66x8', 'Model']='MACE s66x8'

df=df.rename(columns={'Model': 'Method'})

In [118]:
df_fp = df.query('`Calculation type`=="DFT"')

In [41]:
plot=px.line(df_fp, x='c Factor', y='Relative energy', color='Method', markers=True, template='ggplot2')
plot.update_layout(font_family="Serif",font_size=14,
                   xaxis=dict(
                       range=[0.75,1.5]
                   ),
                   yaxis=dict(
                       range=[-0.2,1]
                   ),
                   width=1000,
                   height=350)

In [122]:
df_fp['Colour'] = ''
df_fp.loc[df['Method']=='DFT PBE-D3BJ','Colour'] = 'red'
df_fp.loc[df['Method']=='DFT LDA','Colour'] = 'turquoise'
df_fp.loc[df['Method']=='DFT PBE','Colour'] = 'grey'



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [126]:
from plotly.subplots import make_subplots

plot_combined = make_subplots(rows=2, cols=1, shared_xaxes=True)

for method in df_fp['Method'].unique():
    print(method)
    subset=df_fp.query('Method==@method')
    plot_combined.add_trace(go.Scatter(
        x=subset['c Factor'],
        y=subset['Energy difference [eV]']/4,
        mode='lines+markers',
        name=method,
        legendgroup=method,
        #line_color=subset['Colour'].tail(1)
        line={'color':subset['Colour'].unique()[0]}
        ),
    row=1, col=1
    ) 

    plot_combined.add_trace(go.Scatter(
        x=subset['c Factor'],
        y=subset['Relative energy'],
        mode='lines+markers',
        name=method,
        legendgroup=method,
        showlegend=False,
        #line_color=subset['Colour'].tail(1)
        line={'color':subset['Colour'].unique()[0]}
        ),
    row=2, col=1 
    )

DFT LDA
DFT PBE-D3BJ
DFT PBE


In [133]:
#fig_combined.update_xaxes(title_text='<i>c</i> Factor')
plot_combined.update_layout(yaxis1={'title':'Energy difference per atom [eV]'}, #'range':[-0.9,4]
                yaxis2={'title':'Relative energy'},
                xaxis2={'title':'<i>c</i> Factor','range':[0.75,1.5]},
                grid_yaxes=['y1'],
                legend_title_text='Method',
                font_family="Serif",font_size=18,
                template='ggplot2',
                width=1000,
                height=600) #,'position':0

plot_combined.show()

In [134]:
plot_combined.write_image('/Users/dominicwelti/Library/CloudStorage/Dropbox/Apps/Overleaf/Master Thesis - MLIP evaluation/graphs/results/graphite_ab_initio.png',format='png',scale=5)

In [11]:
df_mace=df.query('`Method`.str.contains("MACE") or `Method`.str.contains("PBE-D3BJ")')

In [44]:
plot=px.line(df_mace, x='c Factor', y='Relative energy', color='Method', markers=True, template='ggplot2')
plot.update_layout(font_family="Serif",font_size=14,
                   xaxis=dict(
                       range=[0.75,1.5]
                   ),
                   yaxis=dict(
                       range=[-0.2,1]
                   ),
                   width=1000,
                   height=350)

In [12]:
df_mace

Unnamed: 0,Calculation type,Method,c Factor,c Parameter,Energy,Relative energy,Energy difference [eV]
1,DFT,DFT PBE-D3BJ,0.75,4.998263,-36.602311,1.000000,0.870202
3,MACE,MACE ANI-1ccx,0.75,4.998263,-4137.178711,0.881774,8.794922
4,MACE,MACE s66x8,0.75,4.998263,-3968.296631,1.000000,163.705811
5,MACE,MACE ANI-1ccx+s66x8,0.75,4.998263,-4139.689453,1.000000,1.554688
6,DFT,DFT PBE-D3BJ,0.80,5.331480,-37.040437,0.496523,0.432075
...,...,...,...,...,...,...,...
88,DFT,DFT PBE-D3BJ,1.45,9.663308,-37.333884,0.159306,0.138628
90,MACE,MACE ANI-1ccx,1.50,9.996526,-4143.489258,0.249082,2.484375
91,DFT,DFT PBE-D3BJ,1.50,9.996526,-37.323587,0.171139,0.148925
92,MACE,MACE s66x8,1.50,9.996526,-4132.002441,-0.000000,0.000000


In [None]:
df.loc[df['Model']==model, 'Relative energy'] =

In [76]:
df_mace['Colour'] = ''
df_mace.loc[df['Method']=='DFT PBE-D3BJ','Colour'] = 'red'
df_mace.loc[df['Method']=='MACE ANI-1ccx','Colour'] = 'green'
df_mace.loc[df['Method']=='MACE s66x8','Colour'] = 'purple'
df_mace.loc[df['Method']=='MACE ANI-1ccx+s66x8','Colour'] = 'orange'



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [69]:
df_mace['Colour'].unique()

array(['lightslategrey', 'limegreen', 'aqua', 'salmon'], dtype=object)

In [130]:
from plotly.subplots import make_subplots

fig_combined = make_subplots(rows=2, cols=1, shared_xaxes=True)

for method in df_mace['Method'].unique():
    print(method)
    subset=df_mace.query('Method==@method')
    fig_combined.add_trace(go.Scatter(
        x=subset['c Factor'],
        y=subset['Energy difference [eV]']/4,
        mode='lines+markers',
        name=method,
        legendgroup=method,
        #line_color=subset['Colour'].tail(1)
        line={'color':subset['Colour'].unique()[0]}
        ),
    row=1, col=1
    ) 

    fig_combined.add_trace(go.Scatter(
        x=subset['c Factor'],
        y=subset['Relative energy'],
        mode='lines+markers',
        name=method,
        legendgroup=method,
        showlegend=False,
        #line_color=subset['Colour'].tail(1)
        line={'color':subset['Colour'].unique()[0]}
        ),
    row=2, col=1 
    )

DFT PBE-D3BJ
MACE ANI-1ccx
MACE s66x8
MACE ANI-1ccx+s66x8


In [135]:
#fig_combined.update_xaxes(title_text='<i>c</i> Factor')
fig_combined.update_layout(yaxis1={'title':'Energy difference per atom [eV]', 'range':[-0.9,4]},
                yaxis2={'title':'Relative energy'},
                xaxis2={'title':'<i>c</i> Factor','range':[0.75,1.5]},
                grid_yaxes=['y1'],
                legend_title_text='Method',
                font_family="Serif",font_size=18,
                template='ggplot2',
                width=1000,
                height=600) #,'position':0

fig_combined.show()

In [136]:
fig_combined.write_image('/Users/dominicwelti/Library/CloudStorage/Dropbox/Apps/Overleaf/Master Thesis - MLIP evaluation/graphs/results/graphite_mace.png',format='png',scale=5)