## Description

This notebook is to investigate how different convergence properties related to each other. The goal is to find the best properties defined that are materials independent and can be used to predict the convergence of a PP. 

The properties that are investigated are:
- For pressure, compare the complex defined SSSP v1 residue volume and the vannila hydrostatic pressure
- For EOS metrics, compare nu wrt AE and nu with ref 200Ry. (Check and assure the guess that delta' and nu are correlated)
- Compare pressure and EOS metrics (nu ref 200Ry)
- Other pair see if those are correlated or not

What I think is, if I tuning the criteria of properties, there will be a cross from A > B to B > A. The different between if A, B are correlated or not is whether their will be a state where A, B are highly linearly correlated. 

The testing data is generated by running full convergence test in the grid of [20:5:200] Ry for all different properties calculation method, then can extract and construct the properties date from the output.
The tested PPs are Hg, Ga, N, Cs, Mn from gbrv, dojo, psl-paw-high and jth, in order to cover PPs from different generated code sources and different type of elements.

This notebook is to compare that selected properties is or not dependent on the structure of the material.
The AiiDA data is stored at group `SI/convergence-properties-compare/<conf>`. conf is BCC/FCC/SC/DC 

In [1]:
from aiida import load_profile
import typing as t

load_profile("2023-08-07")

from aiida import orm

In [2]:
from aiida_sssp_workflow.workflows.convergence.pressure import helper_get_volume_from_pressure_birch_murnaghan
from aiida_sssp_workflow.calculations.calculate_metric import rel_errors_vec_length, _calcDelta
from aiida_sssp_workflow.calculations.calculate_bands_distance import get_bands_distance

paper_scan_list = [30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 90, 100, 120, 150, 200]

def extract_data_scan_list1(node):
    pp_label = node.inputs.label.value.split(' ')[-1]
    real_scan_list = []
    for wf in node.called:
        if wf.process_label == 'ConvergenceCohesiveEnergyWorkChain':
            lst = []
            for wf2 in wf.called:

                if wf2.process_label == 'helper_cohesive_energy_difference':
                    lst.append(wf2)
                if wf2.process_label == 'convergence_analysis':
                    break
            
            real_scan_list = wf.outputs.output_parameters_wfc_test.get_dict()['ecutwfc']
        
        else:
            # parse_pseudo_info or _CachingConvergenceWorkChain
            continue
        
    expected_scan_list = list(range(20, 201, 5))
    # find what is in expected but not in real
    missing = list(set(expected_scan_list) - set(real_scan_list))
    if missing:
        # raise a warning
        print(f"Warning - the following cutoffs are missing from node {node.pk}: {missing}")
        scan_list = real_scan_list
    else:
        scan_list = expected_scan_list

    data = {}
    
    for i, wf in enumerate(lst):
        cutoff = scan_list[i]
        #if cutoff not in paper_scan_list:
        #    continue

        i_para = wf.inputs.input_parameters.get_dict()
        r_para = wf.inputs.ref_parameters.get_dict()
    
        # Get the data
        # Ref_200_nu: the nu value w.r.t. the 200 Ry reference
        # Ref_200_deltap: the delta_p value w.r.t. the 200 Ry reference
        res = get_conv_data1(i_para, r_para)
        data[cutoff] = res

    return pp_label, data, scan_list

def get_conv_data1(i_para, r_para) -> float:
    res_energy = i_para["cohesive_energy_per_atom"]
    ref_energy = r_para["cohesive_energy_per_atom"]
    absolute_diff = abs(res_energy - ref_energy) * 1000.0 # in meV
    relative_diff = abs((res_energy - ref_energy) / ref_energy) * 100

    return absolute_diff

In [3]:
# from aiida_sssp_workflow.workflows.convergence.pressure import ConvergencePressureWorkChain
# from aiida_sssp_workflow.workflows.verifications import VerificationWorkChain

# qb = orm.QueryBuilder().append(
#     orm.Group,
#     filters={"label": {"==": "SI/convergence-properties-compare/SC"}},
#     tag="group",
# ).append(
#     VerificationWorkChain,
#     with_group="group",
#     tag="vc",
# ).append(
#     ConvergencePressureWorkChain,
#     with_incoming="vc",
#     tag="wc",
# ).append(
#     orm.CalcFunctionNode,
#     filters={
#         "attributes.function_name": "helper_pressure_difference",
#     },
#     tag="cf",
#     with_incoming="wc",
# ).append(
#     orm.Data,
#     with_outgoing="cf",
#     project=["*"],
# ).append(
#     orm.Dict,
#     with_incoming="wc",
#     project=["attributes.ecutwfc"],
# )

# qb.order_by({orm.CalcFunctionNode: {"ctime": "asc"}}).all()

In [4]:
g = 'SI/convergence-properties-compare/SC'
gs_nodes = []
gs_nodes.extend(orm.Group.collection.get(label=g).nodes)
    
SC_all_data1 = {}
for node in gs_nodes:
    # give a node and the tuple of criteria
    # return the deducted cutoffs of A and B
    try:
        pp_label1, data1, scan_list1 = extract_data_scan_list1(node)
        SC_all_data1[node.pk] = {
            "pp_label": pp_label1,
            "data": data1,
            "scan_list": scan_list1    
        }
    except Exception as e:
        #print(f"Error: {e}")
        #continue
        print(node.pk)
        raise e



In [5]:
g = 'SI/convergence-properties-compare/DC'
gs_nodes = []
gs_nodes.extend(orm.Group.collection.get(label=g).nodes)
    
DC_all_data1 = {}
for node in gs_nodes:
    # give a node and the tuple of criteria
    # return the deducted cutoffs of A and B
    try:
        pp_label1, data1, scan_list1 = extract_data_scan_list1(node)
        DC_all_data1[node.pk] = {
            "pp_label": pp_label1,
            "data": data1,
            "scan_list": scan_list1    
        }
    except Exception as e:
        #print(f"Error: {e}")
        #continue
        print(node.pk)
        raise e



In [6]:
g = 'SI/convergence-properties-compare/BCC'
gs_nodes = []
gs_nodes.extend(orm.Group.collection.get(label=g).nodes)
    
BCC_all_data1 = {}
for node in gs_nodes:
    # give a node and the tuple of criteria
    # return the deducted cutoffs of A and B
    try:
        pp_label1, data1, scan_list1 = extract_data_scan_list1(node)
        BCC_all_data1[node.pk] = {
            "pp_label": pp_label1,
            "data": data1,
            "scan_list": scan_list1    
        }
    except Exception as e:
        #print(f"Error: {e}")
        #continue
        print(node.pk)
        raise e



In [7]:
def extract_cutoff(data, scan_list, criteria):
    """Extract the cutoff for pA and pB from a verification workchain

    Args:
        data (dict): the data extracted from the verification workchain
        scan_list (list): the list of cutoffs used in the verification workchain
        criteria (tuple): first element is the criteria for pA, second element is the criteria for pB

    Returns:
        tuple: the cutoff for pA and pB.
    """
    # Get the cutoff of pA and pB
    cut = 200
    for cutoff in reversed(scan_list):
        try: 
            p = data[cutoff]
        except:
            continue
        
        if p > criteria:
            break

        cut = cutoff

    return cut

In [14]:
from operator import itemgetter

def compute_cutoff(data12_tuple, criteria):
    cut_A_lst = []
    cut_B_lst = []

    all_data1 = data12_tuple[0]
    all_data2 = data12_tuple[1]

    ## data1
    lst_label = []
    for node_pk in all_data1:
        pp_label = all_data1[node_pk]['pp_label']
        lst_label.append(pp_label)


    idx_sorted, pp_label_sorted = zip(*sorted(enumerate(lst_label), key=itemgetter(1)))
    sorted_pk_lst = [list(all_data1.keys())[i] for i in idx_sorted]

    for node_pk in sorted_pk_lst:
        data = all_data1[node_pk]['data']
        scan_list = all_data1[node_pk]['scan_list']
        cut_A = extract_cutoff(data, scan_list, criteria)
        cut_A_lst.append(cut_A)
    
    ## data2
    lst_label = []
    for node_pk in all_data2:
        pp_label = all_data2[node_pk]['pp_label']
        lst_label.append(pp_label)

    idx_sorted, pp_label_sorted = zip(*sorted(enumerate(lst_label), key=itemgetter(1)))
    sorted_pk_lst = [list(all_data2.keys())[i] for i in idx_sorted]

    for node_pk in sorted_pk_lst:
        data = all_data2[node_pk]['data']
        scan_list = all_data2[node_pk]['scan_list']
        cut_B = extract_cutoff(data, scan_list, criteria)
        cut_B_lst.append(cut_B)

    print(pp_label_sorted)
    print(cut_A_lst)
    print(cut_B_lst)
        
    return cut_A_lst, cut_B_lst

In [15]:
# Get data for plotting (complex pressure)
SC_cut_A_lst, DC_cut_A_lst = compute_cutoff(data12_tuple=(SC_all_data1, DC_all_data1), criteria=0.1)
import ipywidgets as ipw
import plotly.graph_objects as go

trace_corr_scatter = go.Scatter(x=SC_cut_A_lst, y=DC_cut_A_lst, mode='markers', name='cutoff correlation')
trace_xy_line = go.Scatter(x=[0, 200], y=[0, 200], name='x=y')
g = go.FigureWidget(data=[trace_corr_scatter, trace_xy_line])
g.layout.xaxis.title = 'SC'
g.layout.yaxis.title = 'DC'
g.layout.title = 'Cutoff correlation SC vs DC'
factor = 1.3
g.update_layout(width=480*factor, height=400*factor)

pA_slider = ipw.FloatSlider(value=1.1, min=0.5, max=4.0, step=0.1, description='pA')

def response(change):
    SC_cut_A_lst, DC_cut_A_lst = compute_cutoff(data12_tuple=(SC_all_data1, DC_all_data1), criteria=pA_slider.value)
    with g.batch_update():
        g.data[0].x = SC_cut_A_lst
        g.data[0].y = DC_cut_A_lst
        
pA_slider.observe(response, names="value")

slider_widgets = ipw.HBox([pA_slider])
app = ipw.VBox([slider_widgets, g])
app

('Cs.nc.z_9.oncvpsp3.dojo.v0.4.1-std', 'Cs.paw.z_9.atompaw.jth.v1.1-std', 'Cs.paw.z_9.ld1.psl.v1.0.0-high', 'Cs.us.z_9.uspp.gbrv.v1', 'Ga.nc.z_13.oncvpsp3.dojo.v0.4.1-std', 'Ga.paw.z_13.atompaw.jth.v1.1-std', 'Ga.paw.z_13.ld1.psl.v1.0.0-high', 'Ga.us.z_19.uspp.gbrv.v1.4', 'Hg.nc.z_20.oncvpsp3.dojo.v0.4.1-std', 'Hg.paw.z_12.atompaw.jth.v1.1-std', 'Hg.paw.z_20.ld1.psl.v1.0.0-high', 'Hg.us.z_12.uspp.gbrv.v1', 'Mn.nc.z_15.oncvpsp3.dojo.v0.4.1-std', 'Mn.paw.z_15.atompaw.jth.v1.1-std', 'Mn.paw.z_15.ld1.psl.v1.0.0-high', 'Mn.us.z_15.uspp.gbrv.v1.5', 'N.nc.z_5.oncvpsp3.dojo.v0.4.1-std', 'N.paw.z_5.atompaw.jth.v1.1-std', 'N.paw.z_5.ld1.psl.v1.0.0-high', 'N.us.z_5.uspp.gbrv.v1.2')
[35, 195, 40, 30, 135, 135, 125, 170, 60, 45, 200, 65, 125, 125, 190, 200, 80, 200, 75, 100]
[35, 195, 20, 25, 135, 135, 125, 170, 60, 75, 200, 65, 125, 125, 190, 200, 80, 200, 75, 140]


VBox(children=(HBox(children=(FloatSlider(value=1.1, description='pA', max=4.0, min=0.5),)), FigureWidget({
  …

In [13]:
import numpy as np

def compute_corr(criteria, C1_all_data, C2_all_data):
    C1_lst, C2_lst = compute_cutoff(data12_tuple=(C1_all_data, C2_all_data), criteria=criteria)
    arr_A = np.array(C1_lst)
    arr_B = np.array(C2_lst)
    
    N = len(arr_A)
    abs_corr = np.sum(np.abs(arr_A - arr_B) / N)
    
    # only take when element in A > element in B
    pos_corr = np.sum((arr_A - arr_B) * (arr_A > arr_B) / N )
    neg_corr = - np.sum((arr_A - arr_B) * (arr_A < arr_B) / N )
    
    return abs_corr, pos_corr, neg_corr

x = np.linspace(1.0, 2.0, 100)
y_abs = np.array([compute_corr(c, DC_all_data1, SC_all_data1)[0] for c in x])
y_pos = np.array([compute_corr(c, DC_all_data1, SC_all_data1)[1] for c in x])
y_neg = np.array([compute_corr(c, DC_all_data1, SC_all_data1)[2] for c in x])

trace_abs = go.Scatter(x=x, y=y_abs, mode='lines', name='|corr|')
trace_pos = go.Scatter(x=x, y=y_pos, mode='lines', name='DC > SC')
trace_neg = go.Scatter(x=x, y=y_neg, mode='lines', name='SC > DC')
g = go.FigureWidget(data=[trace_abs, trace_pos, trace_neg])
g.layout.xaxis.title = 'criteria (meV/atom)'
g.layout.yaxis.title = 'corr (cohesive energy)'
g.layout.title = 'Correlation SC vs DC (+/- corr)'
g.update_layout(width=480, height=400)
g

FigureWidget({
    'data': [{'mode': 'lines',
              'name': '|corr|',
              'type': 'scatter',
              'uid': 'a9e5aa05-daf6-474b-979f-f785947a3e47',
              'x': array([1.        , 1.01010101, 1.02020202, 1.03030303, 1.04040404, 1.05050505,
                          1.06060606, 1.07070707, 1.08080808, 1.09090909, 1.1010101 , 1.11111111,
                          1.12121212, 1.13131313, 1.14141414, 1.15151515, 1.16161616, 1.17171717,
                          1.18181818, 1.19191919, 1.2020202 , 1.21212121, 1.22222222, 1.23232323,
                          1.24242424, 1.25252525, 1.26262626, 1.27272727, 1.28282828, 1.29292929,
                          1.3030303 , 1.31313131, 1.32323232, 1.33333333, 1.34343434, 1.35353535,
                          1.36363636, 1.37373737, 1.38383838, 1.39393939, 1.4040404 , 1.41414141,
                          1.42424242, 1.43434343, 1.44444444, 1.45454545, 1.46464646, 1.47474747,
                          1.48484848, 1.4949

In [11]:
# Get data for plotting (complex pressure)
SC_cut_A_lst, BCC_cut_A_lst = compute_cutoff(data12_tuple=(SC_all_data1, BCC_all_data1), criteria=0.1)
import ipywidgets as ipw
import plotly.graph_objects as go

trace_corr_scatter = go.Scatter(x=SC_cut_A_lst, y=BCC_cut_A_lst, mode='markers', name='cutoff correlation')
trace_xy_line = go.Scatter(x=[0, 200], y=[0, 200], name='x=y')
g = go.FigureWidget(data=[trace_corr_scatter, trace_xy_line])
g.layout.xaxis.title = 'cutoff pA'
g.layout.yaxis.title = 'cutoff pB'

pA_slider = ipw.FloatSlider(value=0.1, min=0.00, max=4.0, step=0.01, description='pA')

def response(change):
    SC_cut_A_lst, BCC_cut_A_lst = compute_cutoff(data12_tuple=(SC_all_data1, BCC_all_data1), criteria=pA_slider.value)
    with g.batch_update():
        g.data[0].x = SC_cut_A_lst
        g.data[0].y = BCC_cut_A_lst
        
pA_slider.observe(response, names="value")

slider_widgets = ipw.HBox([pA_slider])
app = ipw.VBox([slider_widgets, g])
app

VBox(children=(HBox(children=(FloatSlider(value=0.1, description='pA', max=4.0, step=0.01),)), FigureWidget({
…

In [12]:
# Get data for plotting (complex pressure)
BCC_cut_A_lst, DC_cut_A_lst = compute_cutoff(data12_tuple=(BCC_all_data1, DC_all_data1), criteria=0.1)
import ipywidgets as ipw
import plotly.graph_objects as go

trace_corr_scatter = go.Scatter(x=BCC_cut_A_lst, y=DC_cut_A_lst, mode='markers', name='cutoff correlation')
trace_xy_line = go.Scatter(x=[0, 200], y=[0, 200], name='x=y')
g = go.FigureWidget(data=[trace_corr_scatter, trace_xy_line])
g.layout.xaxis.title = 'cutoff pA'
g.layout.yaxis.title = 'cutoff pB'

pA_slider = ipw.FloatSlider(value=0.1, min=0.00, max=4.0, step=0.01, description='pA')

def response(change):
    BCC_cut_A_lst, DC_cut_A_lst = compute_cutoff(data12_tuple=(BCC_all_data1, DC_all_data1), criteria=pA_slider.value)
    with g.batch_update():
        g.data[0].x = BCC_cut_A_lst
        g.data[0].y = DC_cut_A_lst
        
pA_slider.observe(response, names="value")

slider_widgets = ipw.HBox([pA_slider])
app = ipw.VBox([slider_widgets, g])
app

VBox(children=(HBox(children=(FloatSlider(value=0.1, description='pA', max=4.0, step=0.01),)), FigureWidget({
…