In [1]:
from aiida import load_profile
import typing as t
import json

load_profile("2023-08-07")

from aiida import orm

In [2]:
def computer_score(node: orm.WorkChainNode) -> t.Tuple[float, float, dict]:
    """Compute the precision score of a WorkChainNode."""
    wfc_cutoff = node.inputs.measure.wavefunction_cutoff.value
    rho_cutoff = node.inputs.measure.charge_density_cutoff.value

    # base line
    precision_base_nu = 0.1
    efficiency_base_nu = 0.33

    # extract the nu factor of 10 structures
    nu_factors = {}
    for structure, data in node.outputs.measure.precision.items():
        if structure == "output_parameters":
            # the 0 level of the dict contains the overall nu factor in the output_parameters
            continue

        try:
            nu = data['output_parameters'].get_dict()["rel_errors_vec_length"]
            nu_factors[structure] = round(nu, 3)
        except KeyError:
            # which means the structure has no nu factor, usually because the calculation failed
            # with 811 exit code
            nu_factors[structure] = 100.0
        
    # compute the precision score
    score = 0.0
    for _, nu in nu_factors.items():
        if nu > precision_base_nu:
           score += nu - precision_base_nu

    precision_score = round(score, 2) / 10
           
    # compute the efficiency score
    score = 0.0
    for _, nu in nu_factors.items():
        if nu > efficiency_base_nu:
           score += nu - efficiency_base_nu

    # add the wfc cutoff with a weight of 0.01
    # since nu value is usually around 0.0, 2.0 
    # while wfc cutoff is range from 0 to 200
    score += wfc_cutoff / 100.0

    efficiency_score = round(score, 2)

    extras = {
        "pk": node.pk,
        "wavefunction_cutoff": wfc_cutoff,
        "charge_density_cutoff": rho_cutoff,
    }
           
    return precision_score, efficiency_score, extras

In [3]:
# Create a dict to store the precision score of each element
pd_elements_lst = [
    'H', 'He', 
    'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne',
    'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar',
    'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se','Br', 'Kr',
    'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd','Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe',
    'Cs', 'Ba',      'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt','Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn',
    'Fr', 'Ra',
#    'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb','Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu',
#    'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf','Es', 'Fm', 'Md', 'No', 'Lr',
]

data = {}

for element in pd_elements_lst:
    ele_group = orm.Group.collection.get(label=f"nc-curated/element/{element}")

    data[element] = {}

    for node in ele_group.nodes:
        label = node.base.extras.all['label'].split(" ")[-1]
        p_score, e_score, extras = computer_score(node)

        data[element][label] = {
            "precision_score": p_score,
            "efficiency_score": e_score,
            "uuid": node.uuid,
            "extras": extras,
        }


In [4]:
import pandas as pd

table = []
for element, value in data.items():
    for label, dd in value.items():
        table.append([dd['extras']['pk'], label, dd["precision_score"], dd["efficiency_score"]])

df = pd.DataFrame(table, columns=["pk", "label", "precision score", "efficiency score"])

In [5]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
    print(df)

          pk                                label  precision score  efficiency score
0    5993666    H.nc.z_1.oncvpsp3.dojo.v0.5.0-std            0.106              0.86
1    6907100            H.nc.z_1.oncvpsp4.sg15.v0            0.065              0.83
2    7062992            H.nc.z_1.oncvpsp4.spms.v1            0.117              0.87
3    7442079              H.nc.z_1.ld1.psl.v1.0.0            0.072              1.67
4    5993831   He.nc.z_2.oncvpsp3.dojo.v0.5.0-std            0.320              2.62
5    6907243           He.nc.z_2.oncvpsp4.sg15.v0            0.426              3.26
6    7063152           He.nc.z_2.oncvpsp4.spms.v1            0.313              2.45
7    7442300             He.nc.z_2.ld1.psl.v1.0.0            0.404              4.19
8    5993954   Li.nc.z_3.oncvpsp3.dojo.v0.5.0-std            0.023              0.75
9    6908176           Li.nc.z_3.oncvpsp4.sg15.v0            0.103              0.91
10   7064285           Li.nc.z_3.oncvpsp4.spms.v1            0.25

In [6]:
def curated_library(data: dict, mode: str = "precision") -> t.Tuple[dict, dict]:
    """Curate the library based on the precision/efficiency score of each element.
    The less the score is, the better the structure is.
    """
    curated_data = {}
    for element, dd in data.items():
        # sort the data based on the precision score
        sorted_data = sorted(dd.items(), key=lambda x: x[1][f"{mode}_score"])
        
        label = sorted_data[0][0]
        wfc_cutoff = sorted_data[0][1]["extras"]["wavefunction_cutoff"]
        rho_cutoff = sorted_data[0][1]["extras"]["charge_density_cutoff"]
        uuid = sorted_data[0][1]["uuid"]
        
        # get the best 10 structures
        curated_data[f"{sorted_data[0][0]}"] = {
            "wavefunction_cutoff": wfc_cutoff,
            "charge_density_cutoff": rho_cutoff,
            "uuid": uuid,
        }
         
    return curated_data

In [7]:
lib_curated_precision = curated_library(data, mode="precision")

# save the curated data to a json file
with open("lib_nc_curated_precision_v1.json", "w") as f:
    f.write(json.dumps(lib_curated_precision, indent=4))

In [8]:
lib_curated_efficiency = curated_library(data, mode="efficiency")

# save the curated data to a json file
with open("lib_nc_curated_efficiency_v1.json", "w") as f:
    f.write(json.dumps(lib_curated_efficiency, indent=4))