In [1]:
from aiida import load_profile
import typing as t
import json

load_profile("2023-08-07")

from aiida import orm

In [2]:
def computer_score(node: orm.WorkChainNode) -> t.Tuple[float, float, dict]:
    """Compute the precision score of a WorkChainNode."""
    wfc_cutoff = node.inputs.measure.wavefunction_cutoff.value
    rho_cutoff = node.inputs.measure.charge_density_cutoff.value

    # base line
    precision_base_nu = 0.1
    efficiency_base_nu = 0.33

    # extract the nu factor of 10 structures
    nu_factors = {}
    for structure, data in node.outputs.measure.precision.items():
        if structure == "output_parameters":
            # the 0 level of the dict contains the overall nu factor in the output_parameters
            continue

        try:
            nu = data['output_parameters'].get_dict()["rel_errors_vec_length"]
            nu_factors[structure] = round(nu, 3)
        except KeyError:
            # which means the structure has no nu factor, usually because the calculation failed
            # with 811 exit code
            nu_factors[structure] = 100.0

    # Set an upper bound for the nu factor, if it is exceeding the upper bound, set it to the upper bound
    upper_bound_nu = 2.0
        
    # compute the precision score
    precision_score = 0.0
    for _, nu in nu_factors.items():
        if nu > upper_bound_nu:
            nu = upper_bound_nu

        if nu > precision_base_nu:
           precision_score += nu - precision_base_nu

    precision_score = round(precision_score, 2) / 10
           
    # compute the efficiency score
    efficiency_score = 0.0
    for _, nu in nu_factors.items():
        if nu > upper_bound_nu:
            nu = upper_bound_nu

        if nu > efficiency_base_nu:
           efficiency_score += nu - efficiency_base_nu

    # add the wfc cutoff with a weight of 0.01
    # since nu value is usually around 0.0, 2.0 
    # while wfc cutoff is range from 0 to 200
    efficiency_score += wfc_cutoff / 100.0

    efficiency_score = round(efficiency_score, 2)

    extras = {
        "pk": node.pk,
        "wavefunction_cutoff": wfc_cutoff,
        "charge_density_cutoff": rho_cutoff,
    }
           
    return precision_score, efficiency_score, extras

In [3]:
# Create a dict to store the precision score of each element
pd_elements_lst = [
    'H', 'He', 
    'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne',
    'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar',
    'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se','Br', 'Kr',
    'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd','Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe',
    'Cs', 'Ba',      'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt','Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn',
    'Fr', 'Ra',
#    'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb','Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu',
#    'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf','Es', 'Fm', 'Md', 'No', 'Lr',
]

data = {}

for element in pd_elements_lst:
    ele_group = orm.Group.collection.get(label=f"sssp-curated/element/{element}")

    data[element] = {}

    for node in ele_group.nodes:
        label = node.base.extras.all['label'].split(" ")[-1]
        p_score, e_score, extras = computer_score(node)

        data[element][label] = {
            "precision_score": p_score,
            "efficiency_score": e_score,
            "uuid": node.uuid,
            "extras": extras,
        }


In [4]:
import pandas as pd

table = []
for element, value in data.items():
    for label, dd in value.items():
        table.append([dd['extras']['pk'], label, dd["precision_score"], dd["efficiency_score"], dd["extras"]['wavefunction_cutoff']])

df = pd.DataFrame(table, columns=["pk", "label", "precision score", "efficiency score", "wfc_cutoff"])

In [5]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
    print(df)

          pk                                  label  precision score  efficiency score  wfc_cutoff
0    7229341                H.us.z_1.uspp.gbrv.v1.4            0.003              0.30        30.0
1    2244780      H.nc.z_1.oncvpsp3.dojo.v0.5.0-std            0.022              0.65        65.0
2    7451461                H.nc.z_1.ld1.psl.v1.0.0            0.000              1.50       150.0
3    6921826              H.nc.z_1.oncvpsp4.sg15.v0            0.000              0.70        70.0
4    7107445              H.nc.z_1.oncvpsp4.spms.v1            0.030              0.65        65.0
5    7693477                 H.paw.z_1.ld1.psl.v0.1            0.000              0.45        45.0
6    7699267          H.paw.z_1.ld1.psl.v1.0.0-high            0.000              0.45        45.0
7    7873907                  H.us.z_1.ld1.psl.v0.1            0.005              0.45        45.0
8    7891603           H.us.z_1.ld1.psl.v1.0.0-high            0.005              0.45        45.0
9    81742

In [6]:
def curated_library(data: dict, mode: str = "precision") -> t.Tuple[dict, dict]:
    """Curate the library based on the precision/efficiency score of each element.
    The less the score is, the better the structure is.
    """
    curated_data = {}
    for element, dd in data.items():
        # sort the data based on the efficiency/precision score
        # if efficiency score is the same, then sort based on the precision score as tie breaker
        sorted_data = sorted(dd.items(), key=lambda x: (x[1][f"{mode}_score"], x[1][f"precision_score"]))
        
        label = sorted_data[0][0]
        wfc_cutoff = sorted_data[0][1]["extras"]["wavefunction_cutoff"]
        rho_cutoff = sorted_data[0][1]["extras"]["charge_density_cutoff"]
        uuid = sorted_data[0][1]["uuid"]
        
        # get the best 10 structures
        curated_data[f"{sorted_data[0][0]}"] = {
            "wavefunction_cutoff": wfc_cutoff,
            "charge_density_cutoff": rho_cutoff,
            "uuid": uuid,
        }
         
    return curated_data

In [7]:
lib_curated_precision = curated_library(data, mode="precision")

# save the curated data to a json file
with open("lib_sssp_curated_precision_v0.json", "w") as f:
    f.write(json.dumps(lib_curated_precision, indent=4))

In [8]:
lib_curated_efficiency = curated_library(data, mode="efficiency")

# save the curated data to a json file
with open("lib_sssp_curated_efficiency_v0.json", "w") as f:
    f.write(json.dumps(lib_curated_efficiency, indent=4))

In [9]:
# read the curated data from a json file
with open("lib_sssp_curated_precision_v0.json", "r") as f:
    lib_curated_precision = json.loads(f.read())
    
# create group lib-NC-curated-precision-v24-0001/measure/precision
group, _ = orm.Group.objects.get_or_create(label="lib-SSSP-curated-precision-v24-0000/measure/precision")

for label, dd in lib_curated_precision.items():
    print(label, dd["uuid"])
    group.add_nodes(orm.load_node(uuid=dd["uuid"]))


  group, _ = orm.Group.objects.get_or_create(label="lib-SSSP-curated-precision-v24-0000/measure/precision")


H.nc.z_1.ld1.psl.v1.0.0 ac7b2d13-b9c4-445d-ab78-55253b621700
He.paw.z_2.ld1.psl.v1.0.0-high df5035c4-6d1b-4f59-9ab6-b097bcd21468
Li.us.z_3.uspp.gbrv.v1.4 1b0a9c69-08da-456e-970b-5be8da285eb5
Be.paw.z_4.ld1.psl.v1.0.0-high 6fbb4579-5d14-49c9-98a0-fa064903facd
B.paw.z_3.ld1.psl.v0.1 5af93ba9-9939-459a-807c-c4d9fa5c4d1a
C.paw.z_4.ld1.psl.v0.1 7b069dac-593f-45df-b8f5-8e3a4b44effb
N.paw.z_5.ld1.psl.v0.1 28ef07b3-30ff-4217-a4cb-7a6e673e1fca
O.paw.z_6.ld1.psl.v0.1 600b6872-ab6c-4c6f-b283-bfbd41bf0dc6
F.paw.z_7.ld1.psl.v0.1 e5d15d38-dfe5-415a-b189-ea402badd3e3
Ne.paw.z_8.ld1.psl.v1.0.0-high 0ea8ddac-58bd-47bf-9338-898a643da29f
Na.paw.z_9.ld1.psl.v1.0.0-low 3b4f2bd0-bf23-4291-a76b-7da1ee02e69b
Mg.us.z_10.uspp.gbrv.v1.4 a0f7275f-fc32-49b7-af14-f0b5aa9cf751
Al.paw.z_3.ld1.psl.v1.0.0-low 23f6080f-adfc-44ea-a609-e54f9a00830c
Si.paw.z_4.ld1.psl.v1.0.0-low e1366102-5cb2-41e2-a59c-6b152f040040
P.us.z_5.uspp.gbrv.v1.5 5fd7c896-b18f-47d8-923e-57660c9f3d77
S.nc.z_6.oncvpsp4.dojo.v0.5.0-std 76dafe19-d373-

In [10]:
# read the curated data from a json file
with open("lib_sssp_curated_efficiency_v0.json", "r") as f:
    lib_curated_precision = json.loads(f.read())
    
# create group lib-NC-curated-precision-v24-0001/measure/precision
group, _ = orm.Group.objects.get_or_create(label="lib-SSSP-curated-efficiency-v24-0000/measure/precision")

for label, dd in lib_curated_precision.items():
    print(label, dd["uuid"])
    group.add_nodes(orm.load_node(uuid=dd["uuid"]))

  group, _ = orm.Group.objects.get_or_create(label="lib-SSSP-curated-efficiency-v24-0000/measure/precision")


H.us.z_1.uspp.gbrv.v1.4 be2962a3-037a-41cc-b61b-1afc7b0dd369
He.us.z_2.ld1.psl.v1.0.0-high d304afe5-78ec-4379-a56d-916a958a5c9c
Li.us.z_3.uspp.gbrv.v1.4 1b0a9c69-08da-456e-970b-5be8da285eb5
Be.us.z_2.ld1.psl.v1.0.0-low.n 0a821f91-e6ca-4165-93ab-cd36cacceec0
B.paw.z_3.atompaw.jth.v1.1-std 12826c57-9dba-4d3e-8e7c-f64234166e89
C.paw.z_4.ld1.psl.v0.1 7b069dac-593f-45df-b8f5-8e3a4b44effb
N.paw.z_5.ld1.psl.v0.1 28ef07b3-30ff-4217-a4cb-7a6e673e1fca
O.paw.z_6.ld1.psl.v0.1 600b6872-ab6c-4c6f-b283-bfbd41bf0dc6
F.us.z_7.uspp.gbrv.v1.4 2f5a4e2a-95f2-4764-bae6-16da88c3d266
Ne.paw.z_8.ld1.psl.v1.0.0-high 0ea8ddac-58bd-47bf-9338-898a643da29f
Na.paw.z_9.ld1.psl.v0.2 54447136-edd8-48db-b4fd-c448ece5b28c
Mg.us.z_10.uspp.gbrv.v1.4 a0f7275f-fc32-49b7-af14-f0b5aa9cf751
Al.paw.z_3.ld1.psl.v1.0.0-low 23f6080f-adfc-44ea-a609-e54f9a00830c
Si.paw.z_4.ld1.psl.v1.0.0-high 7861da17-c496-447b-9b85-9f61ed76ace1
P.us.z_5.uspp.gbrv.v1.5 5fd7c896-b18f-47d8-923e-57660c9f3d77
S.us.z_6.uspp.gbrv.v1.4 1fec0ef0-32ef-4a7e-a8