Evaluating reference data


## Loading packages


In [None]:
import numpy as np
import os.path as osp
from gnnepcsaft.data.graphdataset import ThermoMLDataset, Ramirez, Esper
from gnnepcsaft.train.utils import rho_single, vp_single
from gnnepcsaft.epcsaft.utils import parameters_gc_pcsaft
from gnnepcsaft.demo.utils import  params_fn, binary_test, es_para
import polars as pl
import torch
from gnnepcsaft.train.models import GNNePCSAFTL

In [None]:
path = osp.join("gnnepcsaft/data", "thermoml")
test_loader = ThermoMLDataset(path)
model = GNNePCSAFTL.load_from_checkpoint(
    "./gnnepcsaft/train/checkpoints/pna_msigmae_1.0-epoch=21249-mape_den.ckpt",
    "cpu",
).eval()
model_assoc = GNNePCSAFTL.load_from_checkpoint(
    "./gnnepcsaft/train/checkpoints/pna_assoc_1.0-epoch=6249-train_mape=0.0260.ckpt",
    "cpu",
).eval()

In [None]:
def test(para_data):
    data = {"inchis": [], "mden": [], "mvp": []}
    for gh in test_loader:
        if gh.InChI not in para_data:
            continue
        params = para_data[gh.InChI]
        rho = gh.rho[:, -1]
        vp = gh.vp[:, -1]
        if rho.shape[0] > 0:
            pred_den = rho_single((params, gh.rho))
            mape_den = np.mean(np.abs((pred_den - rho) / rho))
        else:
            mape_den = np.nan
        if vp.shape[0] > 0:
            pred_vp = vp_single((params, gh.vp))
            mape_vp = np.mean(np.abs((pred_vp - vp) / vp))
        else:
            mape_vp = np.nan
        data["inchis"].append(gh.InChI)
        data["mden"].append(mape_den)
        data["mvp"].append(mape_vp)
    return data

## Evaluating


In [None]:
train_loader = Ramirez("gnnepcsaft/data/ramirez2022")
para_data = {}
for graph in train_loader:
    para_data[graph.InChI] = graph.para.tolist() + [0] * 5
test_data = test(para_data)
test_data = pl.DataFrame(test_data)
test_data.write_csv("../reframirez.csv")

In [None]:
train_loader = Esper("gnnepcsaft/data/esper2023")
para_data = {}
for graph in train_loader:
    para_data[graph.InChI] = (
        torch.hstack(
            [graph.para, 10 ** (graph.assoc * torch.tensor([-1.0, 1.0])), graph.munanb]
        )
        .squeeze()
        .tolist()
    )
test_data = test(para_data)
test_data = pl.DataFrame(test_data)
test_data.write_csv("../refesper.csv")

In [None]:
para_data = {}
for graph in test_loader:
    try:
        para_data[graph.InChI] = list(parameters_gc_pcsaft(graph.smiles))
    except BaseException as e:
        pass
test_data = test(para_data)
test_data = pl.DataFrame(test_data)
test_data.write_csv("../refgc_pcsaft.csv")

In [None]:
para_data = {}
train_loader = Esper("gnnepcsaft/data/esper2023")
munanb_ref = {}
for gh in train_loader:
    munanb_ref[gh.InChI] = gh.munanb.tolist()[0]
for graph in test_loader:
    params = params_fn(model.model, graph, model_assoc.model).tolist()
    para_data[graph.InChI] = params
test_data = test(para_data)
test_data = pl.DataFrame(test_data)
test_data.write_csv("../model1.csv")

In [None]:

binary_data = binary_test(model.model, model_assoc.model)

data = {
    "inchis1": [],
    "inchis2": [],
    "mden": [],
    "inchi1_in_train": [],
    "inchi2_in_train": [],
}

for inchis, rho_data in binary_data:
    rho = np.asarray(rho_data)
    ref_rho = rho[:, 1]
    pred_rho = rho[:, 0]
    mape_den = np.mean(np.abs(pred_rho - ref_rho) / ref_rho)
    data["inchis1"].append(inchis[0])
    data["inchis2"].append(inchis[1])
    data["mden"].append(mape_den)
    data["inchi1_in_train"].append(inchis[0] in es_para)
    data["inchi2_in_train"].append(inchis[1] in es_para)

data = pl.DataFrame(data)
data.write_csv("../binary_test.csv")
