# Modeling with GNN-ePC-SAFT

Model combining graph neural network with ePC-SAFT


## Starting point

In [None]:
import torch
import wandb
wandb.login()
from torchmetrics import MeanAbsolutePercentageError
from torch.nn import HuberLoss
from torch_geometric.loader import DataLoader
import os.path as osp, pickle
from data.graphdataset import ThermoMLDataset, ramirez, ThermoMLpara
from epcsaft import epcsaft_cython

In [None]:
device = torch.device("cpu")

In [None]:
HLoss = HuberLoss("mean").to(device)
mape = MeanAbsolutePercentageError().to(device)

In [None]:
pcsaft_den = epcsaft_cython.PCSAFT_den.apply
pcsaft_vp = epcsaft_cython.PCSAFT_vp.apply

path = osp.join("./data", "thermoml")
test_loader = ThermoMLDataset(path)

In [None]:
train_loader = ThermoMLpara(path)
para_data = {}
for graph in train_loader:
    para_data[graph.InChI] = graph.para

In [None]:
n=0
for gh in test_loader:
    if gh.InChI in para_data:
        n += 1 
print(n)

In [None]:
def test(para_data):
    for gh in test_loader:
        if gh.InChI not in para_data:
            continue
        datapoints = gh.vp.to(device, torch.float64)
        if torch.all(datapoints == torch.zeros_like(datapoints)):
                    continue
        ref_para = para_data[gh.InChI].to(device, torch.float64)
        pred = pcsaft_vp(ref_para, datapoints)
        target = datapoints[:,-1]
        result_filter = ~torch.isnan(pred)
        loss_mape = mape(pred[result_filter], target[result_filter])
        loss_huber = HLoss(pred[result_filter], target[result_filter])
        wandb.log(
                {
                    "mape_vp": loss_mape.item(),
                    "huber_vp": loss_huber.item(),
                    "inchi": gh.InChI,
                },
            )
    for gh in test_loader:
        if gh.InChI not in para_data:
            continue
        datapoints = gh.rho.to(device, torch.float64)
        if torch.all(datapoints == torch.zeros_like(datapoints)):
                    continue
        ref_para = para_data[gh.InChI].to(device, torch.float64)
        pred = pcsaft_den(ref_para, datapoints)
        target = datapoints[:,-1]
        loss_mape = mape(pred, target)
        loss_huber = HLoss(pred, target)
        wandb.log(
                {
                    "mape_den": loss_mape.item(),
                    "huber_den": loss_huber.item(),
                    "inchi": gh.InChI,
                },
            )

In [None]:
run = wandb.init(
    # Set the project where this run will be logged
    project="gnn-pc-saft",
    config={
        "eval": "thermoml"
    }
)
test(para_data)
wandb.finish()

In [16]:
with open("./data/thermoml/raw/para3_fitted.pkl", "rb") as file:
    fitted_para = pickle.load(file)

In [35]:
fitted_para_tensor = {}
errors = []
for inchi in fitted_para:
    if fitted_para[inchi][1] > 0.05:
        continue
    errors += [fitted_para[inchi][1]]
errors = torch.tensor(errors) * 100

In [36]:
errors.min(), errors.mean(), errors.median(), errors.max(), errors.std(), errors.shape

(tensor(1.1342e-14, dtype=torch.float64),
 tensor(0.2596, dtype=torch.float64),
 tensor(0.0892, dtype=torch.float64),
 tensor(4.8581, dtype=torch.float64),
 tensor(0.5060, dtype=torch.float64),
 torch.Size([1774]))

In [None]:
train_loader = ramirez('./data/ramirez2022')
para_data = {}
for graph in train_loader:
    para_data[graph.InChI] = graph.para

In [None]:
run = wandb.init(
    # Set the project where this run will be logged
    project="gnn-pc-saft",
    config={
            "eval":"ramirez"
    }
)
test(para_data)
wandb.finish()