# Modeling with GNN-ePC-SAFT

Model combining graph neural network with ePC-SAFT


## Starting point

In [None]:
import torch
import wandb
wandb.login()
from torchmetrics import MeanAbsolutePercentageError
from torch.nn import HuberLoss
from torch_geometric.loader import DataLoader
import os.path as osp, pickle
from data.graphdataset import ThermoMLDataset, ThermoML_padded, ramirez
from epcsaft import epcsaft_cython

In [None]:
device = torch.device("cpu")

In [None]:
HLoss = HuberLoss("mean").to(device)
mape = MeanAbsolutePercentageError().to(device)

In [None]:
pcsaft_den = epcsaft_cython.PCSAFT_den.apply
pcsaft_vp = epcsaft_cython.PCSAFT_vp.apply

path = osp.join("./data", "thermoml")
train_dataset = ThermoMLDataset(path)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
ra_loader = DataLoader(ramirez("./data/ramirez2022"), batch_size=1, shuffle=False)

In [None]:
ra_data = {}
for graph in ra_loader:
    ra_data[graph.InChI[0]] = graph.para

In [None]:
n=0
for gh in train_loader:
    if gh.InChI[0] in ra_data:
        n += 1 
print(n)

In [None]:
def test(para_data):
    for gh in train_loader:
        if gh.InChI[0] not in para_data:
            continue
        datapoints = gh.vp.to(device, torch.float64)
        if torch.all(datapoints == torch.zeros_like(datapoints)):
                    continue
        ref_para = para_data[gh.InChI[0]].to(device, torch.float64)
        pred = pcsaft_vp(ref_para, datapoints)
        target = datapoints[:,-1]
        result_filter = ~torch.isnan(pred)
    #print(pred,target)
        loss_mape = mape(pred[result_filter], target[result_filter])
        loss_huber = HLoss(pred[result_filter], target[result_filter])
    #continue
        wandb.log(
                {
                    "mape_vp": loss_mape.item(),
                    "huber_vp": loss_huber.item(),
                },
            )
    for gh in train_loader:
        if gh.InChI[0] not in para_data:
            continue
        datapoints = gh.rho.to(device, torch.float64)
        if torch.all(datapoints == torch.zeros_like(datapoints)):
                    continue
        ref_para = para_data[gh.InChI[0]].to(device, torch.float64)
        pred = pcsaft_den(ref_para, datapoints)
        target = datapoints[:,-1]
        loss_mape = mape(pred, target)
        loss_huber = HLoss(pred, target)
        wandb.log(
                {
                    "mape_den": loss_mape.item(),
                    "huber_den": loss_huber.item(),
                },
            )

In [None]:
run = wandb.init(
    # Set the project where this run will be logged
    project="gnn-pc-saft"
)
test(ra_data)
wandb.finish()

In [None]:
with open("./data/thermoml/processed/para3_fitted.pkl", "rb") as file:
    fitted_para = pickle.load(file)

In [None]:
fitted_para_tensor = {}
errors = []
for inchi in fitted_para:
    fitted_para_tensor[inchi] = torch.tensor(fitted_para[inchi][0]).abs()
    errors += [fitted_para[inchi][1]]
errors = torch.tensor(errors)

In [None]:
errors.mean()

In [None]:
run = wandb.init(
    # Set the project where this run will be logged
    project="gnn-pc-saft"
)
test(fitted_para_tensor)
wandb.finish()