## Load libraries and model


Firt step: Load libraries

In [1]:
import os.path as osp, os

os.environ["CUDA_VISIBLE_DEVICES"] = ""
import torch, polars as pl
from gnnepcsaft.data.graphdataset import ThermoMLDataset, Ramirez, Esper
from gnnepcsaft.train.utils import create_model
from gnnepcsaft.train.utils import calc_deg, LogAssoc, TransformParameters
from gnnepcsaft.train.models import PNApcsaftL
from gnnepcsaft.data.graph import from_InChI, assoc_number
from gnnepcsaft.configs.default import get_config
from gnnepcsaft.epcsaft.utils import pure_den_feos, parameters_gc_pcsaft
import pickle, numpy as np
from rdkit import Chem
from gnnepcsaft.demo.utils import plotdata, loadckp, model_para_fn, datacsv, plotparams, rhovp_data
from feos.eos import EquationOfState, PhaseEquilibrium, State
from feos.pcsaft import PcSaftParameters, PcSaftRecord
from si_units import KELVIN, METER, MOL, PASCAL

torch.cuda.is_available()

False

Second step: Load data

In [2]:
ra_loader = Ramirez("gnnepcsaft/data/ramirez2022")
ra_para = {}
for graph in ra_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3).round(decimals=2)
    ra_para[inchi] = para.tolist()[0]
es_loader = Esper("gnnepcsaft/data/esper2023")
es_para = {}
for graph in es_loader:
    inchi, para = graph.InChI, graph
    es_para[inchi] = para
testloader = ThermoMLDataset("gnnepcsaft/data/thermoml")
tml_dict = {}
for graph in testloader:
    tml_dict[graph.InChI] = graph
device = torch.device("cpu")

In [None]:
count = 0
for graph in es_loader:
  inchi = graph.InChI
  nanb = torch.tensor(assoc_number(inchi))
  es_nanb = graph.munanb[1:]
  if all(es_nanb>0):
    count += 1
    print(nanb, es_nanb, inchi)
print(count)

Third Step: Define and load models

In [2]:
model1 = PNApcsaftL.load_from_checkpoint("gnnepcsaft/train/checkpoints/esper_msigmae_5.2-epoch=72499-train_mape=0.0121.ckpt", 'cpu')
model2 = PNApcsaftL.load_from_checkpoint("gnnepcsaft/train/checkpoints/esper_msigmae_5.1-epoch=58749-train_mape=0.0073.ckpt")
model3 = PNApcsaftL.load_from_checkpoint("gnnepcsaft/train/checkpoints/esper_assoc_7-epoch=99999-train_mape=0.0059.ckpt")
model4 = PNApcsaftL.load_from_checkpoint("gnnepcsaft/train/checkpoints/esper_assoc_7.1-epoch=99999-train_mape=0.0098.ckpt")

## Evaluations


Run model testing on data

In [None]:
model1_para, model1_array = model_para_fn(model3.model, model1.model.eval())
model2_para, model2_array = model_para_fn(model3.model, model2.model.eval())

In [9]:
for inchi in model1_para:
    if np.isnan(model2_para[inchi][2]) and inchi in es_para:
        print(
            model1_para[inchi][1:],
            model1_para[inchi][0],
            es_para[inchi].para.tolist(),
            inchi,
            sep="\n",
        )
        print("")

Save test results to csv file

In [10]:
data2 = datacsv(model2_para)
data1 = datacsv(model1_para)
model1data = pl.DataFrame(data1)
model2data = pl.DataFrame(data2)
model1data.write_csv("../model1.csv")
model2data.write_csv("../model2.csv")

## plotting


In [None]:
inchi = input("InChI: ")
molecule_name = "UNKNOWN"
plotdata(inchi, molecule_name, [model3.model.eval(), model2.model.eval()], model1.model.eval())

In [None]:
mol = Chem.MolFromInchi(inchi, removeHs=False, sanitize=True)
n_pyrimidine = len(mol.GetSubstructMatches(Chem.MolFromSmiles("O")))
print(Chem.MolToSmiles(mol, isomericSmiles=True))

mol

In [None]:
PcSaftParameters.from_json_smiles(
  [Chem.MolToSmiles(mol, isomericSmiles=False)],
  "./gnnepcsaft/data/gc_pcsaft/sauer2014_smarts.json",
  "./gnnepcsaft/data/gc_pcsaft/sauer2014_homo.json"
)

In [14]:

smiles = ["C" * i + "(=O)OCC" for i in range(1, 100)]

In [16]:
smiles = ["C" * i for i in range(1, 100)]

In [18]:
smiles = ["C" * i + "O" for i in range(1, 100)]

In [None]:
plotparams(smiles, [model1, model2], "UNKNOWN")

In [None]:
test_inchis = []
for gh in testloader:
    test_inchis.append(gh.InChI)
inchis = []
for smile in smiles:
    mol = Chem.MolFromSmiles(smile)
    inchi = Chem.MolToInchi(mol)
    if inchi in test_inchis:
        inchis.append(inchi)

In [None]:
inchis