In [None]:
import os.path as osp, os
os.environ["CUDA_VISIBLE_DEVICES"] = ''
import torch, numpy as np, polars as pl
from data.graphdataset import ThermoMLDataset, ramirez, ThermoMLpara
from train.train import create_model
from train.models import PNAPCSAFT
from train.model_deg import calc_deg
from data.graph import from_InChI
from train.parametrisation import MAPE, rhovp_data
import matplotlib.pyplot as plt

In [None]:
torch.cuda.is_available()

In [None]:
ra_loader = ramirez("./data/ramirez2022")
ra_para = {}
for graph in ra_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3).round(decimals=2)
    ra_para[inchi] = para.tolist()[0]

In [None]:
tml_loader = ThermoMLpara("./data/thermoml")
tml_para = {}
for graph in tml_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3).round(decimals=2)
    tml_para[inchi] = para.tolist()[0]

In [None]:
path = osp.join("data", "thermoml")
testloader = ThermoMLDataset(path)
device = torch.device("cpu")

In [None]:
from configs.default import get_config
config = get_config()

In [None]:
config.propagation_depth=4
config.hidden_dim=128
config.num_mlp_layers=2
config.pre_layers=1
config.post_layers=3

In [None]:
model_dtype = torch.float64

In [None]:
# Create and initialize the network.
deg_model2 = calc_deg("thermoml", './')
model2 = create_model(config, deg_model2).to(device, model_dtype)

In [None]:
# Create and initialize the network.
deg_model1 = calc_deg("ramirez", './')
model1 = create_model(config, deg_model1).to(device, model_dtype)

In [None]:
# Set up checkpointing of the model.
def loadckp(ckp_path: str, model: PNAPCSAFT):
    if osp.exists(ckp_path):
        checkpoint = torch.load(ckp_path, map_location=torch.device("cpu"))
        model.load_state_dict(checkpoint["model_state_dict"])
        print(f"model checkpoint step {checkpoint['step']}")
        del checkpoint
        
ckp_path = "./train/checkpoints/model2-5_21e6.pth"
loadckp(ckp_path, model2)
ckp_path = "./train/checkpoints/model1-3_2e6.pth"
loadckp(ckp_path, model1)

In [None]:
def model_para_fn(model: PNAPCSAFT):
    model_para = {}
    model_array = {}
    model.eval()
    with torch.no_grad():
        for graphs in testloader:
            graphs.x = graphs.x.to(model_dtype)
            graphs.edge_attr = graphs.edge_attr.to(model_dtype)
            graphs.edge_index = graphs.edge_index.to(torch.int64)

            graphs = graphs.to(device)
            parameters = model(graphs)
            params = parameters.squeeze().to(torch.float64).numpy()
            rho = graphs.rho.view(-1, 5).to(torch.float64).numpy()
            vp = graphs.vp.view(-1, 5).to(torch.float64).numpy()
            mden_array, mvp_array = MAPE(params, rho, vp, False)
            mden, mvp = mden_array.mean(), mvp_array.mean()
            parameters = parameters.tolist()[0]
            model_para[graphs.InChI] = (parameters, mden, mvp)
            model_array[graphs.InChI] = (mden_array, mvp_array)
    return model_para, model_array

model1_para, model1_array = model_para_fn(model1)
model2_para, model2_array = model_para_fn(model2)

In [None]:
def datacsv(model_para):
    data = {"inchis":[],"mden":[],"mvp":[]}
    for inchi in model_para:
        data['inchis'].append(inchi)
        data['mden'].append(model_para[inchi][1])
        data['mvp'].append(model_para[inchi][2])
    return data

data = datacsv(model2_para)

In [None]:
modelx = pl.DataFrame(data)

In [None]:
modelx.write_csv('model2.csv')

In [None]:
import pickle, numpy as np
with open("./data/thermoml/raw/para3_fitted.pkl", "rb") as file:
        # A new file will be created
        para_fitted = pickle.load( file)

In [None]:
inchi = "InChI=1S/C5H6O2/c6-4-5-2-1-3-7-5/h1-3,6H,4H2"
def compare_model(model_para, model_array):
    ml, mden, mvp = model_para[inchi]
    mden_array, mvp_array = model_array[inchi]
    ra = ra_para[inchi]
    fit, mden_fit, mvp_fit = para_fitted[inchi]
    for row in zip(ra, ml, fit):
        print(row)

    print(mden, mvp)
    print(mden_fit, mvp_fit)

compare_model(model2_para, model2_array)
compare_model(model1_para, model1_array)

In [None]:
with torch.no_grad(): 
    graphs = from_InChI(inchi, with_hydrogen=True)
    graphs.x = graphs.x.to(model_dtype)
    graphs.edge_attr = graphs.edge_attr.to(model_dtype)
    graphs.edge_index = graphs.edge_index.to(torch.int64)

    graphs = graphs.to(device)
    parameters = model2(graphs)
    params = parameters.squeeze().to(torch.float64).detach().numpy()
params

In [None]:
def plotdata(inchi: str, model_name: str, molecule_name: str, model: PNAPCSAFT):
    def pltline(x, y):
        return plt.plot(x, y, linewidth=0.5)

    def pltscatter(x, y):
        return plt.scatter(x, y, marker="x", c="black", s=10)

    def plterr(x, y, m):
        tb = 0
        for i, mape in enumerate(np.round(m, decimals=1)):
            ta = x[i]
            if (mape > 1) & (ta - tb > 2):
                tb = ta
                plt.text(x[i], y[i], f"{mape} %", ha="center", va="center", fontsize=4)

    model.eval()
    with torch.no_grad():
        for graphs in testloader:
            if inchi == graphs.InChI:
                break
        graphs.x = graphs.x.to(model_dtype)
        graphs.edge_attr = graphs.edge_attr.to(model_dtype)
        graphs.edge_index = graphs.edge_index.to(torch.int64)

        graphs = graphs.to(device)
        parameters = model(graphs)
        params = parameters.squeeze().to(torch.float64).numpy()
        # params[0] -= 0.0112
        rho = graphs.rho.view(-1, 5).to(torch.float64).numpy()
        vp = graphs.vp.view(-1, 5).to(torch.float64).numpy()
        pred_den, pred_vp = rhovp_data(params, rho, vp)
        params = np.asarray(ra_para[inchi])
        ra_den, ra_vp = rhovp_data(params, rho, vp)

        idx_vp = (pred_vp != 0) & (ra_vp != 0)
        vp = vp[idx_vp]
        ra_vp = ra_vp[idx_vp]
        pred_vp = pred_vp[idx_vp]

        idx = np.argsort(vp[:, 0], 0)
        x = vp[idx, 0]
        y = vp[idx, -1] / 100000
        pltscatter(x, y)

        y = pred_vp[idx] / 100000
        pltline(x, y)
        mvp_model = 100 * np.abs(vp[idx, -1] - pred_vp[idx]) / vp[idx, -1]
        plterr(x, y * 0.99, mvp_model)

        y = ra_vp[idx] / 100000
        pltline(x, y)
        mvp_ra = 100 * np.abs(vp[idx, -1] - ra_vp[idx]) / vp[idx, -1]
        plterr(x, y * 1.01, mvp_ra)

        # Customize the plot appearance
        pltcustom(model_name, mvp_model, mvp_ra, "linear", "Pressão de vapor (Bar)")

        # Save the plot as a high-quality image file
        path = osp.join(
            "images", "vp_" + model_name.strip() + "_" + molecule_name + ".png"
        )
        plt.savefig(path, dpi=300)
        plt.show()

        idx_p = abs(rho[:, 1] - 101325) < 10000
        rho = rho[idx_p]
        pred_den = pred_den[idx_p]
        ra_den = ra_den[idx_p]
        idx = np.argsort(rho[:, 0], 0)

        x = rho[idx, 0]
        y = rho[idx, -1]
        pltscatter(x, y)

        y = pred_den[idx]
        pltline(x, y)
        mden_model = 100 * np.abs(rho[idx, -1] - pred_den[idx]) / rho[idx, -1]
        plterr(x, y, mden_model)

        y = ra_den[idx]
        pltline(x, y)
        mden_ra = 100 * np.abs(rho[idx, -1] - ra_den[idx]) / rho[idx, -1]
        plterr(x, y, mden_ra)

        # Customize the plot appearance
        pltcustom(model_name, mden_model, mden_ra, "linear", "Densidade (mol / m³)")
        path = osp.join("images", "den_" + model_name.strip()+ "_" + molecule_name + ".png")
        plt.savefig(path, dpi=300)
        plt.show()


def pltcustom(modelname, m_model, m_ra, scale="linear", ylabel=""):
    plt.xlabel("T (K)")
    plt.ylabel(ylabel)
    plt.title("")
    plt.legend(
        [
            "ThermoML",
            modelname + f" ({m_model.mean().round(decimals=2)} %)",
            f"Ramírez-Vélez et al. ({m_ra.mean().round(decimals=2)} %)",
        ]
    )
    plt.grid(False)
    plt.yscale(scale)

In [None]:
inchi = inchi
plotdata(inchi, "Modelo 2", "furfural", model2)
plotdata(inchi, "Modelo 1", "furfural", model1)