## load libraries and model

In [None]:
import os.path as osp, os
os.environ["CUDA_VISIBLE_DEVICES"] = ''
import torch, numpy as np, polars as pl
from data.graphdataset import ThermoMLDataset, ramirez, ThermoMLpara
from train.train import create_model
from train.models import PNAPCSAFT
from train.model_deg import calc_deg
from data.graph import from_InChI
import matplotlib.pyplot as plt
from configs.default import get_config
import pickle, numpy as np
from rdkit import Chem
from rdkit.Chem import Draw
from demo.utils import plotdata, loadckp, model_para_fn, datacsv, plotparams

torch.cuda.is_available()

In [None]:
ra_loader = ramirez("./data/ramirez2022")
ra_para = {}
for graph in ra_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3).round(decimals=2)
    ra_para[inchi] = para.tolist()[0]
tml_loader = ThermoMLpara("./data/thermoml")
tml_para = {}
for graph in tml_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3).round(decimals=2)
    tml_para[inchi] = para.tolist()[0]
path = osp.join("data", "thermoml")
testloader = ThermoMLDataset(path)
device = torch.device("cpu")

In [None]:
data = torch.tensor([])
for graph in testloader:
  if ~torch.all(graph.rho == torch.zeros_like(graph.rho)):
    data = torch.concat([data, graph.rho])  

In [None]:
data.mean(0), data.max(0), data.min(0), data.median(0)

In [None]:
config = get_config()
config.propagation_depth=4
config.hidden_dim=128
config.num_mlp_layers=2
config.pre_layers=1
config.post_layers=3

In [None]:
model_dtype = torch.float64
# Create and initialize the network.
deg_model2 = calc_deg("thermoml", './')
model2 = create_model(config, deg_model2).to(device, model_dtype)
# Create and initialize the network.
deg_model1 = calc_deg("ramirez", './')
model1 = create_model(config, deg_model1).to(device, model_dtype)
# Set up checkpointing of the model.
        
ckp_path = "./train/checkpoints/model2-13_62e6.pth"
loadckp(ckp_path, model2)
ckp_path = "./train/checkpoints/model1-9_20e6.pth"
loadckp(ckp_path, model1)

## evaluations

In [None]:
model1_para, model1_array = model_para_fn(model1)
model2_para, model2_array = model_para_fn(model2)

In [None]:
data2 = datacsv(model2_para)
data1 = datacsv(model1_para)
model1data = pl.DataFrame(data1)
model2data = pl.DataFrame(data2)
model1data.write_csv('../model1.csv')
model2data.write_csv('../model2.csv')

In [None]:
with open("./data/thermoml/raw/para3_fitted.pkl", "rb") as file:
        # A new file will be created
        para_fitted = pickle.load( file)

In [None]:
inchi = "InChI=1S/C8H15N2.BF4/c1-3-4-5-10-7-6-9(2)8-10;2-1(3,4)5/h6-8H,3-5H2,1-2H3;/q+1;-1"
def compare_model(model_para, model_array):
    ml, mden, mvp = model_para[inchi]
    mden_array, mvp_array = model_array[inchi]
    if inchi in ra_para:
        ra = ra_para[inchi]
    else:
        ra = [0, 0, 0]
    if inchi in para_fitted:
        fit, mden_fit, mvp_fit = para_fitted[inchi]
    else:
        fit, mden_fit, mvp_fit = [0,0,0],0,0
    print("#### (ra, ml, fit) ####")
    for row in zip(ra, ml, fit):
        print(row)
    print("#### mden, mvp ####")
    print(mden, mvp)
    print(mden_fit, mvp_fit)

compare_model(model2_para, model2_array)
compare_model(model1_para, model1_array)

In [None]:
with torch.no_grad(): 
    graphs = from_InChI(inchi, with_hydrogen=True)
    graphs.x = graphs.x.to(model_dtype)
    graphs.edge_attr = graphs.edge_attr.to(model_dtype)
    graphs.edge_index = graphs.edge_index.to(torch.int64)

    graphs = graphs.to(device)
    parameters = model2(graphs)
    params = parameters.squeeze().to(torch.float64).detach().numpy()
params

## plotting

In [None]:
"""
InChI=1S/H2O/h1H2
InChI=1S/C5H6O2/c6-4-5-2-1-3-7-5/h1-3,6H,4H2
InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3
InChI=1S/C8H15N2.C2F6NO4S2/c1-3-4-5-10-7-6-9(2)8-10;3-1(4,5)14(10,11)9-15(12,13)2(6,7)8/h6-8H,3-5H2,1-2H3;/q+1;-1
InChI=1S/C8H15N2.BF4/c1-3-4-5-10-7-6-9(2)8-10;2-1(3,4)5/h6-8H,3-5H2,1-2H3;/q+1;-1

"""


In [None]:
inchi ="InChI=1S/C8H15N2.BF4/c1-3-4-5-10-7-6-9(2)8-10;2-1(3,4)5/h6-8H,3-5H2,1-2H3;/q+1;-1"
molecule_name = "bmim-bf4"
plotdata(inchi, molecule_name, [model1, model2])

In [None]:
smiles = ['C'*i + '(=O)OCC' for i in range(1,100)]

In [None]:
smiles = ['C'*i for i in range(1,100)]

In [None]:
smiles = ["C" * i + "O" for i in range(1, 100)]

In [None]:
plotparams(smiles, [model1, model2], 'outra cosa')

In [None]:
test_inchis = []
for gh in testloader:
  test_inchis.append(gh.InChI)
inchis = []
for smile in smiles:
  mol = Chem.MolFromSmiles(smile)
  inchi = Chem.MolToInchi(mol)
  if inchi in test_inchis:
    inchis.append(inchi)

In [None]:
inchis