In [None]:
import os.path as osp, os
os.environ["CUDA_VISIBLE_DEVICES"] = ''
import torch, numpy as np, polars as pl
from data.graphdataset import ThermoMLDataset, ramirez, ThermoMLpara
from train.train import create_model
from train.model_deg import calc_deg
from data.graph import from_InChI
from train.parametrisation import MAPE, APE

In [None]:
torch.cuda.is_available()

In [None]:
ra_loader = ramirez("./data/ramirez2022")
ra_para = {}
for graph in ra_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3).round(decimals=2)
    ra_para[inchi] = para.tolist()[0]

In [None]:
tml_loader = ThermoMLpara("./data/thermoml")
tml_para = {}
for graph in tml_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3).round(decimals=2)
    tml_para[inchi] = para.tolist()[0]

In [None]:
path = osp.join("data", "thermoml")
testloader = ThermoMLDataset(path)
device = torch.device("cpu")

In [None]:
from configs.default import get_config
config = get_config()

In [None]:
config.propagation_depth=4
config.hidden_dim=128
config.num_mlp_layers=2
config.pre_layers=1
config.post_layers=3

In [None]:
model_dtype = torch.float64

In [None]:
# Create and initialize the network.
deg = calc_deg("thermoml", './')
model = create_model(config, deg).to(device, model_dtype)

In [None]:
# Set up checkpointing of the model.
ckp_path = "./train/checkpoints/model2-2_67e6.pth"
if osp.exists(ckp_path):
    checkpoint = torch.load(ckp_path, map_location=torch.device("cpu"))
    model.load_state_dict(checkpoint["model_state_dict"])
    print(f"model checkpoint step {checkpoint['step']}")
    del checkpoint


In [None]:
model_para = {}
model_array = {}
model.eval()
with torch.no_grad():
    for graphs in testloader:
        graphs.x = graphs.x.to(model_dtype)
        graphs.edge_attr = graphs.edge_attr.to(model_dtype)
        graphs.edge_index = graphs.edge_index.to(torch.int64)

        graphs = graphs.to(device)
        parameters = model(graphs)
        params = parameters.squeeze().to(torch.float64).detach().numpy()
        rho = graphs.rho.view(-1, 5).to(torch.float64).numpy()
        vp = graphs.vp.view(-1, 5).to(torch.float64).numpy()
        mden, mvp = MAPE(params, rho, vp)
        mden_array, mvp_array = APE(params, rho, vp)
        parameters = parameters.tolist()[0]
        model_para[graphs.InChI] = (parameters, mden, mvp)
        model_array[graphs.InChI] = (mden_array, mvp_array)

In [None]:
for inchi in ra_para:
    if inchi in model_para:
        ml, mden, mvp = model_para[inchi]
        ra = np.array(ra_para[inchi])
        ml = np.array(ml)
        mape = np.abs(ra - ml) / ra * 100
        
        if (mden > 50 / 100 or mvp > 50 / 100):
            print(inchi)
            print(f"###########---{mape}---##########")
            print(f"###########---{mden*100, mvp*100}---##########")
            for row in zip(ra, ml):
                print(row)

In [None]:
data = {"inchis":[],"mden":[],"mvp":[]}
for inchi in model_para:
    data['inchis'].append(inchi)
    data['mden'].append(model_para[inchi][1])
    data['mvp'].append(model_para[inchi][2])

In [None]:
modelx = pl.DataFrame(data)

In [None]:
modelx.write_csv('model2.csv')

In [None]:
import pickle
with open("./data/thermoml/processed/para3.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
import pickle
with open("./data/thermoml/raw/para3_fitted.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
n_rho_vp = 0
n_rho = 0
n_vp = 0
ntrain = 0
ntrain_rhovp = 0
size_vp = 0
size_rho = 0
for graph in testloader:
    if torch.all(graph.rho == torch.zeros_like(graph.rho)):
        n_vp += 1
    elif torch.all(graph.vp == torch.zeros_like(graph.vp)):
        n_rho += 1
    else:
        n_rho_vp += 1
        if graph.InChI not in ra_para:
            ntrain_rhovp += 1 
    
    if graph.InChI not in ra_para:
        ntrain += 1


In [None]:
n_rho_vp, n_rho, n_vp, ntrain, ntrain_rhovp

In [None]:
import pickle, numpy as np
with open("./data/thermoml/raw/para3_fitted.pkl", "rb") as file:
        # A new file will be created
        para_fitted = pickle.load( file)

In [None]:
len(para_fitted)

In [None]:
n = 0
nra = 0
for inchi in para_fitted:
    ml, mden, mvp = para_fitted[inchi]
    if ( (mden > 0.05) or (mvp > 0.05) ):
            print(inchi)
            print(f"###########---{mden, mvp}---##########")
            print(ml)
            n +=1
    if inchi in ra_para:
        nra += 1
print(f"number of test set left: {n}")
print(f"number of train set: {len(para_fitted) - n}")
print(f"number of val set: {nra}")

In [45]:
inchi = "InChI=1S/H2O/h1H2"
ml, mden, mvp = model_para[inchi]
mden_array, mvp_array = model_array[inchi]
ra = ra_para[inchi]
fit, mden_fit, mvp_fit = para_fitted[inchi]
for row in zip(ra, ml, fit):
    print(row)

print(mden, mvp, mden_fit, mvp_fit)

(3.2799999713897705, 1.8596061168889244, 2.6351581871134684)
(1.9600000381469727, 2.18422885014196, 2.1185829815421897)
(298.20001220703125, 341.5310241437917, 337.4404198471014)
0.2468140099559228 1000000.0 0.008767997939916464 0.007596301684148007


In [42]:
with torch.no_grad(): 
    graphs = from_InChI(inchi, with_hydrogen=True)
    graphs.x = graphs.x.to(model_dtype)
    graphs.edge_attr = graphs.edge_attr.to(model_dtype)
    graphs.edge_index = graphs.edge_index.to(torch.int64)

    graphs = graphs.to(device)
    parameters = model(graphs)
    params = parameters.squeeze().to(torch.float16).detach().numpy()
parameters, params

(tensor([[  1.8596,   2.1842, 341.5310]], dtype=torch.float64),
 array([  1.859,   2.184, 341.5  ], dtype=float16))

In [43]:
for gh in testloader:
    if gh.InChI == inchi:
        break
gh

Data(x=[3, 9], edge_index=[2, 4], edge_attr=[4, 3], InChI='InChI=1S/H2O/h1H2', vp=[553, 5], rho=[1651, 5])

In [46]:
gh.vp, mvp_array

(tensor([[3.2315e+02, 1.2235e+04, 1.0000e+00, 3.0000e+00, 1.2235e+04],
         [3.3315e+02, 1.9821e+04, 1.0000e+00, 3.0000e+00, 1.9821e+04],
         [3.4315e+02, 3.0815e+04, 1.0000e+00, 3.0000e+00, 3.0815e+04],
         ...,
         [3.7281e+02, 1.0000e+05, 1.0000e+00, 3.0000e+00, 1.0000e+05],
         [3.7284e+02, 1.0000e+05, 1.0000e+00, 3.0000e+00, 1.0000e+05],
         [3.7290e+02, 1.0100e+05, 1.0000e+00, 3.0000e+00, 1.0100e+05]],
        dtype=torch.float64),
 array([1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
        1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
        1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
        1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
        1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
        1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
        1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
        1000000., 1000000., 1000000., 1000000., 10000