In [None]:
import os.path as osp, os
os.environ["CUDA_VISIBLE_DEVICES"] = ''
import torch, numpy as np
from torch_geometric.loader import DataLoader
from data.graphdataset import ThermoMLDataset, ramirez
from train.train import create_model
from train.model_deg import calc_deg
from data.graph import from_InChI

In [None]:
torch.cuda.is_available()

In [None]:
train_dataset = ramirez("./data/ramirez2022")
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
ra_para = {}
for graph in train_loader:
    for inchi, para in zip(graph.InChI, graph.para.view(-1, 3)):
        ra_para[inchi] = para.tolist()

In [None]:

path = osp.join("data", "thermoml")
dataset = ThermoMLDataset(path)
testloader = DataLoader(dataset, batch_size=1, shuffle=False)
device = torch.device("cpu")

In [None]:
from configs.default import get_config
config = get_config()

In [None]:
config.num_train_steps=300_000
config.learning_rate=0.001
config.propagation_depth=4
config.hidden_dim=128
config.num_mlp_layers=1 
config.pre_layers=1
config.post_layers=2

In [None]:
model_dtype = torch.float32

In [None]:
# Create and initialize the network.
deg = calc_deg("ramirez", './')
model = create_model(config, deg).to(device, model_dtype)

In [None]:
# Set up checkpointing of the model.
ckp_path = "./train/checkpoints/ra_last_checkpoint.pth"
if osp.exists(ckp_path):
    checkpoint = torch.load(ckp_path, map_location=torch.device("cpu"))
    model.load_state_dict(checkpoint["model_state_dict"])


In [None]:
model_para = {}
model.eval()
for graph in testloader:
    graph = graph.to(device)
    parameters = model(graph)
    parameters = parameters.tolist()[0]
    model_para[graph.InChI[0]] = (parameters, float("inf"))

In [None]:
for inchi in ra_para:
    if inchi in model_para:
        ra = np.array(ra_para[inchi])
        ml = np.array(model_para[inchi][0])
        mape = np.abs(ra - ml) / ra * 100
        mape = mape.mean()
        if mape > 2.0:
            print(inchi)
            print(f"###########---{mape}---##########")
            for row in zip(ra, ml):
                print(row)

In [None]:
import pickle
with open("./data/thermoml/processed/para3.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
import pickle
with open("./data/thermoml/raw/para3_fitted.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
n_rho_vp = 0
n_rho = 0
n_vp = 0
ntrain = 0
ntrain_rhovp = 0
size_vp = 0
size_rho = 0
for graph in testloader:
    if torch.all(graph.rho == torch.zeros_like(graph.rho)):
        n_vp += 1
    elif torch.all(graph.vp == torch.zeros_like(graph.vp)):
        n_rho += 1
    else:
        n_rho_vp += 1
        if graph.InChI[0] not in ra_para:
            ntrain_rhovp += 1 
    
    if graph.InChI[0] not in ra_para:
        ntrain += 1


In [None]:
n_rho_vp, n_rho, n_vp, ntrain, ntrain_rhovp

In [None]:
import pickle
with open("./data/thermoml/raw/para3_fitted.pkl", "rb") as file:
        # A new file will be created
        model_para = pickle.load( file)

In [None]:
n = 0
for inchi in model_para:
    mape = model_para[inchi][1] * 100
    ml = model_para[inchi][0]
    if (mape > 5.0) & ~np.isnan(mape):
            print(inchi)
            print(f"###########---{mape}---##########")
            print(ml)
            n +=1
print(f"number of test set left: {n}")