In [1]:
import os.path as osp, os
os.environ["CUDA_VISIBLE_DEVICES"] = ''
import torch, numpy as np
from torch_geometric.loader import DataLoader
from data.graphdataset import ThermoMLDataset, ramirez
from train.train import create_model
from train.model_deg import calc_deg
from data.graph import from_InChI

In [2]:
torch.cuda.is_available()

False

In [3]:
train_dataset = ramirez("./data/ramirez2022")
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
ra_para = {}
for graph in train_loader:
    for inchi, para in zip(graph.InChI, graph.para.view(-1, 3)):
        ra_para[inchi] = para.tolist()

In [4]:

path = osp.join("data", "thermoml")
dataset = ThermoMLDataset(path)
testloader = DataLoader(dataset, batch_size=1, shuffle=False)
device = torch.device("cpu")

In [5]:
from configs.default import get_config
config = get_config()

In [6]:
config.num_train_steps=200_000
config.log_every_steps=500
config.num_para=3 
config.checkpoint_every_steps=2000 
config.learning_rate=0.001
config.warmup_steps=500 
config.optimizer="adam" 
config.batch_size=512
config.propagation_depth=4
config.hidden_dim=512
config.num_mlp_layers=1 
config.pre_layers=2 
config.post_layers=1

In [7]:
model_dtype = torch.float32

In [8]:
# Create and initialize the network.
deg = calc_deg("ramirez", './')
model = create_model(config, deg).to(device, model_dtype)

In [9]:
# Set up checkpointing of the model.
ckp_path = "./train/checkpoints/model 1-4.pth"
if osp.exists(ckp_path):
    checkpoint = torch.load(ckp_path, map_location=torch.device("cpu"))
    model.load_state_dict(checkpoint["model_state_dict"])


In [10]:
model_para = {}
model.eval()
for graph in testloader:
    graph = graph.to(device)
    parameters = model(graph)
    parameters = parameters.tolist()[0]
    model_para[graph.InChI[0]] = (parameters, float("inf"))

In [15]:
for inchi in ra_para:
    if inchi in model_para:
        ra = np.array(ra_para[inchi])
        ml = np.array(model_para[inchi][0])
        mape = np.abs(ra - ml) / ra * 100
        mape = mape.mean()
        if mape > 1:
            print(mape, inchi, ra, ml)

40.176379992208275 InChI=1S/H3N/h1H3 [  2.62910008   2.31529999 204.8999939 ] [  1.02191615   3.4710598  185.47505188]
1.4577517744946809 InChI=1S/C10H18/c1-2-6-10-8-4-3-7-9(10)5-1/h9-10H,1-8H2 [  3.07509995   4.1645999  318.        ] [  2.98500729   4.19004488 320.64743042]
24.16259712031324 InChI=1S/H2S/h1H2 [  1.68550003   3.02769995 227.5       ] [  1.02186298   3.47111416 185.48248291]
61.27577116379567 InChI=1S/H2O/h1H2 [  3.27859998   1.95889997 298.20001221] [  1.02190351   3.4710691  185.47622681]


In [None]:
import pickle
with open("./data/thermoml/processed/para3.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
n_rho_vp = 0
n_rho = 0
n_vp = 0
ntrain = 0
size_vp = 0
size_rho = 0
for graph in loader:
    if torch.all(graph.rho == torch.zeros_like(graph.rho)):
        n_vp += 1
    elif torch.all(graph.vp == torch.zeros_like(graph.vp)):
        n_rho += 1
    else:
        n_rho_vp += 1
        if graph.InChI[0] not in ra_data:
            ntrain += 1 

In [None]:
n_rho_vp, n_rho, n_vp, ntrain