In [None]:
import os.path as osp, os
os.environ["CUDA_VISIBLE_DEVICES"] = ''
import torch, numpy as np, polars as pl
from data.graphdataset import ThermoMLDataset, ramirez
from train.train import create_model
from train.model_deg import calc_deg
from train.parametrisation import MAPE

In [None]:
torch.cuda.is_available()

In [None]:
ra_loader = ramirez("./data/ramirez2022")
ra_para = {}
for graph in ra_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3).round(decimals=2)
    ra_para[inchi] = para.tolist()[0]

In [None]:
path = osp.join("data", "thermoml")
testloader = ThermoMLDataset(path)
device = torch.device("cpu")

In [None]:
from configs.default import get_config
config = get_config()

In [None]:
config.propagation_depth=4
config.hidden_dim=128
config.num_mlp_layers=2
config.pre_layers=1
config.post_layers=3

In [None]:
model_dtype = torch.float64

In [None]:
# Create and initialize the network.
deg = calc_deg("ramirez", './')
model = create_model(config, deg).to(device, model_dtype)

In [None]:
# Set up checkpointing of the model.
ckp_path = "./train/checkpoints/model3-4_35e6.pth"
if osp.exists(ckp_path):
    checkpoint = torch.load(ckp_path, map_location=torch.device("cpu"))
    model.load_state_dict(checkpoint["model_state_dict"])
    print(f"model checkpoint step {checkpoint['step']}")
    del checkpoint


In [None]:
model_para = {}
model_array = {}
model.eval()
with torch.no_grad():
    for graphs in testloader:
        graphs.x = graphs.x.to(model_dtype)
        graphs.edge_attr = graphs.edge_attr.to(model_dtype)
        graphs.edge_index = graphs.edge_index.to(torch.int64)

        graphs = graphs.to(device)
        parameters = model(graphs)
        params = parameters.squeeze().to(torch.float64).numpy()
        rho = graphs.rho.view(-1, 5).to(torch.float64).numpy()
        vp = graphs.vp.view(-1, 5).to(torch.float64).numpy()
        n_datapoints = rho.shape[0] + vp.shape[0]
        if n_datapoints < 10:
            continue
        mden_array, mvp_array = MAPE(params, rho, vp, False)
        if (mvp_array.size == 0):
            continue
        mden, mvp = mden_array.mean(), mvp_array.mean()
        parameters = parameters.tolist()[0]
        model_para[graphs.InChI] = (parameters, mden, mvp)
        model_array[graphs.InChI] = (mden_array, mvp_array)

In [None]:
para3 = {}
for inchi in model_para:
    if inchi not in ra_para:
        para3[inchi] = model_para[inchi]

In [None]:
len(ra_para), len(model_para), len(para3)

In [None]:
import pickle
with open("./data/thermoml/processed/para3.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(para3, file)

In [None]:
import pickle
with open("./data/thermoml/raw/para3_fitted.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(para3, file)