In [1]:
import os.path as osp, os
os.environ["CUDA_VISIBLE_DEVICES"] = ''
import torch, numpy as np
from data.graphdataset import ThermoMLDataset, ramirez, ThermoMLpara
from train.train import create_model
from train.model_deg import calc_deg
from data.graph import from_InChI
from train.parametrisation import MAPE

In [2]:
torch.cuda.is_available()

False

In [3]:
train_loader = ramirez("./data/ramirez2022")
ra_para = {}
for graph in train_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3)
    ra_para[inchi] = para.tolist()[0]

In [4]:
train_loader = ThermoMLpara("./data/thermoml")
tml_para = {}
for graph in train_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3)
    tml_para[inchi] = para.tolist()[0]

In [5]:

path = osp.join("data", "thermoml")
testloader = ThermoMLDataset(path)
device = torch.device("cpu")

In [6]:
from configs.default import get_config
config = get_config()

In [7]:
config.propagation_depth=4
config.hidden_dim=128
config.num_mlp_layers=2
config.pre_layers=1
config.post_layers=3

In [8]:
model_dtype = torch.float64

In [11]:
# Create and initialize the network.
deg = calc_deg("thermoml", './')
model = create_model(config, deg).to(device, model_dtype)

In [12]:
# Set up checkpointing of the model.
ckp_path = "./train/checkpoints/model5-240k.pth"
if osp.exists(ckp_path):
    checkpoint = torch.load(ckp_path, map_location=torch.device("cpu"))
    model.load_state_dict(checkpoint["model_state_dict"])
    print(f"model checkpoint step {checkpoint['step']}")
    del checkpoint


model checkpoint step 240000


In [None]:
model_para = {}
model.eval()
with torch.no_grad():
    for graphs in testloader:
        graphs.x = graphs.x.to(torch.float64)
        graphs.edge_attr = graphs.edge_attr.to(torch.float64)
        graphs.edge_index = graphs.edge_index.to(torch.int64)

        graphs = graphs.to(device)
        parameters = model(graphs)
        params = parameters.squeeze().to(torch.float64).detach().numpy()
        rho = graphs.rho.view(-1, 5).to(torch.float64).numpy()
        vp = graphs.vp.view(-1, 5).to(torch.float64).numpy()
        mden, mvp = MAPE(params, rho, vp)
        parameters = parameters.tolist()[0]
        model_para[graphs.InChI] = (parameters, mden, mvp)

In [None]:
data = {"inchis":[],"mden":[],"mvp":[]}
for inchi in ra_para:
    if inchi in model_para:
        ml, mden, mvp = model_para[inchi]
        ra = np.array(ra_para[inchi])
        ml = np.array(ml)
        mape = np.abs(ra - ml) / ra * 100
        data["inchis"].append(inchi)
        data["mden"].append(mden)
        data["mvp"].append(mvp)
        
        if (mden > 100 / 100 or mvp > 100 / 100):
            print(inchi)
            print(f"###########---{mape}---##########")
            print(f"###########---{mden*100, mvp*100}---##########")
            for row in zip(ra, ml):
                print(row)

In [None]:
import polars as pl
data = pl.DataFrame(data)

In [None]:
data.write_csv("modelx.csv")

In [None]:
import pickle
with open("./data/thermoml/processed/para3.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
import pickle
with open("./data/thermoml/raw/para3_fitted.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
n_rho_vp = 0
n_rho = 0
n_vp = 0
ntrain = 0
ntrain_rhovp = 0
size_vp = 0
size_rho = 0
for graph in testloader:
    if torch.all(graph.rho == torch.zeros_like(graph.rho)):
        n_vp += 1
    elif torch.all(graph.vp == torch.zeros_like(graph.vp)):
        n_rho += 1
    else:
        n_rho_vp += 1
        if graph.InChI not in ra_para:
            ntrain_rhovp += 1 
    
    if graph.InChI not in ra_para:
        ntrain += 1


In [None]:
n_rho_vp, n_rho, n_vp, ntrain, ntrain_rhovp

In [None]:
import pickle, numpy as np
with open("./data/thermoml/raw/para3_fitted.pkl", "rb") as file:
        # A new file will be created
        para_fitted = pickle.load( file)

In [None]:
len(para_fitted)

In [None]:
n = 0
nra = 0
for inchi in para_fitted:
    ml, mden, mvp = para_fitted[inchi]
    if ( (mden > 0.05) or (mvp > 0.05) ):
            print(inchi)
            print(f"###########---{mden, mvp}---##########")
            print(ml)
            n +=1
    if inchi in ra_para:
        nra += 1
print(f"number of test set left: {n}")
print(f"number of train set: {len(para_fitted) - n}")
print(f"number of val set: {nra}")