In [None]:
import os.path as osp, os
os.environ["CUDA_VISIBLE_DEVICES"] = ''
import torch, numpy as np
from data.graphdataset import ThermoMLDataset, ramirez
from train.train import create_model
from train.model_deg import calc_deg
from data.graph import from_InChI
from train.parametrisation import MAPE

In [None]:
torch.cuda.is_available()

In [None]:
train_loader = ramirez("./data/ramirez2022")
ra_para = {}
for graph in train_loader:
    inchi, para = graph.InChI, graph.para.view(-1, 3)
    ra_para[inchi] = para.tolist()[0]

In [None]:

path = osp.join("data", "thermoml")
testloader = ThermoMLDataset(path)
device = torch.device("cpu")

In [None]:
from configs.default import get_config
config = get_config()

In [None]:
config.num_train_steps=300_000
config.learning_rate=0.001
config.propagation_depth=4
config.hidden_dim=128
config.num_mlp_layers=1 
config.pre_layers=1
config.post_layers=2

In [None]:
model_dtype = torch.float32

In [None]:
# Create and initialize the network.
deg = calc_deg("ramirez", './')
model = create_model(config, deg).to(device, model_dtype)

In [None]:
# Set up checkpointing of the model.
ckp_path = "./train/checkpoints/model3-612k.pth"
if osp.exists(ckp_path):
    checkpoint = torch.load(ckp_path, map_location=torch.device("cpu"))
    model.load_state_dict(checkpoint["model_state_dict"])
    print(f"model checkpoint step {checkpoint['step']}")
    del checkpoint


In [None]:
model_para = {}
model.eval()
for graph in testloader:
    graph = graph.to(device)
    parameters = model(graph)
    params = parameters.squeeze().to(torch.float64).detach().numpy()
    rho = graph.rho.view(-1, 5).to(torch.float64).numpy()
    vp = graph.vp.view(-1, 5).to(torch.float64).numpy()
    mden, mvp = MAPE(params, rho, vp)
    parameters = parameters.tolist()[0]
    model_para[graph.InChI] = (parameters, mden, mvp)

In [None]:
for inchi in ra_para:
    if inchi in model_para:
        ra = np.array(ra_para[inchi])
        ml = np.array(model_para[inchi][0])
        mape = np.abs(ra - ml) / ra * 100
        mape = mape.mean()
        if mape > 2:
            print(inchi)
            print(f"###########---{mape}---##########")
            for row in zip(ra, ml):
                print(row)

In [None]:
import pickle
with open("./data/thermoml/processed/para3.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
import pickle
with open("./data/thermoml/raw/para3_fitted.pkl", "wb") as file:
        # A new file will be created
        pickle.dump(model_para, file)

In [None]:
n_rho_vp = 0
n_rho = 0
n_vp = 0
ntrain = 0
ntrain_rhovp = 0
size_vp = 0
size_rho = 0
for graph in testloader:
    if torch.all(graph.rho == torch.zeros_like(graph.rho)):
        n_vp += 1
    elif torch.all(graph.vp == torch.zeros_like(graph.vp)):
        n_rho += 1
    else:
        n_rho_vp += 1
        if graph.InChI not in ra_para:
            ntrain_rhovp += 1 
    
    if graph.InChI not in ra_para:
        ntrain += 1


In [None]:
n_rho_vp, n_rho, n_vp, ntrain, ntrain_rhovp

In [35]:
import pickle, numpy as np
with open("./data/thermoml/raw/para3_fitted.pkl", "rb") as file:
        # A new file will be created
        para_fitted = pickle.load( file)

In [36]:
len(para_fitted)

2197

In [None]:
n = 0
nra = 0
for inchi in para_fitted:
    ml, mden, mvp = para_fitted[inchi]
    if ( (mden > 0.05) or (mvp > 0.05) ):
            print(inchi)
            print(f"###########---{mden, mvp}---##########")
            print(ml)
            n +=1
    if inchi in ra_para:
        nra += 1
print(f"number of test set left: {n}")
print(f"number of train set: {len(para_fitted) - n}")
print(f"number of val set: {nra}")

In [None]:
data = {"inchis": [], "mden": [], "mvp": []}
for inchis in para_fitted:
    _ , mden, mvp = para_fitted[inchis]
    data["inchis"].append(inchis)
    data["mden"].append(mden)
    data["mvp"].append(mvp)

In [None]:
import polars as pl
data = pl.DataFrame(data)

In [None]:
data.shape

In [None]:
data.write_csv('parametrisation.csv')