In [None]:
global global_dict
global_dict={} # # Loading data with ef and eg labels at default


def set_value(name, value):
    global_dict[name]=value


def get_value(name, defValue=None):
    try:

        return global_dict[name]
    except KeyError:
        return defValue

def get_keys():
    if '_global_dict' in globals().keys():
        return global_dict.keys()
    else:
        return None


In [3]:

set_value('task', 'ef')  # loading dataset with 'ef' and 'eg'.
import matplotlib.pyplot as plt
import torch
from utils.registry import registry, setup_imports
from trainer_heanet import load_mp_data, transform
from sklearn.metrics import r2_score, mean_absolute_error
import seaborn as sns
import os
from scipy import stats
from trainer_heanet_mtl import validate_model, evaluate


device = torch.device(device='cuda' if torch.cuda.is_available() else 'cpu')


def load_model(model_name, hidden_channels=128, n_filters=64, n_interactions=3,
               n_gaussians=50, cutoff=10, num_tasks=2, tower_h1=128,
               tower_h2=64):
    """
    load the trained ML models given the model_name.
    It should be noted that the hyper parameters are assigned according to the specific trained hyper parameters.

    args:
        model_name: str. the name of the trained model.
        For example: './saved_models/ms_type0_300.pt'
    """
    # load the ML model.
    setup_imports()
    device = torch.device(device='cuda' if torch.cuda.is_available() else 'cpu')
    model = registry.get_model('heanet'
                               )(hidden_channels=hidden_channels,
                                 num_filters=n_filters,
                                 num_interactions=n_interactions,
                                 num_gaussians=n_gaussians,
                                 cutoff=cutoff,
                                 readout='add',
                                 dipole=False, mean=None, std=None,
                                 atomref=None, num_tasks=num_tasks,
                                 tower_h1=tower_h1,
                                 tower_h2=tower_h2)
    # load parameters of trained model
    model_state = torch.load(model_name, map_location=device)
    model.load_state_dict(model_state)
    model.to(device)
    return model


train_loader, validate_loader, test_loader = load_mp_data(is_validate=True)



ModuleNotFoundError: No module named 'HEA_project'

In [None]:
class ModelPrediction(object):
    """ Assign the parameters for model and datasets.

    When tasks come from Multi-target learning model, it should be a list including str, such as ['ef', 'eg'].
    At the same time, the transforms should be like ['scaling', 'scaling']
    """
    def __init__(self, model_name, tasks=None, transforms=None,
                 hidden_channels=128, n_filters=64, n_interactions=3,
                 n_gaussians=50, cutoff=10):
        if transforms is None:
            transforms = ['scaling', 'scaling']
        if tasks is None:
            tasks = ['ef', 'eg']
        self.model_name = model_name
        self.tasks = tasks
        self.transforms = transforms
        self.hidden_channels = hidden_channels
        self.n_filters=n_filters
        self.n_interactions=n_interactions
        self.n_gaussians = n_gaussians
        self.cutoff=cutoff

    def obtain_predictions_from_mtl(self):
        model = load_model(self.model_name, hidden_channels=self.hidden_channels,
                           n_filters=self.n_filters, n_interactions=self.n_interactions,
                           n_gaussians=self.n_gaussians, cutoff=self.cutoff)
        out_pred, out_true = validate_model(model, test_loader, tasks=self.tasks,
                                            transforms=self.transforms)
        score = evaluate(out_pred, out_true)
        print('mae in the test set is {}'.format(score))
        return out_true, out_pred


def obtain_predictions(model_name, task='ef'):
    """
    This function is similar to the test function. It uses the trained model to predict the data.
    So, it needs to initialize the model at first. Then, parameters are loaded into the ML model.
    :param
        model_name: 'str', the ML model name, like  './saved_models_lin_256/k_mp_log_500.pt'.
        task: 'str'. The target predictioin.
    :return:
        y_true: ndarray.
        y_pred: ndarray.
    """
    model = load_model(model_name)
    model.eval()  # freeze the dropout and BN layer
    y_pred = []
    y_true = []
    with torch.no_grad():
        end_time = 0
        # for batch in train_loader:
        for batch in test_loader:
            batch.to(device)
            out = model(batch.atomic_numbers.long(), batch.pos, batch=batch)
            # It does not change the variable at default
            # However, we directly compare the log-form data to compare with previous paper.
            if task == 'k' or task == 'g':
                y_label = transform('log', batch.__getitem__(task), forward=True)
            else:
                y_label = batch.__getitem__(task)
            y_true.append(y_label)
            # It should scale into the raw range due to the prediction enlarge the original data in case of scaling.
            # However, we directly compare the log-form data to compare with previous paper.
            if task == 'ef' or task == 'eg':
                out = transform('scaling', out, forward=False)
            y_pred.append(out)

    y_true = torch.cat(y_true, dim=0).detach().cpu().numpy()
    y_pred = torch.cat(y_pred, dim=0).detach().cpu().numpy()

    print(y_true.shape, y_pred.shape)
    print(y_true, y_pred)
    mae_s = mean_absolute_error(y_true, y_pred)
    r2_s = r2_score(y_true, y_pred)
    print(r2_s, mae_s)
    return y_true, y_pred

# model_name = './saved_models_mtl/mtl_2_mp_ef_eg_128_64_400_best.pt'

# y_true, y_pred = obtain_predictions('../saved_models_lin_256/k_mp_log_500_best_256.pt', task='k')


def plot_error_count(y_true, y_pred):
    """
    plot the error count density figure.
    :param y_true: ndarray
    :param y_pred: ndarray
    :return:
        nothing
    """
    bins = 30
    save_fig = True
    fig_path = './fig'
    fig = plt.figure()
    percent = (y_true - y_pred)/y_true

    # the density is obtained from a kernel density estimatioin with Gaussian kernel
    # it do not support for non-Gaussian kernels since version 0.11.0
    fg = sns.kdeplot(data=percent, x='ef [eV/atom]', y='count density [%]')
    sns.kdeplot
    if save_fig:
        plt.savefig(os.path.join(fig_path, 'fig2_error.png'), format='png', bbox_inches='tight')

In [None]:
def main():
    mp_mtl = ModelPrediction(model_name='../saved_models_mtl/mtl_2_mp_ef_eg_128_64_400_best.pt')
    y_true, y_pred = mp_mtl.obtain_predictions_from_mtl()
