In [1]:
from pathlib import Path
import torch
torch.set_printoptions(precision=10)

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from models.ranked_transformer import HsqcRankedTransformer

from models.optional_input_ranked_transformer import OptionalInputRankedTransformer
from datasets.optional_2d_folder_dataset import OptionalInputDataModule
import yaml
from pytorch_lightning.loggers import TensorBoardLogger
torch.set_float32_matmul_precision('medium')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datasets.dataset_utils import specific_radius_mfp_loader


Here we compare models' performance over 7 kinds of inputs


In [3]:
# load model 

model_path = Path("/root/MorganFP_prediction/reproduce_previous_works/weird_H_and_tautomer_cleaned/flexible_models_best_FP/r0_r2_FP_trial_2/")
# Path("/root/MorganFP_prediction/reproduce_previous_works/weird_H_and_tautomer_cleaned/train_on_all_data_possible/only_hsqc_trial_1/")

checkpoint_path = model_path / "checkpoints/epoch=42-all_inputs.ckpt"
# model_path / "checkpoints/epoch=21-step=37708.ckpt"


hyperpaerameters_path = model_path / "hparams.yaml"




with open(hyperpaerameters_path, 'r') as file:
    hparams = yaml.safe_load(file)
    
FP_building_type = hparams['FP_building_type'].split("_")[-1]
only_2d = not hparams['use_oneD_NMR_no_solvent']
specific_radius_mfp_loader.setup(only_2d=only_2d,FP_building_type=FP_building_type)
specific_radius_mfp_loader.set_max_radius(int(hparams['FP_choice'].split("_")[-1][1:]), only_2d=only_2d)


del hparams['checkpoint_path'] # prevent double defition of checkpoint_path
hparams['use_peak_values'] = False




In [4]:
model = HsqcRankedTransformer.load_from_checkpoint(checkpoint_path, **hparams)
model.change_ranker_for_testing()
# model.change_ranker_for_testing(test_ranking_set_path = "/workspace/ranking_sets_cleaned_by_inchi/SMILES_R0_to_R4_reduced_FP_ranking_sets_only_all_info_molecules/test/rankingset.pt")
model=model.to("cuda")

Using jaccard:  False


Initialized SignCoordinateEncoder[384] with dims [180, 180, 24] and 2 positional encoders. 24 bits are reserved for encoding the final bit


HsqcRankedTransformer saving args


In [5]:
datamodule = OptionalInputDataModule(dir="/workspace/SMILES_dataset", FP_choice=hparams["FP_choice"], input_src=["HSQC", "oneD_NMR"], batch_size=hparams['bs'], parser_args=hparams)

datamodule.setup("test")
loader_all_inputs, loader_HSQC_H_NMR, loader_HSQC_C_NMR, loader_only_hsqc, loader_only_1d, loader_only_H_NMR, loader_only_C_NMR = \
    datamodule.test_dataloader()
    




In [6]:
trainer = Trainer()
metric_to_focus = [
    'rank_1',
    'rank_5',
    "mean_rank",
    'cos',
    'f1'    
]
def show_model_performance(model, out_file ="temp_model_show.txt"):
    loaders = [loader_all_inputs, loader_HSQC_H_NMR, loader_HSQC_C_NMR, loader_only_hsqc, loader_only_1d, loader_only_H_NMR, loader_only_C_NMR]
    names = ["all_inputs", "HSQC_H_NMR", "HSQC_C_NMR", "only_hsqc", "only_1d", "only_H_NMR", "only_C_NMR"]
    # names = ["only_hsqc"]
    with open(out_file, "w") as file:
        for loader, name in zip(loaders, names):
            result =  trainer.test(model, dataloaders=loader)
            file.write(f"\n{name:10}: ")
            for m in metric_to_focus:
                file.write(f"{m}: {result[0]['test/mean_'+m]:.4f}, ")


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [7]:
show_model_performance(model);

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0:   0%|          | 0/61 [00:00<?, ?it/s]

  output = torch._nested_tensor_from_mask(output, src_key_padding_mask.logical_not(), mask_check=False)


TypeError: cm() got an unexpected keyword argument 'use_Jaccard'

In [None]:
# spectra-flexible model
# usually not gonna use it because it tests right away after training 

# load model 

model_path = Path("/root/MorganFP_prediction/reproduce_previous_works/average_3/model_sizes/flexible_384_trail1")
hyperpaerameters_path = model_path / "hparams.yaml"

checkpoint_path = model_path / "checkpoints/epoch=41-step=35994.ckpt"
with open(hyperpaerameters_path, 'r') as file:
    hparams = yaml.safe_load(file)
del hparams['checkpoint_path'] # prevent double defition of checkpoint_path
hparams['use_peak_values'] = False

model = OptionalInputRankedTransformer.load_from_checkpoint(checkpoint_path, **hparams)
model.change_ranker_for_testing()

flexible_model_result = trainer.test(model, datamodule)


Using jaccard:  False


AssertionError: /workspace/ranking_sets_cleaned_by_inchi/SMILES_R0_to_R4_reduced_FP_ranking_sets_only_all_info_molecules/val/rankingset.pt does not exist

In [None]:

names = ["all_inputs", "HSQC_H_NMR", "HSQC_C_NMR", "only_hsqc", "only_1d", "only_H_NMR", "only_C_NMR"]
out_file = "temp_model_show.txt"
with open(out_file, "w") as file:
    for name in  names:
        file.write(f"\n{name:10}: ")
        for m in metric_to_focus:
            file.write(f"{m}: {flexible_model_result[0]['test_mean_'+m+'_'+name]:.4f}, ")


In [None]:
flexible_model_result[0] .keys()    


dict_keys(['test_mean_ce_loss_all_inputs', 'test_mean_pos_loss_all_inputs', 'test_mean_neg_loss_all_inputs', 'test_mean_pos_neg_loss_all_inputs', 'test_mean_cos_all_inputs', 'test_mean_active_bits_all_inputs', 'test_mean_f1_all_inputs', 'test_mean_precision_all_inputs', 'test_mean_recall_all_inputs', 'test_mean_accuracy_all_inputs', 'test_mean_mean_rank_all_inputs', 'test_mean_rank_1_all_inputs', 'test_mean_rank_5_all_inputs', 'test_mean_rank_10_all_inputs', 'test_mean_ce_loss_HSQC_H_NMR', 'test_mean_pos_loss_HSQC_H_NMR', 'test_mean_neg_loss_HSQC_H_NMR', 'test_mean_pos_neg_loss_HSQC_H_NMR', 'test_mean_cos_HSQC_H_NMR', 'test_mean_active_bits_HSQC_H_NMR', 'test_mean_f1_HSQC_H_NMR', 'test_mean_precision_HSQC_H_NMR', 'test_mean_recall_HSQC_H_NMR', 'test_mean_accuracy_HSQC_H_NMR', 'test_mean_mean_rank_HSQC_H_NMR', 'test_mean_rank_1_HSQC_H_NMR', 'test_mean_rank_5_HSQC_H_NMR', 'test_mean_rank_10_HSQC_H_NMR', 'test_mean_ce_loss_HSQC_C_NMR', 'test_mean_pos_loss_HSQC_C_NMR', 'test_mean_neg_loss_