In [None]:
# libraries
import numpy as np
import pandas as pd
import torch
from captum.attr import LayerGradientXActivation
from bilstm_utils import LSTM_Captum, LSTMPred # custom dataset and trainer
from torch.utils.data import DataLoader
import pytorch_lightning as pl
import h5py
from tqdm import tqdm
# suppress warnings
import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# load a sample from the test folder
feature_folder = 'USeqPlusRW/test/'
# feature_folder = '/nfs_home/nallapar/final/test/'

# load test file
exp_dat = exp_dat = pd.read_csv('data/test_OnlyLiver_Cov_0.3_NZ_20_PercNan_0.05.csv')
# exp_dat = pd.read_csv('/nfs_home/nallapar/final/ribogl/src/data/test_OnlyLiver_Cov_0.3_NZ_20_PercNan_0.05.csv')

# save_loc = '/nfs_home/nallapar/final/ribogl/src/models/bilstm/saved_models/LSTM DS: Liver [0.3, 20, 0.05, BS 1, D 0.3 E 50 LR 0.0001 Seed: 1] F: embedding/epoch=19-step=111820.ckpt'
save_loc = 'saved_models/LSTM DS: Liver [0.3, 20, 0.05, BS 1, D 0.3 E 50 LR 0.0001 Seed: 1] F: embedding/epoch=19-step=111820.ckpt'

# load model 
dropout_val = 0.3
num_epochs = 50
bs = 1
lr = 1e-3
model = LSTM_Captum.load_from_checkpoint(save_loc, dropout_val=dropout_val, num_epochs=num_epochs, bs=bs, lr=lr)
model_pred = LSTMPred.load_from_checkpoint(save_loc, dropout_val=dropout_val, num_epochs=num_epochs, bs=bs, lr=lr)

In [None]:
part = 0

In [None]:
# Generate explanation for the node at index `10`:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

total_num_sample = len(list(exp_dat['transcript']))

# dataset split into 32 parts, get start and stop of the part
start = int(part * total_num_sample / 32)
stop = int((part + 1) * total_num_sample / 32)

print("start: ", start)
print("stop: ", stop)

print("total samples: ", len(list(exp_dat['transcript'])))

transcripts_list = list(exp_dat['transcript'])[start:stop]

part_length = len(transcripts_list)

out_folder_path = 'final_bilstm_int/'

In [None]:
# captum model
lxg = LayerGradientXActivation(model, model.embedding)

In [None]:
for sample_number in tqdm(range(start, stop)):
    # remove model eval

    file_name = feature_folder + 'sample_' + str(sample_number) + '.pt'

    # load the sample
    data = torch.load(file_name)

    out_dict = {}
    # load the sample
    data = torch.load(file_name)
    data.x = torch.tensor([int(k) for k in data.x['codon_seq']], dtype=torch.long)
    data.y = data.y / torch.nansum(data.y)
    data = data.to(device)

    attr_matrix = []
    for i in tqdm(range(len(data.x))):
        index_val = torch.tensor([i])

        attr_ind = lxg.attribute(inputs=data.x, additional_forward_args=index_val)

        attr_ind = torch.sum(attr_ind, dim=1)

        attr_ind = attr_ind.cpu().detach().numpy()

        attr_matrix.append(attr_ind)

    attr_matrix = np.array(attr_matrix)

    # save the attributions
    out_dict['attributions'] = attr_matrix
    out_dict['sample_number'] = sample_number
    out_dict['x_input'] = data.x.detach().cpu().numpy()
    out_dict['y_true'] = data.y.detach().cpu().numpy()

    y_pred = model_pred(data.x)

    out_dict['y_pred'] = y_pred.detach().cpu().numpy()
    # add the transcript
    out_dict['transcript'] = transcripts_list[sample_number-start]

    # save the out_dict
    out_file_name = out_folder_path + 'sample_' + str(sample_number) + '.npz'
    np.savez_compressed(out_file_name, out_dict)

    
