This notebook generates pharmacophore features from pretrained model (Used for Docking Score prediction - Alpha)

In [1]:
import sys
import os
import torch
from tqdm import tqdm

sys.path.append("..")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from src.scoring import PrecalculationModule

In [3]:
MODEL_PATH = '../model_weights/model.tar'
HEAD_PATH = '../model_weights/base_100_per_pocket.pth'

split_file = torch.load('../dataset/pocket_to_ligands.pt')

In [4]:
predictor = PrecalculationModule(MODEL_PATH, HEAD_PATH, 'cuda')

In [5]:
failed_pairs = []
i = 0
for pdb_id, lig_path in tqdm(split_file.items()):
    try:
        lig_path = os.path.join('../dataset/crossdocked_pocket10/', lig_path)
        rec_path = os.path.join('../dataset/crossdock/', pdb_id + '_rec.pdb')
        out_path = rec_path.replace('crossdock', 'docking_pharmacophores').replace('.pdb', '.pt')

        cache = predictor.run(rec_path, ref_ligand_path=lig_path)
        torch.save(cache, out_path)
    except Exception as e:
        failed_pairs.append((rec_path, lig_path))
        print(e)

  0%|          | 35/15307 [00:22<2:26:58,  1.73it/s]




  0%|          | 63/15307 [00:38<2:29:28,  1.70it/s]




  1%|          | 163/15307 [01:46<1:59:49,  2.11it/s]




  1%|▏         | 206/15307 [02:12<2:08:28,  1.96it/s]




  2%|▏         | 265/15307 [02:47<2:19:38,  1.80it/s]




  3%|▎         | 414/15307 [04:19<2:03:44,  2.01it/s]




  5%|▍         | 695/15307 [07:05<1:48:28,  2.25it/s]




  6%|▋         | 964/15307 [09:43<1:31:36,  2.61it/s]




  6%|▋         | 973/15307 [09:48<1:58:26,  2.02it/s]




  7%|▋         | 1067/15307 [10:38<1:56:58,  2.03it/s]




  7%|▋         | 1103/15307 [10:59<1:46:28,  2.22it/s]




 12%|█▏        | 1784/15307 [17:40<2:13:49,  1.68it/s]




 12%|█▏        | 1880/15307 [18:35<1:33:48,  2.39it/s]




There may be erroneous addition spaces or
the file may contains Atom Lists, which are ignored

 15%|█▌        | 2309/15307 [22:41<1:38:27,  2.20it/s]




 16%|█▌        | 2453/15307 [24:03<1:38:08,  2.18it/s]




 21%|██        | 3165/15307 [31:06<1:32:28,  2.19it/s]




 23%|██▎       | 3482/15307 [34:13<2:00:22,  1.64it/s]




 23%|██▎       | 3511/15307 [34:30<1:18:30,  2.50it/s]




 25%|██▌       | 3890/15307 [38:06<2:05:04,  1.52it/s]




 26%|██▋       | 4020/15307 [39:20<1:37:17,  1.93it/s]




 33%|███▎      | 5103/15307 [50:17<1:37:31,  1.74it/s]




 36%|███▌      | 5450/15307 [53:45<1:32:50,  1.77it/s]




 36%|███▋      | 5550/15307 [54:40<1:23:20,  1.95it/s]




 38%|███▊      | 5749/15307 [56:44<1:30:39,  1.76it/s]




 38%|███▊      | 5787/15307 [57:06<1:22:01,  1.93it/s]




 39%|███▉      | 6010/15307 [59:27<2:12:49,  1.17it/s]




 47%|████▋     | 7216/15307 [1:11:26<1:02:10,  2.17it/s]




 48%|████▊     | 7357/15307 [1:12:50<1:09:23,  1.91it/s]




 49%|████▉     | 7538/15307 [1:14:35<53:22,  2.43it/s]  




 49%|████▉     | 7570/15307 [1:14:53<1:06:27,  1.94it/s]




 51%|█████     | 7784/15307 [1:17:10<1:13:42,  1.70it/s]




 51%|█████▏    | 7868/15307 [1:17:59<1:09:45,  1.78it/s]




 52%|█████▏    | 7915/15307 [1:18:29<1:04:37,  1.91it/s]




 52%|█████▏    | 7921/15307 [1:18:34<1:43:54,  1.18it/s]




 52%|█████▏    | 8021/15307 [1:19:37<1:25:03,  1.43it/s]




 53%|█████▎    | 8109/15307 [1:20:28<1:16:37,  1.57it/s]




 55%|█████▌    | 8450/15307 [1:23:58<42:29,  2.69it/s]  




 56%|█████▌    | 8509/15307 [1:24:34<1:06:49,  1.70it/s]




 58%|█████▊    | 8865/15307 [1:28:11<59:38,  1.80it/s]  




 58%|█████▊    | 8907/15307 [1:28:35<1:00:05,  1.78it/s]




 59%|█████▊    | 8983/15307 [1:29:22<1:15:07,  1.40it/s]




 59%|█████▉    | 9036/15307 [1:29:55<1:07:45,  1.54it/s]




 65%|██████▍   | 9940/15307 [1:39:06<51:59,  1.72it/s]  




 67%|██████▋   | 10275/15307 [1:42:36<48:29,  1.73it/s]  




 69%|██████▊   | 10513/15307 [1:45:07<29:25,  2.72it/s]  




 71%|███████   | 10853/15307 [1:48:38<46:58,  1.58it/s]  




 74%|███████▎  | 11279/15307 [1:53:01<33:36,  2.00it/s]  




 79%|███████▊  | 12044/15307 [2:00:56<46:50,  1.16it/s]  




 80%|███████▉  | 12236/15307 [2:02:58<36:03,  1.42it/s]




 82%|████████▏ | 12626/15307 [2:06:56<28:05,  1.59it/s]




 85%|████████▌ | 13012/15307 [2:10:57<19:10,  2.00it/s]




 88%|████████▊ | 13468/15307 [2:15:56<11:08,  2.75it/s]




 91%|█████████ | 13937/15307 [2:20:23<13:21,  1.71it/s]




 94%|█████████▍| 14358/15307 [2:24:04<08:20,  1.90it/s]




 96%|█████████▌| 14719/15307 [2:27:15<04:51,  2.02it/s]




There may be erroneous addition spaces or
the file may contains Atom Lists, which are ignored

100%|██████████| 15307/15307 [2:32:29<00:00,  1.67it/s]


Check if all files are saved correctly (not corrupted)

In [14]:
for pdb_id, lig_path in tqdm(split_file.items()):
    lig_path = os.path.join('../dataset/crossdocked_pocket10/', lig_path)
    rec_path = os.path.join('../dataset/crossdock/', pdb_id + '_rec.pdb')
    out_path = rec_path.replace('crossdock', 'docking_pharmacophores').replace('.pdb', '.pt')
    
    if os.path.exists(out_path):
        try:
            torch.load(out_path)
        except:
            cache = predictor.run(rec_path, ref_ligand_path=lig_path)
            print('recomputed', out_path)
            torch.save(cache, out_path)

  4%|▍         | 605/15307 [00:00<00:16, 902.36it/s]

recomputed ../dataset/docking_pharmacophores/3mb6_A_rec.pt
recomputed ../dataset/docking_pharmacophores/5due_A_rec.pt
recomputed ../dataset/docking_pharmacophores/3rvh_A_rec.pt
recomputed ../dataset/docking_pharmacophores/3n86_A_rec.pt
recomputed ../dataset/docking_pharmacophores/2e8a_A_rec.pt
recomputed ../dataset/docking_pharmacophores/2r2w_U_rec.pt
recomputed ../dataset/docking_pharmacophores/4xew_B_rec.pt
recomputed ../dataset/docking_pharmacophores/3aun_A_rec.pt
recomputed ../dataset/docking_pharmacophores/5k01_A_rec.pt
recomputed ../dataset/docking_pharmacophores/1qf6_A_rec.pt
recomputed ../dataset/docking_pharmacophores/1os2_C_rec.pt
recomputed ../dataset/docking_pharmacophores/1akc_A_rec.pt
recomputed ../dataset/docking_pharmacophores/4y6u_A_rec.pt
recomputed ../dataset/docking_pharmacophores/2fr6_B_rec.pt
recomputed ../dataset/docking_pharmacophores/4qx5_A_rec.pt
recomputed ../dataset/docking_pharmacophores/5ap3_A_rec.pt
recomputed ../dataset/docking_pharmacophores/5b0x_A_rec.

  4%|▍         | 628/15307 [00:12<06:55, 35.34it/s] 

recomputed ../dataset/docking_pharmacophores/3o57_A_rec.pt


  4%|▍         | 629/15307 [00:13<07:29, 32.65it/s]

recomputed ../dataset/docking_pharmacophores/3qpn_A_rec.pt
recomputed ../dataset/docking_pharmacophores/1ros_A_rec.pt
recomputed ../dataset/docking_pharmacophores/3nbw_A_rec.pt
recomputed ../dataset/docking_pharmacophores/5j82_A_rec.pt
recomputed ../dataset/docking_pharmacophores/1ogx_B_rec.pt
recomputed ../dataset/docking_pharmacophores/4fv4_A_rec.pt
recomputed ../dataset/docking_pharmacophores/4ymq_A_rec.pt
recomputed ../dataset/docking_pharmacophores/4lm0_A_rec.pt
recomputed ../dataset/docking_pharmacophores/3b65_A_rec.pt
recomputed ../dataset/docking_pharmacophores/4kbn_A_rec.pt
recomputed ../dataset/docking_pharmacophores/4uba_A_rec.pt
recomputed ../dataset/docking_pharmacophores/5mxk_A_rec.pt
recomputed ../dataset/docking_pharmacophores/4elv_A_rec.pt
recomputed ../dataset/docking_pharmacophores/5ly1_D_rec.pt
recomputed ../dataset/docking_pharmacophores/4rse_A_rec.pt
recomputed ../dataset/docking_pharmacophores/1bwb_A_rec.pt
recomputed ../dataset/docking_pharmacophores/4rrx_A_rec.

  6%|▌         | 937/15307 [00:27<07:38, 31.32it/s]

recomputed ../dataset/docking_pharmacophores/4jev_A_rec.pt


100%|██████████| 15307/15307 [00:28<00:00, 530.07it/s]


In [3]:
import torch
pdb_id = '3g1h_A'

torch.load(f"../dataset/docking_pharmacophores/{pdb_id}_rec.pt")

EOFError: Ran out of input