This notebook generates pharmacophore features from pretrained model (Used for Docking Score prediction - Alpha)

In [1]:
import sys
import os
import torch
from tqdm import tqdm

sys.path.append("..")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from src.scoring import PrecalculationModule

In [3]:
MODEL_PATH = '../model_weights/model.tar'
HEAD_PATH = '../model_weights/20240117_500.pth'

split_file = torch.load('../dataset/pocket_to_ligands.pt')

In [4]:
predictor = PrecalculationModule(MODEL_PATH, HEAD_PATH, 'cuda')

In [5]:
failed_pairs = []
i = 0
for pdb_id, lig_path in tqdm(split_file.items()):
    lig_path = os.path.join('../dataset/crossdocked_pocket10/', lig_path)
    rec_path = os.path.join('../dataset/crossdock/', pdb_id + '_rec.pdb')
    out_path = rec_path.replace('crossdock', 'docking_pharmacophores').replace('.pdb', '.pt')   
    
    if os.path.exists('out_path'):
        continue
    try:
        cache = predictor.run(rec_path, ref_ligand_path=lig_path)
        torch.save(cache, out_path)
    except Exception as e:
        failed_pairs.append((rec_path, lig_path))
        print(e)

  0%|          | 35/15307 [00:19<2:12:14,  1.92it/s]




  0%|          | 63/15307 [00:33<2:06:57,  2.00it/s]




  1%|          | 162/15307 [01:28<2:16:55,  1.84it/s]




  1%|▏         | 206/15307 [01:52<2:07:45,  1.97it/s]




  2%|▏         | 265/15307 [02:26<2:10:30,  1.92it/s]




  3%|▎         | 413/15307 [03:54<2:25:51,  1.70it/s]




  5%|▍         | 695/15307 [06:31<1:41:37,  2.40it/s]




  6%|▋         | 964/15307 [09:03<1:30:57,  2.63it/s]




  6%|▋         | 974/15307 [09:07<1:27:57,  2.72it/s]




  7%|▋         | 1067/15307 [09:55<1:44:37,  2.27it/s]




  7%|▋         | 1103/15307 [10:13<1:37:25,  2.43it/s]




 12%|█▏        | 1784/15307 [16:24<2:01:27,  1.86it/s]




 12%|█▏        | 1879/15307 [17:17<1:55:51,  1.93it/s]




There may be erroneous addition spaces or
the file may contains Atom Lists, which are ignored

 15%|█▌        | 2309/15307 [21:10<1:32:40,  2.34it/s]




 16%|█▌        | 2453/15307 [22:27<1:26:25,  2.48it/s]




 21%|██        | 3165/15307 [29:27<1:29:07,  2.27it/s]




 23%|██▎       | 3482/15307 [32:29<1:45:55,  1.86it/s]




 23%|██▎       | 3510/15307 [32:43<1:32:04,  2.14it/s]




 25%|██▌       | 3890/15307 [36:22<1:51:07,  1.71it/s]




 26%|██▋       | 4020/15307 [37:41<1:35:01,  1.98it/s]




 33%|███▎      | 5103/15307 [47:51<1:30:37,  1.88it/s]




 36%|███▌      | 5450/15307 [50:59<1:30:11,  1.82it/s]




 36%|███▋      | 5550/15307 [51:49<1:10:26,  2.31it/s]




 38%|███▊      | 5748/15307 [53:43<1:57:40,  1.35it/s]




 38%|███▊      | 5787/15307 [54:07<1:32:20,  1.72it/s]




 39%|███▉      | 6010/15307 [56:17<1:54:31,  1.35it/s]




 47%|████▋     | 7216/15307 [1:07:58<54:17,  2.48it/s]  




 48%|████▊     | 7357/15307 [1:09:53<2:12:08,  1.00it/s]




 49%|████▉     | 7538/15307 [1:11:38<44:37,  2.90it/s]  




 49%|████▉     | 7570/15307 [1:11:54<1:03:29,  2.03it/s]




 51%|█████     | 7783/15307 [1:14:09<1:23:59,  1.49it/s]




 51%|█████▏    | 7869/15307 [1:14:53<45:35,  2.72it/s]  




 52%|█████▏    | 7915/15307 [1:15:21<1:00:57,  2.02it/s]




 52%|█████▏    | 7921/15307 [1:15:25<1:30:09,  1.37it/s]




 52%|█████▏    | 8021/15307 [1:16:20<1:10:07,  1.73it/s]




 53%|█████▎    | 8109/15307 [1:17:06<1:05:20,  1.84it/s]




 55%|█████▌    | 8449/15307 [1:21:15<50:02,  2.28it/s]  




 56%|█████▌    | 8509/15307 [1:21:46<56:36,  2.00it/s]  




 58%|█████▊    | 8866/15307 [1:25:05<39:39,  2.71it/s]  




 58%|█████▊    | 8907/15307 [1:25:26<52:24,  2.04it/s]  




 59%|█████▊    | 8983/15307 [1:26:09<57:17,  1.84it/s]  




 59%|█████▉    | 9036/15307 [1:26:41<1:11:21,  1.46it/s]




 65%|██████▍   | 9940/15307 [1:35:03<42:34,  2.10it/s]  




 67%|██████▋   | 10274/15307 [1:38:09<55:35,  1.51it/s]  




 69%|██████▊   | 10512/15307 [1:40:19<32:12,  2.48it/s]  




 71%|███████   | 10853/15307 [1:43:20<41:16,  1.80it/s]  




 74%|███████▎  | 11278/15307 [1:47:15<41:48,  1.61it/s]  




 79%|███████▊  | 12044/15307 [1:54:06<43:13,  1.26it/s]




 80%|███████▉  | 12237/15307 [1:55:59<23:13,  2.20it/s]




 82%|████████▏ | 12626/15307 [1:59:27<25:14,  1.77it/s]




 85%|████████▌ | 13011/15307 [2:03:04<23:09,  1.65it/s]




 88%|████████▊ | 13467/15307 [2:07:27<12:17,  2.49it/s]




 91%|█████████ | 13937/15307 [2:11:40<13:32,  1.69it/s]




 94%|█████████▍| 14358/15307 [2:15:34<08:40,  1.82it/s]




 96%|█████████▌| 14719/15307 [2:18:53<04:44,  2.06it/s]




There may be erroneous addition spaces or
the file may contains Atom Lists, which are ignored

100%|██████████| 15307/15307 [2:24:20<00:00,  1.77it/s]


Check if all files are saved correctly (not corrupted)

In [6]:
for pdb_id, lig_path in tqdm(split_file.items()):
    lig_path = os.path.join('../dataset/crossdocked_pocket10/', lig_path)
    rec_path = os.path.join('../dataset/crossdock/', pdb_id + '_rec.pdb')
    out_path = rec_path.replace('crossdock', 'docking_pharmacophores').replace('.pdb', '.pt')
    
    if os.path.exists(out_path):
        try:
            torch.load(out_path)
        except:
            cache = predictor.run(rec_path, ref_ligand_path=lig_path)
            print('recomputed', out_path)
            torch.save(cache, out_path)

100%|██████████| 15307/15307 [00:10<00:00, 1401.92it/s]
