In [1]:
# parameters cell
input_dir_pdbqt = None
output_dir_sdf = None
convert_files = False
pdb_file = ''
ligands_file = ''
ligands_files_dir = None
reference_file = ''
write_scores = True
score_col = 'PLIF_SCORE'

In [2]:
# Parameters
input_dir_pdbqt = ""
output_dir_sdf = ""
pdb_file = "/data/docking/XX02KALRNA-x1376_1/surprise_set/XX02KALRNA-x1376_1_prepared.pdb"
ligands_file = ""
ligands_files_dir = "/data/docking/XX02KALRNA-x1376_1/docked/sdf"
reference_file = "/data/GDP_frags/GDP.sdf"
write_scores = True
score_col = "PLIF_SCORE_GDP"


In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
import sys, os, glob
sys.path.append('/code/tools')
from scoring.plif import PlifScore
from misc import vina_pdbqt_to_sdf, is_interactive
from ui.file_widgets import DockButtons, PathSelector, InputBoxSet
import scrapbook as sb

# Scoring vina results: PLIF (protein-ligand interaction fingerprint) against reference

---

### 1. Convert pdbqt files to sdf files 

---

input directory = the place where all of the pdbqt files from a docking run are  
output directory = where to write the sdfs out to 

In [5]:
if is_interactive():
    iconv = InputBoxSet(tlabel='input dir:')
    display(iconv.render())


In [6]:
if is_interactive():
    oconv = InputBoxSet(tlabel='output dir:')
    display(oconv.render())

In [7]:
if is_interactive():
    input_dir_pdbqt = iconv.inbox.value
    output_dir_sdf = oconv.inbox.value

if convert_files:
    infiles = glob.glob(os.path.join(input_dir_pdbqt, '*.pdbqt'))

    if not os.path.isdir(output_dir_sdf):
        os.makedirs(output_dir_sdf)

        [vina_pdbqt_to_sdf(ifile=i, 
                           output=os.path.join(output_dir_sdf, i.split('/')[-1].replace('.pdbqt', '.sdf'))) 
         for i in infiles]

---
### 2. Select some input files for the PLIF scoring
---

#### 2.1.  Select the protein and reference ligand: 
*(N.B. run the next 2 cells to show the file dialogues)*
1. click on the arrow in the dropdown box at the top
2. select the relevant file in the dialogue 
3. click the button next to either protein, query ligands or reference to set the file

---

#### 2.2.  Select the query molecules to score against the reference ligand:

**There are two options here:** 

1. Use one file to specify which ligands to score: use the buttons as described above
2. Specify a directory of sdf files of multiple files of ligands to score: type the directory to search in the 'multiple dir' input box, and click 'set'


In [8]:
if is_interactive():
    p = PathSelector(start_dir=os.getcwd())
    display(p.accord)

    b = DockButtons(path_selector=p)
    display(b.render_buttons())

In [9]:
if is_interactive():
    pdb_file = b.pdb_button.description
    ligands_file = b.lig_button.description
    ref_file = b.ref_button.description

In [10]:
if is_interactive():
    inp = InputBoxSet(tlabel='multiple dir:')
    display(inp.render())

---
### 3. run the PLIF scoring
---

This returns: 

**For one file:** a dataframe with the scores in  
**For multiple files:** a list of dataframes with the scores in

You can also specify not to write the scores back out to the input sdf files by changing ```write_scores=True``` to ```write_scores=False``` below

In [11]:
if is_interactive():
    ligands_files_dir = inp.inbox.value
    write_scores = True

if os.path.isdir(ligands_files_dir):
    files = glob.glob(os.path.join(ligands_files_dir, '*.sdf'))
        
else:
    files = [ligands_file]
    
plif = PlifScore(protein_pdb=pdb_file, 
                 reference_file=reference_file)

if len(files)==1:
    df = plif.score_conformers(file=ligands_file, write=write_scores, score_col=score_col)
    
else:
    from joblib import Parallel, delayed
    import multiprocessing
    from tqdm import tqdm
    import pandas as pd
    
    num_cores = multiprocessing.cpu_count()
    df_list = Parallel(n_jobs=num_cores)(delayed(plif.score_conformers)(
        file=f, write=write_scores, score_col=score_col
    ) for f in tqdm(files))

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 1/250 [00:00<01:27,  2.84it/s]

  3%|▎         | 8/250 [00:49<09:26,  2.34s/it]

  4%|▎         | 9/250 [00:49<06:54,  1.72s/it]

  4%|▍         | 10/250 [00:50<06:35,  1.65s/it]

  4%|▍         | 11/250 [00:53<07:23,  1.86s/it]

  5%|▍         | 12/250 [01:32<51:58, 13.10s/it]

  5%|▌         | 13/250 [01:35<39:28, 10.00s/it]

  6%|▌         | 14/250 [01:38<31:42,  8.06s/it]

  6%|▌         | 15/250 [01:40<23:32,  6.01s/it]

  6%|▋         | 16/250 [02:16<59:23, 15.23s/it]

  7%|▋         | 17/250 [02:17<41:48, 10.76s/it]

  7%|▋         | 18/250 [02:24<38:05,  9.85s/it]

  8%|▊         | 19/250 [02:25<26:50,  6.97s/it]

  8%|▊         | 20/250 [03:01<1:00:59, 15.91s/it]

  8%|▊         | 21/250 [03:03<44:20, 11.62s/it]  

  9%|▉         | 22/250 [03:10<38:46, 10.21s/it]

  9%|▉         | 23/250 [03:15<32:57,  8.71s/it]

 10%|▉         | 24/250 [03:48<59:44, 15.86s/it]

 10%|█         | 25/250 [03:50<43:41, 11.65s/it]

 10%|█         | 26/250 [03:59<40:54, 10.96s/it]

 11%|█         | 27/250 [04:04<34:42,  9.34s/it]

 11%|█         | 28/250 [04:34<56:35, 15.30s/it]

 12%|█▏        | 29/250 [04:39<45:15, 12.29s/it]

 12%|█▏        | 30/250 [04:45<38:42, 10.56s/it]

 12%|█▏        | 31/250 [04:56<38:07, 10.45s/it]

 13%|█▎        | 32/250 [05:22<55:22, 15.24s/it]

 13%|█▎        | 33/250 [05:31<48:35, 13.43s/it]

 14%|█▎        | 34/250 [05:33<35:46,  9.94s/it]

 14%|█▍        | 35/250 [05:43<35:52, 10.01s/it]

 14%|█▍        | 36/250 [06:01<44:16, 12.41s/it]

 15%|█▍        | 37/250 [06:17<47:45, 13.45s/it]

 15%|█▌        | 38/250 [06:21<37:04, 10.49s/it]

 16%|█▌        | 39/250 [06:31<36:57, 10.51s/it]

 16%|█▌        | 40/250 [06:52<47:01, 13.44s/it]

 16%|█▋        | 41/250 [07:02<43:28, 12.48s/it]

 17%|█▋        | 42/250 [07:02<30:36,  8.83s/it]

 17%|█▋        | 43/250 [07:17<36:13, 10.50s/it]

 18%|█▊        | 44/250 [07:36<45:11, 13.16s/it]

 18%|█▊        | 45/250 [07:47<42:43, 12.51s/it]

 18%|█▊        | 46/250 [07:51<34:15, 10.08s/it]

 19%|█▉        | 47/250 [08:07<39:46, 11.76s/it]

 19%|█▉        | 48/250 [08:13<33:33,  9.97s/it]

 20%|█▉        | 49/250 [08:34<44:54, 13.41s/it]

 20%|██        | 50/250 [08:39<36:28, 10.94s/it]

 20%|██        | 51/250 [08:55<41:02, 12.37s/it]

 21%|██        | 52/250 [09:06<39:03, 11.83s/it]

 21%|██        | 53/250 [09:23<44:45, 13.63s/it]

 22%|██▏       | 54/250 [09:26<33:37, 10.29s/it]

 22%|██▏       | 55/250 [09:41<37:50, 11.64s/it]

 22%|██▏       | 56/250 [09:52<36:59, 11.44s/it]

 23%|██▎       | 57/250 [10:14<47:12, 14.68s/it]

 23%|██▎       | 58/250 [10:17<35:39, 11.14s/it]

 24%|██▎       | 59/250 [10:30<37:10, 11.68s/it]

 24%|██▍       | 60/250 [10:35<31:03,  9.81s/it]

 24%|██▍       | 61/250 [10:55<40:04, 12.72s/it]

 25%|██▍       | 62/250 [11:03<35:10, 11.22s/it]

 26%|██▌       | 64/250 [11:16<30:37,  9.88s/it]

 26%|██▌       | 65/250 [11:29<33:28, 10.86s/it]

 26%|██▋       | 66/250 [11:50<42:54, 13.99s/it]

 27%|██▋       | 67/250 [12:00<38:55, 12.76s/it]

 27%|██▋       | 68/250 [12:11<36:27, 12.02s/it]

 28%|██▊       | 69/250 [12:23<36:53, 12.23s/it]

 28%|██▊       | 70/250 [12:48<47:59, 16.00s/it]

 28%|██▊       | 71/250 [12:52<36:31, 12.24s/it]

 29%|██▉       | 72/250 [12:56<29:15,  9.86s/it]

 29%|██▉       | 73/250 [13:07<29:48, 10.10s/it]

 30%|██▉       | 74/250 [13:33<43:49, 14.94s/it]

 30%|███       | 75/250 [13:36<33:09, 11.37s/it]

 30%|███       | 76/250 [13:39<26:07,  9.01s/it]

 31%|███       | 77/250 [13:50<27:44,  9.62s/it]

 31%|███       | 78/250 [14:20<44:59, 15.69s/it]

 32%|███▏      | 79/250 [14:27<37:22, 13.12s/it]

 32%|███▏      | 80/250 [14:34<31:54, 11.26s/it]

 32%|███▏      | 81/250 [14:41<28:05,  9.97s/it]

 33%|███▎      | 82/250 [15:09<42:31, 15.19s/it]

 33%|███▎      | 83/250 [15:14<34:25, 12.37s/it]

 34%|███▎      | 84/250 [15:22<29:58, 10.84s/it]

 34%|███▍      | 85/250 [15:26<24:24,  8.88s/it]

 34%|███▍      | 86/250 [16:01<45:29, 16.65s/it]

 35%|███▍      | 87/250 [16:06<35:39, 13.13s/it]

 35%|███▌      | 88/250 [16:12<29:40, 10.99s/it]

 36%|███▌      | 89/250 [16:20<27:14, 10.15s/it]

 36%|███▌      | 90/250 [16:55<47:05, 17.66s/it]

 36%|███▋      | 91/250 [16:57<34:15, 12.93s/it]

 37%|███▋      | 92/250 [17:10<33:52, 12.87s/it]

 37%|███▋      | 93/250 [17:21<32:36, 12.46s/it]

 38%|███▊      | 94/250 [17:47<42:45, 16.45s/it]

 38%|███▊      | 95/250 [17:48<30:20, 11.74s/it]

 38%|███▊      | 96/250 [18:06<35:25, 13.80s/it]

 39%|███▉      | 97/250 [18:14<30:27, 11.94s/it]

 39%|███▉      | 98/250 [18:29<32:35, 12.87s/it]

 40%|███▉      | 99/250 [18:41<31:42, 12.60s/it]

 40%|████      | 100/250 [19:00<36:22, 14.55s/it]

 40%|████      | 101/250 [19:01<26:10, 10.54s/it]

 41%|████      | 102/250 [19:20<31:50, 12.91s/it]

 41%|████      | 103/250 [19:32<31:05, 12.69s/it]

 42%|████▏     | 104/250 [19:37<25:18, 10.40s/it]

 42%|████▏     | 105/250 [19:52<28:34, 11.83s/it]

 42%|████▏     | 106/250 [20:09<31:53, 13.29s/it]

 43%|████▎     | 107/250 [20:24<32:45, 13.75s/it]

 43%|████▎     | 108/250 [20:28<26:16, 11.10s/it]

 44%|████▎     | 109/250 [20:37<24:35, 10.46s/it]

 44%|████▍     | 110/250 [20:58<31:23, 13.45s/it]

 44%|████▍     | 111/250 [21:12<31:33, 13.62s/it]

 45%|████▍     | 112/250 [21:17<25:12, 10.96s/it]

 45%|████▌     | 113/250 [21:27<24:58, 10.93s/it]

 46%|████▌     | 114/250 [21:47<30:37, 13.51s/it]

 46%|████▌     | 115/250 [22:08<35:38, 15.84s/it]

 46%|████▋     | 116/250 [22:09<25:24, 11.38s/it]

 47%|████▋     | 117/250 [22:23<26:55, 12.14s/it]

 47%|████▋     | 118/250 [22:38<28:36, 13.01s/it]

 48%|████▊     | 119/250 [22:50<27:28, 12.58s/it]

 48%|████▊     | 120/250 [22:53<21:19,  9.84s/it]

 48%|████▊     | 121/250 [23:07<23:24, 10.89s/it]

 49%|████▉     | 122/250 [23:23<26:51, 12.59s/it]

 49%|████▉     | 123/250 [23:38<28:03, 13.26s/it]

 50%|████▉     | 124/250 [23:39<20:01,  9.53s/it]

 50%|█████     | 125/250 [23:55<23:47, 11.42s/it]

 50%|█████     | 126/250 [23:55<16:36,  8.03s/it]

 51%|█████     | 127/250 [24:14<23:14, 11.33s/it]

 51%|█████     | 128/250 [24:32<27:04, 13.32s/it]

 52%|█████▏    | 129/250 [24:33<19:23,  9.61s/it]

 52%|█████▏    | 130/250 [24:52<25:16, 12.64s/it]

 52%|█████▏    | 131/250 [25:12<29:16, 14.76s/it]

 53%|█████▎    | 132/250 [25:23<26:47, 13.62s/it]

 53%|█████▎    | 133/250 [25:33<24:37, 12.63s/it]

 54%|█████▎    | 134/250 [25:42<22:19, 11.54s/it]

 54%|█████▍    | 135/250 [26:05<28:17, 14.76s/it]

 54%|█████▍    | 136/250 [26:27<32:11, 16.94s/it]

 55%|█████▍    | 137/250 [26:33<26:07, 13.87s/it]

 55%|█████▌    | 138/250 [26:35<19:04, 10.22s/it]

 56%|█████▌    | 139/250 [26:35<13:18,  7.20s/it]

 56%|█████▌    | 140/250 [26:56<20:50, 11.37s/it]

 56%|█████▋    | 141/250 [26:56<14:31,  8.00s/it]

 57%|█████▋    | 142/250 [27:12<18:35, 10.32s/it]

 57%|█████▋    | 143/250 [27:21<17:47,  9.97s/it]

 58%|█████▊    | 144/250 [27:29<16:17,  9.22s/it]

 58%|█████▊    | 145/250 [27:53<23:47, 13.60s/it]

 58%|█████▊    | 146/250 [28:03<21:40, 12.50s/it]

 59%|█████▉    | 147/250 [28:12<20:00, 11.66s/it]

 59%|█████▉    | 148/250 [28:24<19:36, 11.54s/it]

 60%|█████▉    | 149/250 [28:46<24:59, 14.85s/it]

 60%|██████    | 150/250 [28:48<18:23, 11.04s/it]

 60%|██████    | 151/250 [29:03<19:49, 12.01s/it]

 61%|██████    | 152/250 [29:13<18:44, 11.47s/it]

 61%|██████    | 153/250 [29:13<13:02,  8.06s/it]

 62%|██████▏   | 154/250 [29:28<16:26, 10.28s/it]

 62%|██████▏   | 155/250 [29:28<11:26,  7.23s/it]

 62%|██████▏   | 156/250 [29:37<11:47,  7.52s/it]

 63%|██████▎   | 157/250 [29:52<15:24,  9.94s/it]

 63%|██████▎   | 158/250 [30:01<14:45,  9.62s/it]

 64%|██████▎   | 159/250 [30:18<17:42, 11.68s/it]

 64%|██████▍   | 160/250 [30:25<15:43, 10.49s/it]

 64%|██████▍   | 161/250 [30:38<16:33, 11.16s/it]

 65%|██████▍   | 162/250 [30:51<17:13, 11.74s/it]

 65%|██████▌   | 163/250 [31:08<19:04, 13.16s/it]

 66%|██████▌   | 164/250 [31:10<14:05,  9.83s/it]

 66%|██████▌   | 165/250 [31:10<09:48,  6.92s/it]

 66%|██████▋   | 166/250 [31:19<10:47,  7.70s/it]

 67%|██████▋   | 167/250 [31:36<14:34, 10.54s/it]

 67%|██████▋   | 168/250 [31:54<17:24, 12.74s/it]

 68%|██████▊   | 169/250 [32:00<14:26, 10.70s/it]

 68%|██████▊   | 170/250 [32:13<15:16, 11.45s/it]

 68%|██████▊   | 171/250 [32:25<15:12, 11.55s/it]

 69%|██████▉   | 172/250 [32:30<12:29,  9.61s/it]

 69%|██████▉   | 173/250 [32:46<14:47, 11.53s/it]

 70%|██████▉   | 174/250 [33:00<15:25, 12.18s/it]

 70%|███████   | 175/250 [33:17<17:00, 13.61s/it]

 70%|███████   | 176/250 [33:21<13:09, 10.67s/it]

 71%|███████   | 177/250 [33:37<15:01, 12.34s/it]

 71%|███████   | 178/250 [33:57<17:33, 14.63s/it]

 72%|███████▏  | 179/250 [34:20<20:18, 17.16s/it]

 72%|███████▏  | 180/250 [34:32<18:14, 15.63s/it]

 72%|███████▏  | 181/250 [34:41<15:44, 13.69s/it]

 73%|███████▎  | 182/250 [35:09<20:20, 17.95s/it]

 73%|███████▎  | 183/250 [35:20<17:29, 15.66s/it]

 74%|███████▎  | 184/250 [35:31<15:41, 14.27s/it]

 74%|███████▍  | 185/250 [35:53<18:06, 16.72s/it]

 74%|███████▍  | 186/250 [36:18<20:37, 19.34s/it]

 75%|███████▍  | 187/250 [36:20<14:40, 13.97s/it]

 75%|███████▌  | 188/250 [36:27<12:11, 11.80s/it]

 76%|███████▌  | 189/250 [36:27<08:25,  8.29s/it]

 76%|███████▌  | 190/250 [36:27<05:54,  5.91s/it]

 76%|███████▋  | 191/250 [37:05<15:07, 15.39s/it]

 77%|███████▋  | 192/250 [37:08<11:23, 11.78s/it]

 77%|███████▋  | 193/250 [37:17<10:28, 11.02s/it]

 78%|███████▊  | 194/250 [37:18<07:31,  8.07s/it]

 78%|███████▊  | 195/250 [37:56<15:26, 16.84s/it]

 78%|███████▊  | 196/250 [37:57<11:03, 12.29s/it]

 79%|███████▉  | 197/250 [38:00<08:25,  9.54s/it]

 79%|███████▉  | 198/250 [38:01<05:52,  6.79s/it]

 80%|███████▉  | 199/250 [38:45<15:15, 17.95s/it]

 80%|████████  | 200/250 [38:48<11:22, 13.66s/it]

 80%|████████  | 201/250 [38:49<07:50,  9.60s/it]

 81%|████████  | 202/250 [38:51<05:56,  7.42s/it]

 81%|████████  | 203/250 [38:52<04:18,  5.51s/it]

 82%|████████▏ | 204/250 [39:43<14:36, 19.05s/it]

 82%|████████▏ | 205/250 [39:45<10:25, 13.90s/it]

 82%|████████▏ | 206/250 [39:48<07:54, 10.77s/it]

 83%|████████▎ | 207/250 [39:53<06:31,  9.11s/it]

 83%|████████▎ | 208/250 [40:23<10:46, 15.40s/it]

 84%|████████▎ | 209/250 [40:37<10:06, 14.80s/it]

 84%|████████▍ | 210/250 [40:38<07:15, 10.88s/it]

 84%|████████▍ | 211/250 [40:41<05:31,  8.51s/it]

 85%|████████▍ | 212/250 [41:15<10:07, 15.99s/it]

 85%|████████▌ | 213/250 [41:28<09:18, 15.09s/it]

 86%|████████▌ | 214/250 [41:28<06:21, 10.61s/it]

 86%|████████▌ | 215/250 [41:31<04:49,  8.26s/it]

 86%|████████▋ | 216/250 [41:31<03:17,  5.82s/it]

 87%|████████▋ | 217/250 [41:45<04:37,  8.41s/it]

 87%|████████▋ | 218/250 [42:05<06:21, 11.91s/it]

 88%|████████▊ | 219/250 [42:14<05:36, 10.87s/it]

 88%|████████▊ | 220/250 [42:20<04:40,  9.33s/it]

 88%|████████▊ | 221/250 [42:42<06:21, 13.14s/it]

 89%|████████▉ | 222/250 [42:42<04:19,  9.26s/it]

 89%|████████▉ | 223/250 [42:53<04:25,  9.82s/it]

 90%|████████▉ | 224/250 [43:06<04:39, 10.73s/it]

 90%|█████████ | 225/250 [43:10<03:40,  8.82s/it]

 90%|█████████ | 226/250 [43:44<06:28, 16.17s/it]

 91%|█████████ | 227/250 [43:52<05:19, 13.88s/it]

 91%|█████████ | 228/250 [44:03<04:47, 13.06s/it]

 92%|█████████▏| 229/250 [44:05<03:22,  9.63s/it]

 92%|█████████▏| 230/250 [44:44<06:10, 18.53s/it]

 92%|█████████▏| 231/250 [44:48<04:26, 14.00s/it]

 93%|█████████▎| 232/250 [45:02<04:17, 14.28s/it]

 93%|█████████▎| 233/250 [45:08<03:17, 11.60s/it]

 94%|█████████▎| 234/250 [45:25<03:31, 13.25s/it]

 94%|█████████▍| 235/250 [45:41<03:29, 13.97s/it]

 94%|█████████▍| 236/250 [46:05<03:59, 17.14s/it]

 95%|█████████▍| 237/250 [46:05<02:36, 12.03s/it]

 95%|█████████▌| 238/250 [46:09<01:54,  9.54s/it]

 96%|█████████▌| 239/250 [46:26<02:09, 11.77s/it]

 96%|█████████▌| 240/250 [46:40<02:03, 12.38s/it]

 96%|█████████▋| 241/250 [47:06<02:29, 16.66s/it]

 97%|█████████▋| 242/250 [47:07<01:35, 11.96s/it]

 97%|█████████▋| 243/250 [47:46<02:20, 20.06s/it]

 98%|█████████▊| 244/250 [47:47<01:25, 14.24s/it]

 98%|█████████▊| 245/250 [47:51<00:55, 11.15s/it]

 98%|█████████▊| 246/250 [48:25<01:11, 17.99s/it]

 99%|█████████▉| 247/250 [48:47<00:57, 19.11s/it]

 99%|█████████▉| 248/250 [49:03<00:36, 18.34s/it]

100%|█████████▉| 249/250 [49:12<00:15, 15.47s/it]

100%|██████████| 250/250 [49:38<00:00, 18.63s/it]


