In [1]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import sys, os, glob
sys.path.append('/code/tools')
sys.path.append('/code/tools/scoring')
from plif import PlifScore
from misc import vina_pdbqt_to_sdf
from ui.file_widgets import DockButtons, PathSelector, InputBoxSet

# Scoring vina results: PLIF (protein-ligand interaction fingerprint) against reference

---

### 1. Convert pdbqt files to sdf files 

---

input directory = the place where all of the pdbqt files from a docking run are  
output directory = where to write the sdfs out to 

In [4]:
iconv = InputBoxSet(tlabel='input dir:')
iconv.render()


A Jupyter Widget

In [5]:
oconv = InputBoxSet(tlabel='output dir:')
oconv.render()

A Jupyter Widget

In [6]:
input_dir = iconv.inbox.value
output_dir = oconv.inbox.value

infiles = glob.glob(os.path.join(input_dir, '*.pdbqt'))

[vina_pdbqt_to_sdf(ifile=i, output=os.path.join(output_dir, i.split('/')[-1].replace('.pdbqt', '.sdf'))) for i in infiles]

[]

---
### 2. Select some input files for the PLIF scoring
---

#### 2.1.  Select the protein and reference ligand: 
*(N.B. run the next 2 cells to show the file dialogues)*
1. click on the arrow in the dropdown box at the top
2. select the relevant file in the dialogue 
3. click the button next to either protein, query ligands or reference to set the file

---

#### 2.2.  Select the query molecules to score against the reference ligand:

**There are two options here:** 

1. Use one file to specify which ligands to score: use the buttons as described above
2. Specify a directory of sdf files of multiple files of ligands to score: type the directory to search in the 'multiple dir' input box, and click 'set'


In [7]:
p = PathSelector(start_dir=os.getcwd())
display(p.accord)

b = DockButtons(path_selector=p)
b.render_buttons()

A Jupyter Widget

A Jupyter Widget

In [8]:
pdb_file = b.pdb_button.description
ligands_file = b.lig_button.description
ref_file = b.ref_button.description

In [9]:
pdb_file

'/data/docking/XX02KALRNA-x1376_1/XX02KALRNA-x1376_1_prepared.pdb'

In [10]:
# /data/docking/XX02KALRNA-x1376_1/surprise_set/docked/sdf
inp = InputBoxSet(tlabel='multiple dir:')
inp.render()

A Jupyter Widget

/data/docking/XX02KALRNA-x1376_1/surprise_set/docked/sdf


---
### 3. run the PLIF scoring
---

This returns: 

**For one file:** a dataframe with the scores in  
**For multiple files:** a list of dataframes with the scores in

You can also specify not to write the scores back out to the input sdf files by changing ```write=True``` to ```write=False``` in the ```plif.score_conformers()``` functions below

In [11]:
if os.path.isdir(inp.inbox.value):
    files = glob.glob(os.path.join(inp.inbox.value, '*.sdf'))
else:
    files = [ligands_file]
    
plif = PlifScore(protein_pdb=pdb_file, 
                 reference_file=ref_file)

if len(files)==1:
    df = plif.score_conformers(file=ligands_file, write=True, score_col='PLIF_SCORE')
    
else:
    df_list = []
    for f in files:
        df = plif.score_conformers(file=f, write=True, score_col='PLIF_SCORE')
        df_list.append(df)

In [12]:
best_list = []

i = 0

for d in df_list:
    try:
        m = max(d['PLIF_SCORE'])
        if float(m) > 0.5:
            best_list.append(i)
    except:
        i +=1
        continue
    
    i +=1

In [13]:
best_list

[4, 32, 43, 71, 75, 206]

In [26]:
df_list[4]

Unnamed: 0,ID,MODEL,REMARK,RMSD_LB,RMSD_UB,ROMol,SCORE,TORSDO,PLIF_SCORE
0,EN300-36796,1,VINA RESULT: -5.5 0.000 0.000\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,0.0,0.0,,-5.5,F 3,0.67
1,EN300-36796,2,VINA RESULT: -5.4 0.424 1.295\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,0.424,1.295,,-5.4,F 3,0.4
2,EN300-36796,3,VINA RESULT: -5.2 1.308 2.015\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,1.308,2.015,,-5.2,F 3,0.25
3,EN300-36796,4,VINA RESULT: -5.1 3.235 3.986\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,3.235,3.986,,-5.1,F 3,0.12
4,EN300-36796,5,VINA RESULT: -5.0 1.088 1.272\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,1.088,1.272,,-5.0,F 3,0.67
5,EN300-36796,6,VINA RESULT: -5.0 3.280 3.983\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,3.28,3.983,,-5.0,F 3,0.14
6,EN300-36796,7,VINA RESULT: -4.8 2.678 2.818\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,2.678,2.818,,-4.8,F 3,0.12
7,EN300-36796,8,VINA RESULT: -4.8 3.638 4.012\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,3.638,4.012,,-4.8,F 3,0.2
8,EN300-36796,9,VINA RESULT: -4.7 4.453 5.501\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,4.453,5.501,,-4.7,F 3,0.0
9,EN300-36796,10,VINA RESULT: -4.7 3.942 4.871\n Name = EN300-36796\n 3 active torsions:\n status: ('A' for Active; 'I' for Inactive)\n 1 A between atoms: N_3 and C_4\n 2 A between atoms: C_7 and C_8\n 3 A between atoms: N_12 and C_13\n x y z vdW Elec q Type\n _______ _______ _______ _____ _____ ______ ____,3.942,4.871,,-4.7,F 3,0.2


In [27]:
files[4]

'/data/docking/XX02KALRNA-x1376_1/surprise_set/docked/sdf/250_surprise_sucos13_prepared_docked.sdf'