# Plipify

## Aim of this notebook

- Create fingerprints for individual structures of the same protein
- Create a frequency fingerprint across multiple structures of the same protein
- Visualize the frequency fingerpint

### Next to-dos:
- check variable names
- wrap code in functions and move to .py file
- path parameters

## Data Import and Preparation
### Import Packages

In [26]:
import csv
import os
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from plip.modules.preparation import PDBComplex
from plip.modules.report import BindingSiteReport
from IPython.display import display, Markdown

### Pre-defined residues


In [27]:
with open('./data/MPro_residues.csv') as file:
    residue_reader = csv.reader(file, delimiter=',')
    residues = residue_reader.next()
residues = map(int,residues)

# consider other formats?

In [28]:
interaction_types = ["hydrophobic", "hbond", "waterbridge", "saltbridge", "pistacking", "pication", "halogen", "metal"] 
fp_length = len(residues)*len(interaction_types) # fp length based on number of pre-defined residues
fp = [0]*fp_length
fp_length

544

Create Index dictionaries for fingerprints

In [29]:
# split list l into lists of size n
def divide_list(l, n): 
    for i in range(0, len(l), n):  
        yield l[i:i + n] 
        
# residue index dictionary
residue_i = range(0,fp_length)
residue_i = list(divide_list(residue_i,len(interaction_types)))
residue_dict = dict(zip(residues, residue_i))

# interaction_type index dictionary
interaction_i = range(0,len(interaction_types))
interaction_dict = dict(zip(interaction_types, interaction_i))

### Get PLIP data
From C-plip-fingerprints prototype

In [30]:
data = "./data/diamond_xchem_screen_mpro_all_pdbs"
with open(os.path.join(data, "diamond_xchem_screen_mpro_non_covalent_pdbs.dat")) as f:
    non_covalent_filenames = [l.strip() for l in f if l.strip()]

In [31]:
def analyze_interactions(pdbfile):
    protlig = PDBComplex()
    protlig.load_pdb(pdbfile)  # load the pdb
    for ligand in protlig.ligands:
        protlig.characterize_complex(ligand)  # find ligands and analyze interactions
    sites = {}
    for key, site in sorted(protlig.interaction_sets.items()):
        binding_site = BindingSiteReport(site)  # collect data about interactions
        # tuples of *_features and *_info will be converted to pandas df
        keys = "hydrophobic", "hbond", "waterbridge", "saltbridge", "pistacking", "pication", "halogen", "metal"
        interactions = {k: [getattr(binding_site, k+"_features")] + getattr(binding_site, k+"_info") for k in keys}
        sites[key] = interactions
    return sites

# for displaying data
def site_to_dataframes(site):
    keys = ["hydrophobic", "hbond", "waterbridge", "saltbridge", "pistacking", "pication", "halogen", "metal"] 
    dfs = {}
    for key in keys:
        records = site[key][1:]
        if not records:
            dfs[key] = None
        else:
            dfs[key] = pd.DataFrame.from_records(records, columns=site[key][0])
    return dfs

In [32]:
interactions = {}
for filename in tqdm(non_covalent_filenames):
    full_filename = os.path.join(data, filename)
    if not os.path.isfile(full_filename):
        print("File", full_filename, "does not exist?")
        continue
    interactions[filename] = analyze_interactions(full_filename)

HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value=u'')))

('File', './data/diamond_xchem_screen_mpro_all_pdbs/Mpro-x0397.pdb', 'does not exist?')
('File', './data/diamond_xchem_screen_mpro_all_pdbs/Mpro-x0426.pdb', 'does not exist?')
('File', './data/diamond_xchem_screen_mpro_all_pdbs/Mpro-x0395.pdb', 'does not exist?')



In [33]:
for structure, sites in interactions.items():
    display(Markdown("# Structure {}".format(structure)))
    for site_name, site_interactions in sites.items():
        if not site_name.startswith('LIG'):
            continue  # fragments are labeled as LIG; other "sites" detected by PLIP are XRC artefacts
        display(Markdown("## Site {}".format(site_name)))
        for interaction_type, dataframe in site_to_dataframes(site_interactions).items():
            if dataframe is not None:
                display(Markdown("### {}".format(interaction_type)))
                display(dataframe)

# Structure Mpro-x0946.pdb

## Site LIG:A:1101

### waterbridge

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST_A-W,DIST_D-W,DON_ANGLE,WATER_ANGLE,PROTISDON,DONOR_IDX,DONORTYPE,ACCEPTOR_IDX,ACCEPTORTYPE,WATER_IDX,LIGCOO,PROTCOO,WATERCOO
0,166,GLU,A,1101,LIG,A,4.03,3.11,173.92,93.79,True,1288,Nam,2383,N3,2434,"(9.554, 5.104, 22.977)","(9.904, 2.716, 18.584)","(9.307, 1.422, 21.348)"


### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,False,1.82,2.83,174.51,False,2383,N3,1291,O2,"(9.554, 5.104, 22.977)","(10.336, 4.819, 20.27)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,165,MET,A,1101,LIG,A,3.78,2390,1284,"(13.336, 2.198, 22.754)","(12.359, 1.056, 19.282)"
1,189,GLN,A,1101,LIG,A,3.91,2387,1469,"(10.861, 2.392, 24.027)","(11.848, 3.317, 27.693)"


# Structure Mpro-x0107.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,False,1.97,2.93,165.31,True,1288,Nam,2393,O2,"(9.346, 0.96, 20.955)","(9.663, 2.532, 18.498)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,3.96,2388,1292,"(4.96, 1.058, 19.367)","(7.648, 3.931, 18.879)"


# Structure Mpro-x0354.pdb

## Site LIG:A:701

### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,701,LIG,A,4.82,75.36,0.64,T,251025112513251425212522,"(11.218166666666667, -1.441, 24.432)","(11.959999999999999, -4.989599999999999, 21.2556)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,41,HIS,A,701,LIG,A,3.86,2510,319,"(11.769, -2.569, 23.891)","(13.888, -5.653, 22.94)"


# Structure Mpro-x0072.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,25,THR,A,1101,LIG,A,True,3.27,3.78,115.0,True,178,O3,2393,O2,"(9.235, -5.584, 26.122)","(8.019, -9.051, 27.017)"


### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,4.8,79.18,1.16,T,238623872388238923902391,"(12.043333333333335, -0.7759999999999999, 23.4...","(11.8164, -5.1314, 21.399800000000003)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,189,GLN,A,1101,LIG,A,3.86,2388,1468,"(12.358, 0.271, 24.254)","(12.928, 2.477, 27.368)"


# Structure Mpro-x0161.pdb

## Site LIG:A:1101

### waterbridge

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST_A-W,DIST_D-W,DON_ANGLE,WATER_ANGLE,PROTISDON,DONOR_IDX,DONORTYPE,ACCEPTOR_IDX,ACCEPTORTYPE,WATER_IDX,LIGCOO,PROTCOO,WATERCOO
0,166,GLU,A,1101,LIG,A,4.01,2.87,172.42,89.78,True,1288,Nam,2391,N3,2433,"(9.311, 5.041, 22.782)","(9.967, 2.685, 18.385)","(9.274, 1.499, 20.904)"


### saltbridge

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,PROTISPOS,LIG_GROUP,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,5.18,True,Carboxylate,23922393,"(13.9435, -0.877, 23.3495)","(11.7745, -4.8115000000000006, 20.7695)"


### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,189,GLN,A,1101,LIG,A,True,3.08,3.67,120.0,True,1472,Nam,2395,O2,"(10.485, 5.379, 25.107)","(10.139, 4.011, 28.5)"
1,166,GLU,A,1101,LIG,A,False,2.01,2.99,161.2,False,2391,N3,1291,O2,"(9.311, 5.041, 22.782)","(10.444, 4.777, 20.029)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,165,MET,A,1101,LIG,A,3.96,2390,1284,"(13.459, 2.162, 22.819)","(12.458, 1.105, 19.137)"
1,189,GLN,A,1101,LIG,A,3.75,2386,1468,"(11.707, 1.106, 24.084)","(13.298, 2.52, 27.177)"
2,189,GLN,A,1101,LIG,A,3.68,2387,1469,"(11.044, 2.322, 24.179)","(12.243, 3.597, 27.411)"


# Structure Mpro-x0104.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,190,THR,A,1101,LIG,A,False,3.16,3.81,124.31,False,2396,Nam,1476,O2,"(12.067, 5.468, 23.761)","(15.692, 6.519, 24.25)"
1,189,GLN,A,1101,LIG,A,True,3.51,3.96,110.23,False,2397,Nar,1471,O2,"(10.416, 1.465, 24.34)","(10.199, 1.734, 28.289)"


### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,5.03,77.98,1.74,T,238923902391239223932394,"(11.979, -0.32616666666666666, 22.96666666666667)","(11.515, -5.099600000000001, 21.4428)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,189,GLN,A,1101,LIG,A,3.91,2387,1468,"(12.151, 2.44, 23.396)","(12.716, 2.386, 27.269)"
1,165,MET,A,1101,LIG,A,3.37,2393,1284,"(13.096, 0.246, 22.356)","(12.168, 1.081, 19.222)"


# Structure Mpro-x1093.pdb

## Site LIG:A:1101

### waterbridge

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST_A-W,DIST_D-W,DON_ANGLE,WATER_ANGLE,PROTISDON,DONOR_IDX,DONORTYPE,ACCEPTOR_IDX,ACCEPTORTYPE,WATER_IDX,LIGCOO,PROTCOO,WATERCOO
0,189,GLN,A,1101,LIG,A,4.0,3.46,159.02,89.58,True,1486,Nam,2447,O2,2636,"(9.135, 1.338, 21.148)","(9.617, 2.175, 26.575)","(7.034, 1.086, 24.544)"


### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,False,2.03,3.01,171.69,True,1302,Nam,2447,O2,"(9.135, 1.338, 21.148)","(9.719, 2.676, 18.521)"


# Structure Mpro-x1077.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,False,2.07,3.03,164.49,True,1288,Nam,2383,N1,"(9.978, 1.48, 21.333)","(9.961, 2.675, 18.544)"


### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,4.77,89.86,1.34,T,238623882389239023912396,"(9.587666666666665, -1.7124999999999997, 23.5445)","(11.974799999999998, -5.0318, 21.091)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,25,THR,A,1101,LIG,A,3.38,2393,179,"(8.975, -5.856, 23.587)","(7.829, -8.893, 24.538)"


# Structure Mpro-x0967.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,False,2.45,3.38,159.01,True,1288,Nam,2401,O2,"(9.866, 1.017, 21.611)","(9.749, 2.544, 18.594)"
1,144,SER,A,1101,LIG,A,True,3.17,3.5,102.2,True,1121,O3,2400,O3,"(5.702, 0.784, 16.93)","(4.607, -2.264, 15.598)"
2,163,HIS,A,1101,LIG,A,True,2.22,3.05,147.85,False,2400,O3,1269,Nar,"(5.702, 0.784, 16.93)","(8.069, -0.282, 15.326)"
3,166,GLU,A,1101,LIG,A,False,2.6,3.48,148.07,False,2397,Nam,1291,O2,"(7.991, 2.594, 22.44)","(9.994, 4.559, 20.389)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,165,MET,A,1101,LIG,A,3.99,2387,1284,"(11.351, -1.592, 22.376)","(12.152, 0.87, 19.344)"
1,166,GLU,A,1101,LIG,A,3.49,2394,1292,"(5.174, 1.599, 19.108)","(7.726, 3.899, 18.494)"


# Structure Mpro-x0995.pdb

## Site LIG:A:1101

### waterbridge

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST_A-W,DIST_D-W,DON_ANGLE,WATER_ANGLE,PROTISDON,DONOR_IDX,DONORTYPE,ACCEPTOR_IDX,ACCEPTORTYPE,WATER_IDX,LIGCOO,PROTCOO,WATERCOO
0,142,ASN,A,1101,LIG,A,3.99,3.24,170.69,116.26,True,1104,Nam,2383,Npl,2634,"(3.78, 2.082, 19.566)","(1.806, -1.075, 19.652)","(0.07, 1.382, 20.847)"


### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,142,ASN,A,1101,LIG,A,True,2.73,3.53,139.58,False,2383,Npl,1110,O2,"(3.78, 2.082, 19.566)","(3.045, 0.014, 22.335)"


# Structure Mpro-x0678.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,False,2.0,2.97,168.31,True,1288,Nam,2398,O2,"(10.0, 0.972, 20.694)","(10.388, 2.544, 18.208)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,3.82,2393,1292,"(5.662, 1.106, 18.846)","(8.292, 3.838, 18.353)"
1,41,HIS,A,1101,LIG,A,3.93,2387,307,"(13.446, -1.874, 22.213)","(14.064, -5.733, 22.67)"
2,189,GLN,A,1101,LIG,A,3.82,2389,1468,"(13.497, -0.339, 24.265)","(13.447, 2.536, 26.776)"
3,49,MET,A,1101,LIG,A,3.79,2389,368,"(13.497, -0.339, 24.265)","(12.993, -1.667, 27.783)"


# Structure Mpro-x0991.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,25,THR,A,1101,LIG,A,True,3.53,3.91,106.27,True,178,O3,2387,Ng+,"(8.523, -4.964, 26.462)","(7.873, -8.788, 26.952)"
1,45,THR,A,1101,LIG,A,False,3.2,4.09,150.78,False,2387,Ng+,337,O2,"(8.523, -4.964, 26.462)","(11.093, -4.54, 29.613)"
2,41,HIS,A,1101,LIG,A,False,2.29,2.71,104.83,False,2383,Ng+,306,O2,"(10.203, -5.727, 25.115)","(12.032, -7.656, 24.576)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,25,THR,A,1101,LIG,A,3.96,2385,179,"(8.148, -4.725, 24.034)","(7.746, -8.624, 24.575)"


# Structure Mpro-x0305.pdb

## Site LIG:A:1101

### waterbridge

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST_A-W,DIST_D-W,DON_ANGLE,WATER_ANGLE,PROTISDON,DONOR_IDX,DONORTYPE,ACCEPTOR_IDX,ACCEPTORTYPE,WATER_IDX,LIGCOO,PROTCOO,WATERCOO
0,164,HIS,A,1101,LIG,A,4.02,2.88,142.75,116.95,True,1276,Nar,2392,N1,2400,"(14.122, -1.82, 20.508)","(15.081, -4.706, 17.493)","(15.605, -5.539, 20.201)"


### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,True,2.48,3.11,121.14,True,309,Nar,2392,N1,"(14.122, -1.82, 20.508)","(12.877, -4.624, 21.017)"
1,189,GLN,A,1101,LIG,A,True,2.37,3.16,136.75,False,2391,Npl,1471,O2,"(10.162, 0.163, 25.71)","(10.373, 1.776, 28.414)"


### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,4.88,77.2,0.62,T,238523862387238823902393,"(11.750833333333334, -0.6871666666666666, 23.5...","(11.877, -5.0280000000000005, 21.3542)"


# Structure Mpro-x0540.pdb

## Site LIG:A:1101

### waterbridge

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST_A-W,DIST_D-W,DON_ANGLE,WATER_ANGLE,PROTISDON,DONOR_IDX,DONORTYPE,ACCEPTOR_IDX,ACCEPTORTYPE,WATER_IDX,LIGCOO,PROTCOO,WATERCOO
0,166,GLU,A,1101,LIG,A,3.5,3.83,141.78,102.95,False,2399,Nam,1295,O3,2556,"(2.294, 3.164, 23.244)","(4.854, 4.81, 19.435)","(1.389, 4.396, 19.734)"


### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,True,3.46,3.98,117.82,True,1295,O3,2397,Nam,"(4.501, 2.341, 22.538)","(4.854, 4.81, 19.435)"
1,142,ASN,A,1101,LIG,A,True,2.57,3.04,108.86,True,1111,Nam,2400,O2,"(3.33, 1.358, 24.276)","(2.686, -1.609, 24.166)"
2,166,GLU,A,1101,LIG,A,True,3.02,3.98,166.54,False,2397,Nam,1295,O3,"(4.501, 2.341, 22.538)","(4.854, 4.81, 19.435)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,3.9,2390,1292,"(5.02, 1.202, 19.424)","(7.898, 3.809, 19.075)"


# Structure Mpro-x0195.pdb

## Site LIG:A:801

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,190,THR,A,801,LIG,A,False,2.97,3.85,144.47,False,2394,N3,1476,O2,"(12.368, 6.143, 23.209)","(15.917, 7.208, 24.257)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,189,GLN,A,801,LIG,A,3.71,2391,1468,"(11.761, 2.061, 23.574)","(13.71, 2.3, 26.72)"
1,165,MET,A,801,LIG,A,3.76,2387,1284,"(13.7, 0.904, 22.706)","(12.46, 0.984, 19.16)"


# Structure Mpro-x0434.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,False,2.06,3.0,160.71,True,1288,Nam,2398,O2,"(9.192, 0.679, 20.85)","(9.816, 2.578, 18.607)"
1,142,ASN,A,1101,LIG,A,True,3.22,3.82,121.61,False,2396,Nam,1110,O2,"(7.33, -0.662, 20.931)","(4.21, -1.088, 23.102)"


### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,5.2,71.19,1.6,T,238423852386238723882389,"(11.763666666666667, 0.01999999999999998, 23.2...","(11.7332, -4.842599999999999, 21.3876)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,3.99,2392,1292,"(4.815, 1.312, 19.109)","(7.757, 3.972, 18.67)"
1,189,GLN,A,1101,LIG,A,3.59,2386,1468,"(11.811, 0.848, 24.344)","(12.84, 2.528, 27.347)"


# Structure Mpro-x0387.pdb

## Site LIG:A:1101

### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,4.6,72.9,1.44,T,23872388238923902395,"(12.3836, -0.9454, 23.1674)","(11.9174, -5.1104, 21.279199999999996)"


# Structure Mpro-x0874.pdb

## Site LIG:A:1101

### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,166,GLU,A,1101,LIG,A,False,2.01,2.99,177.99,True,1288,Nam,2394,O2,"(9.329, 1.303, 21.126)","(9.997, 2.593, 18.511)"
1,166,GLU,A,1101,LIG,A,False,1.75,2.62,145.86,False,2393,Nam,1291,O2,"(10.468, 3.035, 22.181)","(10.354, 4.748, 20.197)"


### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,4.72,81.0,1.68,T,23892390239123922395,"(12.423599999999999, -0.53, 22.5898)","(11.9448, -5.0318000000000005, 21.2454)"


# Structure Mpro-x1249.pdb

## Site LIG:A:1101

### waterbridge

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST_A-W,DIST_D-W,DON_ANGLE,WATER_ANGLE,PROTISDON,DONOR_IDX,DONORTYPE,ACCEPTOR_IDX,ACCEPTORTYPE,WATER_IDX,LIGCOO,PROTCOO,WATERCOO
0,164,HIS,A,1101,LIG,A,4.1,2.87,138.43,120.06,True,1290,Nar,2444,N1,2453,"(14.061, -1.841, 20.54)","(15.054, -4.686, 17.535)","(15.594, -5.624, 20.196)"


### hbond

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,SIDECHAIN,DIST_H-A,DIST_D-A,DON_ANGLE,PROTISDON,DONORIDX,DONORTYPE,ACCEPTORIDX,ACCEPTORTYPE,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,True,2.44,3.08,122.6,True,309,Nar,2444,N1,"(14.061, -1.841, 20.54)","(12.794, -4.607, 21.042)"


### pistacking

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,CENTDIST,ANGLE,OFFSET,TYPE,LIG_IDX_LIST,LIGCOO,PROTCOO
0,41,HIS,A,1101,LIG,A,5.14,87.37,1.26,T,243624372438243924412442,"(11.690833333333332, -0.2608333333333333, 23.2...","(11.795599999999999, -5.0338, 21.373)"


### hydrophobic

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,189,GLN,A,1101,LIG,A,3.85,2437,1482,"(12.014, 0.979, 23.834)","(12.876, 2.521, 27.25)"


## Create Fingerprint
### Individual fingerprints
- create one count fp for each structure
- length of fp = length of pre-defined residues * number of interaction types

<img src = "./data/ifps.png">

In [34]:
# function to create one indivual fingerprint for all interactions of one structure
def create_fingerprint(residue_dictionary,interaction_dict,residue_list,interaction_type):
    fp_length = len(residue_dictionary)*len(interaction_dict) 
    fp = [0]*fp_length    
    for residue in residue_list:
        fp_index = residue_dictionary[residue][interaction_dict[interaction_type]]
        fp[fp_index] = fp[fp_index]+1
    return fp

# create fingerprints for all structures
fp_list = []
for structure, sites in interactions.items():
    print structure
    for site_name, site_interactions in sites.items():
        if not site_name.startswith('LIG'):
            continue  # fragments are labeled as LIG; other "sites" detected by PLIP are XRC artefacts
            
        print site_name
        for interaction_type, dataframe in site_to_dataframes(site_interactions).items():
            if dataframe is not None:
                residue_nos = dataframe['RESNR'].tolist()
                fp = create_fingerprint(residue_dict,interaction_dict,residue_nos,interaction_type)
                fp_list.append(fp)
        print fp
        

Mpro-x0946.pdb
LIG:A:1101
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

### Overall Frequency fingerprints

In [35]:
len(fp_list)

48

In [36]:
# count fingerprint
count_fp = [sum(i) for i in zip(*fp_list)]
count_fp

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 2,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 3,
 0,
 1,
 9,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [37]:
# frequency fingerprint
frequency_fp = [float(i)/len(fp_list) for i in count_fp]
frequency_fp

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.041666666666666664,
 0.041666666666666664,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.041666666666666664,
 0.0625,
 0.0,
 0.020833333333333332,
 0.1875,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.020833333333333332,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.

## 2D Visualization
### Tabular 

### Wave 