In [1]:
import numpy as np
import pandas as pd
from ppi_CBv2 import ppi_CB
from ppi_CAv2 import ppi_CA
from score_function.scoreSBv2 import scoreSB
from score_function.scoreMDSv2 import scoreMDS
from score_function.scoreMDWv2 import scoreMDW
from score_function.scoreKPv2 import scoreKP
from score_function.scoreRRPv2 import scoreRRP
from score_function.scoreMNJv2 import scoreMNJ
from score_function.scoreEEv2 import scoreEE
from score_function.scoreECv2 import scoreEC
from score_function.scoreSB_bb_nbv2 import scoreSB_BB_NB
from score_function.scoreMDS_bb_nbv2 import scoreMDS_BB_NB
from score_function.scoreMDW_bb_nbv2 import scoreMDW_BB_NB
from score_function.scoreKP_bb_nbv2 import scoreKP_BB_NB
from score_function.scoreRRP_bb_nbv2 import scoreRRP_BB_NB
from score_function.scoreMNJ_bb_nbv2 import scoreMNJ_BB_NB
from score_function.scoreEE_bb_nbv2 import scoreEE_BB_NB
from score_function.scoreEC_bb_nbv2 import scoreEC_BB_NB
from score_function.scoreSB_bb_bv2 import scoreSB_BB_B
from score_function.scoreRRP_bb_bv2 import scoreRRP_BB_B
from math import sqrt
from ipywidgets import IntProgress
from IPython.display import display
from ipywidgets import FileUpload
from IPython.display import display, clear_output
from ipywidgets import BoundedFloatText, widgets, VBox, HBox
import plotly.express as px

## Proffinity Featurization Module

In [2]:
out = widgets.Output(layout={'border': '1px solid black', 'width':'400px'})
#out

In [3]:
content={"file":"","outname":""}

uploader = widgets.FileUpload()
label = widgets.Label("")

def on_upload_change(change):
    label.value = "Loaded"
    content["file"]=uploader.value[0]['name']
    content["outname"]=content["file"].split('.')[0].split('_')[1]
    df = pd.read_csv(content["file"], header=None)
    with out:
        display(df)

uploader.observe(on_upload_change, names='value')

#display(uploader, label)



In [4]:
radius=BoundedFloatText(value = 7, min = 5, max = 10, step = 0.1, description = 'radius')

In [5]:
#radius

In [6]:
list_feature_name=["protein_id",
          "kd",
          "scoreSB_CB",
          "scoreMDS_CB",
          "scoreMDW_CB",
          "scoreKP_CB",
          "scoreRRP_CB",
          "scoreMNJ_Etot_CB",
          "scoreMNJ_Elj_CB",
          "scoreMNJ_Ee_CB",
          "scoreMNJ_Ehb_CB",
          "scoreEE_CB",
          "scoreEC_CB",
          "scoreSB_CA",
          "scoreMDS_CA",
          "scoreMDW_CA",
          "scoreKP_CA",
          "scoreRRP_CA",
          "scoreMNJ_Etot_CA",
          "scoreMNJ_Elj_CA",
          "scoreMNJ_Ee_CA",
          "scoreMNJ_Ehb_CA",
          "scoreEE_CA",
          "scoreEC_CA",
          "scoreSB_BB_NB_CB",
          "scoreMDS_BB_NB_CB",
          "scoreMDW_BB_NB_CB",
          "scoreKP_BB_NB_CB",
          "scoreRRP_BB_NB_CB",
          "scoreMNJ_BB_NB_Etot_CB",
          "scoreMNJ_BB_NB_Elj_CB",
          "scoreMNJ_BB_NB_Ee_CB",
          "scoreMNJ_BB_NB_Ehb_CB",
          "scoreEE_BB_NB_CB",
          "scoreEC_BB_NB_CB",
          "scoreSB_BB_Bt_CA",
          "scoreSB_BB_B_CA",
          "scoreSB_BB_NB_CA",
          "scoreMDS_BB_NB_CA",
          "scoreMDW_BB_NB_CA", 
          "scoreKP_BB_NB_CA",
          "scoreRRP_BB_B_CA",
          "scoreRRP_BB_NB_CA",
          "scoreMNJ_BB_NB_Etot_CA",
          "scoreMNJ_BB_NB_Elj_CA",
          "scoreMNJ_BB_NB_Ee_CA",
          "scoreMNJ_BB_NB_Ehb_CA",
          "scoreEE_BB_NB_CA",
          "scoreEC_BB_NB_CA"]


def on_button_clicked(b):
    
    with open("./ppi_index_extract_"+content["outname"]+".csv", 'a') as f:
        print(*list_feature_name, sep=",", file=f)

    df = pd.read_csv(content["file"], header=None)
    rows, columns = df.shape
    
    for index, row in df.iterrows():

        #f.value += 1 # signal to increment the progress bar
        
        with out1:
            clear_output()
            print("protein ID:",row[0])
            print("Kd:",row[1])
            print("radius",radius.value)
            print(index+1," out of ", rows, " processed.")
        
        #Cb-Cb interactions
        try:
            ppi_CB(row[0], radius.value)
        except:
            continue
    

        #Ca-Ca interactions
        try:
            ppi_CA(row[0], radius.value)
        except:
            continue
       
    
        #Cb-Cb interfacial energy
        try:
            scoreSB_CB=scoreSB(row[0],'CB')
        except:
            scoreSB_CB=float("nan")    

        try:
            scoreMDS_CB=scoreMDS(row[0],'CB')
        except:
            scoreMDS_CB=float("nan")

        try:
            scoreMDW_CB=scoreMDW(row[0],'CB')
        except:
            scoreMDW_CB=float("nan")

        try:
            scoreKP_CB=scoreKP(row[0],'CB')
        except:
            scoreKP_CB=float("nan")

        try:
            scoreRRP_CB=scoreRRP(row[0],'CB')
        except:
            scoreRRP_CB=float("nan")

        try:
            scoreMNJ_Etot_CB,scoreMNJ_Elj_CB,scoreMNJ_Ee_CB,scoreMNJ_Ehb_CB=scoreMNJ(row[0],'CB')
        except:
            scoreMNJ_Etot_CB=scoreMNJ_Elj_CB=scoreMNJ_Ee_CB=scoreMNJ_Ehb_CB=float("nan")

        try:
            scoreEE_CB=scoreEE(row[0],'CB')
        except:
            scoreEE_CB=float("nan")

        try:
            scoreEC_CB=scoreEC(row[0],'CB')
        except:
            scoreEC_CB=float("nan")

        #Cb-Cb folded energy
        try:
            scoreSB_BB_NB_CB=scoreSB_BB_NB(row[0],'CB')
        except:
            scoreSB_BB_Nb=float("nan")

        try:
            scoreMDS_BB_NB_CB=scoreMDS_BB_NB(row[0],'CB')
        except:
            scoreMDS_BB_NB_CB=float("nan")

        try:
            scoreMDW_BB_NB_CB=scoreMDW_BB_NB(row[0],'CB')
        except:
            scoreMDW_BB_NB_CB=float("nan")

        try:
            scoreKP_BB_NB_CB=scoreKP_BB_NB(row[0],'CB')
        except:
            scoreKP_BB_NB_CB=float("nan")

        try:
            scoreRRP_BB_NB_CB=scoreRRP_BB_NB(row[0],'CB')
        except:
            scoreRRP_BB_NB_CB=float("nan")

        try:
            scoreMNJ_BB_NB_Etot_CB,scoreMNJ_BB_NB_Elj_CB,scoreMNJ_BB_NB_Ee_CB,scoreMNJ_BB_NB_Ehb_CB=scoreMNJ_BB_NB(row[0],'CB')
        except:
            scoreMNJ_BB_NB_Etot_CB=scoreMNJ_BB_NB_Elj_CB=scoreMNJ_BB_NB_Ee_CB=scoreMNJ_BB_NB_Ehb_C=float("nan")

        try:
            scoreEE_BB_NB_CB=scoreEE_BB_NB(row[0],'CB')
        except:
            scoreEE_BB_NB_CB=float("nan")

        try:
            scoreEC_BB_NB_CB=scoreEC_BB_NB(row[0],'CB')
        except:
            scoreEC_BB_NB_CB=float("nan")

        #Ca-Ca interfacial energy
        try:
            scoreSB_CA=scoreSB(row[0],'CA')
        except:
            scoreSB_CA=float("nan")

        try:
            scoreMDS_CA=scoreMDS(row[0],'CA')
        except:
            scoreMDS_CA=float("nan")

        try:
            scoreMDW_CA=scoreMDW(row[0],'CA')
        except:
            scoreMDW_CA=float("nan")

        try:
            scoreKP_CA=scoreKP(row[0],'CA')
        except:
            scoreKP_CA=float("nan")

        try:
            scoreRRP_CA=scoreRRP(row[0],'CA')
        except:
            scoreRRP_CA=float("nan")

        try:
            scoreMNJ_Etot_CA, scoreMNJ_Elj_CA, scoreMNJ_Ee_CA, scoreMNJ_Ehb_CA = scoreMNJ(row[0],'CA')
        except:
            scoreMNJ_Etot_CA=scoreMNJ_Elj_CA=scoreMNJ_Ee_CA=scoreMNJ_Ehb_CA=float("nan")

        try:
            scoreEE_CA=scoreEE(row[0],'CA')
        except:
            scoreEE_CA=float("nan")

        try:
            scoreEC_CA=scoreEC(row[0],'CA')
        except:
            scoreEC_CA=float("nan")

        #Ca-Ca folded energy
        try:
            scoreSB_BB_Bt_CA, scoreSB_BB_B_CA=scoreSB_BB_B(row[0],'CA')
        except:
            scoreSB_BB_Bt_CA=scoreSB_BB_B_CA=float("nan")

        try:
            scoreSB_BB_NB_CA=scoreSB_BB_NB(row[0],'CA')
        except:
            scoreSB_BB_NB_CA=float("nan")

        try:
            scoreMDS_BB_NB_CA=scoreMDS_BB_NB(row[0],'CA')
        except:
            scoreMDS_BB_NB_CA=float("nan")

        try:
            scoreMDW_BB_NB_CA=scoreMDW_BB_NB(row[0],'CA')
        except:
            scoreMDW_BB_NB_CA=float("nan")

        try:
            scoreKP_BB_NB_CA=scoreKP_BB_NB(row[0],'CA')
        except:
            scoreKP_BB_NB_CA=float("nan")

        try:
            scoreRRP_BB_B_CA=scoreRRP_BB_B(row[0],'CA')
        except:
            scoreRRP_BB_B_CA=float("nan")

        try:
            scoreRRP_BB_NB_CA=scoreRRP_BB_NB(row[0],'CA')
        except:
            scoreRRP_BB_NB_CA=float("nan")

        try:
            scoreMNJ_BB_NB_Etot_CA,scoreMNJ_BB_NB_Elj_CA,scoreMNJ_BB_NB_Ee_CA,scoreMNJ_BB_NB_Ehb_CA=scoreMNJ_BB_NB(row[0],'CA')
        except:
            scoreMNJ_BB_NB_Etot_CA=scoreMNJ_BB_NB_Elj_CA=scoreMNJ_BB_NB_Ee_CA=scoreMNJ_BB_NB_Ehb_CA=float("nan")

        try:
            scoreEE_BB_NB_CA=scoreEE_BB_NB(row[0],'CA')
        except:
            scoreEE_BB_NB_CA=float("nan")
    
        try:    
            scoreEC_BB_NB_CA=scoreEC_BB_NB(row[0],'CA')
        except:
            scoreEC_BB_NB_CA=float("nan")

        #generate feature matrix
        list_feature=[row[0],
                      row[1],
                      scoreSB_CB,
                      scoreMDS_CB,
                      scoreMDW_CB,
                      scoreKP_CB,
                      scoreRRP_CB,
                      scoreMNJ_Etot_CB,
                      scoreMNJ_Elj_CB,
                      scoreMNJ_Ee_CB,
                      scoreMNJ_Ehb_CB,
                      scoreEE_CB,
                      scoreEC_CB,
                      scoreSB_CA,
                      scoreMDS_CA,
                      scoreMDW_CA,
                      scoreKP_CA,
                      scoreRRP_CA,
                      scoreMNJ_Etot_CA,
                      scoreMNJ_Elj_CA,
                      scoreMNJ_Ee_CA,
                      scoreMNJ_Ehb_CA,
                      scoreEE_CA,
                      scoreEC_CA,
                      scoreSB_BB_NB_CB,
                      scoreMDS_BB_NB_CB,
                      scoreMDW_BB_NB_CB,
                      scoreKP_BB_NB_CB,
                      scoreRRP_BB_NB_CB,
                      scoreMNJ_BB_NB_Etot_CB,
                      scoreMNJ_BB_NB_Elj_CB,
                      scoreMNJ_BB_NB_Ee_CB,
                      scoreMNJ_BB_NB_Ehb_CB,
                      scoreEE_BB_NB_CB,
                      scoreEC_BB_NB_CB,
                      scoreSB_BB_Bt_CA,
                      scoreSB_BB_B_CA,
                      scoreSB_BB_NB_CA,
                      scoreMDS_BB_NB_CA,
                      scoreMDW_BB_NB_CA, 
                      scoreKP_BB_NB_CA,
                      scoreRRP_BB_B_CA,
                      scoreRRP_BB_NB_CA,
                      scoreMNJ_BB_NB_Etot_CA,
                      scoreMNJ_BB_NB_Elj_CA,
                      scoreMNJ_BB_NB_Ee_CA,
                      scoreMNJ_BB_NB_Ehb_CA,
                      scoreEE_BB_NB_CA,
                      scoreEC_BB_NB_CA]

        #list_feature_all.append(list_feature)
        with open("./ppi_index_extract_"+content["outname"]+".csv", 'a') as f:
            print(*list_feature,sep=",",file=f)
    with out1:
            print("Extraction completed.")

In [7]:
out1 = widgets.Output(layout={'border': '1px solid black', 'width': '400px'})
#out

In [8]:
#from IPython.display import display
button = widgets.Button(description="Featurize")
button.on_click(on_button_clicked)
#button

In [9]:
out2 = widgets.Output(layout={'border': '1px solid black','width': '1000px'})
#out2

In [10]:
out3 = widgets.Output(layout={'border': '1px solid black','width': '400px'})
#out3

In [11]:
filtered_df=None
label2 = widgets.Label("")

In [12]:
def filter_dataframe(widget):
    #global filtered_df
    selection = list(widget['new'])
    df = pd.read_csv("./ppi_index_extract_"+content["outname"]+".csv", header=0, index_col=0)
    filtered_df = df[selection]
    normalized_filtered_df=(filtered_df-filtered_df.min())/(filtered_df.max()-filtered_df.min())
    filtered_fig = px.imshow(normalized_filtered_df,x=normalized_filtered_df.columns, y=normalized_filtered_df.index,width=1000, height=1000)

    #columns = list(normalized_filtered_df.columns.values)
    #rows = list(normalized_filtered_df.index)

    #clustergram = dashbio.Clustergram(
    #    data=normalized_filtered_df,
    #    row_labels=rows,
    #    column_labels=columns,
    #    color_threshold={
    #        'row': 250,
    #        'col': 700
    #    },
    #    height=1000,
    #    width=1000,
    #    color_map= [
    #        [0.0, '#636EFA'],
    #        [0.25, '#AB63FA'],
    #        [0.5, '#FFFFFF'],
    #        [0.75, '#E763FA'],
    #        [1.0, '#EF553B']
    #    ]    
    #)
    
    sav = widgets.Button(description = 'Save Features')
    
    def save(*args):
        filtered_df.to_csv("./ppi_index_extract_"+content["outname"]+"_filter.csv", index=True)
        label2.value="saved"

    sav.on_click(save)
    

    with out2:
        clear_output()
        #display(filtered_df)
        display(filtered_fig)
        #display(clustergram)
        display(sav, label2)
        #display(selection)

In [13]:
def on_button2_clicked(b):
    df = pd.read_csv("./ppi_index_extract_"+content["outname"]+".csv", header=0, index_col=0)
    normalized_df=(df-df.min())/(df.max()-df.min())
    
    #fig = px.imshow(normalized_df,width=1000, height=1000)

    #columns = list(normalized_df.columns.values)
    #rows = list(normalized_df.index)

    #clustergram = dashbio.Clustergram(
    #    data=normalized_df,
    #    row_labels=rows,
    #    column_labels=columns,
    #    color_threshold={
    #        'row': 250,
    #        'col': 700
    #    },
    #    height=1000,
    #    width=1000,
    #    color_map= [
    #        [0.0, '#636EFA'],
    #        [0.25, '#AB63FA'],
    #        [0.5, '#FFFFFF'],
    #        [0.75, '#E763FA'],
    #        [1.0, '#EF553B']
    #    ]    
    #)

    
    dropdown = widgets.SelectMultiple(
                        options=normalized_df.columns,
                        description='features',
                        disabled=False,
                        layout={'height':'100px', 'width':'100%'})
    dropdown.observe(filter_dataframe, names='value')
    but = widgets.Button(description = 'Select all')

    def select_all(*args):
        dropdown.value = dropdown.options

    but.on_click(select_all)

    
    with out3:
        clear_output()
        display(dropdown)
        display(but)

In [14]:
#dropdown.observe(filter_dataframe, names='value')
button2 = widgets.Button(description="Select Features")
button2.on_click(on_button2_clicked)
#button2

In [15]:
#scene = HBox([o1,o2,VBox([o3, o4])])

In [16]:
scene = HBox([VBox([out,uploader,label,out1,button]),out2,VBox([out3,button2]) ])

In [17]:
display(scene)

HBox(children=(VBox(children=(Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid bla…

### Instruction
step1. Upload: upload input files containing two columns. 
- The frist column contains id of the complex (if name.pdb then id=name). The corresponding pdb files should be in the "model" folder.
- The second column should contains binding affintiy value in -log10 or "nan" if not available.
- Note that both columns should not contains a header.
<br>
step2. Featurize: once pressed, will extract features from each PDB file according to the input list. 
<br>
step3. Select Features: "select all" or partial set of features to visualize onto the heatmap in the central section.  
<br>
##### Note: if the feature heatmap does not show, check input "ppi_index_extract...". Make sure no duplicate tables/headers within a single csv file.