In [29]:
from IPython.display import clear_output, display
from ipyfilechooser import FileChooser
import nglview as nv
import ipywidgets as widgets

import numpy as np

import pandas as pd
import json
import matplotlib.pyplot as plt
import argparse
from pathlib import Path
from tqdm import tqdm


In [6]:
!cd examples

In [2]:
def read_json(file):
    f = open(file)
    data = json.load(f)
    return data

def create_and_fill_matrix(data):
    ndata = len((data[0]["residue1"]))
    numres = int(np.sqrt(ndata))
    matrix = np.zeros((numres,numres), dtype=np.float32)

    for i in range(ndata):
        res1 = data[0]["residue1"][i] -1 # -1 to be 0 based
        res2 = data[0]["residue2"][i] -1 # -1 to be 0 based
        value = data[0]["distance"][i]
        matrix[res1,res2] = value
    return(matrix)

def show_matrix(matrix):
    sns.heatmap(matrix)
    plt.show()


def search_inter_complex(matrix, separation, distance):
    underDistance = np.argwhere(matrix <= distance)

    separation0 = separation-1 # (0based)
    keep = []
    for pair in underDistance:
        res1 = pair[0]
        res2=pair[1]
        if res1 <= separation0 and res2 > separation0:
            keep.append((res1+1,res2+1,matrix[res1,res2])) #+1 to be on 1 based index

    return(keep)


def save_data(data, matrix, output, separation):
    basename = Path(output).stem

    #Basic figure
    fig,ax = plt.subplots(figsize=(11,8))
    g = sns.heatmap(matrix, ax=ax)
    g.set(title=basename)
    plt.savefig(f"{basename}.png", dpi=300)

    #data in csv format.
    df = pd.DataFrame(data, columns=["Residue A","Residue B", "PAE"])
    #reset residue2 index
    df["Residue B (index corrected)"] = df["Residue B"] % separation
    df.to_csv(f"{basename}.csv", index=False, sep=";")

In [19]:
    data = read_json("rank_1_model_3_ptm_seed_0_pae.json")
    matrix = create_and_fill_matrix(data)
    crossContact = search_inter_complex(matrix, 334, 10)
    df = pd.DataFrame(crossContact, columns=["Residue A","Residue B", "PAE"])
    #reset residue2 index
    df["Residue B_chainB"] = df["Residue B"] % 334

In [20]:
df

Unnamed: 0,Residue A,Residue B,PAE,Residue B_chainB
0,120,428,10.0,94
1,120,432,9.8,98
2,122,425,9.5,91
3,122,426,8.3,92
4,122,427,9.0,93
...,...,...,...,...
2161,238,428,9.1,94
2162,238,429,9.7,95
2163,238,430,8.5,96
2164,238,432,9.9,98


In [31]:
structure =None
structureFile="rank_1_model_3_ptm_seed_0_unrelaxed.pdb"
import numpy as np
from Bio import PDB
parser = PDB.PDBParser()
if structure == None:
        structure = parser.get_structure(id='struct', file=structureFile)

displayMol = nv.show_biopython(structure)
shape = displayMol.shape
chain = structure[0].child_list[0].id
r1 = structure[0][chain][80]['CA'].get_coord()
r2 = structure[0][chain][45]['CA'].get_coord()
#display(displayMol)

#component 0 = the structure.
#other component are stuff you add.
for i in tqdm(range(100)):
        row = df.iloc[i,]
        res1 = int(row["Residue A"])
        res2 = int(row["Residue B"])
        r1 = structure[0][chain][res1]['CA'].get_coord()
        r2 = structure[0][chain][res1]['CA'].get_coord()
        shape.add_cylinder(r1,r2,[1,0,0], 0.2)

display(displayMol)


#displayMol = visualizeur()

100%|██████████| 100/100 [00:03<00:00, 32.41it/s]


NGLWidget()

In [25]:
df.iloc[0,]

Residue A           120.0
Residue B           428.0
PAE                  10.0
Residue B_chainB     94.0
Name: 0, dtype: float64

In [None]:
structureFolder_widget = FileChooser("/Users/thibault/Documents/WORK/peprmint/databases/cath/domains/DEV/")
structureFolder_widget.title="Structure folder"
structureFolder_widget.default_path = "/Users/thibault/Documents/WORK/peprmint/databases/cath/domains/DEV/structures"
structureFolder_widget.show_only_dirs = True