In [1]:
import kinodata
from kinodata.data import KinodataDocked, Filtered
from kinodata.data.data_module import create_dataset
from kinodata.data.grouped_split import KinodataKFoldSplit
from kinodata.transform import TransformToComplexGraph, FilterDockingRMSD
from kinodata.types import *
from kinodata.data.utils.dataset_key import KinodataChemblKey


import json
from pathlib import Path
from typing import Any

import torch

import kinodata.configuration as cfg
from kinodata.model import ComplexTransformer, DTIModel, RegressionModel
from kinodata.model.complex_transformer import make_model as make_complex_transformer
from kinodata.model.dti import make_model as make_dti_baseline
from kinodata.data.data_module import make_kinodata_module
from kinodata.transform import TransformToComplexGraph

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np
import tqdm

!wandb disabled

W&B disabled.


### Grab some example complex data object

In [2]:
dataset = KinodataDocked()

In [3]:
demo_data = dataset[2]

In [4]:
demo_data['ident']

tensor([20])

### Get pocket mol2 file for data object and read it

In [5]:
# for access to mol2 file via ident 
df = pd.read_csv('../data/raw/kinodata3d_meta.csv', index_col='activities.activity_id')

In [6]:
key = KinodataChemblKey(dataset)

In [7]:
ident = 1405151

In [8]:
dataset[key[ident]]

HeteroData(
  y=[1],
  docking_score=[1],
  posit_prob=[1],
  predicted_rmsd=[1],
  pocket_sequence='KTL______EVKLVAIKIINVETEIEILKKLNPCIIKIKNFYIVLELMEGGELFDKVVGYLHENGIIHRDLKPENVLLITDFGHS',
  scaffold='C1CCC2CC(CC3CCC(C4CC5CCCCC5C4)CC3)CCC2C1',
  activity_type='pIC50',
  ident=[1],
  smiles='NC(=O)c1ccc2nc(-c3ccc(Oc4ccc5ccccc5c4)cc3)[nH]c2c1',
  ligand={
    z=[29],
    x=[29, 12],
    pos=[29, 3],
  },
  pocket={
    z=[623],
    x=[623, 12],
    pos=[623, 3],
  },
  pocket_residue={ x=[85, 23] },
  (ligand, bond, ligand)={
    edge_index=[2, 66],
    edge_attr=[66, 4],
  },
  (pocket, bond, pocket)={
    edge_index=[2, 1250],
    edge_attr=[1250, 4],
  }
)

In [9]:
pocketfile = lambda ident: f'../data/raw/mol2/pocket/{df.loc[ident]['similar.klifs_structure_id']}_pocket.mol2'

In [10]:
from kinodata.data.io.read_klifs_mol2 import read_klifs_mol2
pocket_df = read_klifs_mol2(pocketfile(ident), with_bonds=False)
pocket_df

Unnamed: 0,atom.id,atom.name,atom.x,atom.y,atom.z,atom.type,residue.subst_id,residue.subst_name,atom.charge,atom.status_bit
0,1,N,6.7728,15.690800,52.700100,N.3,1,LYS224,0.0,BACKBONE
1,2,H,6.1055,16.180099,53.279301,H,1,LYS224,0.0,BACKBONE
2,3,CA,6.4066,14.381400,52.165298,C.3,1,LYS224,0.0,BACKBONE
3,4,HA,7.2576,13.729800,52.363300,H,1,LYS224,0.0,BACKBONE
4,5,C,6.1940,14.357400,50.654598,C.2,1,LYS224,0.0,BACKBONE
...,...,...,...,...,...,...,...,...,...,...
1282,1283,CB,2.4372,21.989599,26.203699,C.3,79,SER372,0.0,
1283,1284,HB2,1.9747,22.759001,25.585400,H,79,SER372,0.0,
1284,1285,HB3,3.3477,22.384501,26.654600,H,79,SER372,0.0,
1285,1286,OG,1.5357,21.607401,27.227501,O.3,79,SER372,0.0,


#### must remove hydrogens to match complex representation

In [11]:
non_hydrogen = pocket_df["atom.type"] != "H"

#### try matching

In [12]:
pocket_df[non_hydrogen][["atom.x", "atom.y", "atom.z"]]

Unnamed: 0,atom.x,atom.y,atom.z
0,6.7728,15.690800,52.700100
2,6.4066,14.381400,52.165298
4,6.1940,14.357400,50.654598
5,5.8572,15.369800,50.045700
6,5.1464,13.867500,52.867401
...,...,...,...
1278,2.7867,20.786501,25.333599
1280,3.5411,21.229700,24.080999
1281,4.7697,21.115700,23.984400
1282,2.4372,21.989599,26.203699


In [13]:
demo_data["pocket"].pos.shape

torch.Size([652, 3])

In [14]:
demo_data["pocket"].pos

tensor([[ 9.5601, 17.7450, 49.1304],
        [ 9.4738, 16.4139, 48.5480],
        [ 9.0894, 16.4427, 47.0702],
        ...,
        [ 6.0524, 22.1294, 26.0840],
        [ 5.8970, 23.3319, 25.8465],
        [ 4.0712, 22.3602, 27.5954]])

In [15]:
# match?
np.all(pocket_df[non_hydrogen][["atom.x", "atom.y", "atom.z"]].values == demo_data["pocket"].pos.numpy())

ValueError: operands could not be broadcast together with shapes (623,3) (652,3) 

In [None]:
pd.read_csv(f'../data/interaction_analysis/{demo_data.ident.item()}.csv')