# Subsetting fingerprints

In [1]:
import numpy as np

from kissim.encoding import FingerprintGenerator
from kissim.comparison import FingerprintDistanceGenerator



## `kissim` pipeline with full fingerprints

In [2]:
fps = FingerprintGenerator.from_structure_klifs_ids([12347, 3825])

In [3]:
d = FingerprintDistanceGenerator.from_fingerprint_generator(fps)

Calculate pairwise fingerprint distance:   0%|          | 0/1 [00:00<?, ?it/s]

Calculate pairwise fingerprint coverage:   0%|          | 0/1 [00:00<?, ?it/s]

In [4]:
d.data

Unnamed: 0,structure.1,structure.2,kinase.1,kinase.2,distance,bit_coverage
0,12347,3825,BRAF,CASK,0.272455,0.92


## `kissim` pipeline with subset fingerprints

### Define subset

In [5]:
selected_residue_ixs = [10, 30, 70]
selected_residue_ixs = [i - 1 for i in selected_residue_ixs]

In [6]:
def subset_fingerprint_generator(structure_klifs_ids):
    
    fps_subset = FingerprintGenerator.from_structure_klifs_ids(structure_klifs_ids)

    for id_, fp in fps_subset.data.items():
        fp_dict = {}
        for feature_name1, features1 in fp.values_dict.items():
            fp_dict[feature_name1] = {}
            if feature_name1 == "physicochemical":
                for feature_name2, features2 in features1.items():
                    fp_dict[feature_name1][feature_name2] = np.array(features2)[selected_residue_ixs].tolist()
            else:
                for feature_name2, features2 in features1.items():
                    fp_dict[feature_name1][feature_name2] = {}
                    if feature_name2 == "distances":
                        for feature_name3, features3 in features2.items():
                            fp_dict[feature_name1][feature_name2][feature_name3] = np.array(features3)[selected_residue_ixs].tolist()
                    else:
                        fp_dict[feature_name1][feature_name2] = {}
                        for feature_name3, features3 in features2.items():
                            fp_dict[feature_name1][feature_name2][feature_name3] = np.array(features3)

        fp.values_dict = fp_dict
        fp.residue_ids = np.array(fp.residue_ids)[selected_residue_ixs].tolist()
        fp.residue_ixs = np.array(fp.residue_ixs)[selected_residue_ixs].tolist()
        
        fps_subset.data[id_] = fp

    return fps_subset

In [7]:
structure_klifs_ids = [12347, 3825]
fps_subset = subset_fingerprint_generator(structure_klifs_ids)

In [8]:
d_subset = FingerprintDistanceGenerator.from_fingerprint_generator(fps_subset)

Calculate pairwise fingerprint distance:   0%|          | 0/1 [00:00<?, ?it/s]

Calculate pairwise fingerprint coverage:   0%|          | 0/1 [00:00<?, ?it/s]

In [9]:
d_subset.data

Unnamed: 0,structure.1,structure.2,kinase.1,kinase.2,distance,bit_coverage
0,12347,3825,BRAF,CASK,0.325399,1.0
