In [1]:
import copy
import sqlite3
import pandas as pd

In [2]:
def call_my_query(db_file, my_query):
    ## connect to the SQLIte database
    my_connection = sqlite3.connect(db_file)

    ## create a cursor object
    my_cursor = my_connection.cursor()

    ## excute the query
    my_cursor.execute(my_query)

    ## fetch all the rows
    rows = my_cursor.fetchall()
    
    ## export the results
    data_list = [row for row in rows]

    my_connection.close()
    return data_list

def extract_tables(db_file, table_name):
    ## extract table data from SQLite DB
    my_query_colName = f"PRAGMA table_info({table_name})"
    colName_list = call_my_query(db_file, my_query_colName)

    my_query_data = f"SELECT * FROM {table_name}"
    data_list = call_my_query(db_file, my_query_data)

    ## clean up data
    dataDict = {}
    for row_tuple in data_list:
        idx = row_tuple[0]
        dataDict[idx] = {}

        for col in colName_list:
            colIdx, colName = col[0], col[1]
            dataDict[idx][colName] = row_tuple[colIdx]
    return dataDict

In [3]:
db_file = './results/hERG_All_1956_2024Jun14.mmpdb'
dataTable_tables = {}

for table_name in ["pair", "compound", "compound_property", "constant_smiles", "rule", "rule_smiles", "rule_environment", "rule_environment_statistics", "environment_fingerprint"]:
    dataDict_table = extract_tables(db_file, table_name)
    dataTable_table = pd.DataFrame.from_dict(dataDict_table).T
    dataTable_tables[table_name] = dataTable_table
    # print(table_name)

In [4]:
print(dataTable_tables["rule_environment"].shape)
dataTable_tables["rule_environment"].drop_duplicates(subset=['rule_id', 'environment_fingerprint_id', 'radius', 'num_pairs'])

(50311, 5)


Unnamed: 0,id,rule_id,environment_fingerprint_id,radius,num_pairs
1,1,1,1,0,19
2,2,1,2,1,1
3,3,1,3,2,1
4,4,1,4,3,1
5,5,1,5,4,1
...,...,...,...,...,...
50307,50307,6508,2013,1,2
50308,50308,6508,2734,2,2
50309,50309,6508,2738,3,2
50310,50310,6508,2739,4,2


In [5]:
print(dataTable_tables["pair"].shape)
dataTable_tables["pair"].drop_duplicates(subset=['rule_environment_id', 'compound1_id', 'compound2_id', 'constant_id'])

(82680, 5)


Unnamed: 0,id,rule_environment_id,compound1_id,compound2_id,constant_id
1,1,1,1,2,1
2,2,2,1,2,1
3,3,3,1,2,1
4,4,4,1,2,1
5,5,5,1,2,1
...,...,...,...,...,...
82676,82676,50307,173,1009,2054
82677,82677,50308,173,1009,2054
82678,82678,50309,173,1009,2054
82679,82679,50310,173,1009,2054


#### compound pair table

In [6]:
##############################################################################
dataTable_allpairs = copy.deepcopy(dataTable_tables["pair"]).rename(columns={'id':'pair_id'})
print(dataTable_allpairs.shape)

##############################################################################
table_sele = {"compound": ['id', 'public_id', 'input_smiles'], 
              "compound_property": ['compound_id', 'value'], 
              "constant_smiles": ['id', 'smiles']}

for table_name in table_sele:
    cols_sele = table_sele[table_name]
    dataTable_sele = dataTable_tables[table_name][cols_sele]

    ## ---------------- compound information ----------------
    keyword = 'compound'
    if keyword in table_name:
        for comp_id in [f'{keyword}{i}' for i in [1, 2]]:    #
            col_rename = {col: f'{comp_id}-{col}' for col in cols_sele}
            dataTable_allpairs = dataTable_allpairs.merge(right=dataTable_sele, left_on=f'{comp_id}_id', right_on=cols_sele[0], how='left')
            dataTable_allpairs = dataTable_allpairs.drop(columns=[cols_sele[0]]).rename(columns=col_rename)

    ## ---------------- constant piece info ----------------
    keyword = 'constant'
    if keyword in table_name:
        col_rename = {col: f'{keyword}-{col}' for col in cols_sele}
        dataTable_allpairs = dataTable_allpairs.merge(right=dataTable_sele, left_on=f'{keyword}_id', right_on=cols_sele[0], how='left')
        dataTable_allpairs = dataTable_allpairs.drop(columns=[cols_sele[0], 'constant_id']).rename(columns=col_rename)

##############################################################################
dataTable_allpairs = dataTable_allpairs[['pair_id'] + sorted(list(dataTable_allpairs.columns)[2:]) + ['rule_environment_id']]
print(dataTable_allpairs.shape)
dataTable_allpairs.head(3)

(82680, 5)
(82680, 11)


Unnamed: 0,pair_id,compound1-input_smiles,compound1-public_id,compound1-value,compound1_id,compound2-input_smiles,compound2-public_id,compound2-value,compound2_id,constant-smiles,rule_environment_id
0,1,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,1
1,2,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,2
2,3,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,3


#### rule env tables

In [7]:
dataTable_allrulenvs = copy.deepcopy(dataTable_tables["rule_environment"]).rename(columns={'id':'rule_env_id', 'radius': 'rule_env-radius', 'num_pairs': 'rule_env-num_pairs'})
print(dataTable_allrulenvs.shape)
dataTable_allrulenvs.head(3)

(50311, 5)


Unnamed: 0,rule_env_id,rule_id,environment_fingerprint_id,rule_env-radius,rule_env-num_pairs
1,1,1,1,0,19
2,2,1,2,1,1
3,3,1,3,2,1


In [8]:
## match rule information
dataTable_sele = dataTable_tables["rule"]
dataTable_allrulenvs = dataTable_allrulenvs.merge(right=dataTable_sele, left_on='rule_id', right_on='id').drop(columns=['id'])

dataTable_sele = dataTable_tables["rule_smiles"].drop(columns=["num_heavies"])
dataTable_allrulenvs = dataTable_allrulenvs.merge(right=dataTable_sele, left_on='from_smiles_id', right_on='id').drop(columns=['from_smiles_id', 'id']).rename(columns={'smiles':'rule-from_smiles'})
dataTable_allrulenvs = dataTable_allrulenvs.merge(right=dataTable_sele, left_on='to_smiles_id', right_on='id').drop(columns=['to_smiles_id', 'id']).rename(columns={'smiles':'rule-to_smiles'})
print(dataTable_allrulenvs.shape)
dataTable_allrulenvs.head(3)

(50311, 7)


Unnamed: 0,rule_env_id,rule_id,environment_fingerprint_id,rule_env-radius,rule_env-num_pairs,rule-from_smiles,rule-to_smiles
0,1,1,1,0,19,[*:1]CN1CCN([*:2])CC1,[*:1]COC1CCN([*:2])CC1
1,2,1,2,1,1,[*:1]CN1CCN([*:2])CC1,[*:1]COC1CCN([*:2])CC1
2,3,1,3,2,1,[*:1]CN1CCN([*:2])CC1,[*:1]COC1CCN([*:2])CC1


In [9]:
## add rule env stats info
dataTable_sele = dataTable_tables["rule_environment_statistics"]
dataTable_sele = dataTable_sele.rename(columns={col: f'rule_env-{col}' for col in list(dataTable_sele.columns)[2:]})

dataTable_allrulenvs = dataTable_allrulenvs.merge(right=dataTable_sele, left_on='rule_env_id', right_on='rule_environment_id').drop(columns=['id', 'rule_environment_id'])
print(dataTable_allrulenvs.shape)
dataTable_allrulenvs.head(3)

(50311, 20)


Unnamed: 0,rule_env_id,rule_id,environment_fingerprint_id,rule_env-radius,rule_env-num_pairs,rule-from_smiles,rule-to_smiles,rule_env-property_name_id,rule_env-count,rule_env-avg,rule_env-std,rule_env-kurtosis,rule_env-skewness,rule_env-min,rule_env-q1,rule_env-median,rule_env-q3,rule_env-max,rule_env-paired_t,rule_env-p_value
0,1,1,1,0,19,[*:1]CN1CCN([*:2])CC1,[*:1]COC1CCN([*:2])CC1,0.0,19.0,0.796581,0.526069,-0.085431,0.334598,-0.054515,0.37519,0.80742,1.085982,2.010149,6.60031,3e-06
1,2,1,2,1,1,[*:1]CN1CCN([*:2])CC1,[*:1]COC1CCN([*:2])CC1,0.0,1.0,0.128712,,,,0.128712,0.128712,0.128712,0.128712,0.128712,,
2,3,1,3,2,1,[*:1]CN1CCN([*:2])CC1,[*:1]COC1CCN([*:2])CC1,0.0,1.0,0.128712,,,,0.128712,0.128712,0.128712,0.128712,0.128712,,


In [10]:
dataTable_sele = dataTable_tables["environment_fingerprint"]
dataTable_sele = dataTable_sele.rename(columns={col: f'env_fp-{col}' for col in list(dataTable_sele.columns)[1:]})

dataTable_allrulenvs = dataTable_allrulenvs.merge(right=dataTable_sele, left_on='environment_fingerprint_id', right_on='id').drop(columns=['environment_fingerprint_id', 'id'])
print(dataTable_allrulenvs.shape)
dataTable_allrulenvs.head(3)

(50311, 22)


Unnamed: 0,rule_env_id,rule_id,rule_env-radius,rule_env-num_pairs,rule-from_smiles,rule-to_smiles,rule_env-property_name_id,rule_env-count,rule_env-avg,rule_env-std,...,rule_env-min,rule_env-q1,rule_env-median,rule_env-q3,rule_env-max,rule_env-paired_t,rule_env-p_value,env_fp-smarts,env_fp-pseudosmiles,env_fp-parent_smarts
0,1,1,0,19,[*:1]CN1CCN([*:2])CC1,[*:1]COC1CCN([*:2])CC1,0.0,19.0,0.796581,0.526069,...,-0.054515,0.37519,0.80742,1.085982,2.010149,6.60031,3e-06,[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2],[*:1](~*).[*:2](~*),
1,1251,205,0,1,[*:1]CN1CCN(C[*:2])CC1,[*:1]COC1CCN([*:2])CC1,0.0,1.0,0.809452,,...,0.809452,0.809452,0.809452,0.809452,0.809452,,,[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2],[*:1](~*).[*:2](~*),
2,1061,176,0,1,[*:1]CCCO[*:2],[*:1]COC1CCN([*:2])CC1,0.0,1.0,0.960904,,...,0.960904,0.960904,0.960904,0.960904,0.960904,,,[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2],[*:1](~*).[*:2](~*),


#### merge molecular pairs and rule envs

In [11]:
dataTable_all = dataTable_allpairs.merge(right=dataTable_allrulenvs, left_on='rule_environment_id', right_on='rule_env_id', how='left').drop(columns=['rule_environment_id', 'rule_env_id'])
print(dataTable_all.shape)
dataTable_all.head(3)

(82680, 31)


Unnamed: 0,pair_id,compound1-input_smiles,compound1-public_id,compound1-value,compound1_id,compound2-input_smiles,compound2-public_id,compound2-value,compound2_id,constant-smiles,...,rule_env-min,rule_env-q1,rule_env-median,rule_env-q3,rule_env-max,rule_env-paired_t,rule_env-p_value,env_fp-smarts,env_fp-pseudosmiles,env_fp-parent_smarts
0,1,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,-0.054515,0.37519,0.80742,1.085982,2.010149,6.60031,3e-06,[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2],[*:1](~*).[*:2](~*),
1,2,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.128712,0.128712,0.128712,0.128712,0.128712,,,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R].[#0;X1;H0;+...,[*:1]-[C](~*).[*:2]-[#6](~*)(~*),[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2]
2,3,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.128712,0.128712,0.128712,0.128712,0.128712,,,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R]#[C;X2;H0;+0...,[*:1]-[C]#[C](~*).[*:2]-[c](:[cH](~*)):[n](~*),[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R].[#0;X1;H0;+...


In [12]:
dataTable_all.dropna(subset=['compound1_id'])

Unnamed: 0,pair_id,compound1-input_smiles,compound1-public_id,compound1-value,compound1_id,compound2-input_smiles,compound2-public_id,compound2-value,compound2_id,constant-smiles,...,rule_env-min,rule_env-q1,rule_env-median,rule_env-q3,rule_env-max,rule_env-paired_t,rule_env-p_value,env_fp-smarts,env_fp-pseudosmiles,env_fp-parent_smarts
0,1,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.090920,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,-0.054515,0.375190,0.807420,1.085982,2.010149,6.600310,0.000003,[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2],[*:1](~*).[*:2](~*),
1,2,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.090920,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.128712,0.128712,0.128712,0.128712,0.128712,,,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R].[#0;X1;H0;+...,[*:1]-[C](~*).[*:2]-[#6](~*)(~*),[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2]
2,3,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.090920,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.128712,0.128712,0.128712,0.128712,0.128712,,,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R]#[C;X2;H0;+0...,[*:1]-[C]#[C](~*).[*:2]-[c](:[cH](~*)):[n](~*),[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R].[#0;X1;H0;+...
3,4,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.090920,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.128712,0.128712,0.128712,0.128712,0.128712,,,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R]#[C;X2;H0;+0...,[*:1]-[C]#[C]-[#6](~*)(~*).[*:2]-[c](:[cH]:[cH...,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R]#[C;X2;H0;+0...
4,5,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.090920,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.128712,0.128712,0.128712,0.128712,0.128712,,,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R]#[C;X2;H0;+0...,[*:1]-[C]#[C]-[c](:[cH](~*)):[cH](~*).[*:2]-[c...,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R]#[C;X2;H0;+0...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82675,82676,N1C(C(N2CC3=C(C=NN3C)C2)=O)=CC2=C1C(F)=C(C1=CC...,KT-0190566,5.730254,173,C1N(C(C2=CC3=C(C(F)=C(C4=CCCN(C(=O)C)C4)C=C3C3...,KT-0191766,6.007446,1009,[*:1]n1ncc2c1CN(C(=O)c1cc3c(-c4cnccc4OC)cc(C4=...,...,0.277193,0.277193,0.606337,0.935481,0.935481,1.842163,0.316610,[#0;X1;H0;+0;!R:1]-[#7;X3;H0;+0;R],[*:1]-[#7](~*)(~*),[#0;X1;H0;+0;!R:1]
82676,82677,N1C(C(N2CC3=C(C=NN3C)C2)=O)=CC2=C1C(F)=C(C1=CC...,KT-0190566,5.730254,173,C1N(C(C2=CC3=C(C(F)=C(C4=CCCN(C(=O)C)C4)C=C3C3...,KT-0191766,6.007446,1009,[*:1]n1ncc2c1CN(C(=O)c1cc3c(-c4cnccc4OC)cc(C4=...,...,0.277193,0.277193,0.606337,0.935481,0.935481,1.842163,0.316610,[#0;X1;H0;+0;!R:1]-[#7;X3;H0;+0;R](:[#6;X3;H0;...,[*:1]-[n](:[c](~*)(~*)):[n](~*),[#0;X1;H0;+0;!R:1]-[#7;X3;H0;+0;R]
82677,82678,N1C(C(N2CC3=C(C=NN3C)C2)=O)=CC2=C1C(F)=C(C1=CC...,KT-0190566,5.730254,173,C1N(C(C2=CC3=C(C(F)=C(C4=CCCN(C(=O)C)C4)C=C3C3...,KT-0191766,6.007446,1009,[*:1]n1ncc2c1CN(C(=O)c1cc3c(-c4cnccc4OC)cc(C4=...,...,0.277193,0.277193,0.606337,0.935481,0.935481,1.842163,0.316610,[#0;X1;H0;+0;!R:1]-[#7;X3;H0;+0;R](:[#7;X2;H0;...,[*:1]-[n](:[n]:[cH](~*)):[c](:[c](~*)(~*))-[CH...,[#0;X1;H0;+0;!R:1]-[#7;X3;H0;+0;R](:[#6;X3;H0;...
82678,82679,N1C(C(N2CC3=C(C=NN3C)C2)=O)=CC2=C1C(F)=C(C1=CC...,KT-0190566,5.730254,173,C1N(C(C2=CC3=C(C(F)=C(C4=CCCN(C(=O)C)C4)C=C3C3...,KT-0191766,6.007446,1009,[*:1]n1ncc2c1CN(C(=O)c1cc3c(-c4cnccc4OC)cc(C4=...,...,0.277193,0.277193,0.606337,0.935481,0.935481,1.842163,0.316610,[#0;X1;H0;+0;!R:1]-[#7;X3;H0;+0;R]1:[#7;X2;H0;...,[*:1]-[n]1:[n]:[cH]:[c](:[c]:1-[CH2]-[#7](~*)(...,[#0;X1;H0;+0;!R:1]-[#7;X3;H0;+0;R](:[#7;X2;H0;...


In [13]:
def matchPairs2Mols(row):
    id_1, id_2 = row['compound1-public_id'], row['compound2-public_id']
    id_pairs = sorted([id_1, id_2])
    id_trans = f'{id_1} => {id_2}'
    return id_pairs, id_trans
dataTable_all['Cmpd_pairs'] = dataTable_all.apply(lambda row: matchPairs2Mols(row)[0], axis=1)
dataTable_all['Cmpd_transform'] = dataTable_all.apply(lambda row: matchPairs2Mols(row)[1], axis=1)
dataTable_all.head(3)

Unnamed: 0,pair_id,compound1-input_smiles,compound1-public_id,compound1-value,compound1_id,compound2-input_smiles,compound2-public_id,compound2-value,compound2_id,constant-smiles,...,rule_env-median,rule_env-q3,rule_env-max,rule_env-paired_t,rule_env-p_value,env_fp-smarts,env_fp-pseudosmiles,env_fp-parent_smarts,Cmpd_pairs,Cmpd_transform
0,1,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.80742,1.085982,2.010149,6.60031,3e-06,[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2],[*:1](~*).[*:2](~*),,"[KT-0034676, KT-0034677]",KT-0034676 => KT-0034677
1,2,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.128712,0.128712,0.128712,,,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R].[#0;X1;H0;+...,[*:1]-[C](~*).[*:2]-[#6](~*)(~*),[#0;X1;H0;+0;!R:1].[#0;X1;H0;+0;!R:2],"[KT-0034676, KT-0034677]",KT-0034676 => KT-0034677
2,3,C(#CCN1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C@...,KT-0034676,5.09092,1,C(#CCOC1CCN(C2=NC3=C(C=C2)C2=C(C=C3)SC3=C2NC[C...,KT-0034677,5.219632,2,[*:1]C#Cc1ccc2c(c1)n(C)c(=O)n2C1CCC(=O)NC1=O.[...,...,0.128712,0.128712,0.128712,,,[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R]#[C;X2;H0;+0...,[*:1]-[C]#[C](~*).[*:2]-[c](:[cH](~*)):[n](~*),[#0;X1;H0;+0;!R:1]-[C;X2;H0;+0;!R].[#0;X1;H0;+...,"[KT-0034676, KT-0034677]",KT-0034676 => KT-0034677


In [14]:
dataTable_all.sort_values(by=['Cmpd_transform', 'rule_id', 'rule_env-radius'], ascending=[True, True, True], inplace=True)

In [15]:
dataTable_clean = dataTable_all.drop(columns=['env_fp-smarts', 'env_fp-parent_smarts'])

In [16]:
dataTable_clean.to_csv(f'./results/mmps_clean.csv', index=False)

In [None]:
# dataTable_all.to_csv(f'./results/mmps_all.csv', index=False)

In [None]:
list(dataTable_all.columns)

In [None]:
']C.' not in 'constant-smiles'