In [7]:
# conda env: pyg (Python3.9.16)
import shutil
import argparse

from datacat4ml.const import *
from datacat4ml.Scripts.data_prep.data_curate.utils.curate_dataset_type import curate_datasets_and_get_stats

In [8]:
def main():

    """
    run the curation process on datasets including:
    - categorize datasets for ORs
    - heterogeneous datasets for ORs
    - heterogeneous datasets for GPCRs

    and get the stats for each dataset type.

    """

    for task in Tasks:
        # keep the 'task' in the argument to ensure the stats are generated only once
        print(f'----------->Task is {task}\n')


        ## ==== categorized data for ORs ====
        print('Processing categorized datasets of ORs...')
        #if os.path.exists(os.path.join(CURA_CAT_DATASETS_DIR, task)):
        #    # remove the directory and its contents
        #    shutil.rmtree(os.path.join(CURA_CAT_DATASETS_DIR, task))

        # Binding affinity
        curate_datasets_and_get_stats(dataset_type='cat', task=task, target_list=OR_names, effect='bind', assay='RBA', std_types=["Ki", 'IC50'], 
                                    input_path=CAT_DATASETS_DIR, output_path= CURA_CAT_DATASETS_DIR)

        # Agonism
        curate_datasets_and_get_stats(dataset_type='cat', task=task, target_list=OR_names, effect='agon', assay='G_GTP', std_types=["EC50"], 
                                    input_path=CAT_DATASETS_DIR, output_path= CURA_CAT_DATASETS_DIR)
        curate_datasets_and_get_stats(dataset_type='cat', task=task, target_list=OR_names, effect='agon', assay='G_Ca', std_types=["EC50"], 
                                    input_path=CAT_DATASETS_DIR, output_path= CURA_CAT_DATASETS_DIR)
        curate_datasets_and_get_stats(dataset_type='cat', task=task, target_list=OR_names, effect='agon', assay='G_cAMP', std_types=["IC50", "EC50"], 
                                    input_path=CAT_DATASETS_DIR, output_path= CURA_CAT_DATASETS_DIR)
        curate_datasets_and_get_stats(dataset_type='cat', task=task, target_list=OR_names, effect='agon', assay='B_arrest', std_types=["EC50"], 
                                    input_path=CAT_DATASETS_DIR, output_path= CURA_CAT_DATASETS_DIR)
        
        ## Antagonism
        curate_datasets_and_get_stats(dataset_type='cat', task=task, target_list=OR_names, effect='antag', assay='G_GTP', std_types=["IC50", "Ki", "Kb", "Ke"], 
                                    input_path=CAT_DATASETS_DIR, output_path= CURA_CAT_DATASETS_DIR)
        curate_datasets_and_get_stats(dataset_type='cat', task=task, target_list=OR_names, effect='antag', assay='B_arrest', std_types=["IC50"], 
                                    input_path=CAT_DATASETS_DIR, output_path= CURA_CAT_DATASETS_DIR)
        
        ## ==== het data for ORs ====
        print('Processing heterogeneous data of ORs...')
        if os.path.exists(os.path.join(CURA_HET_DATASETS_DIR, task)):
            # remove the directory and its contents
            shutil.rmtree(os.path.join(CURA_HET_DATASETS_DIR, task))
            
        curate_datasets_and_get_stats(dataset_type='het', task=task, target_list=OR_names, effect=None, assay=None, std_types=["Ki", "IC50", 'EC50'],
                                    input_path=HET_DATASETS_DIR, output_path=CURA_HET_DATASETS_DIR)

        ## ==== het data for GPCRs ====
        print('Processing heterogeneous data of GPCRs...') 
        if os.path.exists(os.path.join(CURA_GPCR_DATASETS_DIR, task)):
            # remove the directory and its contents
            shutil.rmtree(os.path.join(CURA_GPCR_DATASETS_DIR, task))
        GPCR_chembl_ids = [id for id in os.listdir(HET_GPCR_DIR) if os.path.isdir(os.path.join(HET_GPCR_DIR, id))]
        curate_datasets_and_get_stats(dataset_type='het', task=task, target_list=GPCR_chembl_ids, effect=None, assay=None, std_types=["Ki", "IC50", 'EC50'],
                                    input_path=HET_GPCR_DIR, output_path=CURA_GPCR_DATASETS_DIR)

In [9]:
main()

----------->Task is cls

Processing categorized datasets of ORs...
Processing mor_bind_RBA_Ki...
The length of the raw dataset is 5557
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (5403, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5206, 34)
After removing the mols with multiple values, the shape of the df:(4523, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 154
After standardizing the SMILES, the shape of the df: (154, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (144, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(131, 35)
Done curation.

Processing mor_bind_RBA_IC50...
The length of the raw dataset is 687
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (665, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (630, 34)
After removing the mols w

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (47, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing mor_antag_G_GTP_IC50...
The length of the raw dataset is 377
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (301, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (294, 34)
After removing the mols with multiple values, the shape of the df:(268, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 76
After standardizing the SMILES, the shape of the df: (76, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (76, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(71, 35)
Done curation.

Processing mor_antag_G_GTP_Ki...
The length of the raw dataset is 63
Curating dataset
start s

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL4523255_None_None_Ki...
No dataset for CHEMBL4523255-Ki
Processing CHEMBL4523255_None_None_IC50...
No dataset for CHEMBL4523255-IC50
Processing CHEMBL4523255_None_None_EC50...
The length of the raw dataset is 2
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL3785_None_None_Ki...
The length of the raw dataset is 47
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (47, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (47, 34)
After removing the mols with multiple values, the shape of the df:(25, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL3785_None_None_IC50...
The length of the raw dataset is 335
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (330, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (330, 34)
After removing the mols with multiple values, the shape of th

Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)
Processing CHEMBL1075144_None_None_EC50...
The length of the raw dataset is 159
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (135, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (13, 34)
After removing the mols with multiple values, the shape of the df:(11, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 24
After standardizing the SMILES, the shape of the df: (24, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (24, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(24, 35)
Done curation.


Processing CHEMBL3371_None_None_Ki...
The length of the raw dataset is 4721
Curating dataset
start standardizing with 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (55, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL1075178_None_None_Ki...
The length of the raw dataset is 4
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (4, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (4, 34)
After removing the mols with multiple values, the shape of the df:(4, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL1075178_None_None_IC50...
The length of the raw dataset is 55
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (49, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the

Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)
Processing CHEMBL1075178_None_None_EC50...
The length of the raw dataset is 125
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (125, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL234_None_None_Ki...
The length of the raw dataset is 6650
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (6465, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (6418, 34)
After removing the mols with multiple values, the shape of the df:(5097, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 185
After standardizing the SMILES, the shape of the df: (185, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (185, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(173, 35)
Done curation.

Processing CHEMBL234_None_None_IC50...
The length of the raw dataset is 383
Curati

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (53, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL4523923_None_None_Ki...
No dataset for CHEMBL4523923-Ki
Processing CHEMBL4523923_None_None_IC50...
No dataset for CHEMBL4523923-IC50
Processing CHEMBL4523923_None_None_EC50...
The length of the raw dataset is 33
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (33, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (33, 34)
After removing the mols with multiple values, the shape of the df:(32, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.


Processing CHEMBL5861_None_None_Ki...
No dataset for CHEMBL5861-Ki
Processing CHEMBL5861_None_None_IC50...
The length of the raw dataset is 16
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (16, 34)
After dropping the mols with MW >

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL4523248_None_None_Ki...
No dataset for CHEMBL4523248-Ki
Processing CHEMBL4523248_None_None_IC50...
No dataset for CHEMBL4523248-IC50
Processing CHEMBL4523248_None_None_EC50...
The length of the raw dataset is 14
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (12, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (12, 34)
After removing the mols with multiple values, the shape of the df:(12, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 2
After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After dropping the duplicate combinations of (smile

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (4, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3232682_None_None_Ki...
No dataset for CHEMBL3232682-Ki
Processing CHEMBL3232682_None_None_IC50...
The length of the raw dataset is 6
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 1
After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(1, 35)
Done curation.

Processing CHEMBL3232682_None_None_EC50...
No dataset for CHEMBL3232682-EC50

Processing CHEMBL3191_None_None_Ki...
No dataset for CHEMBL3191-Ki
P

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL1965_None_None_EC50...
The length of the raw dataset is 2
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (0, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL1293316_None_None_Ki...
The length of the raw dataset is 5
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1293316_None_None_IC50...
No dataset for CHEMBL1293316-IC50
Processing CHEMBL1293316_None_None_EC50...
The length of the raw dataset is 131
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (123, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (64, 34)
After removing the mols with multiple values, the shape of the df:(57, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 8
After standardizing the SMILES, the shape of the df: (8, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (7, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(7, 35)
Done curation.


Processing CHEMBL264_None_None_Ki...
The length of the raw dataset is 5019
Curating dataset
start standardizing with value
After standar

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (165, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1832_None_None_EC50...
The length of the raw dataset is 239
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (237, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL2049_None_None_Ki...
The length of the raw dataset is 821
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (807, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (685, 34)
After removing the mols with multiple values, the shape of the df:(598, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 10
After standardizing the SMILES, the shape of the df: (10, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (3, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(3, 35)
Done curation.

Processing CHEMBL2049_None_None_IC50...
The length of the raw dataset is 137
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (131, 34)
After dropping the mols 

Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)

Processing CHEMBL2024_None_None_Ki...
No dataset for CHEMBL2024-Ki
Processing CHEMBL2024_None_None_IC50...
The length of the raw dataset is 69
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (60, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (57, 34)
After removing the mols with multiple values, the shape of the df:(52, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 9
After standardizing the SMILES, the shape of the df: (9, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (4, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(4, 35)
Done curation.

Processing CHEMBL2024_None_None_EC50...
The length of the ra

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (281, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL217_None_None_Ki...
The length of the raw dataset is 11229
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (10931, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (10745, 34)
After removing the mols with multiple values, the shape of the df:(8002, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 298
After standardizing the SMILES, the shape of the df: (298, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (298, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(276, 35)
Done curation.

Processing CHEMBL217_None_None_IC50...
The length of the raw dataset is 1292
Cu

Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)
Processing CHEMBL2028_None_None_EC50...
The length of the raw dataset is 143
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (135, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (122, 34)
After removing the mols with multiple values, the shape of the df:(120, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 8
After standardizing the SMILES, the shape of the df: (8, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (8, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(7, 35)
Done curation.


Processing CHEMBL5849_None_None_Ki...
No dataset for CHEMBL5849-Ki
Processing CHEMBL5849_None_None_IC50...
The length of th

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL2056_None_None_Ki...
The length of the raw dataset is 1836
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1664, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1664, 34)
After removing the mols with multiple values, the shape of the df:(1383, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 172
After standardizing the SMILES, the shape of the df: (172, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (172, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(170, 35)
Done curation.

Processing CHEMBL2056_None_None_IC50...
The length of the raw dataset is 208
Curati

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL1905_None_None_EC50...
No dataset for CHEMBL1905-EC50

Processing CHEMBL5700_None_None_Ki...
The length of the raw dataset is 1
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL5700_None_None_IC50...
The length of the raw dataset is 103
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (96, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (96, 34)
After removing the mols with multiple values, the shape of the df:(93, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 7
After standardizing the SMILES, the shape of the df: (7, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (7, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(7, 35)
Done curation.

Processing CHEMBL5700_None_None_EC50...
The length of the raw dataset is 40
Curating dataset
star

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL3764_None_None_Ki...
The length of the raw dataset is 353
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (352, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (253, 34)
After removing the mols with multiple values, the shape of the df:(240, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 1
After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(1, 35)
Done curation.

Processing CHEMBL3764_None_None_IC50...
The length of the raw dataset is 286
Curating dataset
s

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL2955_None_None_IC50...
The length of the raw dataset is 680
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (678, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (678, 34)
After removing the mols with multiple values, the shape of the df:(301, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 2
After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(2, 35)
Done curation.

Processing CHEMBL2955_None_None_EC50...
The length of the raw dataset is 188
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL298_None_None_Ki...
The length of the raw dataset is 632
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (624, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (539, 34)
After removing the mols with multiple values, the shape of the df:(447, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 6
After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(2, 35)
Done curation.

Processing CHEMBL298_None_None_IC50...
The length of the raw dataset is 1552
Curating dataset
st

Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)

Processing CHEMBL5649_None_None_Ki...
The length of the raw dataset is 3
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (3, 34)
After removing the mols with multiple values, the shape of the df:(3, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL5649_None_None_IC50...
The length of the raw dataset is 165
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (165, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (165, 34)
After removing the mols with multiple values, the shape of the df:(161, 35)
start applying thre

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL288_None_None_Ki...
The length of the raw dataset is 155
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (155, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (155, 34)
After removing the mols with multiple values, the shape of the df:(82, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL288_None_None_IC50...
The length of the raw dataset is 1657
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1524, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1522, 34)
After removing the mols with multiple values, the shape of the df:(1240, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 133
After standardizing th

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL4105988_None_None_EC50...
The length of the raw dataset is 1
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL4523360_None_None_Ki...
No dataset for CHEMBL4523360-Ki
Processing CHEMBL4523360_None_None_IC50...
No dataset for CHEMBL4523360-IC50
Processing CHEMBL4523360_None_None_EC50...
The length of the raw dataset is 1
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL4308_None_None_Ki...
The length of the raw dataset is 824
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (818, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (795, 34)
After removing the mols with multiple values, the shape of the df:(730, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 6
After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (4, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(4, 35)
Done curation.

Processing CHEMBL4308_None_None_IC50...
The length of the raw dataset is 495
Curating dataset
s

Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)
Processing CHEMBL1917_None_None_EC50...
The length of the raw dataset is 21
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (21, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (17, 34)
After removing the mols with multiple values, the shape of the df:(15, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.


Processing CHEMBL4843_None_None_Ki...
The length of the raw dataset is 51
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (51, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (51, 34)
After removing the mols with multiple values, the shape of the df:(51, 35)
start applying thre

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL4029_None_None_Ki...
The length of the raw dataset is 120
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (104, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (104, 34)
After removing the mols with multiple values, the shape of the df:(102, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 16
After standardizing the SMILES, the shape of the df: (16, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (16, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(16, 35)
Done curation.

Processing CHEMBL4029_None_None_IC50...
The length of the raw dataset is 334
Curating datas

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL259_None_None_Ki...
The length of the raw dataset is 2430
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (2413, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1709, 34)
After removing the mols with multiple values, the shape of the df:(1516, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 17
After standardizing the SMILES, the shape of the df: (17, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (14, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(12, 35)
Done curation.

Processing CHEMBL259_None_None_IC50...
The length of the raw dataset is 1065
Curating da

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL2107_None_None_Ki...
The length of the raw dataset is 21
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (21, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (21, 34)
After removing the mols with multiple values, the shape of the df:(15, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL2107_None_None_IC50...
The length of the raw dataset is 1366
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1290, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1111, 34)
After removing the mols with multiple values, the shape of

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL2064_None_None_IC50...
The length of the raw dataset is 1
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL2064_None_None_EC50...
The length of the raw dataset is 7
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (6, 34)
After removing the mols with multiple values, the shape of the df:(6, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 1
After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(1, 35)
Done curation.


Processing CHEMBL3559688_None_None_Ki...
No dataset for CHEMBL3559688-Ki
Processing CHEMBL3559688_Non

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL3559688_None_None_EC50...
No dataset for CHEMBL3559688-EC50

Processing CHEMBL1800_None_None_Ki...
The length of the raw dataset is 1642
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1611, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1536, 34)
After removing the mols with multiple values, the shape of the df:(1416, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 31
After standardizing the SMILES, the shape of the df: (31, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (30, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(29, 35)
Done curation.

Processing CHEMBL1800_None_None_IC50...
The length of the raw dataset is 814
Curating dataset
start standardizing with value
After 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL5850_None_None_Ki...
No dataset for CHEMBL5850-Ki
Processing CHEMBL5850_None_None_IC50...
The length of the raw dataset is 50
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (46, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (46, 34)
After removing the mols with multiple values, the shape of the df:(32, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 4
After standardizing the SMILES, the shape of the df: (4, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (4, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(4, 35)
Done curation.

Processing CHEMBL5850_None_Non

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL5162_None_None_IC50...
The length of the raw dataset is 155
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (141, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (141, 34)
After removing the mols with multiple values, the shape of the df:(72, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 14
After standardizing the SMILES, the shape of the df: (14, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (14, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(9, 35)
Done curation.

Processing CHEMBL5162_None_None_EC50...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3250_None_None_Ki...
The length of the raw dataset is 69
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (67, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (67, 34)
After removing the mols with multiple values, the shape of the df:(67, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 2
After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(2, 35)
Done curation.

Processing CHEMBL3250_None_None_IC50...
The length of the raw dataset is 141
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (130, 34)
After dropping the mols with MW 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL4523454_None_None_EC50...
The length of the raw dataset is 23
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (23, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (22, 34)
After removing the mols with multiple values, the shape of the df:(22, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.


Processing CHEMBL4523866_None_None_Ki...
No dataset for CHEMBL4523866-Ki
Processing CHEMBL4523866_None_None_IC50...
No dataset for CHEMBL4523866-IC50
Processing CHEMBL4523866_None_None_EC50...
The length of the raw dataset is 2
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL252_None_None_Ki...
The length of the raw dataset is 262
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (262, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (259, 34)
After removing the mols with multiple values, the shape of the df:(233, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL252_None_None_IC50...
The length of the raw dataset is 1641
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1638, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1514, 34)
After removing the mols with multiple values, the shape of the df:(1284, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 3
After standardizing the

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL2346493_None_None_EC50...
The length of the raw dataset is 2
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL4616_None_None_Ki...
The length of the raw dataset is 752
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (727, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (708, 34)
After removing the mols with multiple values, the shape of the df:(637, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 25
After standardizing the SMILES, the shape of the df: (25, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (25, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(25, 35)
Done curation.

Processing CHEMBL4616_None_None_IC50...
The length of the raw dataset is 1395
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1356, 34)
After dropping the mol

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL3559686_None_None_EC50...
The length of the raw dataset is 9
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL2327_None_None_Ki...
The length of the raw dataset is 873
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (872, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (842, 34)
After removing the mols with multiple values, the shape of the df:(794, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 1
After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(1, 35)
Done curation.

Processing CHEMBL2327_None_None_IC50...
The length of the raw dataset is 290
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (286, 34)
After dropping the mols with

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL1628472_None_None_Ki...
The length of the raw dataset is 33
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (33, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(4, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL1628472_None_None_IC50...
The length of the raw dataset is 40
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (32, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (27, 34)
After removing the mols with multiple values, the shape of the df:(23, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 8
After standardizing the SMILE

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL4523478_None_None_IC50...
No dataset for CHEMBL4523478-IC50
Processing CHEMBL4523478_None_None_EC50...
The length of the raw dataset is 2
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL344_None_None_Ki...
The length of the raw dataset is 1705
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1701, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1700, 34)
After removing the mols with multiple values, the shape of the df:(1472, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 4
After standardizing the SMILES, the shape of the df: (4, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (4, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(4, 35)
Done curation.

Processing CHEMBL344_None_None_IC50...
The length of the raw dataset is 3063
Curating datase

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL1804_None_None_Ki...
The length of the raw dataset is 531
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (531, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (337, 34)
After removing the mols with multiple values, the shape of the df:(324, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL1804_None_None_IC50...
The length of the raw dataset is 563
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (558, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (114, 34)
After removing the mols with multiple values, the shape o

Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)
Processing CHEMBL1804_None_None_EC50...
The length of the raw dataset is 246
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (246, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (231, 34)
After removing the mols with multiple values, the shape of the df:(125, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.


Processing CHEMBL4315_None_None_Ki...
The length of the raw dataset is 522
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (508, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (508, 34)
After removing the mols with multiple values, the shape of the df:(445, 35)
start apply

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL5201_None_None_EC50...
The length of the raw dataset is 120
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (120, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (120, 34)
After removing the mols with multiple values, the shape of the df:(81, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.


Processing CHEMBL1628473_None_None_Ki...
The length of the raw dataset is 33
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (32, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1628473_None_None_IC50...
The length of the raw dataset is 3
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1628473_None_None_EC50...
The length of the raw dataset is 55
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (55, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (13, 34)
After removing the mols with multiple values, the shape of the df:(11, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.


Processing CHEMBL5963_None_None_Ki...
The length of the raw dataset is 3
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL5963_None_None_IC50...
The length of the raw dataset is 95
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (95, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (83, 34)
After removing the mols with multiple values, the shape of the df:(31, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL5963_None_None_EC50...
The length of the raw dataset is 223
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (199, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (178, 34)
After removing the mols with multiple values, the shape of t

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL5144_None_None_IC50...
The length of the raw dataset is 18
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (17, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(2, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 1


Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)
Processing CHEMBL5144_None_None_EC50...
The length of the raw dataset is 38
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (37, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3321651_None_None_Ki...
No dataset for CHEMBL3321651-Ki
Processing CHEMBL3321651_None_None_IC50...
The length of the raw dataset is 6
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL3321651_None_None_EC50...
No dataset for CHEMBL3321651-EC50

Processing CHEMBL2731_None_None_Ki...
The length of the raw dataset is 27
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (27, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (27, 34)
After removing the mols with multi

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (9, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL5844_None_None_EC50...
The length of the raw dataset is 105
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (105, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL1985_None_None_Ki...
The length of the raw dataset is 311
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (311, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (311, 34)
After removing the mols with multiple values, the shape of the df:(304, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL1985_None_None_IC50...
The length of the raw dataset is 1453
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1380, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1331, 34)
After removing the mols with multiple values, the shape of the df:(784, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 73
After standardizing

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (158, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL5952_None_None_Ki...
The length of the raw dataset is 115
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (112, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (101, 34)
After removing the mols with multiple values, the shape of the df:(96, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 3
After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (3, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(3, 35)
Done curation.

Processing CHEMBL5952_None_None_IC50...
The length of the raw dataset is 8
Curating dataset
st

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (53, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL233_None_None_Ki...
The length of the raw dataset is 5682
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (5511, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5314, 34)
After removing the mols with multiple values, the shape of the df:(4587, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 171
After standardizing the SMILES, the shape of the df: (171, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (161, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(148, 35)
Done curation.

Processing CHEMBL233_None_None_IC50...
The length of the raw dataset is 1366
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1258, 34)
After dropping 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (236, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL4069_None_None_Ki...
The length of the raw dataset is 13
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (10, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 3
After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (3, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(3, 35)
Done curation.

Processing CHEMBL4069_None_None_IC50...
The length of the raw dataset is 65
Curating dataset
start 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL1075133_None_None_Ki...
No dataset for CHEMBL1075133-Ki
Processing CHEMBL1075133_None_None_IC50...
The length of the raw dataset is 18
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (18, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (18, 34)
After removing the mols with multiple values, the shape of the df:(16, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL1075133_None_None_EC50...
No dataset for CHEMBL1075133-EC50

Processing CHEMBL4894_None_None_Ki...
The length of the raw dataset is 45
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (45, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL4894_None_None_IC50...
The length of the raw dataset is 86
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (65, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (61, 34)
After removing the mols with multiple values, the shape of the df:(61, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 21
After standardizing the SMILES, the shape of the df: (21, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (21, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(21, 35)
Done curation.

Processing CHEMBL4894_None_None_EC50...
No dataset for CHEMBL4894-EC50

Processing CHEMBL1792

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL4867_None_None_EC50...
The length of the raw dataset is 51
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (42, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (37, 34)
After removing the mols with multiple values, the shape of the df:(25, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 9
After standardizing the SMILES, the shape of the df: (9, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(4, 35)
Done curation.


Processing CHEMBL1293267_None_None_Ki...
The length of the raw dataset is 74
Curating dataset
sta

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (43, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL3176_None_None_IC50...
The length of the raw dataset is 50
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (42, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (37, 34)
After removing the mols with multiple values, the shape of the df:(37, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 8
After standardizing the SMILES, the shape of the df: (8, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (8, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(8, 35)
Done curation.

Processing CHEMBL3176_None_None_EC50...
No dataset for CHEMBL3176-EC50

Processing CHEMBL251_None_None_Ki...
The length of the raw dataset is 7277
Curating dataset
start standardizing with value
After standardizing the

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3746_None_None_Ki...
The length of the raw dataset is 3
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (3, 34)
After removing the mols with multiple values, the shape of the df:(3, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL3746_None_None_IC50...
The length of the raw dataset is 631
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (554, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (554, 34)
After removing the mols with multiple values, the shape of the df:(518, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 77
After standardizing the SMILES, 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL2150838_None_None_Ki...
No dataset for CHEMBL2150838-Ki
Processing CHEMBL2150838_None_None_IC50...
The length of the raw dataset is 66
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (66, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (66, 34)
After removing the mols with multiple values, the shape of the df:(66, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL2150838_None_None_EC50...
No dataset for CHEMBL2150838-EC50

Processing CHEMBL4422_None_None_Ki...
The length of the raw dataset is 185
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape o

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL5607_None_None_IC50...
The length of the raw dataset is 87
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (87, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (87, 34)
After removing the mols with multiple values, the shape of the df:(85, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL5607_None_None_EC50...
No dataset for CHEMBL5607-EC50

Processing CHEMBL4877_None_None_Ki...
The length of the raw dataset is 351
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (337, 34)
After dropping the mols with MW > 900.0 , the shape of the df: 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL2032_None_None_EC50...
No dataset for CHEMBL2032-EC50

Processing CHEMBL5892_None_None_Ki...
No dataset for CHEMBL5892-Ki
Processing CHEMBL5892_None_None_IC50...
The length of the raw dataset is 1
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL5892_None_None_EC50...
No dataset for CHEMBL5892-EC50

Processing CHEMBL5517_None_None_Ki...
No dataset for CHEMBL5517-Ki
Processing CHEMBL5517_None_None_IC50...
The length of the raw dataset is 71
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (71, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (71, 34)
After removing the mols with multiple values, the shape of the df:(67, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL5517_None_None_EC50...
The length of the raw dataset is 45
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (45, 

Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)
Processing CHEMBL1853_None_None_EC50...
The length of the raw dataset is 86
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (86, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (86, 34)
After removing the mols with multiple values, the shape of the df:(81, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.


Processing CHEMBL1983_None_None_Ki...
The length of the raw dataset is 1340
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (1282, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1282, 34)
After removing the mols with multiple values, the shape of the df:(1118, 35)
start apply

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL1916_None_None_Ki...
The length of the raw dataset is 901
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (840, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (840, 34)
After removing the mols with multiple values, the shape of the df:(636, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 61
After standardizing the SMILES, the shape of the df: (61, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (61, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(56, 35)
Done curation.

Processing CHEMBL1916_None_None_IC50...
The length of the raw dataset is 75
Curating datase

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (102, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL5399_None_None_Ki...
The length of the raw dataset is 16
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (16, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (15, 34)
After removing the mols with multiple values, the shape of the df:(15, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL5399_None_None_IC50...
The length of the raw dataset is 94
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (93, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 1


Failed curating the dataset due to index 0 is out of bounds for axis 0 with size 0


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(0, 35)
Processing CHEMBL5399_None_None_EC50...
The length of the raw dataset is 84
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (71, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3724_None_None_Ki...
The length of the raw dataset is 13
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (7, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (7, 34)
After removing the mols with multiple values, the shape of the df:(7, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 6
After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (6, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(6, 35)
Done curation.

Processing CHEMBL3724_None_None_IC50...
The length of the raw dataset is 163
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (103, 34)
After dropping the mols with MW > 

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (15, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL4295740_None_None_EC50...
No dataset for CHEMBL4295740-EC50

Processing CHEMBL3988599_None_None_Ki...
No dataset for CHEMBL3988599-Ki
Processing CHEMBL3988599_None_None_IC50...
The length of the raw dataset is 371
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (371, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (371, 34)
After removing the mols with multiple values, the shape of the df:(321, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL3988599_None_None_EC50...
The length of the raw dataset is 1
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL1987_None_None_Ki...
The length of the raw dataset is 125
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (103, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (103, 34)
After removing the mols with multiple values, the shape of the df:(96, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 22
After standardizing the SMILES, the shape of the df: (22, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (22, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(22, 35)
Done curation.

Processing CHEMBL1987_None_None_IC50...
The length of the raw dataset is 147
Curating datase

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (68, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 

Processing CHEMBL1854_None_None_Ki...
The length of the raw dataset is 1
Curating dataset
start standardizing with value


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
start applying thresholds
Applying thresholds 
Processing CHEMBL1854_None_None_IC50...
The length of the raw dataset is 50
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (50, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (50, 34)
After removing the mols with multiple values, the shape of the df:(37, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 0
Done curation.

Processing CHEMBL1854_None_None_EC50...
The length of the raw dataset is 45
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (33, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (23, 34)
After removing the mols with multiple values, the shape of the 

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1925_None_None_IC50...
No dataset for CHEMBL1925-IC50
Processing CHEMBL1925_None_None_EC50...
The length of the raw dataset is 2
Curating dataset
start standardizing with value


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL1821_None_None_Ki...
The length of the raw dataset is 967
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (906, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (781, 34)
After removing the mols with multiple values, the shape of the df:(676, 35)
start applying thresholds
Applying thresholds 
The length of df_novalue is 61
After standardizing the SMILES, the shape of the df: (61, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (61, 34)
After dropping the duplicate combinations of (smiles, value) , the shape of the df:(55, 35)
Done curation.

Processing CHEMBL1821_None_None_IC50...
The length of the raw dataset is 408
Curating dataset
start standardizing with value
After standardizing the SMILES, the shape of the df: (400, 34)
After dropping the mols 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (47, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing mor_antag_G_GTP_IC50...
The length of the raw dataset is 377
Curating dataset
After standardizing the SMILES, the shape of the df: (301, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (294, 34)
After removing the mols with multiple values, the shape of the df:(268, 35)
Applying thresholds 
Done curation.

Processing mor_antag_G_GTP_Ki...
The length of the raw dataset is 63
Curating dataset
After standardizing the SMILES, the shape of the df: (63, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (63, 34)
After removing the mols with multiple values, the shape of the df:(61, 35)
Applying thresholds 
Done curation.

Processing mor_antag_G_GTP_Kb...
Empty dataset for mor-antag-G_GTP-Kb
Processing mor_antag_G_GTP_Ke..

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL4523255_None_None_Ki...
No dataset for CHEMBL4523255-Ki
Processing CHEMBL4523255_None_None_IC50...
No dataset for CHEMBL4523255-IC50
Processing CHEMBL4523255_None_None_EC50...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL3785_None_None_Ki...
The length of the raw dataset is 47
Curating dataset
After standardizing the SMILES, the shape of the df: (47, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (47, 34)
After removing the mols with multiple values, the shape of the df:(25, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3785_None_None_IC50...
The length of the raw dataset is 335
Curating dataset
After standardizing the SMILES, the shape of the df: (330, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (330, 34)
After removing the mols with multiple values, the shape of the df:(291, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3785_None_None_EC50...
The length of the raw dataset is 758
Curating datase

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (55, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL1075178_None_None_Ki...
The length of the raw dataset is 4
Curating dataset
After standardizing the SMILES, the shape of the df: (4, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (4, 34)
After removing the mols with multiple values, the shape of the df:(4, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1075178_None_None_IC50...
The length of the raw dataset is 55
Curating dataset
After standardizing the SMILES, the shape of the df: (49, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(2, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1075178_None_None_EC50...
The length of the raw dataset is 125
Curating datase

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (125, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL234_None_None_Ki...
The length of the raw dataset is 6650
Curating dataset
After standardizing the SMILES, the shape of the df: (6465, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (6418, 34)
After removing the mols with multiple values, the shape of the df:(5097, 35)
Applying thresholds 
Done curation.

Processing CHEMBL234_None_None_IC50...
The length of the raw dataset is 383
Curating dataset
After standardizing the SMILES, the shape of the df: (360, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (360, 34)
After removing the mols with multiple values, the shape of the df:(295, 35)
Applying thresholds 
Done curation.

Processing CHEMBL234_None_None_EC50...
The length of the raw dataset is 744
Curating

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (53, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL4523923_None_None_Ki...
No dataset for CHEMBL4523923-Ki
Processing CHEMBL4523923_None_None_IC50...
No dataset for CHEMBL4523923-IC50
Processing CHEMBL4523923_None_None_EC50...
The length of the raw dataset is 33
Curating dataset
After standardizing the SMILES, the shape of the df: (33, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (33, 34)
After removing the mols with multiple values, the shape of the df:(32, 35)
Applying thresholds 
Done curation.


Processing CHEMBL5861_None_None_Ki...
No dataset for CHEMBL5861-Ki
Processing CHEMBL5861_None_None_IC50...
The length of the raw dataset is 16
Curating dataset
After standardizing the SMILES, the shape of the df: (16, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (16, 34)
After removing the mols with multiple values, the shape of the df:(15, 35)
Appl

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL4523248_None_None_Ki...
No dataset for CHEMBL4523248-Ki
Processing CHEMBL4523248_None_None_IC50...
No dataset for CHEMBL4523248-IC50
Processing CHEMBL4523248_None_None_EC50...
The length of the raw dataset is 14
Curating dataset
After standardizing the SMILES, the shape of the df: (12, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (12, 34)
After removing the mols with multiple values, the shape of the df:(12, 35)
Applying thresholds 
Done curation.


Processing CHEMBL4523229_None_None_Ki...
No dataset for CHEMBL4523229-Ki
Processing CHEMBL4523229_None_None_IC50...
No dataset for CHEMBL4523229-IC50
Processing CHEMBL4523229_None_None_EC50...
The length of the raw dataset is 4
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (4, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3232682_None_None_Ki...
No dataset for CHEMBL3232682-Ki
Processing CHEMBL3232682_None_None_IC50...
The length of the raw dataset is 6
Curating dataset
After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3232682_None_None_EC50...
No dataset for CHEMBL3232682-EC50

Processing CHEMBL3191_None_None_Ki...
No dataset for CHEMBL3191-Ki
Processing CHEMBL3191_None_None_IC50...
The length of the raw dataset is 6
Curating dataset
After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (6, 34)
After removing the mols with multiple values, the shape of the df:(6, 35)
Applying thre

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL1965_None_None_EC50...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (0, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL1293316_None_None_Ki...
The length of the raw dataset is 5
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1293316_None_None_IC50...
No dataset for CHEMBL1293316-IC50
Processing CHEMBL1293316_None_None_EC50...
The length of the raw dataset is 131
Curating dataset
After standardizing the SMILES, the shape of the df: (123, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (64, 34)
After removing the mols with multiple values, the shape of the df:(57, 35)
Applying thresholds 
Done curation.


Processing CHEMBL264_None_None_Ki...
The length of the raw dataset is 5019
Curating dataset
After standardizing the SMILES, the shape of the df: (4952, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (4944, 34)
After removing the mols with multiple values, the shape of the df:(3991, 35)
Applying thresholds 
Done curation.

Processing CHEMBL264_None_None_IC50...
The length of the raw dataset is 797
Curating dataset
After sta

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (165, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1832_None_None_EC50...
The length of the raw dataset is 239
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (237, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL2049_None_None_Ki...
The length of the raw dataset is 821
Curating dataset
After standardizing the SMILES, the shape of the df: (807, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (685, 34)
After removing the mols with multiple values, the shape of the df:(598, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2049_None_None_IC50...
The length of the raw dataset is 137
Curating dataset
After standardizing the SMILES, the shape of the df: (131, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (102, 34)
After removing the mols with multiple values, the shape of the df:(93, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2049_None_None_EC50...
The length of the raw dataset is 492
Curating dataset
After standardizing the SMILES, the shape of the df: (492, 34)
After dropping the mols w

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (281, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL217_None_None_Ki...
The length of the raw dataset is 11229
Curating dataset
After standardizing the SMILES, the shape of the df: (10931, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (10745, 34)
After removing the mols with multiple values, the shape of the df:(8002, 35)
Applying thresholds 
Done curation.

Processing CHEMBL217_None_None_IC50...
The length of the raw dataset is 1292
Curating dataset
After standardizing the SMILES, the shape of the df: (1205, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1175, 34)
After removing the mols with multiple values, the shape of the df:(997, 35)
Applying thresholds 
Done curation.

Processing CHEMBL217_None_None_EC50...
The length of the raw dataset is 1876
C

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL2056_None_None_Ki...
The length of the raw dataset is 1836
Curating dataset
After standardizing the SMILES, the shape of the df: (1664, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1664, 34)
After removing the mols with multiple values, the shape of the df:(1383, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2056_None_None_IC50...
The length of the raw dataset is 208
Curating dataset
After standardizing the SMILES, the shape of the df: (202, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (201, 34)
After removing the mols with multiple values, the shape of the df:(142, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2056_None_None_EC50...
The length of the raw dataset is 538
Curatin

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL1905_None_None_EC50...
No dataset for CHEMBL1905-EC50

Processing CHEMBL5700_None_None_Ki...
The length of the raw dataset is 1
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL5700_None_None_IC50...
The length of the raw dataset is 103
Curating dataset
After standardizing the SMILES, the shape of the df: (96, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (96, 34)
After removing the mols with multiple values, the shape of the df:(93, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5700_None_None_EC50...
The length of the raw dataset is 40
Curating dataset
After standardizing the SMILES, the shape of the df: (25, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (25, 34)
After removing the mols with multiple values, the shape of the df:(12, 35)
Applying thresholds 
Done curation.


Processing CHEMBL5413_None_None_Ki...
The length of the raw dataset is 113
Curating dataset
A

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL3764_None_None_Ki...
The length of the raw dataset is 353
Curating dataset
After standardizing the SMILES, the shape of the df: (352, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (253, 34)
After removing the mols with multiple values, the shape of the df:(240, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3764_None_None_IC50...
The length of the raw dataset is 286
Curating dataset
After standardizing the SMILES, the shape of the df: (276, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (200, 34)
After removing the mols with multiple values, the shape of the df:(145, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3764_None_None_EC50...
The length of the raw dataset is 279
Curating da

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL2955_None_None_IC50...
The length of the raw dataset is 680
Curating dataset
After standardizing the SMILES, the shape of the df: (678, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (678, 34)
After removing the mols with multiple values, the shape of the df:(301, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2955_None_None_EC50...
The length of the raw dataset is 188
Curating dataset
After standardizing the SMILES, the shape of the df: (108, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (108, 34)
After removing the mols with multiple values, the shape of the df:(91, 35)
Applying thresholds 
Done curation.


Processing CHEMBL218_None_None_Ki...
The length of the raw dataset is 5414
Curating dat

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL298_None_None_Ki...
The length of the raw dataset is 632
Curating dataset
After standardizing the SMILES, the shape of the df: (624, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (539, 34)
After removing the mols with multiple values, the shape of the df:(447, 35)
Applying thresholds 
Done curation.

Processing CHEMBL298_None_None_IC50...
The length of the raw dataset is 1552
Curating dataset
After standardizing the SMILES, the shape of the df: (1542, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1332, 34)
After removing the mols with multiple values, the shape of the df:(1061, 35)
Applying thresholds 
Done curation.

Processing CHEMBL298_None_None_EC50...
The length of the raw dataset is 136
Curating d

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL288_None_None_Ki...
The length of the raw dataset is 155
Curating dataset
After standardizing the SMILES, the shape of the df: (155, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (155, 34)
After removing the mols with multiple values, the shape of the df:(82, 35)
Applying thresholds 
Done curation.

Processing CHEMBL288_None_None_IC50...
The length of the raw dataset is 1657
Curating dataset
After standardizing the SMILES, the shape of the df: (1524, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1522, 34)
After removing the mols with multiple values, the shape of the df:(1240, 35)
Applying thresholds 
Done curation.

Processing CHEMBL288_None_None_EC50...
The length of the raw dataset is 7
Curating dataset
After standardizing the SMILES, the shape of the df: (7, 34)
After dropping the mols with M

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL4105988_None_None_EC50...
The length of the raw dataset is 1
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL4523360_None_None_Ki...
No dataset for CHEMBL4523360-Ki
Processing CHEMBL4523360_None_None_IC50...
No dataset for CHEMBL4523360-IC50
Processing CHEMBL4523360_None_None_EC50...
The length of the raw dataset is 1
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL4308_None_None_Ki...
The length of the raw dataset is 824
Curating dataset
After standardizing the SMILES, the shape of the df: (818, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (795, 34)
After removing the mols with multiple values, the shape of the df:(730, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4308_None_None_IC50...
The length of the raw dataset is 495
Curating dataset
After standardizing the SMILES, the shape of the df: (483, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (469, 34)
After removing the mols with multiple values, the shape of the df:(426, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4308_None_None_EC50...
No dataset for CHEMBL4308-EC50

Processing CHEMB

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL4029_None_None_Ki...
The length of the raw dataset is 120
Curating dataset
After standardizing the SMILES, the shape of the df: (104, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (104, 34)
After removing the mols with multiple values, the shape of the df:(102, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4029_None_None_IC50...
The length of the raw dataset is 334
Curating dataset
After standardizing the SMILES, the shape of the df: (334, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (334, 34)
After removing the mols with multiple values, the shape of the df:(282, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4029_None_None_EC50...
The length of the raw dataset is 1
Curating data

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL259_None_None_Ki...
The length of the raw dataset is 2430
Curating dataset
After standardizing the SMILES, the shape of the df: (2413, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1709, 34)
After removing the mols with multiple values, the shape of the df:(1516, 35)
Applying thresholds 
Done curation.

Processing CHEMBL259_None_None_IC50...
The length of the raw dataset is 1065
Curating dataset
After standardizing the SMILES, the shape of the df: (1027, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (686, 34)
After removing the mols with multiple values, the shape of the df:(598, 35)
Applying thresholds 
Done curation.

Processing CHEMBL259_None_None_EC50...
The length of the raw dataset is 1564
Curatin

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL2107_None_None_Ki...
The length of the raw dataset is 21
Curating dataset
After standardizing the SMILES, the shape of the df: (21, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (21, 34)
After removing the mols with multiple values, the shape of the df:(15, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2107_None_None_IC50...
The length of the raw dataset is 1366
Curating dataset
After standardizing the SMILES, the shape of the df: (1290, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1111, 34)
After removing the mols with multiple values, the shape of the df:(934, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2107_None_None_EC50...
The length of the raw dataset is 274
Curating dat

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL2064_None_None_IC50...
The length of the raw dataset is 1
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL2064_None_None_EC50...
The length of the raw dataset is 7
Curating dataset
After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (6, 34)
After removing the mols with multiple values, the shape of the df:(6, 35)
Applying thresholds 
Done curation.


Processing CHEMBL3559688_None_None_Ki...
No dataset for CHEMBL3559688-Ki
Processing CHEMBL3559688_None_None_IC50...
The length of the raw dataset is 5
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL3559688_None_None_EC50...
No dataset for CHEMBL3559688-EC50

Processing CHEMBL1800_None_None_Ki...
The length of the raw dataset is 1642
Curating dataset
After standardizing the SMILES, the shape of the df: (1611, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1536, 34)
After removing the mols with multiple values, the shape of the df:(1416, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1800_None_None_IC50...
The length of the raw dataset is 814
Curating dataset
After standardizing the SMILES, the shape of the df: (806, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (776, 34)
After removing the mols with multiple values, the shape of the df:(662, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1800_None_None_EC50...
The length of the raw dataset is 108
Curating dataset
After st

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL5850_None_None_Ki...
No dataset for CHEMBL5850-Ki
Processing CHEMBL5850_None_None_IC50...
The length of the raw dataset is 50
Curating dataset
After standardizing the SMILES, the shape of the df: (46, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (46, 34)
After removing the mols with multiple values, the shape of the df:(32, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5850_None_None_EC50...
The length of the raw dataset is 166
Curating dataset
After standardizing the SMILES, the shape of the df: (135, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (128, 34)
After removing the mols with multiple values, the shape of the df:(122, 35)
Applying thresholds 
Done curation.


Processing CHEMBL5162_

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL5162_None_None_IC50...
The length of the raw dataset is 155
Curating dataset
After standardizing the SMILES, the shape of the df: (141, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (141, 34)
After removing the mols with multiple values, the shape of the df:(72, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5162_None_None_EC50...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3250_None_None_Ki...
The length of the raw dataset is 69
Curating dataset
After standardizing the SMILES, the shape of the df: (67, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (67, 34)
After removing the mols with multiple values, the shape of the df:(67, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3250_None_None_IC50...
The length of the raw dataset is 141
Curating dataset
After standardizing the SMILES, the shape of the df: (130, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (130, 34)
After removing the mols with multiple values, the shape of the df:(116, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3250_None_None_EC50...
The length of the raw dataset is 127
Curating dataset
After standardizing the SMILES, the shape of the df: (74, 34)
After dropping the mols with MW

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL4523454_None_None_EC50...
The length of the raw dataset is 23
Curating dataset
After standardizing the SMILES, the shape of the df: (23, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (22, 34)
After removing the mols with multiple values, the shape of the df:(22, 35)
Applying thresholds 
Done curation.


Processing CHEMBL4523866_None_None_Ki...
No dataset for CHEMBL4523866-Ki
Processing CHEMBL4523866_None_None_IC50...
No dataset for CHEMBL4523866-IC50
Processing CHEMBL4523866_None_None_EC50...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL252_None_None_Ki...
The length of the raw dataset is 262
Curating dataset
After standardizing the SMILES, the shape of the df: (262, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (259, 34)
After removing the mols with multiple values, the shape of the df:(233, 35)
Applying thresholds 
Done curation.

Processing CHEMBL252_None_None_IC50...
The length of the raw dataset is 1641
Curating dataset
After standardizing the SMILES, the shape of the df: (1638, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1514, 34)
After removing the mols with multiple values, the shape of the df:(1284, 35)
Applying thresholds 
Done curation.

Processing CHEMBL252_None_None_EC50...
The length of the raw dataset is 13
Curating dataset
After standardizing the SMILES, the shape of the df: (13, 34)
After dropping the mols wit

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL2346493_None_None_EC50...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL4616_None_None_Ki...
The length of the raw dataset is 752
Curating dataset
After standardizing the SMILES, the shape of the df: (727, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (708, 34)
After removing the mols with multiple values, the shape of the df:(637, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4616_None_None_IC50...
The length of the raw dataset is 1395
Curating dataset
After standardizing the SMILES, the shape of the df: (1356, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1286, 34)
After removing the mols with multiple values, the shape of the df:(893, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4616_None_None_EC50...
The length of the raw dataset is 1079
Curating dataset
After standardizing the SMILES, the shape of the df: (1010, 34)
After dropping the mo

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL3559686_None_None_EC50...
The length of the raw dataset is 9
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL2327_None_None_Ki...
The length of the raw dataset is 873
Curating dataset
After standardizing the SMILES, the shape of the df: (872, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (842, 34)
After removing the mols with multiple values, the shape of the df:(794, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2327_None_None_IC50...
The length of the raw dataset is 290
Curating dataset
After standardizing the SMILES, the shape of the df: (286, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (270, 34)
After removing the mols with multiple values, the shape of the df:(261, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2327_None_None_EC50...
The length of the raw dataset is 5
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (5, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL1628472_None_None_Ki...
The length of the raw dataset is 33
Curating dataset
After standardizing the SMILES, the shape of the df: (33, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(4, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1628472_None_None_IC50...
The length of the raw dataset is 40
Curating dataset
After standardizing the SMILES, the shape of the df: (32, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (27, 34)
After removing the mols with multiple values, the shape of the df:(23, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1628472_None_None_EC50...
The length of the raw dataset is 84
Curating dataset
After standardizing the SMILES, the shape of the df: (77, 34)
After dropping the mols with 

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL4523478_None_None_IC50...
No dataset for CHEMBL4523478-IC50
Processing CHEMBL4523478_None_None_EC50...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL344_None_None_Ki...
The length of the raw dataset is 1705
Curating dataset
After standardizing the SMILES, the shape of the df: (1701, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1700, 34)
After removing the mols with multiple values, the shape of the df:(1472, 35)
Applying thresholds 
Done curation.

Processing CHEMBL344_None_None_IC50...
The length of the raw dataset is 3063
Curating dataset
After standardizing the SMILES, the shape of the df: (2971, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2971, 34)
After removing the mols with multiple values, the shape of the df:(2108, 35)
Applying thresholds 
Done curation.

Processing CHEMBL344_None_None_EC50...
The length of the raw dataset is 83
Curatin

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL1804_None_None_Ki...
The length of the raw dataset is 531
Curating dataset
After standardizing the SMILES, the shape of the df: (531, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (337, 34)
After removing the mols with multiple values, the shape of the df:(324, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1804_None_None_IC50...
The length of the raw dataset is 563
Curating dataset
After standardizing the SMILES, the shape of the df: (558, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (114, 34)
After removing the mols with multiple values, the shape of the df:(101, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1804_None_None_EC50...
The length of the raw dataset is 246
Curating da

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL5201_None_None_EC50...
The length of the raw dataset is 120
Curating dataset
After standardizing the SMILES, the shape of the df: (120, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (120, 34)
After removing the mols with multiple values, the shape of the df:(81, 35)
Applying thresholds 
Done curation.


Processing CHEMBL1628473_None_None_Ki...
The length of the raw dataset is 33
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (32, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1628473_None_None_IC50...
The length of the raw dataset is 3
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1628473_None_None_EC50...
The length of the raw dataset is 55
Curating dataset
After standardizing the SMILES, the shape of the df: (55, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (13, 34)
After removing the mols with multiple values, the shape of the df:(11, 35)
Applying thresholds 
Done curation.


Processing CHEMBL5963_None_None_Ki...
The length of the raw dataset is 3
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL5963_None_None_IC50...
The length of the raw dataset is 95
Curating dataset
After standardizing the SMILES, the shape of the df: (95, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (83, 34)
After removing the mols with multiple values, the shape of the df:(31, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5963_None_None_EC50...
The length of the raw dataset is 223
Curating dataset
After standardizing the SMILES, the shape of the df: (199, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (178, 34)
After removing the mols with multiple values, the shape of the df:(110, 35)
Applying thresholds 
Done curation.


Processing CHEMBL229_None_None_Ki...
The length of the raw dataset is 1962
Curating datase

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL5144_None_None_IC50...
The length of the raw dataset is 18
Curating dataset
After standardizing the SMILES, the shape of the df: (17, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(2, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5144_None_None_EC50...
The length of the raw dataset is 38
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (37, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3321651_None_None_Ki...
No dataset for CHEMBL3321651-Ki
Processing CHEMBL3321651_None_None_IC50...
The length of the raw dataset is 6
Curating dataset
After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3321651_None_None_EC50...
No dataset for CHEMBL3321651-EC50

Processing CHEMBL2731_None_None_Ki...
The length of the raw dataset is 27
Curating dataset
After standardizing the SMILES, the shape of the df: (27, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (27, 34)
After removing the mols with multiple values, the shape of the df:(26, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2731_None_None_IC50...


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (9, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL5844_None_None_EC50...
The length of the raw dataset is 105
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (105, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL1985_None_None_Ki...
The length of the raw dataset is 311
Curating dataset
After standardizing the SMILES, the shape of the df: (311, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (311, 34)
After removing the mols with multiple values, the shape of the df:(304, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1985_None_None_IC50...
The length of the raw dataset is 1453
Curating dataset
After standardizing the SMILES, the shape of the df: (1380, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1331, 34)
After removing the mols with multiple values, the shape of the df:(784, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1985_None_None_EC50...
The length of the raw dataset is 170
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (158, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL5952_None_None_Ki...
The length of the raw dataset is 115
Curating dataset
After standardizing the SMILES, the shape of the df: (112, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (101, 34)
After removing the mols with multiple values, the shape of the df:(96, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5952_None_None_IC50...
The length of the raw dataset is 8
Curating dataset
After standardizing the SMILES, the shape of the df: (8, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5952_None_None_EC50...
The length of the raw dataset is 122
Curating dataset
A

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (53, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL233_None_None_Ki...
The length of the raw dataset is 5682
Curating dataset
After standardizing the SMILES, the shape of the df: (5511, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5314, 34)
After removing the mols with multiple values, the shape of the df:(4587, 35)
Applying thresholds 
Done curation.

Processing CHEMBL233_None_None_IC50...
The length of the raw dataset is 1366
Curating dataset
After standardizing the SMILES, the shape of the df: (1258, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1202, 34)
After removing the mols with multiple values, the shape of the df:(1013, 35)
Applying thresholds 
Done curation.

Processing CHEMBL233_None_None_EC50...
The length of the raw dataset is 2296
Curating dataset
After standardizing the SMILES, the shape of the df: (2103, 34)
After dropping the

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (236, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL4069_None_None_Ki...
The length of the raw dataset is 13
Curating dataset
After standardizing the SMILES, the shape of the df: (10, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4069_None_None_IC50...
The length of the raw dataset is 65
Curating dataset
After standardizing the SMILES, the shape of the df: (65, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (37, 34)
After removing the mols with multiple values, the shape of the df:(35, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4069_None_None_EC50...
The length of the raw dataset is 6
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (6, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL1075133_None_None_Ki...
No dataset for CHEMBL1075133-Ki
Processing CHEMBL1075133_None_None_IC50...
The length of the raw dataset is 18
Curating dataset
After standardizing the SMILES, the shape of the df: (18, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (18, 34)
After removing the mols with multiple values, the shape of the df:(16, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1075133_None_None_EC50...
No dataset for CHEMBL1075133-EC50

Processing CHEMBL4894_None_None_Ki...
The length of the raw dataset is 45
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (45, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL4894_None_None_IC50...
The length of the raw dataset is 86
Curating dataset
After standardizing the SMILES, the shape of the df: (65, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (61, 34)
After removing the mols with multiple values, the shape of the df:(61, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4894_None_None_EC50...
No dataset for CHEMBL4894-EC50

Processing CHEMBL1792_None_None_Ki...
The length of the raw dataset is 388
Curating dataset
After standardizing the SMILES, the shape of the df: (386, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (251, 34)
After removing the mols with multiple values, the shape of the df:(234, 35)
Applying thresholds 
Done curation.

Processing CHEMBL179

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (2, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL4867_None_None_EC50...
The length of the raw dataset is 51
Curating dataset
After standardizing the SMILES, the shape of the df: (42, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (37, 34)
After removing the mols with multiple values, the shape of the df:(25, 35)
Applying thresholds 
Done curation.


Processing CHEMBL1293267_None_None_Ki...
The length of the raw dataset is 74
Curating dataset
After standardizing the SMILES, the shape of the df: (74, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (74, 34)
After removing the mols with multiple values, the shape of the df:(65, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1293267_None_None_IC50...
The length of the raw dataset is 207
Curating data

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (43, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL3176_None_None_IC50...
The length of the raw dataset is 50
Curating dataset
After standardizing the SMILES, the shape of the df: (42, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (37, 34)
After removing the mols with multiple values, the shape of the df:(37, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3176_None_None_EC50...
No dataset for CHEMBL3176-EC50

Processing CHEMBL251_None_None_Ki...
The length of the raw dataset is 7277
Curating dataset
After standardizing the SMILES, the shape of the df: (7075, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (7054, 34)
After removing the mols with multiple values, the shape of the df:(5800, 35)
Applying thresholds 
Done curation.

Processing CHEMBL251_None_None_IC50...
The length of the raw dataset is 693
Curating dataset
After standardizing

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3746_None_None_Ki...
The length of the raw dataset is 3
Curating dataset
After standardizing the SMILES, the shape of the df: (3, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (3, 34)
After removing the mols with multiple values, the shape of the df:(3, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3746_None_None_IC50...
The length of the raw dataset is 631
Curating dataset
After standardizing the SMILES, the shape of the df: (554, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (554, 34)
After removing the mols with multiple values, the shape of the df:(518, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3746_None_None_EC50...
The length of the raw dataset is 4
Curating dataset
After standardizing the SMILES, the shape of the df: (4, 34)
After dropping the mols with MW > 900.

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL2150838_None_None_Ki...
No dataset for CHEMBL2150838-Ki
Processing CHEMBL2150838_None_None_IC50...
The length of the raw dataset is 66
Curating dataset
After standardizing the SMILES, the shape of the df: (66, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (66, 34)
After removing the mols with multiple values, the shape of the df:(66, 35)
Applying thresholds 
Done curation.

Processing CHEMBL2150838_None_None_EC50...
No dataset for CHEMBL2150838-EC50

Processing CHEMBL4422_None_None_Ki...
The length of the raw dataset is 185
Curating dataset
After standardizing the SMILES, the shape of the df: (155, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (155, 34)
After removing the mols with multiple values, the s

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL5607_None_None_IC50...
The length of the raw dataset is 87
Curating dataset
After standardizing the SMILES, the shape of the df: (87, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (87, 34)
After removing the mols with multiple values, the shape of the df:(85, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5607_None_None_EC50...
No dataset for CHEMBL5607-EC50

Processing CHEMBL4877_None_None_Ki...
The length of the raw dataset is 351
Curating dataset
After standardizing the SMILES, the shape of the df: (337, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (67, 34)
After removing the mols with multiple values, the shape of the df:(50, 35)
Applying thresholds 
Done curation.

Processing CHEMBL4877_N

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL2032_None_None_EC50...
No dataset for CHEMBL2032-EC50

Processing CHEMBL5892_None_None_Ki...
No dataset for CHEMBL5892-Ki
Processing CHEMBL5892_None_None_IC50...
The length of the raw dataset is 1
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL5892_None_None_EC50...
No dataset for CHEMBL5892-EC50

Processing CHEMBL5517_None_None_Ki...
No dataset for CHEMBL5517-Ki
Processing CHEMBL5517_None_None_IC50...
The length of the raw dataset is 71
Curating dataset
After standardizing the SMILES, the shape of the df: (71, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (71, 34)
After removing the mols with multiple values, the shape of the df:(67, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5517_None_None_EC50...
The length of the raw dataset is 45
Curating dataset
After standardizing the SMILES, the shape of the df: (45, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (45, 34)
After removing the mols with multiple values, the shape of the df:(4

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL1916_None_None_Ki...
The length of the raw dataset is 901
Curating dataset
After standardizing the SMILES, the shape of the df: (840, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (840, 34)
After removing the mols with multiple values, the shape of the df:(636, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1916_None_None_IC50...
The length of the raw dataset is 75
Curating dataset
After standardizing the SMILES, the shape of the df: (74, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (74, 34)
After removing the mols with multiple values, the shape of the df:(72, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1916_None_None_EC50...
The length of the raw dataset is 193
Curating datase

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (102, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL5399_None_None_Ki...
The length of the raw dataset is 16
Curating dataset
After standardizing the SMILES, the shape of the df: (16, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (15, 34)
After removing the mols with multiple values, the shape of the df:(15, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5399_None_None_IC50...
The length of the raw dataset is 94
Curating dataset
After standardizing the SMILES, the shape of the df: (93, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (5, 34)
After removing the mols with multiple values, the shape of the df:(5, 35)
Applying thresholds 
Done curation.

Processing CHEMBL5399_None_None_EC50...
The length of the raw dataset is 84
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (71, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL3724_None_None_Ki...
The length of the raw dataset is 13
Curating dataset
After standardizing the SMILES, the shape of the df: (7, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (7, 34)
After removing the mols with multiple values, the shape of the df:(7, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3724_None_None_IC50...
The length of the raw dataset is 163
Curating dataset
After standardizing the SMILES, the shape of the df: (103, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (103, 34)
After removing the mols with multiple values, the shape of the df:(100, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3724_None_None_EC50...
The length of the raw dataset is 108
Curating dataset
After standardizing the SMILES, the shape of the df: (75, 34)
After dropping the mols with MW >

Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (15, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL4295740_None_None_EC50...
No dataset for CHEMBL4295740-EC50

Processing CHEMBL3988599_None_None_Ki...
No dataset for CHEMBL3988599-Ki
Processing CHEMBL3988599_None_None_IC50...
The length of the raw dataset is 371
Curating dataset
After standardizing the SMILES, the shape of the df: (371, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (371, 34)
After removing the mols with multiple values, the shape of the df:(321, 35)
Applying thresholds 
Done curation.

Processing CHEMBL3988599_None_None_EC50...
The length of the raw dataset is 1
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL1987_None_None_Ki...
The length of the raw dataset is 125
Curating dataset
After standardizing the SMILES, the shape of the df: (103, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (103, 34)
After removing the mols with multiple values, the shape of the df:(96, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1987_None_None_IC50...
The length of the raw dataset is 147
Curating dataset
After standardizing the SMILES, the shape of the df: (147, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (147, 34)
After removing the mols with multiple values, the shape of the df:(113, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1987_None_None_EC50...
The length of the raw dataset is 165
Curating dat

Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (68, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 

Processing CHEMBL1854_None_None_Ki...
The length of the raw dataset is 1
Curating dataset


Failed curating the dataset due to cannot set a frame with no defined index and a scalar


After standardizing the SMILES, the shape of the df: (1, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (1, 34)
After removing the mols with multiple values, the shape of the df:(1, 35)
Applying thresholds 
Processing CHEMBL1854_None_None_IC50...
The length of the raw dataset is 50
Curating dataset
After standardizing the SMILES, the shape of the df: (50, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (50, 34)
After removing the mols with multiple values, the shape of the df:(37, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1854_None_None_EC50...
The length of the raw dataset is 45
Curating dataset
After standardizing the SMILES, the shape of the df: (33, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (23, 34)
After removing the mols with multiple values, the shape of the df:(22, 35)
Applying thresholds 
Done curation.


Processing CHEMBL1925_None_None_Ki...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)
Processing CHEMBL1925_None_None_IC50...
No dataset for CHEMBL1925-IC50
Processing CHEMBL1925_None_None_EC50...
The length of the raw dataset is 2
Curating dataset


Failed curating the dataset due to Expected a 1D array, got an array with shape (0, 34)


After standardizing the SMILES, the shape of the df: (2, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (0, 34)

Processing CHEMBL1821_None_None_Ki...
The length of the raw dataset is 967
Curating dataset
After standardizing the SMILES, the shape of the df: (906, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (781, 34)
After removing the mols with multiple values, the shape of the df:(676, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1821_None_None_IC50...
The length of the raw dataset is 408
Curating dataset
After standardizing the SMILES, the shape of the df: (400, 34)
After dropping the mols with MW > 900.0 , the shape of the df: (395, 34)
After removing the mols with multiple values, the shape of the df:(268, 35)
Applying thresholds 
Done curation.

Processing CHEMBL1821_None_None_EC50...
The length of the raw dataset is 931
Curating dataset
After standardizing the SMILES, the shape of the df: (894, 34)
After dropping the mols wi