# **CHALLENGE**

In [2]:
! pip install chembl_webresource_client



# Import the necessary libraries

In [3]:
# import panda and necessary libraries
import pandas as pd
from chembl_webresource_client.new_client import new_client

  __version__ = __import__('pkg_resources').get_distribution('chembl_webresource_client').version


# Discover target proteins for the given molecule

In [4]:
# Target search for leutinizing hormone
target_search = new_client.target
target_query = target_search.search('leutinizing hormone')
targets = pd.DataFrame.from_dict(target_query)
targets


Unnamed: 0,cross_references,organism,pref_name,score,species_group_flag,target_chembl_id,target_components,target_type,tax_id
0,[],Homo sapiens,Growth hormone-releasing hormone receptor,12.0,False,CHEMBL2032,"[{'accession': 'Q02643', 'component_descriptio...",SINGLE PROTEIN,9606
1,[],Rattus norvegicus,Growth hormone-releasing hormone receptor,12.0,False,CHEMBL3709,"[{'accession': 'Q02644', 'component_descriptio...",SINGLE PROTEIN,10116
2,[],Bos taurus,Appetite-regulating hormone,12.0,False,CHEMBL5169189,"[{'accession': 'Q9BDJ6', 'component_descriptio...",SINGLE PROTEIN,9913
3,[],Homo sapiens,Parathyroid hormone receptor,11.0,False,CHEMBL1793,"[{'accession': 'Q03431', 'component_descriptio...",SINGLE PROTEIN,9606
4,[],Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,11.0,False,CHEMBL2456,"[{'accession': 'P16235', 'component_descriptio...",SINGLE PROTEIN,10116
...,...,...,...,...,...,...,...,...,...
209,[],Mus musculus,Ribosomal protein S6 kinase beta-1,1.0,False,CHEMBL5429,"[{'accession': 'Q8BSK8', 'component_descriptio...",SINGLE PROTEIN,10090
210,[],Mus musculus,Ribosomal protein S6 kinase alpha-2,1.0,False,CHEMBL3351220,"[{'accession': 'Q9WUT3', 'component_descriptio...",SINGLE PROTEIN,10090
211,[],Homo sapiens,RAC-alpha serine/threonine-protein kinase/Ribo...,1.0,False,CHEMBL3885629,"[{'accession': 'P23443', 'component_descriptio...",PROTEIN FAMILY,9606
212,[],Homo sapiens,Mitochondrial complex I (NADH dehydrogenase),0.0,False,CHEMBL2363065,"[{'accession': 'P03923', 'component_descriptio...",PROTEIN COMPLEX,9606


# Retrieve the target information for leutinizing hormone

In [5]:
select_target = targets.target_chembl_id[4]
select_target

'CHEMBL2456'

# Bioacativity data for leutinizing hormone IC50

In [6]:
activity = new_client.activity
activity_query = activity.filter(target_chembl_id=select_target).filter(standard_type='IC50')

In [7]:
LH_activity = pd.DataFrame.from_dict(activity_query)

In [8]:
LH_activity.head(3)

Unnamed: 0,action_type,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,...,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,Not Determined,633070,[],CHEMBL710528,Compound was evaluated for its antagonism in r...,F,,,BAO_0000190,...,Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,10116,,,IC50,nM,UO_0000065,,
1,,,18922547,[],CHEMBL4323796,Antagonist activity at rat luteinizing hormone...,F,,,BAO_0000190,...,Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,10116,,,IC50,nM,UO_0000065,,4890.0
2,,,18922548,[],CHEMBL4323796,Antagonist activity at rat luteinizing hormone...,F,,,BAO_0000190,...,Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,10116,,,IC50,nM,UO_0000065,,23.0


In [9]:
LH_activity.standard_type.unique()

array(['IC50'], dtype=object)

# save to csv

In [10]:
LH_activity.to_csv('LH_activity_data.csv', index=False)

# Create folder for LH activity data

In [12]:
! cd C:\Users\borsm\Desktop\Computational-Biology\CHEMINFORMATICS

In [13]:
! mkdir LH_activity_data_folder

In [14]:
! move LH_activity_data.csv "LH_activity_data_folder"

        1 file(s) moved.


# check for missing data

In [15]:
LH2 = LH_activity[LH_activity.standard_value.notna()]
LH2

Unnamed: 0,action_type,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,...,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
1,,,18922547,[],CHEMBL4323796,Antagonist activity at rat luteinizing hormone...,F,,,BAO_0000190,...,Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,10116,,,IC50,nM,UO_0000065,,4890.0
2,,,18922548,[],CHEMBL4323796,Antagonist activity at rat luteinizing hormone...,F,,,BAO_0000190,...,Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,10116,,,IC50,nM,UO_0000065,,23.0
3,,,18922549,[],CHEMBL4323796,Antagonist activity at rat luteinizing hormone...,F,,,BAO_0000190,...,Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,10116,,,IC50,nM,UO_0000065,,46.0
4,,,18922553,[],CHEMBL4323796,Antagonist activity at rat luteinizing hormone...,F,,,BAO_0000190,...,Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,10116,,,IC50,nM,UO_0000065,,412.0
5,,,18922564,[],CHEMBL4323796,Antagonist activity at rat luteinizing hormone...,F,,,BAO_0000190,...,Rattus norvegicus,Luteinizing hormone/Choriogonadotropin receptor,10116,,,IC50,nM,UO_0000065,,212.0


# DATA PREPROCESSING

In [None]:
LH_activity_class = []
for i in LH2.standard_value:
    if float(i) <= 100:
        LH_activity_class.append('low')
    elif float(i) < 500:
        LH_activity_class.append('medium')
    else:
        LH_activity_class.append('high')

In [17]:
selection = ['molecule_chembl_id', 'canonical_smiles', 'standard_value']
LH3 = LH2[selection]
LH3

Unnamed: 0,molecule_chembl_id,canonical_smiles,standard_value
1,CHEMBL4449797,COc1ccc([C@@H]2c3ccsc3CCN2C(=O)Nc2cc(Cl)cc(Cl)...,4890.0
2,CHEMBL4537998,O=C(Nc1ccc(Oc2ccc(F)cc2)cc1)N1CCc2ncccc2[C@@H]...,23.0
3,CHEMBL4458424,O=C(Nc1cnc(Oc2ccc(F)cc2)nc1)N1CCc2ncccc2[C@@H]...,46.0
4,CHEMBL4471621,COc1ccc([C@H]2c3ccsc3CCN2C(=O)Nc2cc(Cl)cc(Cl)c...,412.0
5,CHEMBL4593222,COc1ccc(C2c3ccsc3CCN2C(=O)Nc2cc(Cl)cc(Cl)c2)cc1,212.0


In [None]:
pd.concat([LH3,pd.Series(LH_activity_class)], axis=1)

Unnamed: 0,molecule_chembl_id,canonical_smiles,standard_value,0
1,CHEMBL4449797,COc1ccc([C@@H]2c3ccsc3CCN2C(=O)Nc2cc(Cl)cc(Cl)...,4890.0,low
2,CHEMBL4537998,O=C(Nc1ccc(Oc2ccc(F)cc2)cc1)N1CCc2ncccc2[C@@H]...,23.0,low
3,CHEMBL4458424,O=C(Nc1cnc(Oc2ccc(F)cc2)nc1)N1CCc2ncccc2[C@@H]...,46.0,high
4,CHEMBL4471621,COc1ccc([C@H]2c3ccsc3CCN2C(=O)Nc2cc(Cl)cc(Cl)c...,412.0,high
5,CHEMBL4593222,COc1ccc(C2c3ccsc3CCN2C(=O)Nc2cc(Cl)cc(Cl)c2)cc1,212.0,
0,,,,high


# save to csv

In [27]:
LH3.to_csv('LH_activity_dataprocessed.csv', index=False)

In [28]:
! move LH_activity_dataprocessed.csv "LH_activity_data_folder"

        1 file(s) moved.
