# AChE

In [1]:
# pip install chembl_webresource_client pandas tqdm

from chembl_webresource_client.new_client import new_client
import pandas as pd
from tqdm import tqdm

# AChE — CHEMBL220 - выбранная мишень
target_id = "CHEMBL220"

# загружаем bioactivities
bioactivities = new_client.activity.filter(target_chembl_id=target_id).filter(standard_type__in=["IC50", "Ki", "Potency"]).only(
    "molecule_chembl_id",
    "canonical_smiles",
    "standard_type",
    "standard_relation",
    "standard_value",
    "standard_units",
    "target_chembl_id",
    "assay_type",
    "target_pref_name",
    "source"
)

# конвертируем в pandas DataFrame
df = pd.DataFrame(bioactivities)

# удалим строки без значения активности или SMILES
df = df[df["standard_value"].notna()]
df = df[df["canonical_smiles"].notna()]

# сохраняем в CSV
# df.to_csv("bioactivity_dataset_AChE.csv", index=False)

# print("Готово. Скачано строк:", len(df))
df.head()

Готово. Скачано строк: 9130


Unnamed: 0,assay_type,canonical_smiles,molecule_chembl_id,relation,standard_relation,standard_type,standard_units,standard_value,target_chembl_id,target_pref_name,type,units,value
0,B,CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1,CHEMBL133897,=,=,IC50,nM,750.0,CHEMBL220,Acetylcholinesterase,IC50,uM,0.75
1,B,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1,CHEMBL336398,=,=,IC50,nM,100.0,CHEMBL220,Acetylcholinesterase,IC50,uM,0.1
2,B,CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1,CHEMBL131588,>,>,IC50,nM,50000.0,CHEMBL220,Acetylcholinesterase,IC50,uM,50.0
3,B,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F,CHEMBL130628,=,=,IC50,nM,300.0,CHEMBL220,Acetylcholinesterase,IC50,uM,0.3
4,B,CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C,CHEMBL130478,=,=,IC50,nM,800.0,CHEMBL220,Acetylcholinesterase,IC50,uM,0.8


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9130 entries, 0 to 11349
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   assay_type          9130 non-null   object
 1   canonical_smiles    9130 non-null   object
 2   molecule_chembl_id  9130 non-null   object
 3   relation            9130 non-null   object
 4   standard_relation   9130 non-null   object
 5   standard_type       9130 non-null   object
 6   standard_units      9130 non-null   object
 7   standard_value      9130 non-null   object
 8   target_chembl_id    9130 non-null   object
 9   target_pref_name    9130 non-null   object
 10  type                9130 non-null   object
 11  units               8477 non-null   object
 12  value               9130 non-null   object
dtypes: object(13)
memory usage: 998.6+ KB


In [3]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors

# загрузка файла 
file_path = "bioactivity_dataset_AChE.csv" 
df = pd.read_csv(file_path)

# проверка колонки canonical_smiles
assert 'canonical_smiles' in df.columns, "Нет колонки 'canonical_smiles'"

# вычисления дескрипторов
def calc_rdkit_descriptors(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None
    return {
        'MolWt': Descriptors.MolWt(mol),
        'LogP': Descriptors.MolLogP(mol),
        'TPSA': Descriptors.TPSA(mol),
        'NumHDonors': Descriptors.NumHDonors(mol),
        'NumHAcceptors': Descriptors.NumHAcceptors(mol),
        'NumRotatableBonds': Descriptors.NumRotatableBonds(mol),
        'NumAliphaticRings': Descriptors.NumAliphaticRings(mol),
        'NumAromaticRings': Descriptors.NumAromaticRings(mol)
    }

desc_rows = []
for smi in df['canonical_smiles']:
    desc = calc_rdkit_descriptors(smi)
    if desc:
        desc['canonical_smiles'] = smi
        desc_rows.append(desc)

# DataFrame с дескрипторами 
desc_df = pd.DataFrame(desc_rows)

# объединение с исходными данными 
merged_df = pd.merge(df, desc_df, on='canonical_smiles')

# сохранение
# merged_df.to_csv("AChE_with_descriptors.csv", index=False)
# print("Файл сохранён: AChE_with_descriptors.csv")
