# Imports

In [104]:
# Standard library imports
import os
import math
from pathlib import Path

# Third-party imports
import pandas as pd
from lightning import pytorch as pl
import torch
from chemprop import data, featurizers, models, nn
from chembl_webresource_client.new_client import new_client

# Local imports

# CUDA
print(f"CUDA available: {torch.cuda.is_available()}")
os.environ['CUDA_VISIBLE_DEVICES'] = '7'

CUDA available: True


# Constants

In [105]:
# Directory
CUR_DIR = os.path.dirname(os.path.realpath('__file__'))

# Cytochrome P450 3A4 IDs
CYP3A4_CHEMBL_IDS = ['CHEMBL340', 'CHEMBL2111472', 'CHEMBL2364675', 'CHEMBL4523986']

# ChEMBL Database

In [None]:
# Get all available items in new_client
available_resources = [resource for resource in dir(new_client) if not resource.startswith('_')]
all_data = {}
for resource in ['activity', 'document']:
    print(f'Resource: {resource}...')
    if resource not in all_data:
        all_data[resource] = []
        
    # Get caller for resource
    caller = getattr(new_client, resource)
    
    # Fetch data for the specified chembl_id
    caller_data = []
    for chembl_id in CYP3A4_CHEMBL_IDS:
        try:
            caller_data.extend(
                caller.filter(
                    target_chembl_id=chembl_id
                )
            )
        except Exception as e:
            print(f"Error fetching data from {resource} for ID {chembl_id}: {e}")
    
    # Append the data to the resource's list in all_data
    all_data[resource].append(pd.DataFrame(caller_data))
    
pd.DataFrame(all_data).to_json(f'{CUR_DIR}/data/cyp3a4_data.json', orient='records')

Resource: activity...
Resource: document...
