In [10]:
import requests
import time

def get_uniprot_ids(chembl_id):
    """Fetch UniProt IDs for a given ChEMBL ID"""
    url = f"https://www.ebi.ac.uk/chembl/api/data/target/{chembl_id}.json"
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        
        uniprot_ids = []


        if 'target_components' in data:
            #print("algo")
            for component in data['target_components']:
                #print(component)
                if 'target_component_xrefs' in component:
                    for xref in component['target_component_xrefs']:
                        #print(xref)
                        if xref['xref_src_db'] == 'UniProt':
                            uniprot_ids.append(xref['xref_id'])
        
        return uniprot_ids
    except:
        return []


In [7]:
# Read ChEMBL IDs from input file
with open('xfp_chembls.txt', 'r') as f:
    chembl_ids = [line.strip() for line in f if line.strip()]

# Read ChEMBL IDs from input file
with open('ecfp_chembls.txt', 'r') as f:
    chembl_ids2 = [line.strip() for line in f if line.strip()]

chembl_ids = chembl_ids + chembl_ids2


In [12]:
with open('chembl_chembls.txt', 'r') as f:
    chembl_ids = [line.strip() for line in f if line.strip()]

In [13]:
# Collect all UniProt IDs
all_uniprot_ids = []
for chembl_id in chembl_ids:
    uniprot_ids = get_uniprot_ids(chembl_id)
    all_uniprot_ids.extend(uniprot_ids)
    time.sleep(0.1)  # Be nice to the API

# Remove duplicates and write to output file
unique_uniprot_ids = list(set(all_uniprot_ids))
with open('uniprot_ids_ppb2.txt', 'w') as f:
    for uniprot_id in unique_uniprot_ids:
        f.write(f"{uniprot_id}\n")

print(f"Processed {len(chembl_ids)} ChEMBL IDs")
print(f"Found {len(unique_uniprot_ids)} unique UniProt IDs")
print("Output written to uniprot_ids_ppb2.txt")

Processed 707 ChEMBL IDs
Found 3514 unique UniProt IDs
Output written to uniprot_ids_ppb2.txt


In [37]:
# Read ChEMBL IDs from input file
with open('xfp_chembls.txt', 'r') as f:
    chembl_ids = [line.strip() for line in f if line.strip()]


In [32]:
uniprot_ids = [get_uniprot_ids(id) for id in chembl_ids]

In [33]:
uniprot_ids

[['P11511', 'Q16731', 'Q3B764', 'Q58FA0', 'Q8IYJ7'],
 ['O95569', 'P18054', 'Q6ISF8', 'Q9UQM4'],
 ['A8K2P4', 'B7ZA11', 'P16050', 'Q8N6R7', 'Q99657'],
 ['B2R6R3', 'B7Z5H3', 'D3DWC3', 'P27338', 'Q7Z6S2'],
 ['P37136'],
 ['A8K8K5',
  'G3V5M5',
  'O60608',
  'O60685',
  'O60702',
  'O60703',
  'O75583',
  'O75584',
  'Q0MWT5',
  'Q0MWT6',
  'Q86Z31',
  'Q92731',
  'Q9UEV6',
  'Q9UHD3',
  'Q9UQK9'],
 ['Q13511',
  'Q14276',
  'Q5T5H7',
  'Q6MZQ9',
  'Q9NU51',
  'Q9UDZ7',
  'Q9UIS7',
  'P03372'],
 ['P42345', 'Q4LE76', 'Q5TER1', 'Q6LE87', 'Q96QG3', 'Q9Y4I3'],
 ['A5Z1R8', 'B2R4S3', 'Q2V4Y5', 'Q6FHV0', 'P14174'],
 ['B2RA44', 'P21728', 'Q4QRJ0'],
 ['B4DF46', 'P21397', 'Q16426'],
 ['B2R7G8', 'Q6FI12', 'Q96ET9', 'P00918'],
 ['A4D0Q6', 'Q8IV23', 'Q9BZC8', 'P48736'],
 ['P06211'],
 ['P25099'],
 ['P18901', 'P21669'],
 ['D3DNF0', 'P42338', 'Q24JU2'],
 ['Q14CW1', 'Q99762', 'P42336'],
 ['P78527',
  'P78528',
  'Q13327',
  'Q13337',
  'Q14175',
  'Q59H99',
  'Q7Z611',
  'Q96SE6',
  'Q9UME3'],
 ['P14416', 'Q9