In [6]:
import requests

def chebi_to_cid(chebi_id):
    print(f"Fetching CID for ChEBI ID: {chebi_id}")
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{chebi_id}/cids/JSON"
    response = requests.get(url)
    print(f"Response Status Code: {response.status_code}")
    
    if response.status_code == 200:
        data = response.json()
        print("Received data:", data)  # Debug print
        if 'IdentifierList' in data:
            return data['IdentifierList']['CID'][0]
        else:
            print("No CID found for this ChEBI ID.")
            return None
    else:
        print(f"Failed to retrieve CID. Status code: {response.status_code}")
        return None

def get_pubchem_description(cid):
    print(f"Fetching description for CID: {cid}")
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/description/JSON"
    response = requests.get(url)
    print(f"Response Status Code: {response.status_code}")
    
    if response.status_code == 200:
        data = response.json()
        print("Response Data:", data)  # Print the entire response for debugging
        if 'InformationList' in data:
            for info in data['InformationList']['Information']:
                if 'Description' in info:
                    return info['Description']
            print("Description key is missing in the response.")
            return None
        else:
            print("InformationList key is missing in the response.")
            return None
    else:
        print(f"Failed to retrieve description. Status code: {response.status_code}")
        return None

chebi_id = "CHEBI:58245"
cid = chebi_to_cid(chebi_id)

if cid:
    description = get_pubchem_description(cid)
    if description:
        print(f"Description: {description}")
    else:
        print("Description not available.")
else:
    print("No CID found; cannot fetch description.")


Fetching CID for ChEBI ID: CHEBI:58245
Response Status Code: 200
Received data: {'IdentifierList': {'CID': [22848660]}}
Fetching description for CID: 22848660
Response Status Code: 200
Response Data: {'InformationList': {'Information': [{'CID': 22848660, 'Title': 'Deoxyadenosine-phosphate'}, {'CID': 22848660, 'Description': "2'-deoxyadenosine 5'-monophosphate(2-) is a 2'-deoxyribonucleoside 5'-monophosphate(2-) obtained by deprotonation of the phosphate OH groups of 2'-deoxyadenosine 5'-monophosphate (dAMP). It has a role as a human metabolite and a fundamental metabolite. It is a 2'-deoxynucleoside 5'-monophosphate(2-) and a purine 2'-deoxyribonucleoside 5'-phosphate(2-). It is a conjugate base of a 2'-deoxyadenosine 5'-monophosphate.", 'DescriptionSourceName': 'ChEBI', 'DescriptionURL': 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:58245'}]}}
Description: 2'-deoxyadenosine 5'-monophosphate(2-) is a 2'-deoxyribonucleoside 5'-monophosphate(2-) obtained by deprotonation of the 

In [None]:
import pandas as pd
import requests

def chebi_to_cid(chebi_id):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{chebi_id}/cids/JSON"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if 'IdentifierList' in data:
            return data['IdentifierList']['CID'][0]
        else:
            return None
    else:
        return None

def get_pubchem_description(cid):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/description/JSON"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if 'InformationList' in data:
            for info in data['InformationList']['Information']:
                if 'Description' in info:
                    return info['Description']
        return None
    else:
        return None

def process_chebi_ids(csv_file, output_file):
    # Load the CSV file
    df = pd.read_csv(csv_file)
    
    # Create a dictionary to store results
    results = {}
    
    # Iterate over each ChEBI ID in the DataFrame
    for index, row in df.iterrows():
        chebi_id = row['ChEBI ID']
        print(f"Processing {chebi_id}...")
        
        # Get the CID
        cid = chebi_to_cid(chebi_id)
        
        # Get the description if a CID was found
        if cid:
            description = get_pubchem_description(cid)
        else:
            description = None
        
        # Store the result in the dictionary
        results[chebi_id] = description
    
    # Convert the results dictionary to a DataFrame
    result_df = pd.DataFrame(list(results.items()), columns=['ChEBI_ID', 'Description'])
    
    # Save the DataFrame to a new CSV file
    result_df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

# Usage example
input_csv_file = '/data/servilla/DT_HGNN/data/ChEBI_ID_183.csv'  # Replace with your input CSV file
output_csv_file = '/data/servilla/DT_HGNN/data/ChEBI_desriptions_183.csv'  # The output CSV file

process_chebi_ids(input_csv_file, output_csv_file)


In [None]:
import requests
import pandas as pd

def chebi_to_cid(chebi_id):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{chebi_id}/cids/JSON"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if 'IdentifierList' in data:
            return data['IdentifierList']['CID'][0]
        else:
            return None
    else:
        return None

def get_pubchem_description(cid):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/description/JSON"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if 'InformationList' in data:
            for info in data['InformationList']['Information']:
                if 'Description' in info:
                    return info['Description']
        return None
    else:
        return None

def get_chebi_info(chebi_id):
    url = f"https://www.ebi.ac.uk/chebi/ws/rest/chebiId/{chebi_id}"
    headers = {"Accept": "application/json"}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        name = data.get('chebiAsciiName', None)
        definition = data.get('definition', None)
        return name, definition
    else:
        return None, None

def process_chebi_ids(csv_file, output_file):
    df = pd.read_csv(csv_file)
    results = []

    for index, row in df.iterrows():
        chebi_id = row['ChEBI ID']
        print(f"Processing {chebi_id}...")

        name, chebi_definition = get_chebi_info(chebi_id)
        cid = chebi_to_cid(chebi_id)
        pubchem_description = get_pubchem_description(cid) if cid else None
        
        combined_description = ""
        if pubchem_description:
            combined_description += f"PubChem Description: {pubchem_description}"
        if chebi_definition:
            if combined_description:
                combined_description += " | "
            combined_description += f"ChEBI Definition: {chebi_definition}"

        results.append({
            'ChEBI ID': chebi_id,
            'Name': name,
            'Combined Description': combined_description
        })

    result_df = pd.DataFrame(results)
    result_df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

# Usage example
input_csv_file = '/data/servilla/DT_HGNN/data/ChEBI_ID_183.csv'  # Replace with your input CSV file
output_csv_file = '/data/servilla/DT_HGNN/data/ChEBI_name_definition.csv'  # The output CSV file

process_chebi_ids(input_csv_file, output_csv_file)


In [29]:
import pandas as pd

df1 = pd.read_csv('/data/servilla/DT_HGNN/data/chebi_name_smiles_definition.csv')
df2 = pd.read_csv('/data/servilla/DT_HGNN/data/ChEBI_desriptions_183.csv')

df3 = df1[['ChEBI ID', 'SMILES', 'Name']]
df3['Description'] = df2['Description']
df3.to_csv('/data/servilla/DT_HGNN/data/ChEBI_name_smiles_description_combined.csv', index=False)


In [26]:
df3_filtered = df3[df3.index.isin(df2.index)]


In [28]:
df3_filtered.to_csv('/data/servilla/DT_HGNN/data/ChEBI_name_smiles_description_combined.csv', index=False)

In [30]:
df1_filtered = df1[df1['ChEBI ID'].isin(df2['ChEBI ID'])]

In [31]:
df1_filtered.to_csv('/data/servilla/DT_HGNN/data/chebi_name_smiles_definition_filtered1.csv', index=False)

In [32]:
# Check if the indexes are the same
indexes_match = df2.index.equals(df3_filtered.index)

if indexes_match:
    print("The indexes of df1 and df2 are exactly the same.")
else:
    print("The indexes of df1 and df2 are not the same.")
    
# Optionally, you can also print the differences, if any
if not indexes_match:
    print("Indexes in df1 but not in df2:", df1.index.difference(df2.index))
    print("Indexes in df2 but not in df1:", df2.index.difference(df1.index))


The indexes of df1 and df2 are exactly the same.


In [1]:
import xgboost as xgb

# Check for GPU support by trying to set up a parameter that uses GPU
try:
    params = {"tree_method": "gpu_hist"}
    dtrain = xgb.DMatrix([[1, 2], [3, 4]], label=[0, 1])
    xgb.train(params, dtrain, num_boost_round=1)
    print("GPU support is enabled in this XGBoost version.")
except xgb.core.XGBoostError:
    print("No GPU support detected in this XGBoost version.")



    E.g. tree_method = "hist", device = "cuda"



GPU support is enabled in this XGBoost version.
