In [None]:
import requests
import pandas as pd

api = "https://maayanlab.cloud/Harmonizome/static/harmonizomeapi.py"
r = requests.get(api)

with open("harmonizomeapi.py", "wb") as f:
    f.write(r.content)

In [None]:
from harmonizomeapi import Harmonizome
from urllib.parse import quote_plus
from tqdm import tqdm

dataset_name = "DISEASES Experimental Gene-Disease Association Evidence Scores 2025"
dataset = Harmonizome.get("dataset", name=dataset_name)
#print(dataset)

rows = []

# For loop on all the diseases to get the associated genes
for gs in tqdm(dataset["geneSets"], desc="Getting genes for all the diseases"):
    disease_name = gs["name"].split("/")[0]

    url = f"https://maayanlab.cloud/Harmonizome/api/1.0/gene_set/{quote_plus(disease_name)}/{quote_plus(dataset_name)}"
    try:
        geneset = requests.get(url).json()
    except Exception as e:
        print("Err::", disease_name, e)
        continue

    if "associations" not in geneset:
        continue

    for assoc in geneset["associations"]:
        rows.append({
            "gene": assoc["gene"]["symbol"],
            "gene_href": assoc["gene"]["href"],
            "disease": geneset["attribute"]["name"],
            "disease_href": geneset["attribute"]["href"],
            "dataset": geneset["dataset"]["name"],
            "dataset_href": geneset["dataset"]["href"],
            "threshold": assoc.get("thresholdValue"),
            "score": assoc.get("standardizedValue")
    })

df = pd.DataFrame(rows)
print("Number of rows:", len(df))
df.to_csv("gene_disease_scores.csv", index=False)

unique_counts = df.nunique()
unique_counts

Getting genes for all the diseases: 100%|██████████| 557/557 [05:55<00:00,  1.56it/s]


Number of rows: 171854


Unnamed: 0,0
gene,10636
gene_href,10636
disease,557
disease_href,557
dataset,1
dataset_href,1
threshold,1
score,1202


In [None]:
df

Unnamed: 0,gene,gene_href,disease,disease_href,dataset,dataset_href,threshold,score
0,ZBTB20,/api/1.0/gene/ZBTB20,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.032
1,HDX,/api/1.0/gene/HDX,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.440
2,RETNLB,/api/1.0/gene/RETNLB,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,0.526
3,BEND4,/api/1.0/gene/BEND4,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.812
4,FIGN,/api/1.0/gene/FIGN,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,0.618
...,...,...,...,...,...,...,...,...
171849,HLA-B,/api/1.0/gene/HLA-B,proliferative diabetic retinopathy,/api/1.0/attribute/proliferative+diabetic+reti...,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,0.986
171850,GOLIM4,/api/1.0/gene/GOLIM4,proliferative diabetic retinopathy,/api/1.0/attribute/proliferative+diabetic+reti...,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,0.530
171851,TRPM3,/api/1.0/gene/TRPM3,DOID:14313,/api/1.0/attribute/DOID%3A14313,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.117
171852,MAGI1,/api/1.0/gene/MAGI1,Ileocolitis,/api/1.0/attribute/Ileocolitis,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.095


In [None]:
base_url = "https://maayanlab.cloud/Harmonizome/api/1.0"

unique_genes = df["gene"].unique()

rows = []

for g in tqdm(unique_genes, desc="Getting genes info"):
    gene_url = f"{base_url}/gene/{g}"

    resp = requests.get(gene_url)
    if resp.status_code != 200:
        print("HTTP error:", resp.status_code, "on", gene_url)
        continue

    try:
        gene_info = resp.json()
    except Exception as e:
        print("JSON parsing error on:", gene_url, e)
        continue

    rows.append({
        "gene": gene_info.get("symbol"),
        "synonyms": gene_info.get("synonyms"),
        "name": gene_info.get("name"),
        "description": gene_info.get("description"),
        "ncbiEntrezGeneId": gene_info.get("ncbiEntrezGeneId"),
        "ncbiEntrezGeneUrl": gene_info.get("ncbiEntrezGeneUrl"),
        "proteins": [p.get("symbol") for p in gene_info.get("proteins", [])] if gene_info.get("proteins") else None,
        "proteins_href": [p.get("href") for p in gene_info.get("proteins", [])] if gene_info.get("proteins") else None,
        "hgncRootFamilies": [fam.get("name") for fam in gene_info.get("hgncRootFamilies", [])] if gene_info.get("hgncRootFamilies") else None,
        "hgncRootFamilies_href": [fam.get("href") for fam in gene_info.get("hgncRootFamilies", [])] if gene_info.get("hgncRootFamilies") else None,
    })

genes = pd.DataFrame(rows)
len(genes)


Getting genes info:  19%|█▊        | 1984/10636 [06:47<31:42,  4.55it/s]

HTTP error: 500 on https://maayanlab.cloud/Harmonizome/api/1.0/gene/HSPA1B


Getting genes info:  21%|██        | 2209/10636 [07:33<30:05,  4.67it/s]

HTTP error: 500 on https://maayanlab.cloud/Harmonizome/api/1.0/gene/HSPA1A


Getting genes info: 100%|██████████| 10636/10636 [36:12<00:00,  4.90it/s]


10634

In [None]:
unique_counts2 = genes.astype(str).nunique()
unique_counts2

Unnamed: 0,0
gene,10634
synonyms,8935
name,10634
description,9848
ncbiEntrezGeneId,10634
ncbiEntrezGeneUrl,10634
proteins,10131
proteins_href,10131
hgncRootFamilies,738
hgncRootFamilies_href,738


In [None]:
genes

Unnamed: 0,gene,synonyms,name,description,ncbiEntrezGeneId,ncbiEntrezGeneUrl,proteins,proteins_href,hgncRootFamilies,hgncRootFamilies_href
0,ZBTB20,"[PRIMS, DPZF, ZNF288, HOF, ODA-8S]",zinc finger and BTB domain containing 20,"This gene, which was initially designated as d...",26137,http://www.ncbi.nlm.nih.gov/gene/26137,[ZBT20_HUMAN],[/api/1.0/protein/ZBT20_HUMAN],"[Zinc fingers, BTB (POZ) domain containing (BT...","[/api/1.0/gene_family/Zinc+fingers, /api/1.0/g..."
1,HDX,"[CXORF43, D030011N01RIK]",highly divergent homeobox,Predicted to enable DNA-binding transcription ...,139324,http://www.ncbi.nlm.nih.gov/gene/139324,[HDX_HUMAN],[/api/1.0/protein/HDX_HUMAN],[Homeoboxes],[/api/1.0/gene_family/Homeoboxes]
2,RETNLB,"[RELMB, XCP2, RELM-BETA, RELMBETA, FIZZ1, FIZZ...",resistin like beta,Predicted to enable hormone activity. Involved...,84666,http://www.ncbi.nlm.nih.gov/gene/84666,[RETNB_HUMAN],[/api/1.0/protein/RETNB_HUMAN],,
3,BEND4,[CCDC4],BEN domain containing 4,Predicted to enable DNA binding activity. [pro...,389206,http://www.ncbi.nlm.nih.gov/gene/389206,[BEND4_HUMAN],[/api/1.0/protein/BEND4_HUMAN],[BEN domain containing (BEND)],[/api/1.0/gene_family/BEN+domain+containing+%2...
4,FIGN,[],fidgetin,Predicted to enable ATP hydrolysis activity an...,55137,http://www.ncbi.nlm.nih.gov/gene/55137,[FIGN_HUMAN],[/api/1.0/protein/FIGN_HUMAN],[ATPases],[/api/1.0/gene_family/ATPases]
...,...,...,...,...,...,...,...,...,...,...
10629,CST9L,"[CTES7B, BA218C14.1]",cystatin 9-like,The cystatin superfamily encompasses proteins ...,128821,http://www.ncbi.nlm.nih.gov/gene/128821,[CST9L_HUMAN],[/api/1.0/protein/CST9L_HUMAN],,
10630,CST3,"[HEL-S-2, ARMD11]",cystatin C,The cystatin superfamily encompasses proteins ...,1471,http://www.ncbi.nlm.nih.gov/gene/1471,[CYTC_HUMAN],[/api/1.0/protein/CYTC_HUMAN],,
10631,CST9,[CTES7A],cystatin 9 (testatin),The cystatin superfamily encompasses proteins ...,128822,http://www.ncbi.nlm.nih.gov/gene/128822,[CST9_HUMAN],[/api/1.0/protein/CST9_HUMAN],,
10632,CST4,[],cystatin S,The cystatin superfamily encompasses proteins ...,1472,http://www.ncbi.nlm.nih.gov/gene/1472,[CYTS_HUMAN],[/api/1.0/protein/CYTS_HUMAN],,


In [None]:
genes.to_csv("gene_info.csv", index=False)

In [None]:
unique_diseases = df["disease"].unique()

rows = []

for disease_name in tqdm(unique_diseases, desc="Getting diseases info"):
    url = f"https://maayanlab.cloud/Harmonizome/api/1.0/attribute/{quote_plus(disease_name)}"
    resp = requests.get(url)
    if resp.status_code != 200:
        print("HTTP error:", resp.status_code, "on", disease_name)
        continue

    try:
        attr = resp.json()
    except Exception as e:
        print("JSON parsing error on:", disease_name, e)
        continue

    gene_sets = None
    if "geneSets" in attr and isinstance(attr["geneSets"], list):
        gene_sets = [g.get("name") for g in attr["geneSets"]]

    rows.append({
        "disease": attr.get("name"),
        "nameFromNamingAuthority": attr.get("nameFromNamingAuthority"),
        "nameFromDataset": attr.get("nameFromDataset"),
        "idFromNamingAuthority": attr.get("idFromNamingAuthority"),
        "descriptionFromNamingAuthority": attr.get("descriptionFromNamingAuthority"),
        "urlFromNamingAuthority": attr.get("urlFromNamingAuthority"),
        "namingAuthority_name": attr.get("namingAuthority", {}).get("name") if attr.get("namingAuthority") else None,
        "namingAuthority_href": attr.get("namingAuthority", {}).get("href") if attr.get("namingAuthority") else None,
        "geneSets": gene_sets
    })

diseases = pd.DataFrame(rows)
print("Totale malattie arricchite:", len(diseases))

Getting diseases info: 100%|██████████| 557/557 [01:53<00:00,  4.90it/s]

Totale malattie arricchite: 557





In [None]:
diseases

Unnamed: 0,disease,nameFromNamingAuthority,nameFromDataset,idFromNamingAuthority,descriptionFromNamingAuthority,urlFromNamingAuthority,namingAuthority_name,namingAuthority_href,geneSets
0,disease of mental health,disease of mental health,disease of mental health,DOID_150,A disease that involves a psychological or beh...,http://purl.obolibrary.org/obo/DOID_150,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[disease of mental health/DISEASES Experimenta...
1,duodenal ulcer,duodenal ulcer,duodenal ulcer,DOID_1724,Adding UMLS CUI for Curling Ulcer C0013295.,http://purl.obolibrary.org/obo/DOID_1724,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,"[duodenal ulcer/HPO Gene-Disease Associations,..."
2,disease,disease,disease,EFO_0000408,A disease is a disposition that describes stat...,http://www.ebi.ac.uk/efo/EFO_0000408,Experimental Factor Ontology,/api/1.0/naming_authority/Experimental+Factor+...,[Disease/HuGE Navigator Gene-Phenotype Associa...
3,sleep disorder,sleep disorder,sleep disorder,DOID_535,A disease of mental health that involves disru...,http://purl.obolibrary.org/obo/DOID_535,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[Sleep disorder/DISEASES Experimental Gene-Dis...
4,disease of anatomical entity,disease of anatomical entity,disease of anatomical entity,DOID_7,A disease that manifests in a defined anatomic...,http://purl.obolibrary.org/obo/DOID_7,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[disease of anatomical entity/DISEASES Experim...
...,...,...,...,...,...,...,...,...,...
552,childhood type dermatomyositis,childhood type dermatomyositis,childhood type dermatomyositis,DOID_14203,,http://purl.obolibrary.org/obo/DOID_14203,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[Childhood type dermatomyositis/DISEASES Text-...
553,proliferative diabetic retinopathy,proliferative diabetic retinopathy,proliferative diabetic retinopathy,DOID_13207,,http://purl.obolibrary.org/obo/DOID_13207,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[proliferative diabetic retinopathy/DISEASES T...
554,DOID:14313,DOID:14313,DOID:14313,DOID:14313,,,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[DOID:14313/DISEASES Experimental Gene-Disease...
555,Ileocolitis,Ileocolitis,Ileocolitis,DOID:0060190,,,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[Ileocolitis/DISEASES Text-mining Gene-Disease...


In [None]:
unique_counts2 = diseases.astype(str).nunique()
unique_counts2

Unnamed: 0,0
disease,557
nameFromNamingAuthority,557
nameFromDataset,557
idFromNamingAuthority,556
descriptionFromNamingAuthority,385
urlFromNamingAuthority,521
namingAuthority_name,10
namingAuthority_href,10
geneSets,557


In [None]:
diseases.to_csv("disease_info.csv", index=False)

In [None]:
genes_renamed = genes.rename(columns={c: f"gene_{c}" for c in genes.columns if c != "gene"})
diseases_renamed = diseases.rename(columns={c: f"disease_{c}" for c in diseases.columns if c != "disease"})

final_df = (
    df.merge(genes_renamed, on="gene", how="inner")
      .merge(diseases_renamed, on="disease", how="inner")
)

In [None]:
final_df

Unnamed: 0,gene,gene_href,disease,disease_href,dataset,dataset_href,threshold,score,gene_synonyms,gene_name,...,gene_hgncRootFamilies,gene_hgncRootFamilies_href,disease_nameFromNamingAuthority,disease_nameFromDataset,disease_idFromNamingAuthority,disease_descriptionFromNamingAuthority,disease_urlFromNamingAuthority,disease_namingAuthority_name,disease_namingAuthority_href,disease_geneSets
0,ZBTB20,/api/1.0/gene/ZBTB20,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.032,"[PRIMS, DPZF, ZNF288, HOF, ODA-8S]",zinc finger and BTB domain containing 20,...,"[Zinc fingers, BTB (POZ) domain containing (BT...","[/api/1.0/gene_family/Zinc+fingers, /api/1.0/g...",disease of mental health,disease of mental health,DOID_150,A disease that involves a psychological or beh...,http://purl.obolibrary.org/obo/DOID_150,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[disease of mental health/DISEASES Experimenta...
1,HDX,/api/1.0/gene/HDX,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.440,"[CXORF43, D030011N01RIK]",highly divergent homeobox,...,[Homeoboxes],[/api/1.0/gene_family/Homeoboxes],disease of mental health,disease of mental health,DOID_150,A disease that involves a psychological or beh...,http://purl.obolibrary.org/obo/DOID_150,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[disease of mental health/DISEASES Experimenta...
2,RETNLB,/api/1.0/gene/RETNLB,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,0.526,"[RELMB, XCP2, RELM-BETA, RELMBETA, FIZZ1, FIZZ...",resistin like beta,...,,,disease of mental health,disease of mental health,DOID_150,A disease that involves a psychological or beh...,http://purl.obolibrary.org/obo/DOID_150,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[disease of mental health/DISEASES Experimenta...
3,BEND4,/api/1.0/gene/BEND4,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.812,[CCDC4],BEN domain containing 4,...,[BEN domain containing (BEND)],[/api/1.0/gene_family/BEN+domain+containing+%2...,disease of mental health,disease of mental health,DOID_150,A disease that involves a psychological or beh...,http://purl.obolibrary.org/obo/DOID_150,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[disease of mental health/DISEASES Experimenta...
4,FIGN,/api/1.0/gene/FIGN,disease of mental health,/api/1.0/attribute/disease+of+mental+health,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,0.618,[],fidgetin,...,[ATPases],[/api/1.0/gene_family/ATPases],disease of mental health,disease of mental health,DOID_150,A disease that involves a psychological or beh...,http://purl.obolibrary.org/obo/DOID_150,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[disease of mental health/DISEASES Experimenta...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171703,HLA-B,/api/1.0/gene/HLA-B,proliferative diabetic retinopathy,/api/1.0/attribute/proliferative+diabetic+reti...,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,0.986,"[AS, SPDA1, HLAB, B-4901]","major histocompatibility complex, class I, B",...,"[Immunoglobulin superfamily domain containing,...",[/api/1.0/gene_family/Immunoglobulin+superfami...,proliferative diabetic retinopathy,proliferative diabetic retinopathy,DOID_13207,,http://purl.obolibrary.org/obo/DOID_13207,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[proliferative diabetic retinopathy/DISEASES T...
171704,GOLIM4,/api/1.0/gene/GOLIM4,proliferative diabetic retinopathy,/api/1.0/attribute/proliferative+diabetic+reti...,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,0.530,"[GOLPH4, GPP130, P138, GIMPC]",golgi integral membrane protein 4,...,,,proliferative diabetic retinopathy,proliferative diabetic retinopathy,DOID_13207,,http://purl.obolibrary.org/obo/DOID_13207,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[proliferative diabetic retinopathy/DISEASES T...
171705,TRPM3,/api/1.0/gene/TRPM3,DOID:14313,/api/1.0/attribute/DOID%3A14313,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.117,"[LTRPC3, GON-2, MLSN2]","transient receptor potential cation channel, s...",...,[Ion channels],[/api/1.0/gene_family/Ion+channels],DOID:14313,DOID:14313,DOID:14313,,,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[DOID:14313/DISEASES Experimental Gene-Disease...
171706,MAGI1,/api/1.0/gene/MAGI1,Ileocolitis,/api/1.0/attribute/Ileocolitis,DISEASES Experimental Gene-Disease Association...,/api/1.0/dataset/DISEASES+Experimental+Gene-Di...,1.0,1.095,"[MAGI-1, AIP-3, AIP3, TNRC19, WWP3, BAIAP1, MA...","membrane associated guanylate kinase, WW and P...",...,"[Membrane-associated guanylate kinases, Trinuc...",[/api/1.0/gene_family/Membrane-associated+guan...,Ileocolitis,Ileocolitis,DOID:0060190,,,Human Disease Ontology,/api/1.0/naming_authority/Human+Disease+Ontology,[Ileocolitis/DISEASES Text-mining Gene-Disease...


In [None]:
final_df.to_csv("final_dataset.csv", index=False)