In [1]:
# 🧬 Ensembl Trait and LD Variant Explorer

import requests
import json
import emoji
import re

# Optional: Pandas for displaying tables
import pandas as pd

class EnsemblExplorer:

    ENSEMBL_API = "https://rest.ensembl.org"

    def get_phenotypes(self, gene_symbol, species):
        url = f"{self.ENSEMBL_API}/phenotype/gene/{species}/{gene_symbol}?content-type=application/json"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            phenotypes = [entry["description"] for entry in data if "description" in entry]
            return phenotypes
        else:
            print(emoji.emojize(":warning: "), f"Error fetching phenotypes for {gene_symbol}: {response.status_code}")
            return []

    def get_genes(self, phenotype_term, species):
        url = f"{self.ENSEMBL_API}/phenotype/term/{species}/{phenotype_term}?content-type=application/json"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            genes = [entry.get("associated_gene") for entry in data if entry.get("associated_gene")]
            return list(set(genes))
        else:
            print(emoji.emojize(":warning: "), f"Error fetching genes for {phenotype_term}: {response.status_code}")
            return []

    def get_ld_variants(self, species, variant_id, population):
        url = f"{self.ENSEMBL_API}/ld/{species}/{variant_id}/{population}?content-type=application/json"
        response = requests.get(url)
        if response.status_code == 200:
            return response.json()
        else:
            print(emoji.emojize(":warning: "), f"Error fetching LD variants for {variant_id}: {response.status_code}")
            return []

    def clean_input(self, text):
        return re.sub(r'^\"|\"$', '', text.strip())

    def explore_gene_phenotypes(self, species, gene_list):
        results = []
        for gene in gene_list:
            phenotypes = self.get_phenotypes(gene, species)
            if phenotypes:
                for pheno in phenotypes:
                    results.append({"Gene": gene, "Phenotype": pheno})
            else:
                results.append({"Gene": gene, "Phenotype": "None found"})
        return pd.DataFrame(results)

    def explore_phenotype_genes(self, species, phenotype_term):
        genes = self.get_genes(phenotype_term, species)
        return genes

    def explore_ld_for_variants(self, species, variant_list, population):
        ld_records = []
        for variant in variant_list:
            ld_data = self.get_ld_variants(species, variant, population)
            for item in ld_data:
                ld_records.append({
                    "Query Variant": item.get("variation1"),
                    "Linked Variant": item.get("variation2"),
                    "D'": item.get("d_prime"),
                    "r²": item.get("r2"),
                    "Population": item.get("population_name")
                })
        return pd.DataFrame(ld_records)

# Example usage in Jupyter:
# explorer = EnsemblExplorer()
# df = explorer.explore_gene_phenotypes("homo_sapiens", ["ITIH1", "TP53"])
# df_ld = explorer.explore_ld_for_variants("homo_sapiens", ["rs1042779"], "1000GENOMES:phase_3:KHV")
# display(df)
# display(df_ld)
