<a href="https://colab.research.google.com/github/ufbfung/cpic/blob/main/PGx_CDS_Tool_0_0_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from prettytable import PrettyTable
from tabulate import tabulate

# Function that retrieves the lookupkeys (aka the phenotypes) for a gene of interest.
# Note that lookupkeys may not always be the same, depending on the gene.
# Plan will be to implement only a subset of genes so we will have more control over this.

def get_lookupkeys(gene_symbol):
    url = f"https://api.cpicpgx.org/v1/diplotype?genesymbol=eq.{gene_symbol}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        unique_lookupkeys = []
        for item in data:
            lookupkey = item['lookupkey']
            if lookupkey not in unique_lookupkeys:
                unique_lookupkeys.append(lookupkey)
        return unique_lookupkeys
    return None

# Function that will display the lookupkeys in a nice table format

def print_table(headers, rows):
    table = PrettyTable(headers)
    for row in rows:
        table.add_row(row)
    print(table)

In [None]:
# This will allow a user to enter a gene symbol (aka gene of interest) and call the get_lookupkeys function
# to retrieve a table of lookupkeys

gene_symbol = input("Enter gene symbol: ")
lookupkeys = get_lookupkeys(gene_symbol)

if lookupkeys:
    headers = ['Lookupkeys']
    rows = [[lookupkey] for lookupkey in lookupkeys]
    print_table(headers, rows)
else:
    print(f"No lookupkeys found for {gene_symbol}.")

Enter gene symbol: CYP2C19
+------------------------------------------------+
|                   Lookupkeys                   |
+------------------------------------------------+
|     {'CYP2C19': 'Ultrarapid Metabolizer'}      |
|        {'CYP2C19': 'Rapid Metabolizer'}        |
|       {'CYP2C19': 'Normal Metabolizer'}        |
| {'CYP2C19': 'Likely Intermediate Metabolizer'} |
|    {'CYP2C19': 'Intermediate Metabolizer'}     |
|     {'CYP2C19': 'Likely Poor Metabolizer'}     |
|        {'CYP2C19': 'Poor Metabolizer'}         |
|          {'CYP2C19': 'Indeterminate'}          |
+------------------------------------------------+


In [None]:
import requests
from tabulate import tabulate

# Function that will retrieve the CPIC recommendation given a drugid and lookupkey as inputs

def get_recommendation(drugid, lookupkey):
    url = f"https://api.cpicpgx.org/v1/recommendation?select=drug(name), guideline(name),* &drugid=eq.{drugid}&lookupkey=cs.{lookupkey}"
    response = requests.get(url)
    if response.ok:
        data = response.json()
        recommendations = [{ 'Drug': item['drug']['name'], 'Guideline': item['guideline']['name'], 'Recommendation': item['drugrecommendation'] } for item in data]
        return recommendations
    return None

# Displays the drug, guideline, and recommendation using the get_recommendation function.

# This function will need to be modified to
# 1) use a drug name instead of drugid AND
# 2) Link the lookupkey from the previous section and hardcode it into this section.
# 3) Modify the columns and potentially the output so it's not scrolling.

def main():
    drugid = 'RxNorm:36437'
    lookupkey = '{"CYP2C19": "Ultrarapid Metabolizer"}'
    recommendations = get_recommendation(drugid, lookupkey)
    if recommendations:
        headers = {'Drug': 'Drug', 'Guideline': 'Guideline', 'Recommendation': 'Recommendation'}
        print(tabulate(recommendations, headers=headers))
    else:
        print("No recommendations found.")

if __name__ == '__main__':
    main()


Drug        Guideline                                                    Recommendation
----------  -----------------------------------------------------------  -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
sertraline  CYP2D6, CYP2C19 and Selective Serotonin Reuptake Inhibitors  Initiate therapy with recommended starting dose. If patient does not respond to recommended maintenance dosing, consider alternative drug not predominantly metabolized by CYP2C19.


In [None]:
# WORK IN PROGRESS
# Feature get list of CPIC gene symbols

def get_genesymbols():
    url = "https://api.cpicpgx.org/v1/gene"
    response = requests.get(url)
    if response.ok:
        data = response.json()
        return [item['symbol'] for item in data]
    return None

# Feature that links each gene symbol to a number

def main():
    genesymbols = get_genesymbols()
    if genesymbols:
        print("Available gene symbols:")
        for i, symbol in enumerate(genesymbols):
            print(f"{i+1}. {symbol}")
        choice = input("Enter the number corresponding to the gene symbol of interest: ")
        try:
            choice = int(choice)
            if 1 <= choice <= len(genesymbols):
                gene_symbol = genesymbols[choice-1]
                lookupkeys = get_lookupkeys(gene_symbol)
                if lookupkeys:
                    headers = ['Lookupkeys']
                    rows = [[lookupkey] for lookupkey in lookupkeys]
                    print(tabulate(rows, headers=headers))
                else:
                    print(f"No lookupkeys found for gene symbol {gene_symbol}")
            else:
                print("Invalid choice.")
        except ValueError:
            print("Invalid input. Please enter a number.")
    else:
        print("Unable to retrieve available gene symbols.")

if __name__ == '__main__':
    main()

In [None]:
pip install pgmpy

In [17]:
# Exploring whether we can leverage the frequency information about each gene to build a Bayes Net
# to predict the phenotypes that a patient is likely to have

import pandas as pd
import numpy as np
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD

# Define the data
data_list = [
    {
        'Ethnicity': 'Asian',
        'frequency': {'Normal metabolizer': 0.75, 'Intermediate metabolizer': 0.22, 'Poor metabolizer': 0.03}
    },
    {
        'Ethnicity': 'Caucasian',
        'frequency': {'Normal metabolizer': 0.74, 'Intermediate metabolizer': 0.24, 'Poor metabolizer': 0.02}
    },
    {
        'Ethnicity': 'African',
        'frequency': {'Normal metabolizer': 0.72, 'Intermediate metabolizer': 0.26, 'Poor metabolizer': 0.02}
    }
]

values = {
    'Ethnicity': ['Asian', 'Caucasian', 'African'],
    'CYP2C19': ['Normal metabolizer', 'Intermediate metabolizer', 'Poor metabolizer'],
}

# Create the Bayesian network
cpds = []
model = BayesianNetwork()

# Define the CYP2C19 variable
cpd_cyp2c19 = TabularCPD(
    variable='CYP2C19',
    variable_card=len(values['CYP2C19']),
    values=np.zeros((len(values['CYP2C19']), 1)),
    evidence=['Ethnicity'],
    evidence_card=[len(values['Ethnicity'])],
    state_names={f'CYP2C19_{i}': value for i, value in enumerate(values['CYP2C19'])},
)
for ethnic_group in values['Ethnicity']:
    frequencies = [data['frequency'][ethnic_group] for data in data_list]
    cpd_cyp2c19.values = np.array([frequencies]).T

cpds.append(cpd_cyp2c19)

# Define the Ethnicity variable
cpd_ethnicity = TabularCPD(
    variable='Ethnicity',
    variable_card=len(values['Ethnicity']),
    values=[1 / len(values['Ethnicity'])] * len(values['Ethnicity']),
    state_names={f'Ethnicity_{i}': value for i, value in enumerate(values['Ethnicity'])},
)
cpds.append(cpd_ethnicity)

model.add_cpds(*cpds)  # Add the CPDs to the model

print(model.check_model())  # Check if the model is valid
print(model.get_cpds())  # Print the CPDs of the model


ValueError: ignored