In [1]:
import json
import numpy as np
import pandas as pd
from nlbayes import ORNOR
from nlbayes.utils import read_network_json, get_evidence_dict

In [2]:
# download files: 
#   - This differential expression table was generated using the GEO2R tool, contrasting the E2F3 treated
#     samples against the control samples. At GEO2R, we need to select the `Gene.ID` column that contains
#     Entrez (NCBI) gene ids.
#     url: https://umbibio.math.umb.edu/nlbayes/assets/data/experiments/GSE3151.E2F3.top.table.tsv
#   - Since the experiment above was performed on mammary epithelial cell cultures, we may choose a breast
#     tissue specific network. 
#     url: https://umbibio.math.umb.edu/nlbayes/assets/data/networks/gtex_chip/homo_sapiens/tissue_specific/breast.rels.json

In [3]:
network = read_network_json("breast.rels.json")

In [4]:
evidence = pd.read_csv("GSE3151.E2F3.top.table.tsv", sep='\t')
evidence = get_evidence_dict(
        evidence,
        logfc_threshold=1,      # limit DE genes by log2fold-change (logFC)
        pval_threshold=0.001,   # limit DE genes by p-value
        network=network,        # optional. If provided, genes will be selected
                                # only if present in network's set of targets
    )
print(f"\nSelected {len(evidence)} DE genes")

Using column `Gene.ID` as gene
Using column `adj.P.Val` as pval
Using column `logFC` as logfc

Selected 255 DE genes


In [5]:
model = ORNOR(network, evidence, n_graphs=5, uniform_prior=False)


In [6]:
model.fit(n_samples=2000, gelman_rubin=1.2, burnin=True)


Initializing model burn-in ...



100%|██████████| 20/20 [00:29<00:00,  1.47s/it]


Converged after 20 samples
Max Gelman-Rubin statistic is 1.8285694420354446 (target was 5.0 )
Burn-in complete ...



100%|██████████| 140/140 [03:55<00:00,  1.69s/it]

Converged after 140 samples
Max Gelman-Rubin statistic is 1.1951619587694196 (target was 1.2 )





<nlbayes.ornor.ORNOR at 0x7fc2bf1bb9e0>

In [7]:
df = model.get_results()
df

Unnamed: 0_level_0,TF_id,X,T
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,3169,1.000000,0.813403
2,1869,1.000000,0.824464
3,5914,0.991071,0.791339
4,6256,0.987500,0.730360
5,367,0.975000,0.768256
...,...,...,...
745,1499,0.001786,0.491821
746,79577,0.001786,0.474731
747,26523,0.001786,0.494423
748,55290,0.000000,0.477047


In [8]:
# we would like to convert NCBI ids to gene symbols
from biomart import BiomartServer

# Connect to the BioMart server
server = BiomartServer("http://www.ensembl.org/biomart")

# Set the dataset to use
dataset = server.datasets["hsapiens_gene_ensembl"]

# Define the attributes to retrieve
attributes = ["entrezgene_id", "external_gene_name"]

# Define the filters to use (e.g. gene ids to convert)
filters = {"entrezgene_id": df['TF_id'].to_list()}


In [9]:

# Perform the query
response = dataset.search({'attributes':attributes, 'filters':filters})

annotation = {}
for record in response.iter_lines():
    record = record.decode("utf-8").strip().split("\t")
    if len(record) == 2:
        annotation[record[0]] = record[1]
    else:
        annotation[record[0]] = ''

In [10]:

# Process the results
df['TF_symbol'] = df['TF_id'].map(annotation)
df.iloc[:,[0, 3, 1, 2]].head(50)


Unnamed: 0_level_0,TF_id,TF_symbol,X,T
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,3169,FOXA1,1.0,0.813403
2,1869,E2F1,1.0,0.824464
3,5914,RARA,0.991071,0.791339
4,6256,RXRA,0.9875,0.73036
5,367,AR,0.975,0.768256
6,4613,MYCN,0.944643,0.773096
7,8320,EOMES,0.901786,0.744966
8,429,ASCL1,0.792857,0.667199
9,2625,GATA3,0.783929,0.705112
10,11091,WDR5,0.657143,0.628203
