# TF Selection

In [76]:
# Import standard libraries
from importlib import reload
import csv
import pandas as pd
import numpy as np
import scipy.stats as ss
import matplotlib.pyplot as plt
import seaborn as sns
import gseapy
from gseapy.plot import barplot, dotplot
from tqdm import tqdm
import time

In [49]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
# File paths
data_dir = "/home/braunger/masterthesis/data/"
save_dir = "/home/braunger/masterthesis/save/"
fig_dir = "/home/braunger/masterthesis/save/figures/TF_selection/"

In [14]:
# Load TFs from design 2 
incl_TFs = pd.read_csv(save_dir + 'pcst/incl_TFs_design2.csv')
shared_TFs = incl_TFs['TF'].value_counts().to_frame()
shared_TFs = shared_TFs[shared_TFs['TF'] == 3].index.tolist()

## a) MEFISTO weights

## b) GSEA on all pathways

In [86]:
# Load TF target interactions with annotation
tf_targets = pd.read_csv(save_dir + 'TF_targets/TF_targets_anno.csv', sep = ',')
tf_targets = tf_targets[tf_targets['expressed'] == True]
#tf_targets = tf_targets[tf_targets['DE'] == True]

TF_ranking = pd.DataFrame({'TF': [], 'Term': [], 'Adjusted P-value': [], 'Overlap': []})

for TF in tqdm(shared_TFs):
    time.sleep(0.01)
    # Select DE targets of the TF
    targets = tf_targets.loc[(tf_targets['TF'] == TF), 'target']
    #print(TF, ': ', len(targets))

    # GSEA
    enr = gseapy.enrichr(gene_list=targets, 
                         gene_sets='GO_Biological_Process_2021', 
                         description='', format='png',
                         verbose=False)
    TF_df = enr.results[enr.results['Adjusted P-value'] < 0.05]
    TF_df['TF'] = TF
    TF_df = TF_df[['TF', 'Term', 'Adjusted P-value', 'Overlap']]
    
    TF_ranking = pd.concat([TF_ranking, TF_df.iloc[0:5, :]], ignore_index = True)
                           

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [02:01<00:00,  6.37s/it]


In [96]:
TF_ranking.Term[12]

'ubiquitin-dependent protein catabolic process (GO:0006511)'

## c) GSEA on DE targets with GTEx Aging signatures

In [66]:
TF_ranking = pd.DataFrame({'TF': [], 'adj_p_value': []})

for TF in shared_TFs:
    # Select targets of the TF
    targets = tf_targets.loc[(tf_targets['TF'] == TF), 'target']

    # GSEA
    enr = gseapy.enrichr(gene_list=targets, 
                         gene_sets='GTEx_Aging_Signatures_2021', 
                         description='', format='png',
                         verbose=False)

    TF_ranking = pd.concat([TF_ranking, 
                           pd.DataFrame({'TF': [TF], 'adj_p_value': [enr.results['Adjusted P-value'][0]]})], 
                           ignore_index = True)
                           



In [67]:
TF_ranking = TF_ranking.sort_values(by=['adj_p_value'])
print(TF_ranking)

        TF  adj_p_value
9    STAT3     0.653138
5    NR2C2     0.824809
2    FOXO3     0.941196
14  TCF7L2     0.961369
8    HIF1A     0.987603
12     AHR     0.999764
11    MAFF     0.999974
15    LYL1     0.999985
13   BACH2     0.999987
3     FLI1     0.999994
17     MAZ     0.999994
16  NOTCH1     0.999994
0    GTF2B     0.999995
1     ATF1     0.999996
4    TEAD4     0.999996
10     SRC     0.999996
6    GATA4     0.999996
18   STAT1     0.999996
7     KAT5     0.999996


## d) DEMAGALHAES Aging Gene Set

In [68]:
TF_ranking = pd.DataFrame({'TF': [], 'adj_p_value': []})

for TF in shared_TFs:
    # Select targets of the TF
    targets = tf_targets.loc[(tf_targets['TF'] == TF), 'target']
    #print(TF, ': ', len(targets))

    # GSEA
    enr = gseapy.enrichr(gene_list=targets, 
                         gene_sets= data_dir+'de_data/DEMAGALHAES_AGING_UP.v7.5.1.gmt', 
                         background = 'hsapiens_gene_ensembl',
                         description='', format='png',
                         verbose=False)
    try: 
        TF_ranking = pd.concat([TF_ranking, 
                               pd.DataFrame({'TF': [TF], 
                                             #'pathway':[enr.results['Term'][0]],
                                             'adj_p_value': [enr.results['Adjusted P-value'][0]]})], 
                               ignore_index = True)
    except:
        pass



In [69]:
TF_ranking[TF_ranking['adj_p_value'] < 0.05]

Unnamed: 0,TF,adj_p_value
11,BACH2,0.011004


In [70]:
TF_ranking = pd.DataFrame({'TF': [], 'adj_p_value': []})

for TF in shared_TFs:
    # Select DE targets of the TF
    targets = tf_targets.loc[(tf_targets['TF'] == TF), 'target']
    #print(TF, ': ', len(targets))

    # GSEA
    enr = gseapy.enrichr(gene_list=targets, 
                         gene_sets= data_dir+'de_data/DEMAGALHAES_AGING_DN.v7.5.1.gmt', 
                         background = 'hsapiens_gene_ensembl',
                         description='', format='png',
                         verbose=False)
    try: 
        TF_ranking = pd.concat([TF_ranking, 
                               pd.DataFrame({'TF': [TF], 
                                             #'pathway':[enr.results['Term'][0]],
                                             'adj_p_value': [enr.results['Adjusted P-value'][0]]})], 
                               ignore_index = True)
    except:
        pass



In [71]:
TF_ranking[TF_ranking['adj_p_value'] < 0.05]

Unnamed: 0,TF,adj_p_value
5,HIF1A,0.033048
