In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.multitest import multipletests
from scipy.stats import fisher_exact, ttest_ind
from sklearn.metrics import precision_score, recall_score, f1_score

In [2]:
def load_expression():
    # Define parameters for the negative binomial distribution
    n = 10  # Number of successes
    p = 0.5  # Probability of success
    
    # Generate data
    time_points = [f't{i}' for i in range(1, 8)]
    genes = [f'g{i}' for i in range(1, 21)] + [f'tf{i}' for i in range(1, 8)]
    samples = [f'p{j}' for j in range(1, 11)]

    data_frames = []

    for time_point in time_points:
        # Generate data for p1-p4 columns
        p1_p4_data = np.random.negative_binomial(n, p, (27, 4))
        
        # Generate data for p5-p10 columns such that p1 is the average
        p5_p10_data = []
        for row in p1_p4_data:
            p1_value = row[0]  # Take p1 value
            remaining_columns = np.random.randint(0, 7, size=(1, 6))  # Small random values to avoid zeros
            adjustment = (p1_value - remaining_columns.mean(axis=1)).astype(int)
            p5_p10_row = remaining_columns + adjustment[:, None]
            p5_p10_data.append(p5_p10_row.flatten())
        
        p5_p10_data = np.array(p5_p10_data)
        
        # Combine all columns
        data = np.column_stack((p1_p4_data, p5_p10_data))
        
        df = pd.DataFrame(data, index=genes, columns=samples)
        df['time'] = time_point
        data_frames.append(df)

    # Concatenate all data frames
    expression_data = pd.concat(data_frames)

    return expression_data

def load_tf_network():
    # Set random seed for reproducibility
    np.random.seed(42)

    # Define genes and TFs
    genes = [f'g{i}' for i in range(1, 21)]
    tfs = [f'tf{j}' for j in range(1, 8)]

    # Generate gene list with specific occurrence requirements
    gene_occurrences = genes + np.random.choice(genes, 4, replace=False).tolist() + [np.random.choice(genes)]
    np.random.shuffle(gene_occurrences)

    # Ensure the lengths match the requirement
    assert len(gene_occurrences) == 25

    # Generate random TF-target pairs
    tf_targets = {
        'TF': np.random.choice(tfs, len(gene_occurrences)),
        'Target': gene_occurrences
    }

    # Create DataFrame
    tf_network_df = pd.DataFrame(tf_targets)
    tf_network = tf_network_df.assign(Target_numeric=tf_network_df['Target'].str.extract(r'(\d+)').astype(int)).sort_values(by=['TF', 'Target_numeric']).drop(columns=['Target_numeric']).reset_index(drop=True)
    
    return tf_network

def load_target_genes():
    target_genes = ['g2','g4','g6','g8']    
    return target_genes

expression = load_expression()
tf_network = load_tf_network()
target_genes = load_target_genes()

In [3]:
expression

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,time
g1,12,7,15,14,10,13,9,12,14,10,t1
g2,3,7,9,11,3,3,1,6,5,3,t1
g3,17,4,5,7,16,14,16,19,16,17,t1
g4,8,17,11,11,9,7,9,7,7,8,t1
g5,8,19,15,15,6,7,10,9,7,9,t1
...,...,...,...,...,...,...,...,...,...,...,...
tf3,9,9,6,21,7,10,10,7,6,9,t7
tf4,20,14,18,23,22,18,20,20,19,17,t7
tf5,15,10,10,4,16,16,11,15,14,15,t7
tf6,8,7,5,12,7,6,7,6,12,8,t7


In [4]:
tf_network

Unnamed: 0,TF,Target
0,tf1,g2
1,tf1,g5
2,tf1,g16
3,tf2,g1
4,tf2,g4
5,tf2,g8
6,tf2,g11
7,tf2,g12
8,tf2,g13
9,tf3,g10


In [5]:
target_genes

['g2', 'g4', 'g6', 'g8']

In [6]:
def identify_degs(expression_data):
    time_points = expression['time'].unique()
    initial_time_point = expression[expression['time'] == 't1']

    deg_sets = {}

    for time_point in time_points:
        if time_point == 't1':
            continue
        current_time_point = expression[expression['time'] == time_point]
        p_values = []
        deg_list = []
        for gene in expression.index.unique():
            initial_values = initial_time_point.loc[gene].values[:-1] 
            current_values = current_time_point.loc[gene].values[:-1]  
            t_stat, p_val = ttest_ind(initial_values, current_values)
            p_values.append(p_val)        
            if p_val < 0.05:
                deg_list.append(gene)
                
            # Multiple testing correction
        corrected_p_values = multipletests(p_values, alpha=0.05, method='fdr_bh')[1]
        
        deg_sets[time_point] = [expression.index.unique()[i] for i in range(len(corrected_p_values)) if corrected_p_values[i] < 0.05]
    return deg_sets

deg_sets = identify_degs(expression)

In [7]:
deg_sets

{'t2': ['g2',
  'g4',
  'g6',
  'g8',
  'g9',
  'g12',
  'g13',
  'g14',
  'g15',
  'g16',
  'g18',
  'g19',
  'tf2',
  'tf3',
  'tf5',
  'tf6',
  'tf7'],
 't3': ['g1',
  'g2',
  'g6',
  'g7',
  'g13',
  'g15',
  'g17',
  'g18',
  'tf1',
  'tf3',
  'tf5',
  'tf6'],
 't4': ['g2',
  'g6',
  'g7',
  'g8',
  'g10',
  'g11',
  'g14',
  'g15',
  'g16',
  'g17',
  'g18',
  'g19',
  'tf1',
  'tf3',
  'tf5',
  'tf6'],
 't5': ['g1',
  'g3',
  'g4',
  'g7',
  'g8',
  'g10',
  'g12',
  'g13',
  'g14',
  'g15',
  'g17',
  'g18',
  'g20',
  'tf2',
  'tf4',
  'tf5'],
 't6': ['g2',
  'g3',
  'g4',
  'g6',
  'g7',
  'g8',
  'g10',
  'g11',
  'g14',
  'g15',
  'g17',
  'g20',
  'tf1',
  'tf5'],
 't7': ['g1',
  'g2',
  'g5',
  'g6',
  'g9',
  'g11',
  'g12',
  'g13',
  'g14',
  'g15',
  'g16',
  'tf4',
  'tf5']}

In [8]:
# Construct time-specific networks
def construct_time_specific_networks(tf_network, deg_sets, target_genes):
    time_specific_networks = {}
    
    for time_point, deg_set in deg_sets.items():
        # Intersected gene set
        vj = (set(deg_set) & set(target_genes)) | set(tf_network['TF'])
        
        # Construct network
        edges = []
        for _, row in tf_network.iterrows():
            if row['TF'] in vj and row['Target'] in vj:
                edges.append((row['TF'], row['Target']))
        
        time_specific_networks[time_point] = {
            'nodes': list(vj),
            'edges': edges
        }
    
    return time_specific_networks

time_specific_networks = construct_time_specific_networks(tf_network, deg_sets, target_genes)

In [9]:
time_specific_networks

{'t2': {'nodes': ['g2',
   'tf6',
   'tf2',
   'g6',
   'tf5',
   'tf4',
   'tf3',
   'tf1',
   'g8',
   'g4',
   'tf7'],
  'edges': [('tf1', 'g2'),
   ('tf2', 'g4'),
   ('tf2', 'g8'),
   ('tf4', 'g2'),
   ('tf4', 'g6'),
   ('tf6', 'g2')]},
 't3': {'nodes': ['g2', 'tf6', 'tf2', 'g6', 'tf5', 'tf4', 'tf3', 'tf1', 'tf7'],
  'edges': [('tf1', 'g2'), ('tf4', 'g2'), ('tf4', 'g6'), ('tf6', 'g2')]},
 't4': {'nodes': ['g2',
   'tf6',
   'tf2',
   'g6',
   'tf5',
   'tf4',
   'tf3',
   'tf1',
   'g8',
   'tf7'],
  'edges': [('tf1', 'g2'),
   ('tf2', 'g8'),
   ('tf4', 'g2'),
   ('tf4', 'g6'),
   ('tf6', 'g2')]},
 't5': {'nodes': ['tf6', 'tf2', 'tf5', 'tf4', 'tf3', 'tf1', 'g8', 'g4', 'tf7'],
  'edges': [('tf2', 'g4'), ('tf2', 'g8')]},
 't6': {'nodes': ['g2',
   'tf6',
   'tf2',
   'g6',
   'tf5',
   'tf4',
   'tf3',
   'tf1',
   'g8',
   'g4',
   'tf7'],
  'edges': [('tf1', 'g2'),
   ('tf2', 'g4'),
   ('tf2', 'g8'),
   ('tf4', 'g2'),
   ('tf4', 'g6'),
   ('tf6', 'g2')]},
 't7': {'nodes': ['g2', 't

In [10]:
# Main pipeline function
def propa_net_pipeline():
    expression = load_expression()
    tf_network = load_tf_network()
    target_genes = load_target_genes()
    deg_sets = identify_degs(expression)
    time_specific_networks = construct_time_specific_networks(tf_network, deg_sets, target_genes)
    
    # Populate the dictionaries
    time_specific_nodes = {}
    time_specific_graphs = {}
    
    for key, value in time_specific_networks.items():
        time_specific_nodes[key] = value['nodes']
    
        # Create DataFrame for edges
        df_edges = pd.DataFrame(value['edges'], columns=['node1', 'node2'])
        time_specific_graphs[key] = df_edges
        
    return time_specific_nodes, time_specific_graphs

time_specific_nodes, time_specific_graphs = propa_net_pipeline()

In [11]:
time_specific_nodes

{'t2': ['g2',
  'tf6',
  'tf2',
  'g6',
  'tf5',
  'tf4',
  'tf3',
  'tf1',
  'g8',
  'g4',
  'tf7'],
 't3': ['g2', 'tf6', 'tf2', 'g6', 'tf5', 'tf4', 'tf3', 'tf1', 'tf7'],
 't4': ['g2', 'tf6', 'tf2', 'g6', 'tf5', 'tf4', 'tf3', 'tf1', 'g8', 'tf7'],
 't5': ['tf6', 'tf2', 'tf5', 'tf4', 'tf3', 'tf1', 'g8', 'g4', 'tf7'],
 't6': ['g2',
  'tf6',
  'tf2',
  'g6',
  'tf5',
  'tf4',
  'tf3',
  'tf1',
  'g8',
  'g4',
  'tf7'],
 't7': ['g2', 'tf6', 'tf2', 'g6', 'tf5', 'tf4', 'tf3', 'tf1', 'tf7']}

In [12]:
time_specific_graphs

{'t2':   node1 node2
 0   tf1    g2
 1   tf2    g4
 2   tf2    g8
 3   tf4    g2
 4   tf4    g6
 5   tf6    g2,
 't3':   node1 node2
 0   tf1    g2
 1   tf4    g2
 2   tf4    g6
 3   tf6    g2,
 't4':   node1 node2
 0   tf1    g2
 1   tf2    g8
 2   tf4    g2
 3   tf4    g6
 4   tf6    g2,
 't5':   node1 node2
 0   tf2    g4
 1   tf2    g8,
 't6':   node1 node2
 0   tf1    g2
 1   tf2    g4
 2   tf2    g8
 3   tf4    g2
 4   tf4    g6
 5   tf6    g2,
 't7':   node1 node2
 0   tf1    g2
 1   tf4    g2
 2   tf4    g6
 3   tf6    g2}

In [13]:
# Function to calculate influence
def calculate_influence(time_specific_networks, tf_network, deg_sets, target_genes, rounds=100):
    influence_results = {}
    
    for time_point, network in time_specific_networks.items():
        nodes = network['nodes']
        edges = network['edges']
        vj = list(set(nodes) & set(target_genes) & set(deg_sets[time_point]))
        
        if not vj:
            continue
        
        tf_set = list(set(tf_network['TF'].unique()) & set(nodes))
        
        # Initialize DE(s) and IL(t)
        de = {node: abs(np.random.randn()) for node in nodes}  # Using random values for DE(s)
        il = {tf: 0 for tf in tf_set}
        
        for _ in range(rounds):
            g_prime_edges = [(u, v) for u, v in edges if np.random.rand() > 0.5]
            g_prime = {node: [] for node in nodes}
            
            for u, v in g_prime_edges:
                g_prime[u].append(v)
            
            for tf in tf_set:
                reachable = set()
                queue = [tf]
                
                while queue:
                    current = queue.pop(0)
                    for neighbor in g_prime[current]:
                        if neighbor not in reachable and neighbor not in tf_set:
                            reachable.add(neighbor)
                            queue.append(neighbor)
                
                if reachable:
                    il[tf] += sum(de[node] for node in reachable) / len(reachable)
        
        for tf in il:
            il[tf] /= rounds
        
        influence_results[time_point] = il
    
    return influence_results

influence_results = calculate_influence(time_specific_networks, tf_network, deg_sets, target_genes)

In [14]:
influence_results

{'t2': {'tf6': 0.7027490449494657,
  'tf2': 0.7635642961695567,
  'tf5': 0.0,
  'tf4': 0.4748264715382994,
  'tf3': 0.0,
  'tf1': 0.7165284379876905,
  'tf7': 0.0},
 't3': {'tf6': 1.2217646419851387,
  'tf2': 0.0,
  'tf5': 0.0,
  'tf4': 1.9350372858187732,
  'tf3': 0.0,
  'tf1': 1.434245449286902,
  'tf7': 0.0},
 't4': {'tf6': 0.4166727077002777,
  'tf2': 0.2984318032054628,
  'tf5': 0.0,
  'tf4': 0.9166101758545078,
  'tf3': 0.0,
  'tf1': 0.42453445690216973,
  'tf7': 0.0},
 't5': {'tf6': 0.0,
  'tf2': 0.5579100117201744,
  'tf5': 0.0,
  'tf4': 0.0,
  'tf3': 0.0,
  'tf1': 0.0,
  'tf7': 0.0},
 't6': {'tf6': 0.12032426844012253,
  'tf2': 0.8278748278573331,
  'tf5': 0.0,
  'tf4': 1.1288900038675553,
  'tf3': 0.0,
  'tf1': 0.1422014081565084,
  'tf7': 0.0},
 't7': {'tf6': 0.6688486243949332,
  'tf2': 0.0,
  'tf5': 0.0,
  'tf4': 0.5439017871366648,
  'tf3': 0.0,
  'tf1': 0.6242587161019375,
  'tf7': 0.0}}

In [15]:
def network_propagation(time_specific_networks, tf_network, rounds=10, alpha=0.85):
    major_tfs = {}
    
    for time_point, network in time_specific_networks.items():
        nodes = network['nodes']
        edges = network['edges']
        
        # Initialize p0(v) with random values for demonstration
        p0 = {node: np.random.rand() for node in nodes}
        p = p0.copy()
        
        # Adjacency list
        adjacency_list = {node: [] for node in nodes}
        for u, v in edges:
            adjacency_list[u].append(v)
            adjacency_list[v].append(u)  # Assuming undirected graph for simplicity

        for _ in range(rounds):
            p_new = p.copy()
            for node in nodes:
                sum_neighbors = sum(p[neighbor] for neighbor in adjacency_list[node])
                p_new[node] = alpha * sum_neighbors / len(adjacency_list[node]) if adjacency_list[node] else 0
            p = p_new
        
        # Extract TFs and rank by propagated values
        tf_set = list(set(tf_network['TF'].unique()) & set(nodes))
        tf_ranking = {tf: p[tf] for tf in tf_set if tf in p}
        sorted_tfs = sorted(tf_ranking, key=tf_ranking.get, reverse=True)
        
        major_tfs[time_point] = sorted_tfs
    
    return major_tfs

major_tfs = network_propagation(time_specific_networks, tf_network)

In [16]:
major_tfs

{'t2': ['tf2', 'tf4', 'tf6', 'tf1', 'tf5', 'tf3', 'tf7'],
 't3': ['tf6', 'tf1', 'tf4', 'tf2', 'tf5', 'tf3', 'tf7'],
 't4': ['tf2', 'tf6', 'tf1', 'tf4', 'tf5', 'tf3', 'tf7'],
 't5': ['tf2', 'tf6', 'tf5', 'tf4', 'tf3', 'tf1', 'tf7'],
 't6': ['tf6', 'tf1', 'tf4', 'tf2', 'tf5', 'tf3', 'tf7'],
 't7': ['tf6', 'tf1', 'tf4', 'tf2', 'tf5', 'tf3', 'tf7']}