In [43]:
import networkx as nx
import random
import pandas as pd
import random
import numpy as np


In [85]:


def create_labeled_directed_graph(n, m):
    # Initialize a directed graph
    G = nx.DiGraph()
    
    # Add nodes with random labels from {1, ..., m}
    for i in range(n):
        label = random.randint(1, m)
        G.add_node(i, label=label)
    
    # Ensure each node has at least one incoming and one outgoing edge
    # Connect nodes in a way that guarantees each node has an in-degree and out-degree of at least 1
    for i in range(n):
        # Ensure at least one outgoing edge for node i
        if G.out_degree(i) == 0:
            target = random.choice([j for j in range(n) if j != i])
            G.add_edge(i, target)
        
        # Ensure at least one incoming edge for node i
        if G.in_degree(i) == 0:
            source = random.choice([j for j in range(n) if j != i])
            G.add_edge(source, i)

    # Add additional random edges to satisfy further connectivity requirements
    while any(G.in_degree(i) < 2 or G.out_degree(i) < 2 for i in range(n)):
        source, target = random.sample(range(n), 2)
        G.add_edge(source, target)

    return G






In [118]:
# Example usage
n = 50  # Number of nodes
m = 3   # Number of unique labels
G = create_labeled_directed_graph(n, m)

In [87]:
G.nodes[0]

{'label': 3}

In [88]:
list(set([ G.nodes[i]['label'] for i in range(len(G)) if 'label'  in G.nodes[i]]))

[1, 2, 3]

In [48]:
# # Print node labels and degrees
# for node in graph.nodes(data=True):
#     print(f"Node {node[0]} has label {node[1]['label']}")
# print("Graph edges:", list(graph.edges()))

In [89]:

def create_fake_io_table(graph):
    # Extract unique labels (sectors) from the graph and create a label-to-index mapping
    sectors = sorted(set(label for _, label in graph.nodes(data="label") if label is not None))
    num_sectors = len(sectors)
    label_to_index = {label: i for i, label in enumerate(sectors)}
    
    # Initialize the I/O table as a NumPy array of zeros
    io_table = np.zeros((num_sectors, num_sectors), dtype=int)
    
    # Populate the I/O table based on edges in the graph
    for source, target in graph.edges():
        source_label = graph.nodes[source].get('label')
        target_label = graph.nodes[target].get('label')
        
        if source_label is not None and target_label is not None:
            # Get the indices for the source and target labels
            source_idx = label_to_index[source_label]
            target_idx = label_to_index[target_label]
            
            # Assign a random value to represent the flow between these sectors
            io_table[source_idx, target_idx] += np.random.randint(1, 10)

    return io_table, sectors  # Returning both the table and the sector labels for reference

# Example usage
# io_table, sectors = create_fake_io_table_np(graph)
# print("I/O Table:\n", io_table)
# print("Sectors:", sectors)


In [90]:
# Create a fake I/O table from the graph
io_table,_= create_fake_io_table(G)

# Display the resulting I/O table
print(io_table)

[[ 59 201 237]
 [157 256 344]
 [197 402 498]]


In [91]:

# Assume 'io_table' is the I/O table generated as a NumPy array
# Example for testing:
# io_table = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# 1. Row-normalized I/O table
row_sums = io_table.sum(axis=1, keepdims=True)  # Sum each row
io_row_normalized = io_table / row_sums  # Divide each element by its row sum

# 2. Column-normalized I/O table
column_sums = io_table.sum(axis=0, keepdims=True)  # Sum each column
io_column_normalized = io_table / column_sums  # Divide each element by its column sum

# Display the normalized tables
print("Row-normalized I/O Table:\n", io_row_normalized)
print("\nColumn-normalized I/O Table:\n", io_column_normalized)


Row-normalized I/O Table:
 [[0.11871227 0.40442656 0.47686117]
 [0.20739762 0.33817701 0.45442536]
 [0.17958067 0.36645397 0.45396536]]

Column-normalized I/O Table:
 [[0.14285714 0.23399302 0.21964782]
 [0.38014528 0.29802095 0.31881372]
 [0.47699758 0.46798603 0.46153846]]


In [92]:

def remove_labels(graph, k):
    # Get a list of all nodes
    nodes = list(graph.nodes)
    
    # Randomly select k nodes to remove labels from
    nodes_to_remove_labels = random.sample(nodes, k)
    
    # Remove labels by setting them to None
    for node in nodes_to_remove_labels:
        graph.nodes[node]['label'] = None

    return graph

# Example usage
#n = 10  # Number of nodes
#m = 5   # Number of unique labels
k = 10   # Number of nodes to remove labels from

# Create the initial graph
#graph = create_labeled_directed_graph(n, m)

# Remove labels from k nodes
graph = remove_labels(G, k)

# Print node labels after removal
# for node in graph.nodes(data=True):
#     print(f"Node {node[0]} has label {node[1]['label']}")
# print("Graph edges:", list(graph.edges()))


In [93]:
unlabeledNodes = [ i for i in range(len(graph)) if graph.nodes[i]['label'] is None]

In [94]:
unlabeledNodes

[13, 18, 19, 22, 29, 30, 39, 42, 43, 48]

In [104]:
list(G.neighbors(13))

[4, 12, 28, 30, 14, 37, 22, 38, 34, 41, 48, 47]

In [154]:
dikt = {}
for node in unlabeledNodes:
    labeledOutNghbrs = [nghbr for nghbr in  list(G.successors(node)) if nghbr not in unlabeledNodes ]
    labeledInNghbrs = [nghbr for nghbr in  list(G.predecessors(node)) if nghbr not in unlabeledNodes ]
    
    dikt[node] =  len(labeledInNghbrs) +  len(labeledOutNghbrs)

In [158]:
sorted_dict= dict(sorted(dikt.items(), key=lambda item: item[1], reverse = True))

In [159]:
sorted_dict

{29: 9, 22: 7, 39: 7, 43: 7, 18: 6, 42: 6, 13: 5, 48: 5, 19: 4, 30: 4}

In [None]:
''' for unlabeled nodes create a probabilty table.
for a certain unlabled nodes with labeled neighbours, given a neighbour belongs to certain sector, whats the probability 
that the unlabeled node belong to each of the possible class. this probability comes from 
'''

In [119]:
sctrs =  list(set([ G.nodes[i]['label'] for i in range(len(G)) if 'label'  in G.nodes[i]]))

In [120]:
sctrs

[1, 2, 3]

In [144]:
p = []
for nghbr in labeledOutNghbrs:
    j = graph.nodes[nghbr]['label']
    p.append([  io_row_normalized[j -1][sctr-1]   for sctr in sctrs])

for nghbr in labeledInNghbrs:
    j = graph.nodes[nghbr]['label']
    p.append([  io_row_normalized[sctr -1][j-1]   for sctr in sctrs])

p =  np.array(p)
p_sums = p.sum(axis=0, keepdims=True)[0]   
Prob = p_sums/len(p)        
    
    

In [149]:
probDict = {}

for node in unlabeledNodes:
    labeledOutNghbrs = [nghbr for nghbr in  list(G.successors(node)) if nghbr not in unlabeledNodes ]
    labeledInNghbrs = [nghbr for nghbr in  list(G.predecessors(node)) if nghbr not in unlabeledNodes ]

    p = []
    
    for nghbr in labeledOutNghbrs:
        j = graph.nodes[nghbr]['label']
        p.append([  io_row_normalized[j -1][sctr-1]   for sctr in sctrs])
    
    for nghbr in labeledInNghbrs:
        j = graph.nodes[nghbr]['label']
        p.append([  io_column_normalized[sctr -1][j-1]   for sctr in sctrs])
    
    p =  np.array(p)
    p_sums = p.sum(axis=0, keepdims=True)[0]   
    Prob = p_sums/len(p)  


    probDict[node] =   Prob
        
        
    

In [150]:
probDict

{13: array([0.19382622, 0.34448074, 0.46169305]),
 18: array([0.20699044, 0.33421731, 0.45879225]),
 19: array([0.19432505, 0.34382826, 0.46184669]),
 22: array([0.20294998, 0.34072377, 0.45632625]),
 29: array([0.19391219, 0.34287085, 0.46321696]),
 30: array([0.21321733, 0.32552559, 0.46125708]),
 39: array([0.18450984, 0.3527147 , 0.46277545]),
 42: array([0.18981868, 0.34872436, 0.46145696]),
 43: array([0.18900828, 0.34810433, 0.46288739]),
 48: array([0.17112337, 0.35948531, 0.46939131])}

In [161]:
'''
sorted_dict: dict  the unlabeled nodes
in ascending order of #labeled 
neighbours'''

for node in sorted_dict:
    

29
22
39
43
18
42
13
48
19
30
