In [1]:
import os
from collections import defaultdict
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
df_ks = pd.read_csv("k_shell_decomposition_result_wlvl.csv", index_col=0)
df_ks = df_ks.reset_index()
df_ks.head()

Unnamed: 0,node,weekday_in_core,weekday_out_core,weekend_in_core,weekend_out_core,weekday_in_core_lvl,weekday_out_core_lvl,weekend_in_core_lvl,weekend_out_core_lvl
0,ADMIRALTY,8845,8436,5350,5185,0,0,0,0
1,AIRPORT ROAD,2153,2070,515,500,0,0,0,0
2,ALEXANDRA HILL,14278,14023,8195,8136,1,1,1,1
3,ALEXANDRA NORTH,5717,6379,2838,2951,0,0,0,0
4,ALJUNIED,14278,14023,8195,8377,1,1,1,1


In [4]:
def conn_entropy(dg, df, col, direction="out", edge_weight="weight"):
    node_dic = {}
    for n, v in zip(df["node"].tolist(), df[col].tolist()):
        node_dic[n] = v
    entropy_dic = {}
    for n in dg.nodes():
        nb = None
        if direction=="out":
            nb = dg.out_edges(nbunch=n,data=True)
            nb = [ (v,d[edge_weight]) for u,v,d in nb ]
        else:
            nb = dg.in_edges(nbunch=n, data=True)
            nb = [ (u,d[edge_weight]) for u,v,d in nb ]
        if len(nb)<=1:
            entropy_dic[n] = -1
            continue
        tot_weight = sum([ w for c,w in nb ])
        partial = [ float(w)/tot_weight for c,w in nb ]
        cats = [ node_dic[c] for c,w in nb ]
        cat_set = list(set(cats))
        if len(cat_set)<=1:
            entropy_dic[n] = 0.
            continue
        proportions = { cat:0. for cat in cat_set }
        for cat,prop in zip(cats, partial):
            proportions[cat]+=prop
        ent = -sum([ p*np.log(p) for p in proportions.values() ])/np.log(len(cat_set))
        #print(ent)
        entropy_dic[n] = ent
        #break
    return entropy_dic

In [5]:
data_dir = "../data"

In [6]:
day = "weekday" # "weekend"
fp = os.path.join(data_dir, "net_202001_{}.net".format(day))
dg1 = nx.read_pajek(fp)
dg1 = nx.DiGraph(dg1)
dg1.number_of_nodes(), dg1.number_of_edges()

(303, 30043)

In [7]:
day = "weekend"
fp2 = os.path.join(data_dir, "net_202001_{}.net".format(day))
dg2 = nx.read_pajek(fp2)
dg2 = nx.DiGraph(dg2)
dg2.number_of_nodes(), dg2.number_of_edges()

(303, 30043)

In [10]:
ks_entropy_1_in = conn_entropy(dg1, df_ks, "weekday_in_core_lvl", direction="in", edge_weight="weight")
ks_entropy_1_out = conn_entropy(dg1, df_ks, "weekday_out_core_lvl", direction="out", edge_weight="weight")
ks_entropy_2_in = conn_entropy(dg2, df_ks, "weekend_in_core_lvl", direction="in", edge_weight="weight")
ks_entropy_2_out = conn_entropy(dg2, df_ks, "weekend_out_core_lvl", direction="out", edge_weight="weight")

In [11]:
df_entropy = pd.DataFrame.from_dict({"weekday_ksc_ent_in": ks_entropy_1_in, 
                                     "weekday_ksc_ent_out":ks_entropy_1_out, 
                                     "weekend_ksc_ent_in": ks_entropy_2_in, 
                                     "weekend_ksc_ent_out":ks_entropy_2_out, 
                                     })
df_entropy.head()

Unnamed: 0,weekday_ksc_ent_in,weekday_ksc_ent_out,weekend_ksc_ent_in,weekend_ksc_ent_out
ADMIRALTY,0.997714,0.771458,0.999947,0.779104
AIRPORT ROAD,0.665975,0.10055,0.683295,0.32445
ALEXANDRA HILL,0.679349,0.530146,0.883398,0.521277
ALEXANDRA NORTH,0.986702,0.551673,0.927997,0.682645
ALJUNIED,0.524651,0.625371,0.437139,0.536421


In [13]:
df_entropy.min()

weekday_ksc_ent_in     0.0
weekday_ksc_ent_out    0.0
weekend_ksc_ent_in     0.0
weekend_ksc_ent_out    0.0
dtype: float64

In [14]:
df_entropy.max()

weekday_ksc_ent_in     0.999998
weekday_ksc_ent_out    0.999873
weekend_ksc_ent_in     0.999994
weekend_ksc_ent_out    0.999996
dtype: float64

In [12]:
ks_res = pd.merge(df_ks, df_entropy, left_on="node", right_index=True, suffixes=["_com", "entropy"])
ks_res.head()

Unnamed: 0,node,weekday_in_core,weekday_out_core,weekend_in_core,weekend_out_core,weekday_in_core_lvl,weekday_out_core_lvl,weekend_in_core_lvl,weekend_out_core_lvl,weekday_ksc_ent_in,weekday_ksc_ent_out,weekend_ksc_ent_in,weekend_ksc_ent_out
0,ADMIRALTY,8845,8436,5350,5185,0,0,0,0,0.997714,0.771458,0.999947,0.779104
1,AIRPORT ROAD,2153,2070,515,500,0,0,0,0,0.665975,0.10055,0.683295,0.32445
2,ALEXANDRA HILL,14278,14023,8195,8136,1,1,1,1,0.679349,0.530146,0.883398,0.521277
3,ALEXANDRA NORTH,5717,6379,2838,2951,0,0,0,0,0.986702,0.551673,0.927997,0.682645
4,ALJUNIED,14278,14023,8195,8377,1,1,1,1,0.524651,0.625371,0.437139,0.536421


In [15]:
ks_res.to_csv("k_shell_decomposition_result_entropy.csv", index_label="ind")