In [1]:
import os
from collections import defaultdict
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df_ks = pd.read_csv("results/k_shell_decomposition_result_wlvl.csv", index_col=0)
#df_ks = df_ks.reset_index()
df_ks.head()

Unnamed: 0_level_0,node,weekday_in_core,weekday_out_core,weekend_in_core,weekend_out_core,weekday_in_core_lvl,weekday_out_core_lvl,weekend_in_core_lvl,weekend_out_core_lvl
ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,ADMIRALTY,2797,2668,2747,2593,0,0,0,0
1,AIRPORT ROAD,681,654,258,250,0,0,0,0
2,ALEXANDRA HILL,5641,5374,5250,5005,1,1,1,1
3,ALEXANDRA NORTH,1808,2017,1419,1476,0,0,0,0
4,ALJUNIED,7385,7318,6252,6239,1,1,1,1


In [4]:
def conn_entropy(dg, df, col, direction="out", edge_weight="weight"):
    node_dic = {}
    for n, v in zip(df["node"].tolist(), df[col].tolist()):
        node_dic[n] = v
    entropy_dic = {}
    for n in dg.nodes():
        nb = None
        if direction=="out":
            nb = dg.out_edges(nbunch=n,data=True)
            nb = [ (v,d[edge_weight]) for u,v,d in nb ]
        else:
            nb = dg.in_edges(nbunch=n, data=True)
            nb = [ (u,d[edge_weight]) for u,v,d in nb ]
        if len(nb)<=1:
            entropy_dic[n] = -1
            continue
        tot_weight = sum([ w for c,w in nb ])
        partial = [ float(w)/tot_weight for c,w in nb ]
        cats = [ node_dic[c] for c,w in nb ]
        cat_set = list(set(cats))
        if len(cat_set)<=1:
            entropy_dic[n] = 0.
            continue
        proportions = { cat:0. for cat in cat_set }
        for cat,prop in zip(cats, partial):
            proportions[cat]+=prop
        #ent = -sum([ p*np.log(p) for p in proportions.values() ])/np.log(len(cat_set))
        ent = -sum([ p*np.log(p) for p in proportions.values() ])/np.log(2)
        #print(ent)
        entropy_dic[n] = ent
        #break
    return entropy_dic

In [5]:
data_dir = "../../data"

In [6]:
day = "weekday" # "weekend"
fp = os.path.join(data_dir, "net_202001_{}b.net".format(day))
dg1 = nx.read_pajek(fp)
dg1 = nx.DiGraph(dg1)
dg1.number_of_nodes(), dg1.number_of_edges()

(303, 30043)

In [7]:
day = "weekend"
fp2 = os.path.join(data_dir, "net_202001_{}b.net".format(day))
dg2 = nx.read_pajek(fp2)
dg2 = nx.DiGraph(dg2)
dg2.number_of_nodes(), dg2.number_of_edges()

(303, 30043)

In [8]:
ks_entropy_1_in = conn_entropy(dg1, df_ks, "weekday_in_core_lvl", direction="in", edge_weight="weight")
ks_entropy_1_out = conn_entropy(dg1, df_ks, "weekday_out_core_lvl", direction="out", edge_weight="weight")
ks_entropy_2_in = conn_entropy(dg2, df_ks, "weekend_in_core_lvl", direction="in", edge_weight="weight")
ks_entropy_2_out = conn_entropy(dg2, df_ks, "weekend_out_core_lvl", direction="out", edge_weight="weight")

In [9]:
df_entropy = pd.DataFrame.from_dict({"weekday_ksc_ent_in": ks_entropy_1_in, 
                                     "weekday_ksc_ent_out":ks_entropy_1_out, 
                                     "weekend_ksc_ent_in": ks_entropy_2_in, 
                                     "weekend_ksc_ent_out":ks_entropy_2_out, 
                                     })
df_entropy.head()

Unnamed: 0,weekday_ksc_ent_in,weekday_ksc_ent_out,weekend_ksc_ent_in,weekend_ksc_ent_out
ADMIRALTY,0.99089,0.771334,0.999949,0.783552
AIRPORT ROAD,0.665975,0.10055,0.683295,0.32445
ALEXANDRA HILL,0.75116,0.617682,0.778628,0.785826
ALEXANDRA NORTH,0.962353,0.8965,0.996685,0.961128
ALJUNIED,0.320833,0.406824,0.261917,0.46884


In [10]:
df_entropy.min()

weekday_ksc_ent_in     0.000000
weekday_ksc_ent_out    0.000000
weekend_ksc_ent_in     0.000000
weekend_ksc_ent_out    0.006011
dtype: float64

In [11]:
df_entropy.max()

weekday_ksc_ent_in     0.999996
weekday_ksc_ent_out    0.999988
weekend_ksc_ent_in     0.999994
weekend_ksc_ent_out    0.999996
dtype: float64

In [12]:
ks_res = pd.merge(df_ks, df_entropy, left_on="node", right_index=True, suffixes=["_com", "entropy"])
ks_res.head()

Unnamed: 0_level_0,node,weekday_in_core,weekday_out_core,weekend_in_core,weekend_out_core,weekday_in_core_lvl,weekday_out_core_lvl,weekend_in_core_lvl,weekend_out_core_lvl,weekday_ksc_ent_in,weekday_ksc_ent_out,weekend_ksc_ent_in,weekend_ksc_ent_out
ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,ADMIRALTY,2797,2668,2747,2593,0,0,0,0,0.99089,0.771334,0.999949,0.783552
1,AIRPORT ROAD,681,654,258,250,0,0,0,0,0.665975,0.10055,0.683295,0.32445
2,ALEXANDRA HILL,5641,5374,5250,5005,1,1,1,1,0.75116,0.617682,0.778628,0.785826
3,ALEXANDRA NORTH,1808,2017,1419,1476,0,0,0,0,0.962353,0.8965,0.996685,0.961128
4,ALJUNIED,7385,7318,6252,6239,1,1,1,1,0.320833,0.406824,0.261917,0.46884


In [13]:
ks_res.to_csv("results/k_shell_decomposition_result_entropy.csv", index_label="ind")