In [1]:
import os
from collections import defaultdict
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
root_dir = "../../"

In [3]:
f = "net_202001_weekday_G.csv"
df_res1 = pd.read_csv(os.path.join(root_dir, "mapequation_gravity", f), index_col=0)
df_res1.head()

Unnamed: 0_level_0,clusters,flow,node,ori_index,level_1,level_2
ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1:1,0.011348,NEWTON CIRCUS,205,1,1_1
1,1:2,0.011091,BOULEVARD,79,1,1_2
2,1:3,0.009469,CHINATOWN,5,1,1_3
3,1:4,0.009312,MOULMEIN,128,1,1_4
4,1:5,0.008589,CITY HALL,206,1,1_5


In [4]:
f = "net_202001_weekend_G.csv"
df_res2 = pd.read_csv(os.path.join(root_dir, "mapequation_gravity", f), index_col=0)
df_res2.head()

Unnamed: 0_level_0,clusters,flow,node,ori_index,level_1,level_2
ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1:1,0.015323,BOULEVARD,79,1,1_1
1,1:2,0.011581,NEWTON CIRCUS,205,1,1_2
2,1:3,0.011476,FARRER PARK,188,1,1_3
3,1:4,0.010261,VICTORIA,185,1,1_4
4,1:5,0.009789,CITY HALL,206,1,1_5


In [5]:
df_merge = pd.merge(df_res1[["node", "level_1"]], df_res2[["node", "level_1"]], left_on="node", right_on="node", suffixes=["_weekday", "_weekend"])
df_merge.head()

Unnamed: 0,node,level_1_weekday,level_1_weekend
0,NEWTON CIRCUS,1,1
1,BOULEVARD,1,1
2,CHINATOWN,1,1
3,MOULMEIN,1,1
4,CITY HALL,1,1


In [6]:
df_merge.to_csv("results/mapequation_com_G.csv", index_label="ind")

In [7]:
data_dir = "../../data"

In [8]:
day = "weekday" # "weekend"
fp = os.path.join(data_dir, "net_202001_{}b.net".format(day))
dg1 = nx.read_pajek(fp)
dg1 = nx.DiGraph(dg1)
dg1.number_of_nodes(), dg1.number_of_edges()

(303, 30043)

In [9]:
day = "weekend"
fp2 = os.path.join(data_dir, "net_202001_{}b.net".format(day))
dg2 = nx.read_pajek(fp2)
dg2 = nx.DiGraph(dg2)
dg2.number_of_nodes(), dg2.number_of_edges()

(303, 30043)

In [10]:
def conn_entropy(dg, df, col, direction="out", edge_weight="weight"):
    node_dic = {}
    for n, v in zip(df["node"].tolist(), df[col].tolist()):
        node_dic[n] = v
    entropy_dic = {}
    number_of_all_community = len(list(set(df[col].tolist())))
    for n in dg.nodes():
        nb = None
        if direction=="out":
            nb = dg.out_edges(nbunch=n,data=True)
            nb = [ (v,d[edge_weight]) for u,v,d in nb ]
        else:
            nb = dg.in_edges(nbunch=n, data=True)
            nb = [ (u,d[edge_weight]) for u,v,d in nb ]
        if len(nb)<=1:
            entropy_dic[n] = -1
            continue
        tot_weight = sum([ w for c,w in nb ])
        partial = [ float(w)/tot_weight for c,w in nb ]
        cats = [ node_dic[c] for c,w in nb ]
        cat_set = list(set(cats))
        if len(cat_set)<=1:
            entropy_dic[n] = 0.
            continue
        proportions = { cat:0. for cat in cat_set }
        for cat,prop in zip(cats, partial):
            proportions[cat]+=prop
        #ent = -sum([ p*np.log(p) for p in proportions.values() ])/np.log(len(cat_set))
        ent = -sum([ p*np.log(p) for p in proportions.values() ])/np.log(number_of_all_community)
        #print(ent)
        entropy_dic[n] = ent
        #break
    return entropy_dic

In [11]:
df_com = pd.read_csv("results/mapequation_com_G.csv", index_col=0)
df_com.head()

Unnamed: 0_level_0,node,level_1_weekday,level_1_weekend
ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,NEWTON CIRCUS,1,1
1,BOULEVARD,1,1
2,CHINATOWN,1,1
3,MOULMEIN,1,1
4,CITY HALL,1,1


In [12]:
com_entropy_1_in = conn_entropy(dg1, df_com, "level_1_weekday", direction="in", edge_weight="weight")
com_entropy_1_out = conn_entropy(dg1, df_com, "level_1_weekday", direction="out", edge_weight="weight")
com_entropy_2_in = conn_entropy(dg2, df_com, "level_1_weekend", direction="in", edge_weight="weight")
com_entropy_2_out = conn_entropy(dg2, df_com, "level_1_weekend", direction="out", edge_weight="weight")
#check#["THE WHARVES"]

In [13]:
df_entropy = pd.DataFrame.from_dict({"weekday_com_ent_in":com_entropy_1_in, 
                                     "weekday_com_ent_out":com_entropy_1_out, 
                                     "weekend_com_ent_in":com_entropy_2_in, 
                                     "weekend_com_ent_out":com_entropy_2_out, 
                                     })
df_entropy.head()

Unnamed: 0,weekday_com_ent_in,weekday_com_ent_out,weekend_com_ent_in,weekend_com_ent_out
ADMIRALTY,0.314996,0.344542,0.330212,0.357016
AIRPORT ROAD,0.407923,0.244388,0.421046,0.190578
ALEXANDRA HILL,0.554596,0.527702,0.566378,0.575657
ALEXANDRA NORTH,0.464098,0.351009,0.458953,0.351353
ALJUNIED,0.624921,0.649594,0.573044,0.602839


In [14]:
com_res = pd.merge(df_com, df_entropy, left_on="node", right_index=True, suffixes=["_com", "entropy"])

In [15]:
com_res.head()

Unnamed: 0_level_0,node,level_1_weekday,level_1_weekend,weekday_com_ent_in,weekday_com_ent_out,weekend_com_ent_in,weekend_com_ent_out
ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,NEWTON CIRCUS,1,1,0.660688,0.618028,0.649526,0.595412
1,BOULEVARD,1,1,0.778886,0.751121,0.780433,0.728454
2,CHINATOWN,1,1,0.74653,0.754246,0.714408,0.728074
3,MOULMEIN,1,1,0.74409,0.732001,0.710449,0.652052
4,CITY HALL,1,1,0.759932,0.761807,0.763656,0.770766


In [16]:
com_res.to_csv("results/mapequation_com_entropy_G.csv", index_label="ind")