# <center>Creating dictionaries with infos on each user, for each dataset </center>

In [2]:
import os
import util
import networkx as nx
from time import time
from networkx.algorithms.centrality import closeness_centrality, betweenness_centrality, katz_centrality

Choose dataset.

In [22]:
dataset = 'test_rtid'
cascade = False

data_path, RTU, truegraph = util.load_data(dataset)
if RTU : 
    cascade = False # en cas d'erreur d'inattention
    
out_path = "../PsiResults/{}/".format(dataset)
if cascade:
    out_path = out_path[:-1] + "_cascade/" 

Author dict creation.

In [23]:
Author = util.get_authors(data_path)

## 1. Get $\lambda, \mu, \nu$
**Important :** if we don't know the author of some RTid, the reposting user is assumed to be the author. In this case we increase his/her $\mu$.

In [24]:
Lambda, Mu, Nu, total_time = util.get_activity(data_path, RTU, cascade, divide_by_time=True, retweeted=True, Author=Author)

Create the main dictionary `MainDict` and add $\lambda, \mu, \nu$ to it.

In [25]:
MainDict = dict()
for u in Lambda:
    MainDict[u] = {'lambda':Lambda[u], 'mu':Mu[u], 'nu':Nu[u]}

Delete `Lambda`, `Mu` and `Nu` to save memory (they're not useful anymore).

In [26]:
del Lambda, Mu, Nu

## 2. Create user graph

In [27]:
G = util.get_nx_graph(data_path, RTU, cascade, truegraph, Author)

Add to `MainDict`:
- in and out degrees
- closeness, betweenness and Katz centrality

In [34]:
betweenness = betweenness_centrality(G)
katz = katz_centrality(G)

for u in G.nodes:
    MainDict[u]['in_degree'] = G.in_degree[u]
    MainDict[u]['out_degree'] = G.out_degree[u]
    MainDict[u]['closeness_centrality'] = closeness_centrality(G, u)
    MainDict[u]['betweenness_centrality'] = betweenness[u]
    MainDict[u]['katz_centrality'] = katz[u]

In [35]:
MainDict

{3: {'lambda': 0.125,
  'mu': 0.0,
  'nu': 0.375,
  'in_degree': 0,
  'out_degree': 3,
  'closeness_centrality': 0.0,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.4130961923860138},
 0: {'lambda': 0.0,
  'mu': 0.125,
  'nu': 0.0,
  'in_degree': 1,
  'out_degree': 0,
  'closeness_centrality': 0.25,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.45440581162461524},
 1: {'lambda': 0.0,
  'mu': 0.375,
  'nu': 0.0,
  'in_degree': 2,
  'out_degree': 0,
  'closeness_centrality': 0.5,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.49571543086321657},
 5: {'lambda': 0.125,
  'mu': 0.0,
  'nu': 0.125,
  'in_degree': 0,
  'out_degree': 1,
  'closeness_centrality': 0.0,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.4130961923860138},
 4: {'lambda': 0.0, 'mu': 0.125, 'nu': 0.0},
 2: {'lambda': 0.0,
  'mu': 0.25,
  'nu': 0.0,
  'in_degree': 1,
  'out_degree': 0,
  'closeness_centrality': 0.25,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.454405811624

In [32]:
G.nodes

NodeView((3, 0, 1, 5, 2))

In [33]:
MainDict

{3: {'lambda': 0.125,
  'mu': 0.0,
  'nu': 0.375,
  'in_degree': 0,
  'out_degree': 3,
  'closeness_centrality': 0.0,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.4130961923860138},
 0: {'lambda': 0.0,
  'mu': 0.125,
  'nu': 0.0,
  'in_degree': 1,
  'out_degree': 0,
  'closeness_centrality': 0.25,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.45440581162461524},
 1: {'lambda': 0.0,
  'mu': 0.375,
  'nu': 0.0,
  'in_degree': 2,
  'out_degree': 0,
  'closeness_centrality': 0.5,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.49571543086321657},
 5: {'lambda': 0.125,
  'mu': 0.0,
  'nu': 0.125,
  'in_degree': 0,
  'out_degree': 1,
  'closeness_centrality': 0.0,
  'betweenness_centrality': 0.0,
  'katz_centrality': 0.4130961923860138},
 4: {'lambda': 0.0, 'mu': 0.125, 'nu': 0.0},
 2: {'lambda': 0.0, 'mu': 0.25, 'nu': 0.0}}