## For peer review only. A customisable version is in preparation. 

### 0. imports and functions

In [None]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import colors
%matplotlib inline
import math,collections

In [None]:
# define a function to remove a key from a dict
def removekey(d, key):
    r = dict(d)
    del r[key]
    return r

In [None]:
# define a function for n choose r
def nCr(n,r):
    f = math.factorial
    return f(n) // f(r) // f(n-r)

### 1. WNGF applied to EUREGIO (macro data)

#### EUREGIO data from https://dataportaal.pbl.nl/downloads/PBL_Euregio/

#### 1.1 Prepare the data

In [None]:
# data path
ZPath = 'Z2010.csv' # transaction matrix
headerPath = 'header.csv' # header with names of the sectors

# load data
myHeader = pd.read_csv(headerPath,header=None)
cols = list(myHeader)
myHeader['combined'] = myHeader[cols].apply(lambda row: '-'.join(row.values.astype(str)), axis=1)
Z=np.genfromtxt(ZPath,delimiter=',')
Z2 = np.nan_to_num(Z)

# Now I collapse all the 14 sectors within the same region to have a 266 by 266 matrix.  
DF1 = pd.DataFrame(Z2,index=myHeader[1])
# first sum up the rows for the original matrix
DF2 = DF1.groupby(level=0,sort=False).sum().T
# second sum up the columns for the original matrix
DF2.index = myHeader[1]
DF3 = DF2.groupby(level=0,sort=False).sum().T

nodeNames = list(DF3.index)

# count the number of regions in each country
ctryNames = [x[:2] for x in nodeNames]
ctryCount = {x:ctryNames.count(x) for x in ctryNames}

#### 1.2 define normalisation denominators

In [None]:
# coordinator normalisation denominator
def normCo(node,ctryCount):
    ctry = node[:2]
    n = ctryCount[ctry]
    if n>2:
        m = nCr(n-1,2)*2
    else:
        m = 1
    return m

# gatekeeper and representative normalisation denominator
def normGaRe(node,ctryCount):
    ctry = node[:2]
    n = ctryCount[ctry]
    newCount = removekey(ctryCount,ctry)
    if n>1:
        m = 0
        for key in newCount:
            m += newCount[key]*(n-1)
    else:
        m = 1
    return m

# itinerant normalisation denominator
def normIt(node,ctryCount):
    ctry = node[:2]
    n = ctryCount[ctry]
    newCount = removekey(ctryCount,ctry)
    m = 0
    for key in newCount:
        temp = newCount[key]
        if temp > 1:
            m += nCr(temp,2)*2
    return m

# liaison normalisation denominator
def normLi(node,ctryCount):
    ctry = node[:2]
    n = ctryCount[ctry]
    newCount = removekey(ctryCount,ctry)
    m = 0
    for key in newCount:
        v1 = newCount[key]
        tempCount = removekey(newCount,key)
        for ind in tempCount:
            v2 = tempCount[ind]
            m += v1*v2
    return m

#### 1.3 Construct the network

In [None]:
# Create a network
# Note that I have removed the self-loops. 

G = nx.DiGraph()

for i in nodeNames:
    for j in nodeNames:
        if i != j:
            G.add_edge(i,j,weight=DF3.loc[i,j])
            
for node in G.nodes:
    G.nodes[node]['country'] = node[:2] # the first two letters are distinct across countries

### 2. WNGF applied to R&D information flow (micro data)

#### R&D information flow data from https://toreopsahl.com/datasets/#Cross_Parker

#### 2.1 Prepare the data

In [None]:
# data path
MatrixPath = 'Cross_Parker-Manufacturing_info (differentiated in terms of advice).xlsx'
attributePath = 'Cross_Parker-Manufacturing_ATTR.xlsx'

# load data
matrix = pd.read_excel(MatrixPath, header=0, index_col=0)
attribute = pd.read_excel(attributePath, header=0, index_col=0)

nodeNames = list(attribute.index)

# create a list of our conditions
conditions = [
    (attribute['location'] == 1),
    (attribute['location'] == 2),
    (attribute['location'] == 3),
    (attribute['location'] == 4)
    ]

# create a list of the values we want to assign for each condition
values = ['Paris', 'Frankfurt', ' Warsaw', 'Geneva']
attribute['city'] = np.select(conditions, values)

# count the number of people in each location
Count = dict(collections.Counter(attribute.city))

#### 2.2 define normalisation denominators

In [None]:
# coordinator normalisation denominator
def normCo(node,Count):
    city = attribute.loc[node].city 
    n = Count[city]
    if n>2:
        m = nCr(n-1,2)*2
    else:
        m = 1
    return m

# gatekeeper and representative normalisation denominator
def normGaRe(node,Count):
    city = attribute.loc[node].city
    n = Count[city]
    newCount = removekey(Count,city)
    if n>1:
        m = 0
        for key in newCount:
            m += newCount[key]*(n-1)
    else:
        m = 1
    return m

# itinerant normalisation denominator
def normIt(node,Count):
    city = attribute.loc[node].city
    n = Count[city]
    newCount = removekey(Count,city)
    m = 0
    for key in newCount:
        temp = newCount[key]
        if temp > 1:
            m += nCr(temp,2)*2
    return m

# liaison normalisation denominator
def normLi(node,Count):
    city = attribute.loc[node].city
    n = Count[city]
    newCount = removekey(Count,city)
    m = 0
    for key in newCount:
        v1 = newCount[key]
        tempCount = removekey(newCount,key)
        for ind in tempCount:
            v2 = tempCount[ind]
            m += v1*v2
    return m

#### 2.3 Construct the network

In [None]:
# Create a network

G = nx.DiGraph()

for i in nodeNames:
    for j in nodeNames:
        if i != j:
            G.add_edge(i,j,weight=matrix.loc[i,j])
            
for node in G.nodes:
    G.nodes[node]['city'] = attribute.loc[node].city

### 3. WNGF calculation

In [None]:
# Now I am going to calculate the number of occurances for shortest path each type of the brokerage. 

GF = {}
for node in G.nodes:
    temp = {'coordinator':0, 'itinerant':0, 'gatekeeper':0, 'representative':0, 'liaison':0, 'total':0,
           'coPairs':[], 'itPairs':[], 'gaPairs':[], 'rePairs':[], 'liPairs':[], 'toPairs':[]}
    for up in G.predecessors(node):
        for down in G.successors(node):
            if up != down:
                w_updown = G[up][down]['weight']
                w_upn = G[up][node]['weight']
                w_ndown = G[node][down]['weight']
                if (w_updown==0 and w_upn>0 and w_ndown>0) or (w_updown>0 and w_upn>0 and w_ndown>0 and 1/w_updown>1/w_upn+1/w_ndown):
                    if G.nodes[up]['city']==G.nodes[node]['city']==G.nodes[down]['city']:
                        temp['coordinator'] += 1
                        temp['coPairs'].append([up,down])
                        temp['total'] += 1
                        temp['toPairs'].append([up,down])
                    elif G.nodes[up]['city']==G.nodes[down]['city']!=G.nodes[node]['city']:
                        temp['itinerant'] += 1
                        temp['itPairs'].append([up,down])
                        temp['total'] += 1
                        temp['toPairs'].append([up,down])
                    elif G.nodes[up]['city']!=G.nodes[down]['city']==G.nodes[node]['city']:
                        temp['gatekeeper'] += 1
                        temp['gaPairs'].append([up,down])
                        temp['total'] += 1
                        temp['toPairs'].append([up,down])
                    elif G.nodes[up]['city']==G.nodes[node]['city']!=G.nodes[down]['city']:
                        temp['representative'] += 1
                        temp['rePairs'].append([up,down])
                        temp['total'] += 1
                        temp['toPairs'].append([up,down])
                    elif G.nodes[up]['city']!=G.nodes[node]['city']!=G.nodes[down]['city'] and G.nodes[up]['city']!=G.nodes[down]['city']:
                        temp['liaison'] += 1
                        temp['liPairs'].append([up,down])
                        temp['total'] += 1
                        temp['toPairs'].append([up,down])

    
    # Note that this time all the five roles frequencies are normalised. 
    temp['coordinator'] = temp['coordinator']/normCo(node,Count)
    temp['itinerant'] = temp['itinerant']/normIt(node,Count)
    temp['gatekeeper'] = temp['gatekeeper']/normGaRe(node,Count)
    temp['representative'] = temp['representative']/normGaRe(node,Count)
    temp['liaison'] = temp['liaison']/normLi(node,Count)
    GF[node]=temp

### 4. Collect the results

In [None]:
# Now I reorganise the results

coRank = []
itRank = []
gaRank = []
reRank = []
liRank = []
toRank = []
for node in nodeNames:
    coRank.append({'node':node, 'coordinator':GF[node]['coordinator']})
    itRank.append({'node':node, 'itinerant':GF[node]['itinerant']})
    gaRank.append({'node':node, 'gatekeeper':GF[node]['gatekeeper']})
    reRank.append({'node':node, 'representative':GF[node]['representative']})
    liRank.append({'node':node, 'liaison':GF[node]['liaison']})
    toRank.append({'node':node, 'total':GF[node]['total']})

In [None]:
# Turn them into DataFrame

coRankDF = pd.DataFrame(coRank)
itRankDF = pd.DataFrame(itRank)
gaRankDF = pd.DataFrame(gaRank)
reRankDF = pd.DataFrame(reRank)
liRankDF = pd.DataFrame(liRank)
toRankDF = pd.DataFrame(toRank)
gf_freq = pd.concat([coRankDF, itRankDF.itinerant, gaRankDF.gatekeeper, reRankDF.representative,
           liRankDF.liaison, toRankDF.total], axis=1, sort=False)

In [None]:
# Check different roles, for example, gatekeeper
# Other types can be sorted by changing the argument "by"

gf_freq.sort_values(by='gatekeeper', ascending=True)