In [18]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.cm as cmx
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.patches as mpatches

In [19]:
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### Importing Data

In [20]:
PATH = "data/IRA Data/IRADATA.xlsx"
xls = pd.ExcelFile(PATH)

In [21]:
codebook = pd.read_excel(xls, 'Codebook')
p2_attr = pd.read_excel(xls, 'Period2_Attribute')
p2_netw = pd.read_excel(xls, 'Period2_Network')

# Removing missing values
p2_data = p2_attr[['ID', 'Antrim Brigade', 'Armagh Brigade', 'Down Brigade', 
                   'Derry Brigade', 'Tyrone Brigade', 'Fermanagh Brigade']]
p2_data = p2_data[~p2_data.isin([99999]).any(axis=1)].reset_index(drop=True)

# Changing Dataframe to make graph construction easier
p2_data.rename(columns={"ID" : "Node Label"}, inplace=True)
p2_data.head(3)

Unnamed: 0,Node Label,Antrim Brigade,Armagh Brigade,Down Brigade,Derry Brigade,Tyrone Brigade,Fermanagh Brigade
0,ZF1001,0,0,0,1,0,0
1,ZF1007,0,0,0,1,0,0
2,ZF1025,0,0,0,1,0,0


#### Constructing / Visualizing a Network based on Brigade Membership

In [22]:
def is_duplicate(new_edge, edges_list):
    reversed_edge = tuple(reversed(new_edge))
    if new_edge in edges_list or reversed_edge in edges_list:
        return True
    return False

In [23]:
def get_brigade_membership(node_id:int, df:pd.DataFrame=None):
    if not df:
            df = p2_data
    node_row = df.iloc[[node_id], :]

    for col in df.columns:
        cur_value = list(node_row[col])[0]
        if cur_value == 1:
            return col
    return False

In [24]:
def get_node_connections(node_id, df, larger_edges_lst, delete_duplicates = False):
    edges = []
    src_node_membership = get_brigade_membership(node_id, df=df)
    
    for n in range(0, df.shape[0]):
        if n == node_id:
            continue
        node_n_membership = get_brigade_membership(n, df=df)
        if node_n_membership == src_node_membership:
            new_edge = (node_id, n)
            
            if delete_duplicates: 
                if not is_duplicate(new_edge, larger_edges_lst):
                    edges.append(new_edge)
            else:
                edges.append(new_edge)
    return edges

#### Constructing graph with the given network data + coloring based on brigade membership

In [25]:
p2_adj = p2_netw.set_index('Unnamed: 0')
G_from_netw = nx.from_pandas_adjacency(p2_adj)

In [47]:
# Remove nodes in adjacency matrix that aren't present in p2_data
p2_data_nodes = list(p2_data['Node Label'])
netw_nodes = p2_adj.index
to_remove = []

for node in netw_nodes:
    if not node in p2_data_nodes:
        to_remove.append(node)
        
p2_adj.drop(labels=to_remove, axis=0, inplace=True)
p2_adj.drop(labels=to_remove, axis=1, inplace=True)

In [27]:
cat_df = pd.DataFrame({"Node ID" : p2_data.index})
cat_df['Brigade'] = cat_df['Node ID'].apply(get_brigade_membership)
cat_df = cat_df.replace({False : "None"})
cat_df['Node ID'] = p2_data['Node Label']

In [28]:
# Make types into categories
cat_df = cat_df.set_index('Node ID')
cat_df = cat_df.reindex(G_from_netw.nodes())
cat_df['Brigade'] = pd.Categorical(cat_df['Brigade'])

nodes_to_brigades = cat_df.copy()
nodes_to_brigades['Node ID'] = nodes_to_brigades.index
nodes_to_brigades.reset_index(drop=True, inplace=True)
cat_brigades = cat_df.copy()

In [29]:
# nx.draw(G_from_netw, node_size=25, font_size=3, node_color=cat_df['Brigade'].cat.codes)

#### Coloring based on age at recruitment

In [30]:
p2_age_data = p2_attr[['ID', 'Age at Recruitment']]
p2_age_data = p2_age_data[~p2_age_data.isin([99999]).any(axis=1)].reset_index(drop=True)

In [31]:
p2_age_data['Age at Recruitment'].value_counts()
def get_age_band(age):
    if 17 <= age <= 20:
        return "Between 17 and 20"
    if 20 < age <= 25:
        return "Between 20 and 25"
    if 25 < age <= 35:
        return "Between 25 and 35"
    if 35 < age <= 50:
        return "Between 35 and 50"
    else:
        return "Over 50"

In [32]:
# Remove nodes in adjacency matrix that aren't present in p2_data
p2_adj = pd.read_excel(xls, 'Period2_Network').set_index('Unnamed: 0')
p2_age_data_nodes = list(p2_age_data['ID'])
netw_nodes = p2_adj.index
to_remove = []

for node in netw_nodes:
    if not node in p2_age_data_nodes:
        to_remove.append(node)

p2_adj.drop(labels=to_remove, axis=0, inplace=True)
p2_adj.drop(labels=to_remove, axis=1, inplace=True)

In [33]:
cat_df = pd.DataFrame({"ID" : p2_age_data.index, "Age at Recruitment" : p2_age_data['Age at Recruitment']})
cat_df['Age Band'] = cat_df['Age at Recruitment'].apply(get_age_band)
cat_df['ID'] = p2_age_data['ID']
cat_df = cat_df[['ID', 'Age Band']]
cat_ages = cat_df.copy()
cat_df.head(2)

Unnamed: 0,ID,Age Band
0,ZF1001,Between 35 and 50
1,ZF1007,Between 20 and 25


In [34]:
# Make types into categories
cat_df = cat_df.set_index('ID')
cat_df = cat_df.reindex(G_from_netw.nodes())
cat_df['Age Band'] = pd.Categorical(cat_df['Age Band'])
# nx.draw(G_from_netw, node_size=25, font_size=3, with_labels=True, node_color=cat_df['Age Band'].cat.codes)