# Networking

In [82]:
import numpy as np
import pandas as pd
import networkx as nx
import pickle
from tqdm import tqdm

from geopy.distance import vincenty, great_circle


In [2]:
with open('conflict.pickle', 'rb') as data_source:
    conflict_df = pickle.load(data_source)

In [3]:
with open('refugee.pickle', 'rb') as data_source:
    refugee_df = pickle.load(data_source)

In [4]:
display(conflict_df.head(1))
display(refugee_df.head(1))

Unnamed: 0,id,year,type_of_violence,conflict_new_id,conflict_name,side_a_new_id,gwnoa,side_a,gwnob,side_b_new_id,...,longitude,geom_wkt,country,country_id,date_start,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best
0,4,2010,1,230,Yemen (North Yemen):Government,123,678.0,Government of Yemen (North Yemen),,881,...,44.206667,POINT (44.206667 15.354722),Yemen (North Yemen),678,2010,2,0,0,0,2


Unnamed: 0,year,origin,refugee,asylum,internally_displaced,stateless,others,total
0,1989.0,Dem. Rep. of the Congo,100786.0,0.0,0.0,0.0,0.0,100786.0


In [5]:
# graph indexed
conflict_df = conflict_df.set_index('id')
display(conflict_df.head(2))

Unnamed: 0_level_0,year,type_of_violence,conflict_new_id,conflict_name,side_a_new_id,gwnoa,side_a,gwnob,side_b_new_id,side_b,...,longitude,geom_wkt,country,country_id,date_start,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4,2010,1,230,Yemen (North Yemen):Government,123,678.0,Government of Yemen (North Yemen),,881,AQAP,...,44.206667,POINT (44.206667 15.354722),Yemen (North Yemen),678,2010,2,0,0,0,2
5,2011,3,715,Government of Yemen (North Yemen) - Civilians,123,678.0,Government of Yemen (North Yemen),,1,Civilians,...,45.036667,POINT (45.036667 12.779444),Yemen (North Yemen),678,2011,0,0,0,0,0


In [83]:
def distance_between_nodes(graph, node_1_id, node_2_id):
    """Get distance between two nodes by using their latitude and longitude property"""
    pos_1 = (graph.node[node_1_id]["latitude"], graph.node[node_1_id]["longitude"])
    pos_2 = (graph.node[node_2_id]["latitude"], graph.node[node_2_id]["longitude"])
    # Sometime vincenty doesn't converge, just put None as weight
    try:
        distance = vincenty(pos_1, pos_2)
    except:
        # Try to get great circle distance instead
        try:
            distance = great_circle(pos_1, pos_2)
        except:
            print("Error: failed to get distance between node {} and node {}".format(node_1_id, node_2_id))
            print("Node 1 positions: {}".format(pos_1))
            print("Node 2 positions: {}".format(pos_2))
            distance = None
    return distance
    

In [78]:
def get_conflict_lat_long(conflict_id):
    """Get the average latitude and longitude for a particular conflict id"""
    longitude = conflict_df[conflict_df.conflict_new_id == conflict_id].longitude.mean()
    latitude = conflict_df[conflict_df.conflict_new_id == conflict_id].latitude.mean()
    return (latitude, longitude)
    

In [79]:
G = nx.Graph()

# Create a node for each conflict event
G.add_nodes_from(conflict_df.index.values, nature="event")

# Set longitude and latitude for each node
for index in conflict_df.index.values:
    G.node[index]["longitude"] = conflict_df.loc[index, "longitude"]
    G.node[index]["latitude"] = conflict_df.loc[index, "latitude"]

# Create a node for each unique conflict, use a special node id to avoid conflict with events
for conflict_id in conflict_df.conflict_new_id.unique():
    conflict_node_name = "conflict_{}".format(conflict_id)
    G.add_node(conflict_node_name, nature="conflict")
    
    # Get the average position for the conflict
    latitude, longitude = get_conflict_lat_long(conflict_id)
    G.node[conflict_node_name]["longitude"] = longitude
    G.node[conflict_node_name]["latitude"] = latitude

In [80]:
# Create edges from event to their conflict
for conflict_id in tqdm(conflict_df.conflict_new_id.unique()):
    conflict_node_name = "conflict_{}".format(conflict_id)
    for event_id in conflict_df[conflict_df.conflict_new_id == conflict_id].index.values:
        G.add_edge(conflict_node_name, event_id,
                   weight=distance_between_nodes(G, conflict_node_name, event_id))
        


  0%|          | 0/997 [00:00<?, ?it/s][A
  1%|          | 6/997 [00:00<00:23, 42.04it/s][A
  4%|▎         | 36/997 [00:00<00:06, 148.00it/s][A
 11%|█         | 111/997 [00:00<00:02, 321.86it/s][A
 14%|█▍        | 141/997 [00:00<00:02, 308.33it/s][A
 19%|█▉        | 191/997 [00:00<00:02, 341.19it/s][A
 23%|██▎       | 226/997 [00:00<00:02, 326.90it/s][A
 32%|███▏      | 323/997 [00:00<00:01, 394.15it/s][A
 37%|███▋      | 369/997 [00:00<00:01, 384.52it/s][A
 41%|████      | 411/997 [00:01<00:01, 387.77it/s][A
 52%|█████▏    | 515/997 [00:01<00:01, 436.06it/s][A
Exception in thread Thread-11:
Traceback (most recent call last):
  File "/home/lal/Programs/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/lal/Programs/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/home/lal/Programs/anaconda3/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for it

In [84]:
# Extract the distance between all conflicts 
for conflict_id_1 in tqdm(conflict_df.conflict_new_id.unique()):
    conflict_node_name_1 = "conflict_{}".format(conflict_id_1)
    
    for conflict_id_2 in conflict_df.conflict_new_id.unique():   
        conflict_node_name_2 = "conflict_{}".format(conflict_id_2)   
        
        # No self loop
        if conflict_id_1 == conflict_id_2:
            pass
        
        G.add_edge(conflict_node_name_1, conflict_node_name_2, 
                  weight=distance_between_nodes(G, conflict_node_name_1, conflict_node_name_2))

100%|██████████| 997/997 [00:25<00:00, 38.40it/s]


In [85]:
with open('distance_nx.pickle', 'wb') as out:
    pickle.dump(G, out)