In [93]:
import pandas as pd

%matplotlib inline

In [94]:
events = pd.read_csv('ged171.csv')
events.type_of_violence = events.type_of_violence.astype('category')
events.where_prec       = events.where_prec.astype('category')
events.date_prec        = events.date_prec.astype('category')
events.type_of_violence.cat.rename_categories(['state-based', 'non-state', 'one-sided'], inplace=True)
events.set_index('id', inplace=True)

In [95]:
events.columns

Index(['year', 'active_year', 'type_of_violence', 'conflict_new_id',
       'conflict_name', 'dyad_new_id', 'dyad_name', 'side_a_new_id', 'gwnoa',
       'side_a', 'side_b_new_id', 'gwnob', 'side_b', 'number_of_sources',
       'source_article', 'source_office', 'source_date', 'source_headline',
       'source_original', 'where_prec', 'where_coordinates', 'adm_1', 'adm_2',
       'latitude', 'longitude', 'geom_wkt', 'priogrid_gid', 'country',
       'country_id', 'region', 'event_clarity', 'date_prec', 'date_start',
       'date_end', 'deaths_a', 'deaths_b', 'deaths_civilians',
       'deaths_unknown', 'best', 'low', 'high'],
      dtype='object')

In [96]:
events[['geom_wkt', 'longitude', 'latitude']].head() # Can drop 1

Unnamed: 0_level_0,geom_wkt,longitude,latitude
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,POINT (44.206667 15.354722),44.206667,15.354722
5,POINT (45.036667 12.779444),45.036667,12.779444
6,POINT (46.141765 13.786202),46.141765,13.786202
7,POINT (46.141765 13.786202),46.141765,13.786202
10,POINT (45.300000 15.616667),45.3,15.616667


## DF to Map

TOO slow apparently, need batch processing

In [97]:
def add_markers_from_df(df, m, marker_generator, lat_col='latitude', lon_col='longitude'):
    """Creates markers on provided folium map
    
    Keyword arguments:
    df -- dataframe containing a longitude and latitude column
    m -- the map where markers will be placed
    marker_generator -- used to generate the marker.
                        It has to accept the dataframe index of the
                        provided df and the position as a tuple.
                        e.g. lambda index, position
    """
    shrunk_df = df[[lat_col, lon_col]]
    for idx, pos in shrunk_df.iterrows():
        marker_generator(idx, pos).add_to(m)
    return m

### Usage example

In [6]:
def simple_marker_generator(idx, pos, df):
    return folium.Marker(pos, popup=df.iloc[idx].conflict_name)

In [185]:
# Do not execute, crashes
#m = folium.Map()
#generator = lambda idx, pos: simple_marker_generator(idx, pos, events)
#add_markers_from_df(events, m, generator)

# One dataframe for all conflicts

- `date_start` and `date_end` are combined to obtain a length for each event that will be summed across all events.
- `date_start` becomes the oldest date in the list of start dates of the events in a conflict.
- `date_end` similarly becomes the most recent
- all the deaths counts are summed.
- A list of all sides, countries and coordinates is kept for each conflict

In [108]:
events.columns

Index(['year', 'active_year', 'type_of_violence', 'conflict_new_id',
       'conflict_name', 'dyad_new_id', 'dyad_name', 'side_a_new_id', 'gwnoa',
       'side_a', 'side_b_new_id', 'gwnob', 'side_b', 'number_of_sources',
       'source_article', 'source_office', 'source_date', 'source_headline',
       'source_original', 'where_prec', 'where_coordinates', 'adm_1', 'adm_2',
       'latitude', 'longitude', 'geom_wkt', 'priogrid_gid', 'country',
       'country_id', 'region', 'event_clarity', 'date_prec', 'date_start',
       'date_end', 'deaths_a', 'deaths_b', 'deaths_civilians',
       'deaths_unknown', 'best', 'low', 'high', 'duration'],
      dtype='object')

In [107]:
format = '%Y-%m-%d'
events['date_start'] = pd.to_datetime(events.date_start, format=format)
events['date_end'] = pd.to_datetime(events.date_end, format=format)
events['duration'] = events.date_end - events.date_start

In [174]:
conflicts = events.groupby('conflict_new_id').agg({
    'duration': 'sum',
    'date_start': 'min',
    'date_end': 'max',
    'deaths_civilians': 'sum',
    'deaths_unknown': 'sum',
    'best': 'sum',
    'low': 'sum',
    'high': 'sum',
    'latitude': lambda x: list(x),
    'longitude': lambda x: list(x),
    'country': lambda x: set(x),
    'side_a': lambda x: list(x),
    'side_b': lambda x: list(x)
})

In [181]:
names = events[['conflict_new_id', 'conflict_name']].drop_duplicates().set_index('conflict_new_id').conflict_name

In [183]:
conflicts['name'] = names

In [186]:
conflicts.head()

Unnamed: 0_level_0,duration,date_start,date_end,deaths_civilians,deaths_unknown,best,low,high,latitude,longitude,country,side_a,side_b,name
conflict_new_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
205,104 days,1990-04-17,2016-09-18,92,56,218,218,543,"[36.246389, 36.155278, 37.32322, 35.73671, 36....","[46.266389, 45.478889, 48.041134, 46.274843, 4...","{Germany, Iraq, Turkey, Iran}","[Government of Iran, Government of Iran, Gover...","[KDPI, KDPI, KDPI, KDPI, KDPI, KDPI, KDPI, KDP...",Iran:Kurdistan
209,3978 days,1989-01-01,2016-08-20,276,221,7786,7735,10479,"[12.782008, 7.780424, 12.75, 8.895, 12.4863, 1...","[123.952024, 123.292649, 124.033333, 125.77527...",{Philippines},"[Government of Philippines, Government of Phil...","[CPP, CPP, CPP, CPP, CPP, CPP, CPP, CPP, CPP, ...",Philippines:Government
218,3917 days,1989-01-01,2016-12-30,187,331,1701,1677,2492,"[34.57173, 34.683056, 34.484389, 33.831266, 34...","[73.89724, 76.645, 75.658389, 74.046931, 73.85...","{Pakistan, India}","[Government of India, Government of India, Gov...","[Government of Pakistan, Government of Pakista...",Government of India-Government of Pakistan
220,0 days,1989-02-03,1989-02-03,0,150,150,17,300,[-25.29389],[-57.611111],{Paraguay},[Government of Paraguay],[Military faction (forces of Andres Rodriguez)],Paraguay:Government
221,3086 days,1989-01-01,2015-09-30,196,1366,4550,4155,6362,"[17.5, 17.5, 17.5, 17.5, 17.5, 17.5, 22.0, 17....","[97.75, 97.75, 97.75, 97.75, 97.75, 97.75, 98....","{Myanmar (Burma), Thailand}","[Government of Myanmar (Burma), Government of ...","[KNU, KNU, KNU, KNU, KNU, DKBA 5, KNU, KNU, KN...",Myanmar (Burma):Karen


In [190]:
conflicts = conflicts.rename(columns={
    'country': 'countries',
    'side_a': 'sides_a',
    'side_b': 'sides_b',
    'latitude': 'latitudes',
    'longitude': 'longitudes'
})