In [44]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [45]:
url = 'https://data.kimetrica.com/dataset/4dbc3cc7-9474-49f2-bfd4-231e78401caa/resource/7423b71d-ce8c-437c-9fe6-2d9ba58d6155/download/dataset_communal_cnflict_model.csv'
df = pd.read_csv(url, index_col=0)
list(df.columns)

['admin1',
 'admin2',
 'year',
 'month',
 'fatalities',
 'cc_event_count',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'cc_onset',
 'admin0',
 'inflation_all',
 'inflation_food',
 'inflation_non_food',
 'rainfall',
 'rainfall_lag',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'ndvi_lag',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',
 'cluster']

In [46]:
df.shape

(15895, 32)

In [47]:
df['date'] = pd.to_datetime(df.assign(Day=1).loc[:, ['year','month','Day']])
df.rename(columns={'cluster':'group'}, inplace=True)
df.shape

(15895, 33)

In [48]:
group = ["admin1", "admin2"] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset','fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups
grouped_df = df_combo.groupby(["group"])


In [49]:
def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(12)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment


In [50]:
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df1=pd.concat(dflist, axis=0).reset_index()
df1.shape

(15895, 23)

In [51]:
df1 = df1.rename(columns = {'cc_onset':'cc_onset_lag', 'fatalities':'fatalities_lag'})

In [52]:
start_date = "2009-12-1"
end_date = "2019-1-1"
mask = (df1['date'] > start_date) & (df1['date'] < end_date)
df1 = df1.loc[mask]
cc_onset=df[['date','group','cc_onset', 'fatalities']]
cc_onset.shape

(15895, 4)

In [53]:
df2=pd.merge(df1, cc_onset, how='left', on=['date', 'group'])
df2.shape

(5940, 25)

In [54]:
df2 = df2.rename(columns = {'cc_onset':'cc_onset_current', 'fatalities':'fatalities_current'})
df2.columns

Index(['date', 'group', 'cc_onset_lag', 'fatalities_lag', 'actor_state',
       'actor_rebel_groups', 'actor_political_militias',
       'actor_identity_militias', 'actor_civilians', 'actor_others',
       'inflation_all', 'rainfall', 'temperature mean', 'btotl',
       'maize_ETB_KG', 'teff_ETB_KG', 'wheat_ETB_KG', 'mean ndvi', 'fs_Crisis',
       'fs_Emergency', 'fs_Famine', 'fs_Minimal', 'fs_Stressed',
       'cc_onset_current', 'fatalities_current'],
      dtype='object')

In [55]:
df2=df2[['date', 'group', 'cc_onset_current', 'fatalities_current', 'cc_onset_lag', 'fatalities_lag', 'actor_state',
       'actor_rebel_groups', 'actor_political_militias',
       'actor_identity_militias', 'actor_civilians', 'actor_others',
       'inflation_all', 'rainfall', 'temperature mean', 'btotl',
       'maize_ETB_KG', 'teff_ETB_KG', 'wheat_ETB_KG', 'mean ndvi', 'fs_Crisis',
       'fs_Emergency', 'fs_Famine', 'fs_Minimal', 'fs_Stressed',
       ]]

In [62]:
df2.to_csv('/Users/yaredhurisa/Google Drive/Probability of conflict/indicators/eth_cc_final_dataset.csv')

In [60]:
df2.groupby(['date'])['cc_onset_current', 'cc_onset_lag'].sum()

Unnamed: 0_level_0,cc_onset_current,cc_onset_lag
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-01,1.0,0.0
2010-02-01,1.0,0.0
2010-03-01,1.0,0.0
2010-04-01,1.0,0.0
2010-05-01,0.0,2.0
2010-06-01,0.0,1.0
2010-07-01,0.0,2.0
2010-08-01,0.0,0.0
2010-09-01,0.0,0.0
2010-10-01,0.0,0.0
