In [29]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Predicting onset of the conflict in the comming 12 months at admin2 level in Ethiopia
# Data preparation for timeseries analysis 
#### Y= cc_onset
#### X= 'fatalities', 'actor_state', 'actor_rebel_groups', 'actor_political_militias', 'actor_identity_militias', 'actor_civilians', 'actor_others', 'cc_onset', 'inflation_all', 'rainfall', 'rainfall_lag', 'temperature mean', 'btotl', 'maize_ETB_KG', 'teff_ETB_KG', 'wheat_ETB_KG', 'mean ndvi', 'fs_Crisis', 'fs_Emergency', 'fs_Famine', 'fs_Minimal', 'fs_Stressed'

#### Group variables: admin1, admin2, year, month

#### data stracture objective Yt=X(t-1)+...+X(t-12)

#### where t is a month lag 

In [30]:
url = 'https://data.kimetrica.com/dataset/4dbc3cc7-9474-49f2-bfd4-231e78401caa/resource/7423b71d-ce8c-437c-9fe6-2d9ba58d6155/download/dataset_communal_cnflict_model.csv'
df = pd.read_csv(url, index_col=0)
list(df.columns)

['admin1',
 'admin2',
 'year',
 'month',
 'fatalities',
 'conflict',
 'cc_onset',
 'actor_state',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_others',
 'actor_rebel_groups',
 'actor_civilians',
 'inflation_all',
 'inflation_food',
 'inflation_non_food',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'ndvi_lag',
 'group',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed']

In [31]:
df['date'] = pd.to_datetime(df.assign(Day=1).loc[:, ['year','month','Day']])
df.rename(columns={'cluster':'group'}, inplace=True)
df.shape

(21312, 31)

In [32]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(1)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df1=pd.concat(dflist, axis=0).reset_index()
df1.rename(columns={'cc_onset': 'cc_onset_1', 
                    'fatalities': 'fatalities_1',
 'actor_state': 'actor_state_1',
 'actor_rebel_groups': 'actor_rebel_groups_1',
 'actor_political_militias': 'actor_political_militias_1',
 'actor_identity_militias': 'actor_identity_militias_1',
 'actor_civilians':  'actor_civilians_1',
 'actor_others':'actor_others_1',
 'inflation_all': 'inflation_all_1',
 'rainfall': 'rainfall_1',
 'temperature mean': 'temperature_1',
 'btotl':  'pop_1',
 'maize_ETB_KG': 'maize_kg_birr_1',
 'teff_ETB_KG': 'teff_kg_birr_1',
 'wheat_ETB_KG': 'wheat_kg_birr_1',
 'mean ndvi': 'ndvi_1',
 'fs_Crisis': 'fs_crisis_1',
 'fs_Emergency': 'df_emergency_1',
 'fs_Famine':'fs_famine_1',
 'fs_Minimal': 'fs_minimal_1',
 'fs_Stressed': 'fs_stressed_1'}, inplace=True)

In [33]:
y=df[['group', 'date', 'cc_onset', 'fatalities']]

In [34]:
df3=pd.merge(y, df1, how='left', on=['group','date'])

In [35]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(2)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df2=pd.concat(dflist, axis=0).reset_index()
df2.rename(columns={'cc_onset': 'cc_onset_2', 
                    'fatalities': 'fatalities_2',
 'actor_state': 'actor_state_2',
 'actor_rebel_groups': 'actor_rebel_groups_2',
 'actor_political_militias': 'actor_political_militias_2',
 'actor_identity_militias': 'actor_identity_militias_2',
 'actor_civilians':  'actor_civilians_2',
 'actor_others':'actor_others_2',
 'inflation_all': 'inflation_all_2',
 'rainfall': 'rainfall_2',
 'temperature mean': 'temperature_2',
 'btotl':  'pop_2',
 'maize_ETB_KG': 'maize_kg_birr_2',
 'teff_ETB_KG': 'teff_kg_birr_2',
 'wheat_ETB_KG': 'wheat_kg_birr_2',
 'mean ndvi': 'ndvi_2',
 'fs_Crisis': 'fs_crisis_2',
 'fs_Emergency': 'df_emergency_2',
 'fs_Famine':'fs_famine_2',
 'fs_Minimal': 'fs_minimal_2',
 'fs_Stressed': 'fs_stressed_2'}, inplace=True)

In [36]:
df4=pd.merge(df3, df2, how='left', on=['group','date'])

In [37]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(3)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df5=pd.concat(dflist, axis=0).reset_index()
df5.rename(columns={'cc_onset': 'cc_onset_3', 
                    'fatalities': 'fatalities_3',
 'actor_state': 'actor_state_3',
 'actor_rebel_groups': 'actor_rebel_groups_3',
 'actor_political_militias': 'actor_political_militias_3',
 'actor_identity_militias': 'actor_identity_militias_3',
 'actor_civilians':  'actor_civilians_3',
 'actor_others':'actor_others_3',
 'inflation_all': 'inflation_all_3',
 'rainfall': 'rainfall_3',
 'temperature mean': 'temperature_3',
 'btotl':  'pop_3',
 'maize_ETB_KG': 'maize_kg_birr_3',
 'teff_ETB_KG': 'teff_kg_birr_3',
 'wheat_ETB_KG': 'wheat_kg_birr_3',
 'mean ndvi': 'ndvi_3',
 'fs_Crisis': 'fs_crisis_3',
 'fs_Emergency': 'df_emergency_3',
 'fs_Famine':'fs_famine_3',
 'fs_Minimal': 'fs_minimal_3',
 'fs_Stressed': 'fs_stressed_3'}, inplace=True)
df6=pd.merge(df4, df5, how='left', on=['group','date'])

In [38]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(4)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df7=pd.concat(dflist, axis=0).reset_index()
df7.rename(columns={'cc_onset': 'cc_onset_4', 
                    'fatalities': 'fatalities_4',
 'actor_state': 'actor_state_4',
 'actor_rebel_groups': 'actor_rebel_groups_4',
 'actor_political_militias': 'actor_political_militias_4',
 'actor_identity_militias': 'actor_identity_militias_4',
 'actor_civilians':  'actor_civilians_4',
 'actor_others':'actor_others_4',
 'inflation_all': 'inflation_all_4',
 'rainfall': 'rainfall_4',
 'temperature mean': 'temperature_4',
 'btotl':  'pop_4',
 'maize_ETB_KG': 'maize_kg_birr_4',
 'teff_ETB_KG': 'teff_kg_birr_4',
 'wheat_ETB_KG': 'wheat_kg_birr_4',
 'mean ndvi': 'ndvi_4',
 'fs_Crisis': 'fs_crisis_4',
 'fs_Emergency': 'df_emergency_4',
 'fs_Famine':'fs_famine_4',
 'fs_Minimal': 'fs_minimal_4',
 'fs_Stressed': 'fs_stressed_4'}, inplace=True)
df8=pd.merge(df6, df7, how='left', on=['group','date'])

In [39]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(5)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df9=pd.concat(dflist, axis=0).reset_index()
df9.rename(columns={'cc_onset': 'cc_onset_5', 
                    'fatalities': 'fatalities_5',
 'actor_state': 'actor_state_5',
 'actor_rebel_groups': 'actor_rebel_groups_5',
 'actor_political_militias': 'actor_political_militias_5',
 'actor_identity_militias': 'actor_identity_militias_5',
 'actor_civilians':  'actor_civilians_5',
 'actor_others':'actor_others_5',
 'inflation_all': 'inflation_all_5',
 'rainfall': 'rainfall_5',
 'temperature mean': 'temperature_5',
 'btotl':  'pop_5',
 'maize_ETB_KG': 'maize_kg_birr_5',
 'teff_ETB_KG': 'teff_kg_birr_5',
 'wheat_ETB_KG': 'wheat_kg_birr_5',
 'mean ndvi': 'ndvi_5',
 'fs_Crisis': 'fs_crisis_5',
 'fs_Emergency': 'df_emergency_5',
 'fs_Famine':'fs_famine_5',
 'fs_Minimal': 'fs_minimal_5',
 'fs_Stressed': 'fs_stressed_5'}, inplace=True)
df10=pd.merge(df8, df9, how='left', on=['group','date'])

In [40]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(6)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df11=pd.concat(dflist, axis=0).reset_index()
df11.rename(columns={'cc_onset': 'cc_onset_6', 
                    'fatalities': 'fatalities_6',
 'actor_state': 'actor_state_6',
 'actor_rebel_groups': 'actor_rebel_groups_6',
 'actor_political_militias': 'actor_political_militias_6',
 'actor_identity_militias': 'actor_identity_militias_6',
 'actor_civilians':  'actor_civilians_6',
 'actor_others':'actor_others_6',
 'inflation_all': 'inflation_all_6',
 'rainfall': 'rainfall_6',
 'temperature mean': 'temperature_6',
 'btotl':  'pop_6',
 'maize_ETB_KG': 'maize_kg_birr_6',
 'teff_ETB_KG': 'teff_kg_birr_6',
 'wheat_ETB_KG': 'wheat_kg_birr_6',
 'mean ndvi': 'ndvi_6',
 'fs_Crisis': 'fs_crisis_6',
 'fs_Emergency': 'df_emergency_6',
 'fs_Famine':'fs_famine_6',
 'fs_Minimal': 'fs_minimal_6',
 'fs_Stressed': 'fs_stressed_6'}, inplace=True)
df12=pd.merge(df10, df11, how='left', on=['group','date'])

In [41]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(7)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df13=pd.concat(dflist, axis=0).reset_index()
df13.rename(columns={'cc_onset': 'cc_onset_7', 
                    'fatalities': 'fatalities_7',
 'actor_state': 'actor_state_7',
 'actor_rebel_groups': 'actor_rebel_groups_7',
 'actor_political_militias': 'actor_political_militias_7',
 'actor_identity_militias': 'actor_identity_militias_7',
 'actor_civilians':  'actor_civilians_7',
 'actor_others':'actor_others_7',
 'inflation_all': 'inflation_all_7',
 'rainfall': 'rainfall_7',
 'temperature mean': 'temperature_7',
 'btotl':  'pop_7',
 'maize_ETB_KG': 'maize_kg_birr_7',
 'teff_ETB_KG': 'teff_kg_birr_7',
 'wheat_ETB_KG': 'wheat_kg_birr_7',
 'mean ndvi': 'ndvi_7',
 'fs_Crisis': 'fs_crisis_7',
 'fs_Emergency': 'df_emergency_7',
 'fs_Famine':'fs_famine_7',
 'fs_Minimal': 'fs_minimal_7',
 'fs_Stressed': 'fs_stressed_7'}, inplace=True)
df14=pd.merge(df12, df13, how='left', on=['group','date'])

In [42]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(8)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df15=pd.concat(dflist, axis=0).reset_index()
df15.rename(columns={'cc_onset': 'cc_onset_8', 
                    'fatalities': 'fatalities_8',
 'actor_state': 'actor_state_8',
 'actor_rebel_groups': 'actor_rebel_groups_8',
 'actor_political_militias': 'actor_political_militias_8',
 'actor_identity_militias': 'actor_identity_militias_8',
 'actor_civilians':  'actor_civilians_8',
 'actor_others':'actor_others_8',
 'inflation_all': 'inflation_all_8',
 'rainfall': 'rainfall_8',
 'temperature mean': 'temperature_8',
 'btotl':  'pop_8',
 'maize_ETB_KG': 'maize_kg_birr_8',
 'teff_ETB_KG': 'teff_kg_birr_8',
 'wheat_ETB_KG': 'wheat_kg_birr_8',
 'mean ndvi': 'ndvi_8',
 'fs_Crisis': 'fs_crisis_8',
 'fs_Emergency': 'df_emergency_8',
 'fs_Famine':'fs_famine_8',
 'fs_Minimal': 'fs_minimal_8',
 'fs_Stressed': 'fs_stressed_8'}, inplace=True)
df16=pd.merge(df14, df15, how='left', on=['group','date'])

In [43]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(9)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df17=pd.concat(dflist, axis=0).reset_index()
df17.rename(columns={'cc_onset': 'cc_onset_9', 
                    'fatalities': 'fatalities_9',
 'actor_state': 'actor_state_9',
 'actor_rebel_groups': 'actor_rebel_groups_9',
 'actor_political_militias': 'actor_political_militias_9',
 'actor_identity_militias': 'actor_identity_militias_9',
 'actor_civilians':  'actor_civilians_9',
 'actor_others':'actor_others_9',
 'inflation_all': 'inflation_all_9',
 'rainfall': 'rainfall_9',
 'temperature mean': 'temperature_9',
 'btotl':  'pop_9',
 'maize_ETB_KG': 'maize_kg_birr_9',
 'teff_ETB_KG': 'teff_kg_birr_9',
 'wheat_ETB_KG': 'wheat_kg_birr_9',
 'mean ndvi': 'ndvi_9',
 'fs_Crisis': 'fs_crisis_9',
 'fs_Emergency': 'df_emergency_9',
 'fs_Famine':'fs_famine_9',
 'fs_Minimal': 'fs_minimal_9',
 'fs_Stressed': 'fs_stressed_9'}, inplace=True)
df18=pd.merge(df16, df17, how='left', on=['group','date'])

In [44]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(10)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df19=pd.concat(dflist, axis=0).reset_index()
df19.rename(columns={'cc_onset': 'cc_onset_10', 
                    'fatalities': 'fatalities_10',
 'actor_state': 'actor_state_10',
 'actor_rebel_groups': 'actor_rebel_groups_10',
 'actor_political_militias': 'actor_political_militias_10',
 'actor_identity_militias': 'actor_identity_militias_10',
 'actor_civilians':  'actor_civilians_10',
 'actor_others':'actor_others_10',
 'inflation_all': 'inflation_all_10',
 'rainfall': 'rainfall_10',
 'temperature mean': 'temperature_10',
 'btotl':  'pop_10',
 'maize_ETB_KG': 'maize_kg_birr_10',
 'teff_ETB_KG': 'teff_kg_birr_10',
 'wheat_ETB_KG': 'wheat_kg_birr_10',
 'mean ndvi': 'ndvi_10',
 'fs_Crisis': 'fs_crisis_10',
 'fs_Emergency': 'df_emergency_10',
 'fs_Famine':'fs_famine_10',
 'fs_Minimal': 'fs_minimal_10',
 'fs_Stressed': 'fs_stressed_10'}, inplace=True)
df20=pd.merge(df18, df19, how='left', on=['group','date'])

In [45]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(11)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df19=pd.concat(dflist, axis=0).reset_index()
df19.rename(columns={'cc_onset': 'cc_onset_11', 
                    'fatalities': 'fatalities_11',
 'actor_state': 'actor_state_11',
 'actor_rebel_groups': 'actor_rebel_groups_11',
 'actor_political_militias': 'actor_political_militias_11',
 'actor_identity_militias': 'actor_identity_militias_11',
 'actor_civilians':  'actor_civilians_11',
 'actor_others':'actor_others_11',
 'inflation_all': 'inflation_all_11',
 'rainfall': 'rainfall_11',
 'temperature mean': 'temperature_11',
 'btotl':  'pop_11',
 'maize_ETB_KG': 'maize_kg_birr_11',
 'teff_ETB_KG': 'teff_kg_birr_11',
 'wheat_ETB_KG': 'wheat_kg_birr_11',
 'mean ndvi': 'ndvi_11',
 'fs_Crisis': 'fs_crisis_11',
 'fs_Emergency': 'df_emergency_11',
 'fs_Famine':'fs_famine_11',
 'fs_Minimal': 'fs_minimal_11',
 'fs_Stressed': 'fs_stressed_11'}, inplace=True)
df21=pd.merge(df20, df19, how='left', on=['group','date'])

In [46]:
group = ['group'] # to assign the groups for the multiple group case
df_combo = df[['date','group', 'cc_onset', 'fatalities',
 'actor_state',
 'actor_rebel_groups',
 'actor_political_militias',
 'actor_identity_militias',
 'actor_civilians',
 'actor_others',
 'inflation_all',
 'rainfall',
 'temperature mean',
 'btotl',
 'maize_ETB_KG',
 'teff_ETB_KG',
 'wheat_ETB_KG',
 'mean ndvi',
 'fs_Crisis',
 'fs_Emergency',
 'fs_Famine',
 'fs_Minimal',
 'fs_Stressed',]] # many vars, many groups

grouped_df = df_combo.groupby(["group"])

def lag_by_group(key, value_df):
    df = value_df.assign(group = key) # this pandas method returns a copy of the df, with group columns assigned the key value
    return (df.sort_values(by=["date"], ascending=True)
        .set_index(["date"])
        .shift(12)
               ) # the parenthesis allow you to chain methods and avoid intermediate variable assignment
dflist = [lag_by_group(g, grouped_df.get_group(g)) for g in grouped_df.groups.keys()]
df20=pd.concat(dflist, axis=0).reset_index()
df20.rename(columns={'cc_onset': 'cc_onset_12', 
                    'fatalities': 'fatalities_12',
 'actor_state': 'actor_state_12',
 'actor_rebel_groups': 'actor_rebel_groups_12',
 'actor_political_militias': 'actor_political_militias_12',
 'actor_identity_militias': 'actor_identity_militias_12',
 'actor_civilians':  'actor_civilians_12',
 'actor_others':'actor_others_12',
 'inflation_all': 'inflation_all_12',
 'rainfall': 'rainfall_12',
 'temperature mean': 'temperature_12',
 'btotl':  'pop_12',
 'maize_ETB_KG': 'maize_kg_birr_12',
 'teff_ETB_KG': 'teff_kg_birr_12',
 'wheat_ETB_KG': 'wheat_kg_birr_12',
 'mean ndvi': 'ndvi_12',
 'fs_Crisis': 'fs_crisis_12',
 'fs_Emergency': 'df_emergency_12',
 'fs_Famine':'fs_famine_12',
 'fs_Minimal': 'fs_minimal_12',
 'fs_Stressed': 'fs_stressed_12'}, inplace=True)
df22=pd.merge(df21, df20, how='left', on=['group','date'])

In [47]:
df22=df22.dropna()

In [48]:
df22

Unnamed: 0,group,date,cc_onset,fatalities,cc_onset_1,fatalities_1,actor_state_1,actor_rebel_groups_1,actor_political_militias_1,actor_identity_militias_1,...,pop_12,maize_kg_birr_12,teff_kg_birr_12,wheat_kg_birr_12,ndvi_12,fs_crisis_12,df_emergency_12,fs_famine_12,fs_minimal_12,fs_stressed_12
1,Addis Ababa_Addis Ababa,1998-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.055693e+06,6.366733,10.491953,10.126039,143.936125,0.0,0.0,0.0,0.0,0.0
2,Addis Ababa_Addis Ababa,1999-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.893393e+06,6.294734,10.519060,10.158023,145.263097,0.0,0.0,0.0,0.0,0.0
3,Addis Ababa_Addis Ababa,2000-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.173030e+06,6.416730,10.470207,10.114585,143.656003,0.0,0.0,0.0,0.0,0.0
4,Addis Ababa_Addis Ababa,2001-01-01,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.460510e+06,6.154499,10.143197,9.958946,144.056314,0.0,0.0,0.0,0.0,0.0
5,Addis Ababa_Addis Ababa,2002-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.517836e+06,6.172404,10.275892,10.010577,143.325276,0.0,0.0,0.0,0.0,0.0
6,Addis Ababa_Addis Ababa,2003-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.578384e+06,6.194241,10.236124,10.027970,145.098807,0.0,0.0,0.0,0.0,0.0
7,Addis Ababa_Addis Ababa,2004-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.642130e+06,6.310951,10.527013,10.495768,132.233711,0.0,0.0,0.0,0.0,0.0
8,Addis Ababa_Addis Ababa,2005-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.708629e+06,6.334656,10.646945,10.555530,131.924545,0.0,0.0,0.0,0.0,0.0
9,Addis Ababa_Addis Ababa,2006-01-01,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.777763e+06,6.379080,10.554230,10.584710,131.393640,0.0,0.0,0.0,0.0,0.0
10,Addis Ababa_Addis Ababa,2007-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.850120e+06,6.409241,10.640195,10.572775,129.556521,0.0,0.0,0.0,0.0,0.0


In [50]:
df22['month'] = pd.to_datetime(df22['date']).dt.to_period('M')

In [None]:
df22['quarter'] = pd.PeriodIndex(df22.date, freq='Q')

In [None]:
df22['bi_annual'] = df22['month'].dt.year.astype(str) + 'BA' + np.where(df23['month'].dt.month <= 6, 1, 2).astype(str)

In [None]:
df22.to_csv('/Users/yaredhurisa/Google Drive/Probability of conflict/indicators/eth_cc_final_dataset_monthly.csv')

In [None]:
df23=pd.merge(y, df20, how='left', on=['group','date']).dropna()

In [None]:
df23.to_csv('/Users/yaredhurisa/Google Drive/Probability of conflict/indicators/eth_cc_final_dataset_12_month.csv')

In [None]:
df3=df3.dropna()

In [None]:
df3.to_csv('/Users/yaredhurisa/Google Drive/Probability of conflict/indicators/eth_cc_final_dataset_1_month_lag.csv')

In [51]:
df6=df6.dropna()
df6.to_csv('/Users/yaredhurisa/Google Drive/Probability of conflict/indicators/eth_cc_final_dataset_3_month_lag.csv')

In [52]:
df12=df12.dropna()
df12.to_csv('/Users/yaredhurisa/Google Drive/Probability of conflict/indicators/eth_cc_final_dataset_6_month_lag.csv')