## Imports

In [1]:
import numpy as np
import pandas as pd


pd.set_option('display.max_columns', 100)

## Create Datetime Features

1. Datetime repr. of dyad start date

2. Datetime repr. of length of alliance

In [65]:
date_columns = [['dyad_st_year', 'dyad_st_month', 'dyad_st_day']]
dyads_df = pd.read_csv('./data/cleaned_dyads_yearly.csv', parse_dates=date_columns)
dyads_df.head()

Unnamed: 0,dyad_st_year_dyad_st_month_dyad_st_day,ccode1,state_name1,ccode2,state_name2,dyad_end_year,left_censor,right_censor,defense,neutrality,nonaggression,entente,year
0,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1816
1,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1817
2,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1818
3,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1819
4,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1820


In [66]:
# Rename date column
dyads_df.rename(columns={'dyad_st_year_dyad_st_month_dyad_st_day':'dyad_start_date'}, inplace=True)
dyads_df.head()

Unnamed: 0,dyad_start_date,ccode1,state_name1,ccode2,state_name2,dyad_end_year,left_censor,right_censor,defense,neutrality,nonaggression,entente,year
0,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1816
1,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1817
2,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1818
3,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1819
4,1816-01-01,200,United Kingdom,235,Portugal,2012,1,1,1,0,1.0,0.0,1820


In [67]:
# Convert `dyad_end_year` and `year` to datetime type
dyads_df['dyad_end_year'] = pd.to_datetime(dyads_df['dyad_end_year'], format='%Y')

In [68]:
# 0s: must impute
dyads_df['year'].values

array([1816, 1817, 1818, ...,    0,    0,    0])

In [69]:
len(dyads_df[dyads_df['year'] == 0])

45

In [70]:
# .loc of 0 values
dyads_df.loc[74084:74128, 'year'] = dyads_df['dyad_start_date'].dt.year

In [71]:
dyads_df.tail()

Unnamed: 0,dyad_start_date,ccode1,state_name1,ccode2,state_name2,dyad_end_year,left_censor,right_censor,defense,neutrality,nonaggression,entente,year
74124,1987-11-30,2,United States of America,666,Israel,1991-01-01,0,0,0,0,0.0,1.0,1987
74125,1988-11-30,2,United States of America,666,Israel,1991-01-01,0,0,0,0,0.0,1.0,1988
74126,1989-11-30,2,United States of America,666,Israel,1991-01-01,0,0,0,0,0.0,1.0,1989
74127,1990-11-30,2,United States of America,666,Israel,1991-01-01,0,0,0,0,0.0,1.0,1990
74128,1991-11-30,2,United States of America,666,Israel,1991-01-01,0,0,0,0,0.0,1.0,1991


In [72]:
dyads_df.loc[74084:74128]

Unnamed: 0,dyad_start_date,ccode1,state_name1,ccode2,state_name2,dyad_end_year,left_censor,right_censor,defense,neutrality,nonaggression,entente,year
74084,1979-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1979
74085,1980-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1980
74086,1981-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1981
74087,1982-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1982
74088,1983-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1983
74089,1984-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1984
74090,1985-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1985
74091,1986-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1986
74092,1987-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1987
74093,1988-03-26,651,Egypt,666,Israel,2012-01-01,0,1,0,0,1.0,0.0,1988


In [73]:
# Fix start dates
dyads_df.loc[74085:74117, 'dyad_start_date'] = dyads_df.loc[74084, 'dyad_start_date']

In [74]:
dyads_df.loc[74119:74128, 'dyad_start_date'] = dyads_df.loc[74118, 'dyad_start_date']

In [75]:
# Look at `year`, ensure 
dyads_df.tail(100)

Unnamed: 0,dyad_start_date,ccode1,state_name1,ccode2,state_name2,dyad_end_year,left_censor,right_censor,defense,neutrality,nonaggression,entente,year
74029,2006-12-15,517,Rwanda,540,Angola,2012-01-01,0,1,1,0,1.0,1.0,2012
74030,2006-12-15,517,Rwanda,551,Zambia,2012-01-01,0,1,1,0,1.0,1.0,2006
74031,2006-12-15,517,Rwanda,551,Zambia,2012-01-01,0,1,1,0,1.0,1.0,2007
74032,2006-12-15,517,Rwanda,551,Zambia,2012-01-01,0,1,1,0,1.0,1.0,2008
74033,2006-12-15,517,Rwanda,551,Zambia,2012-01-01,0,1,1,0,1.0,1.0,2009
74034,2006-12-15,517,Rwanda,551,Zambia,2012-01-01,0,1,1,0,1.0,1.0,2010
74035,2006-12-15,517,Rwanda,551,Zambia,2012-01-01,0,1,1,0,1.0,1.0,2011
74036,2006-12-15,517,Rwanda,551,Zambia,2012-01-01,0,1,1,0,1.0,1.0,2012
74037,2006-12-15,517,Rwanda,625,Sudan,2012-01-01,0,1,1,0,1.0,1.0,2006
74038,2006-12-15,517,Rwanda,625,Sudan,2012-01-01,0,1,1,0,1.0,1.0,2007


In [76]:
# Datetime repr. of length of alliance
dyads_df['alliance_time'] = dyads_df['dyad_end_year'] - dyads_df['dyad_start_date']
dyads_df.head()

Unnamed: 0,dyad_start_date,ccode1,state_name1,ccode2,state_name2,dyad_end_year,left_censor,right_censor,defense,neutrality,nonaggression,entente,year,alliance_time
0,1816-01-01,200,United Kingdom,235,Portugal,2012-01-01,1,1,1,0,1.0,0.0,1816,71588 days
1,1816-01-01,200,United Kingdom,235,Portugal,2012-01-01,1,1,1,0,1.0,0.0,1817,71588 days
2,1816-01-01,200,United Kingdom,235,Portugal,2012-01-01,1,1,1,0,1.0,0.0,1818,71588 days
3,1816-01-01,200,United Kingdom,235,Portugal,2012-01-01,1,1,1,0,1.0,0.0,1819,71588 days
4,1816-01-01,200,United Kingdom,235,Portugal,2012-01-01,1,1,1,0,1.0,0.0,1820,71588 days


In [77]:
dyads_df.to_csv('engineered_dyads_yearly.csv', index=False)