In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, BaggingClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelBinarizer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

## Preparation

In [2]:
df=pd.read_csv('./data/ssp_public.csv', encoding='ISO-8859-1')

In [3]:
df.shape

(62141, 106)

In [4]:
for col in df.columns:
    df.rename({col: col.upper()}, axis=1, inplace=True)

In [5]:
drop_cols=\
['AID',
 'AMBIG_INI',
 'AMBIG_WGT',
 'ATK_TYPE',
 'CODE_DAY',
 'CODE_MONTH',
 'CODE_YEAR',
 'COWCODE',
 'CTRY_BIAS',
 'DATE_TYP',
 'DAY',
 'DSA_TYPE',
 'EVENT',
 'EVENTID',
 'EV_TYPE',
 'FROM_EID',
 'GP3',
 'GP4',
 'GP7',
 'GP8',
 'INI_IGRP1',
 'INI_PGRP1',
 'INI_SGRP1',
 'JUL_EED',
 'JUL_END_DATE',
 'JUL_LED',
 'JUL_PED',
 'JUL_PSD',
 'JUL_START_DATE',
 'KNOW_INI',
 'LOC_TYPE',
 'MONTH',
 'NEWS_SOURCE',
 'PINPOINT',
 'PROPERTY_OWNER',
 'PROPERTY_DAMAGED',
 'PUB_DATE',
 'PUB_MON',
 'PUB_YEAR',
 'SYM_TYPE',
 'TAR_IGRP1',
 'TAR_PGRP1',
 'TAR_SGRP1',
 'TO_EID',
 'VIC_IGRP1',
 'VIC_PGRP1',
 'VIC_SGRP1',
 'YEAR']

In [6]:
len(drop_cols)

48

In [7]:
df['YEAR'].min()

1946

In [8]:
df['YEAR'].max()

2005

In [9]:
df['NEWS_SOURCE'].unique()

array(['NYT', nan, 'WSJ', 'FBI', 'SWB'], dtype=object)

In [10]:
df_coups = df[(df['COUP_FAILED']==1) | df['COUP']==1]
df_coups=df_coups.sort_values(['YEAR', 'MONTH', 'DAY'])
df_coups=df_coups.drop(drop_cols, axis=1)
df_coups.reset_index(drop=True, inplace=True)
df_coups.fillna(0, inplace=True)

In [11]:
df_coups.shape

(747, 58)

In [12]:
len(df_coups['COUNTRY'].unique())

128

In [13]:
df_coups['COUNTRY'].sort_values().unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
       'Azerbaijan', 'Bahrain', 'Bangladesh', 'Benin', 'Bolivia',
       'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi',
       'Cambodia', 'Cameroon', 'Central African Repu', 'Chad', 'Chile',
       'China', 'Colombia', 'Comoros', 'Congo', 'Costa Rica',
       "Cote d'Ivoire", 'Cuba', 'Cyprus', 'Czech Republic',
       'Czechoslovakia', 'Dominica', 'Dominican Republic', 'East Timor',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Ethiopia',
       'Fiji', 'Finland', 'Gabon', 'Gambia', 'Georgia',
       'German Federal Repub', 'Ghana', 'Greece', 'Grenada', 'Guatemala',
       'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras',
       'Hungary', 'India', 'Indonesia', 'Iran', 'Iraq', 'Israel', 'Italy',
       'Jamaica', 'Japan', 'Jordan', 'Kenya', 'Korea, South (Rep.)',
       'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia',
       'Libya', 'Luxembourg', 'Madagascar', 'Mal

In [14]:
df_coups.drop(df_coups[df_coups['COUNTRY'].isin(['United States', 'United Kingdom', 'Ukraine', 
                                                 'Latvia', 'Finland', 'Japan', 'Israel',
                                                 'German Federal Repub', 'Italy', 'Mexico', 'Luxembourg'])].index, inplace=True)

In [15]:
print('Number of successful coups: {}.'.format(df_coups[df_coups['COUP']==1].shape[0]))

Number of successful coups: 374.


In [16]:
print('Number of failed coups: {}.'.format(df_coups[df_coups['COUP_FAILED']==1].shape[0]))

Number of failed coups: 348.


In [17]:
len(df_coups['COUNTRY'].sort_values().unique())

117

In [18]:
df_coups

Unnamed: 0,COUNTRY,INI_TYPE,NGOV_I1,GOV_I1,TAR_TYPE,HUMAN_T1,GOV_T1,G_LVL_I,G_LVL_T,G_LVL_V,HUMAN_V1,GOV_V1,VIC_TYPE,DAY_SPAN,N_INJURD,TAR_GPOL,GP_TYPE,RECAP,POSTHOC,LINKED,LINK_TYPE,PE_TYPE,AD_VIOL,AD_TACT,EXP_TYPE,STAT_ACT,N_OF_INI_P,N_OF_INI_A,WEAP_GRD,N_KILLED_P,N_KILLED_A,E_LENGTH,N_INJURD_D,DAM_PROP,ARRESTS,PER_ATK_I,PER_ATK_E,PUB_ORDER,RETRIBUTION,POL_EXPRESS,INTANG_REP,MASS_EXPRESS,POL_VIOL,STAT_VIOL,ST_REPRESS,SC_ANIMOSITY,ANTI_GOV_SENTMNTS,CLASS_CONFLICT,POL_DESIRES,RETAIN_POWER,ECO_SCARCITY,PERS_SECURITY,QUASI_EVENT,COUP,COUP_FAILED,REGION,VICTIM_EFFECT,WEAPON
0,Haiti,2.0,0.0,9.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,8.0,0,0,0.0,0,0,1,1.0,0.0,0,0.0,0.0,0.0,3.0,3.0,1.0,0.0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,1,0,4.0,0.0,1
1,Haiti,2.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.0,0,0,0.0,0,0,1,1.0,0.0,0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,1,0,4.0,0.0,1
2,Cuba,1.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,0,0,1,1.0,0.0,0,0.0,0.0,0.0,60.0,60.0,1.0,0.0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1,0,0,0,0,0,1,4.0,0.0,1
3,Guatemala,1.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0,0,0.0,0,0,1,1.0,0.0,0,0.0,0.0,0.0,15.0,15.0,1.0,0.0,0,3,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1,0,0,0,0,0,1,4.0,0.0,1
4,Indonesia,1.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61.0,0,0,0.0,0,0,1,1.0,0.0,0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1,0,0,0,0,1,0,2.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741,Equatorial Guinea,1.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.0,0,0,0.0,0,0,1,1.0,0.0,0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,1,1.0,0.0,1
742,Algeria,2.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,366.0,0,1,5.0,0,0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,1,0,7.0,0.0,1
744,Nepal,2.0,0.0,11.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,0,0,0,0.0,0.0,0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1,0,0,0,0,1,0,2.0,0.0,1
745,Kyrgyzstan,1.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,0,0,0,0.0,0.0,0,0.0,0.0,0.0,1500.0,1500.0,1.0,0.0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,1,0,2.0,0.0,1


## Feature selection

In [19]:
df_counts=df_coups.describe()
df_counts

Unnamed: 0,INI_TYPE,NGOV_I1,GOV_I1,TAR_TYPE,HUMAN_T1,GOV_T1,G_LVL_I,G_LVL_T,G_LVL_V,HUMAN_V1,GOV_V1,VIC_TYPE,DAY_SPAN,N_INJURD,TAR_GPOL,GP_TYPE,RECAP,POSTHOC,LINKED,LINK_TYPE,PE_TYPE,AD_VIOL,AD_TACT,EXP_TYPE,STAT_ACT,N_OF_INI_P,N_OF_INI_A,WEAP_GRD,N_KILLED_P,N_KILLED_A,E_LENGTH,N_INJURD_D,DAM_PROP,ARRESTS,PER_ATK_I,PER_ATK_E,PUB_ORDER,RETRIBUTION,POL_EXPRESS,INTANG_REP,MASS_EXPRESS,POL_VIOL,STAT_VIOL,ST_REPRESS,SC_ANIMOSITY,ANTI_GOV_SENTMNTS,CLASS_CONFLICT,POL_DESIRES,RETAIN_POWER,ECO_SCARCITY,PERS_SECURITY,QUASI_EVENT,COUP,COUP_FAILED,REGION,VICTIM_EFFECT,WEAPON
count,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0,722.0
mean,1.32687,10.193906,4.078947,0.065097,1.33241,0.461219,1.250693,0.108033,0.108033,1.33241,0.461219,0.00831,82.926593,0.0,0.015235,0.069252,0.009695,0.00831,0.415512,0.362881,0.0,0.0,0.023546,0.0,0.0,8456.896,8736.175,1.119114,0.0,0.0,1.191136,0.0,0.0,0.001385,0.0,0.0,0.00554,0.00554,0.0,0.0,0.0,0.0,0.0,0.0,0.060942,1.0,0.054017,0.34626,0.144044,0.001385,0.004155,0.009695,0.518006,0.481994,3.373961,0.160665,4.037396
std,0.811549,15.030291,5.358248,0.368564,6.898534,2.514152,1.482035,0.559341,0.559341,6.898534,2.514152,0.157785,201.060928,0.0,0.122573,0.584755,0.098054,0.090844,0.493152,0.481164,0.0,0.0,0.632674,0.0,0.0,223293.7,223345.0,0.779199,0.0,0.0,0.684303,0.0,0.0,0.037216,0.0,0.0,0.074277,0.074277,0.0,0.0,0.0,0.0,0.0,0.0,0.239389,0.0,0.226207,0.476107,0.351378,0.037216,0.064371,0.098054,0.500022,0.500022,2.316506,1.296192,7.815663
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
50%,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,1.0
75%,2.0,23.0,9.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,31.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,15.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,4.0,0.0,1.0
max,4.0,39.0,24.0,3.0,37.0,20.0,4.0,3.0,3.0,37.0,20.0,3.0,1826.0,0.0,1.0,5.0,1.0,1.0,1.0,1.0,0.0,0.0,17.0,0.0,0.0,6000000.0,6000000.0,5.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,13.0,26.0


In [20]:
df_counts.loc['std'].sort_values(ascending=False).head(10)

N_OF_INI_A    223345.019706
N_OF_INI_P    223293.678437
DAY_SPAN         201.060928
NGOV_I1           15.030291
WEAPON             7.815663
HUMAN_T1           6.898534
HUMAN_V1           6.898534
GOV_I1             5.358248
GOV_T1             2.514152
GOV_V1             2.514152
Name: std, dtype: float64

In [21]:
# List of columns having no data - to remove from features
empty=list(df_counts.loc['mean'].where(df_counts.loc['mean']==0).dropna().index)
empty.sort()

In [22]:
empty

['AD_VIOL',
 'DAM_PROP',
 'EXP_TYPE',
 'INTANG_REP',
 'MASS_EXPRESS',
 'N_INJURD',
 'N_INJURD_D',
 'N_KILLED_A',
 'N_KILLED_P',
 'PER_ATK_E',
 'PER_ATK_I',
 'PE_TYPE',
 'POL_EXPRESS',
 'POL_VIOL',
 'STAT_ACT',
 'STAT_VIOL',
 'ST_REPRESS']

In [23]:
len(empty)

17

In [24]:
df_coups.drop(empty, axis=1, inplace=True)

In [25]:
features=[
     'AD_TACT',
     'ARRESTS',
     'CLASS_CONFLICT',
     'ECO_SCARCITY',
     'E_LENGTH',
     'GOV_I1',
     'GOV_T1',
     'GOV_V1',
     'G_LVL_I',
     'G_LVL_T',
     'G_LVL_V',
     'HUMAN_T1',
     'HUMAN_V1',
     'INI_TYPE',
     'LINKED',
     'LINK_TYPE',
     'NGOV_I1',
     'N_OF_INI_A',
     'N_OF_INI_P',
     'PERS_SECURITY',
     'POL_DESIRES',
     'POSTHOC',
     'PUB_ORDER',
     'RETAIN_POWER',
     'RETRIBUTION',
     'SC_ANIMOSITY',
     'TAR_GPOL',
     'TAR_TYPE',
     'VICTIM_EFFECT',
     'VIC_TYPE',
     'WEAPON',
     'WEAP_GRD'
]

In [26]:
df_features=df_coups[features]
df_features.var().sort_values(ascending=False)

N_OF_INI_A        4.988300e+10
N_OF_INI_P        4.986007e+10
NGOV_I1           2.259096e+02
WEAPON            6.108459e+01
HUMAN_T1          4.758977e+01
HUMAN_V1          4.758977e+01
GOV_I1            2.871082e+01
GOV_T1            6.320963e+00
GOV_V1            6.320963e+00
G_LVL_I           2.196428e+00
VICTIM_EFFECT     1.680115e+00
INI_TYPE          6.586113e-01
WEAP_GRD          6.071515e-01
E_LENGTH          4.682708e-01
AD_TACT           4.002770e-01
G_LVL_V           3.128619e-01
G_LVL_T           3.128619e-01
LINKED            2.431987e-01
LINK_TYPE         2.315190e-01
POL_DESIRES       2.266781e-01
TAR_TYPE          1.358397e-01
RETAIN_POWER      1.234666e-01
SC_ANIMOSITY      5.730729e-02
CLASS_CONFLICT    5.116970e-02
VIC_TYPE          2.489617e-02
TAR_GPOL          1.502415e-02
POSTHOC           8.252619e-03
RETRIBUTION       5.517114e-03
PUB_ORDER         5.517114e-03
PERS_SECURITY     4.143599e-03
ARRESTS           1.385042e-03
ECO_SCARCITY      1.385042e-03
dtype: f

In [27]:
# df_features['N_OF_INI_P'].plot().set_title('Pristine measure of # of initiators')

In [28]:
# sns.kdeplot(df_features['N_OF_INI_A']).set_title('Measure of # of initiators')

In [29]:
# sns.kdeplot(df_features['N_OF_INI_P'])

In [30]:
# sns.kdeplot(df_features['HUMAN_V1'])

In [31]:
df_coups[features]

Unnamed: 0,AD_TACT,ARRESTS,CLASS_CONFLICT,ECO_SCARCITY,E_LENGTH,GOV_I1,GOV_T1,GOV_V1,G_LVL_I,G_LVL_T,G_LVL_V,HUMAN_T1,HUMAN_V1,INI_TYPE,LINKED,LINK_TYPE,NGOV_I1,N_OF_INI_A,N_OF_INI_P,PERS_SECURITY,POL_DESIRES,POSTHOC,PUB_ORDER,RETAIN_POWER,RETRIBUTION,SC_ANIMOSITY,TAR_GPOL,TAR_TYPE,VICTIM_EFFECT,VIC_TYPE,WEAPON,WEAP_GRD
0,0.0,0,0,0,1,9.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,1,1.0,0.0,3.0,3.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1,1.0
1,0.0,0,0,0,1,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1,1.0,0.0,1.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1,1.0
2,0.0,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1,1.0,37.0,60.0,60.0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,1,1.0
3,0.0,0,0,0,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1,1.0,37.0,15.0,15.0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,1,1.0
4,0.0,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1,1.0,32.0,1.0,0.0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741,0.0,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1,1.0,39.0,1.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1,1.0
742,0.0,0,0,0,1,3.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,1,0.0,0.0,3.0,1,1.0
744,0.0,0,0,0,1,11.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,0,0.0,0.0,1.0,1.0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,1,1.0
745,0.0,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0.0,5.0,1500.0,1500.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1,1.0


In [32]:
# fig, axes=plt.subplots(1, len(features), figsize=(15, 5))
# for i in range(len(features)):
#     sns.histplot(data=df_coups[features], ax=axes[i], x=features[i], kde=True)

In [33]:
# numerical=[
# 'N_OF_INI_A',
# 'N_OF_INI_P',
# 'DAY_SPAN'
# ]

In [34]:
# categorical=[x for x in features if x not in numerical]

In [35]:
# predictors=categorical
# predictors

## Model

In [36]:
X=df_coups[features].astype(str)
X_encoded=OneHotEncoder().fit(X).transform(X)
y=df_coups['COUP_FAILED']

In [37]:
n_estimators=10
estimators=[ 
    ('LinearSVC', LinearSVC(max_iter=10000)),
#     ('KNeighborsClassifier', KNeighborsClassifier(n_neighbors=5)),
    ('RandomForest', RandomForestClassifier(n_estimators=n_estimators, random_state=42) ),
    ('AdaBoost', AdaBoostClassifier(n_estimators=n_estimators, random_state=0) ),
    ('Bagging', BaggingClassifier(base_estimator=SVC(), n_estimators=n_estimators, random_state=0))
]

In [38]:
estimators

[('LinearSVC', LinearSVC(max_iter=10000)),
 ('RandomForest', RandomForestClassifier(n_estimators=10, random_state=42)),
 ('AdaBoost', AdaBoostClassifier(n_estimators=10, random_state=0)),
 ('Bagging', BaggingClassifier(base_estimator=SVC(), random_state=0))]

In [39]:
stack=estimators.copy()
stack.append(('Stacking', StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())))

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.8, random_state=42)

In [41]:
scores=[]
for estimator in stack: 
        scores.append((estimator[0], estimator[1].fit(X_train, y_train).score(X_test, y_test)))

In [42]:
pd.DataFrame(scores, columns=['estimator', 'score']).set_index('estimator')

Unnamed: 0_level_0,score
estimator,Unnamed: 1_level_1
LinearSVC,0.645329
RandomForest,0.612457
AdaBoost,0.657439
Bagging,0.636678
Stacking,0.66436


In [43]:
scores=[]
for estimator in stack:
    yhat=estimator[1].fit(X_train, y_train).predict(X_encoded)
    scores.append((estimator[0], accuracy_score(y, yhat)))

In [44]:
pd.DataFrame(scores, columns=['estimator', 'score']).set_index('estimator')

Unnamed: 0_level_0,score
estimator,Unnamed: 1_level_1
LinearSVC,0.685596
RandomForest,0.67313
AdaBoost,0.674515
Bagging,0.65374
Stacking,0.695291
