In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score

# read our data and creates a data frame
DF = pd.read_csv('DATA/Democracy-Index.csv', usecols=range(1,10))

In [11]:
# makes a copy of the data frame to work on it
# and remove the columns with 'Democracy' and 'Country'
# in order to build a model
df_target = DF.copy()
del DF['Democracy']
del DF['Country']

In [12]:
# splits the data frame in two parts. One two train the model (80%), the other one to test it (20%)

X_train, X_test, y_train, y_test = train_test_split(DF, df_target['Democracy'] , test_size=0.2, random_state=0)

In [17]:
# puts the datas in the right format to be used by xgboost 

train = xgb.DMatrix(X_train, label=y_train)
test = xgb.DMatrix(X_test, label=y_test)

In [18]:
# chooses the parameters for xgboost
param = {
    'max_depth': 10,
    'eta': 0.3,
    'objective': 'multi:softmax',
    'num_class': 3} 
epochs = 30

In [19]:
# trains the model
model = xgb.train(param, train, epochs)

In [20]:
# tests the model
predictions = model.predict(test)

In [21]:
# calculate the accuracy score obtain by the model on the test data
accuracy_score(y_test, predictions)
# the accuracy is good (0.9)

0.8787878787878788

In [22]:
# Ables to chose arbitrary values for the parameters
# and figures out if this 'fake' country is a democracy or not
# Play with it !

data = {'Expectancy': [78],'PopulationD':[100],'Gini':[10],'AgeMed':[40],'Skyscraper':[350.0],'Children':[1],'PressF':[20] }
target = {'Democracy': []}
my_test = pd.DataFrame(data=data)

my_test = xgb.DMatrix(my_test, label=target['Democracy'])

predictions = model.predict(my_test)

if predictions==0:
    print('This country is a dictatorship.')
else:
    print('This country is a democracy.')

This country is a democracy.
