In [1]:
import pandas as pd
import numpy as np 
import os
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.metrics import mean_absolute_error, classification_report, accuracy_score
from xgboost import XGBClassifier
from sklearn.grid_search import GridSearchCV



In [2]:
os.chdir("ETL Legislative 2T/data")

In [3]:
ls

[0m[01;32mcdsp_legi1997t2_circ.xlsx[0m*  [01;32mcdsp_legi2012t2_circ.xlsx[0m*    [01;32mdico_variables.csv[0m*
[01;32mcdsp_legi2002t2_circ.xls[0m*   [01;32mdataframe_elections.xlsx[0m*
[01;32mcdsp_legi2007t2_circ.xls[0m*   [01;32mdataset_legislative_2nd.csv[0m*


In [4]:
df = pd.read_csv("dataset_legislative_2nd.csv")
df["elu"] = df["elu"].map(lambda x: 1 if x=='O' else 0)

In [5]:
df.head()

Unnamed: 0,an,Code département,département,circonscription,code,name,first_name,sexe,circo_bloc,circo_leg_meme_nuance,...,top_triangulaire,bloc_opposant_Centre,bloc_opposant_Divers,bloc_opposant_Droite,bloc_opposant_Exdroite,bloc_opposant_Exgauche,bloc_opposant_Gauche,voix_bloc_1nd,p_voix_2nd_lag,p_voix_candidat_2nd_lag
0,2012,1,AIN,1.0,1|1,BRETON,XAVIER,M,Droite,1.0,...,0,0,0,0,0,0,1,0.2306,0.536621,0.536621
1,2012,1,AIN,1.0,1|1,DEBAT,JEAN-FRANCOIS,M,Droite,0.0,...,0,0,0,1,0,0,0,0.2492,0.463379,0.463379
2,2012,1,AIN,2.0,1|2,DE LA VERPILLIERE,CHARLES,M,Droite,1.0,...,1,0,0,0,1,0,1,0.2209,,
3,2012,1,AIN,2.0,1|2,EYRAUD,OLIVIER,M,Droite,0.0,...,1,0,0,1,0,0,1,0.1252,,
4,2012,1,AIN,2.0,1|2,RAYMOND,MICHEL,M,Droite,0.0,...,1,0,0,1,1,0,0,0.2024,,


In [6]:
nd_tour = pd.get_dummies(pd.qcut(df["p_voix_2nd_lag"], 5, labels=False), prefix="2nd_tour" )
nd_tour["2nd_tour_nan"] = nd_tour.apply(lambda x : 0 if np.sum(x)==1 else 1 , axis=1)

nd_tour_candidate = pd.get_dummies(pd.qcut(df["p_voix_candidat_2nd_lag"], 5, labels=False), 
                                   prefix="2nd_tour_candidate" )
nd_tour_candidate["2nd_tour_candidate_nan"] = nd_tour_candidate.apply(lambda x : 0 if np.sum(x)==1 else 1 , axis=1)

nd = pd.concat([nd_tour, nd_tour_candidate], axis=1)

In [7]:
df = pd.concat([df, nd], axis=1)

In [8]:
del df["p_voix_2nd_lag"]
del df["p_voix_candidat_2nd_lag"]
del df["sexe"]
#del df["elu"]

In [9]:
df = pd.get_dummies(df, columns=["bloc"], prefix="bloc")

In [10]:
df.columns

Index(['an', 'Code département', 'département', 'circonscription', 'code',
       'name', 'first_name', 'circo_bloc', 'circo_leg_meme_nuance',
       'circo_pres_meme_nuance', 'score_bloc_pres', 'depute_sortant',
       'ancien_depute', 'au_gouvernement', 'ancien_ministre', 'p_voix',
       'voix_y', 'Exprimés', 'score', 'elu', 'top_triangulaire',
       'bloc_opposant_Centre', 'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche', 'voix_bloc_1nd', '2nd_tour_0.0', '2nd_tour_1.0',
       '2nd_tour_2.0', '2nd_tour_3.0', '2nd_tour_4.0', '2nd_tour_nan',
       '2nd_tour_candidate_0.0', '2nd_tour_candidate_1.0',
       '2nd_tour_candidate_2.0', '2nd_tour_candidate_3.0',
       '2nd_tour_candidate_4.0', '2nd_tour_candidate_nan', 'bloc_Centre',
       'bloc_Divers', 'bloc_Droite', 'bloc_Exdroite', 'bloc_Exgauche',
       'bloc_Gauche'],
      dtype='object')

In [11]:
df.head()

Unnamed: 0,an,Code département,département,circonscription,code,name,first_name,circo_bloc,circo_leg_meme_nuance,circo_pres_meme_nuance,...,2nd_tour_candidate_2.0,2nd_tour_candidate_3.0,2nd_tour_candidate_4.0,2nd_tour_candidate_nan,bloc_Centre,bloc_Divers,bloc_Droite,bloc_Exdroite,bloc_Exgauche,bloc_Gauche
0,2012,1,AIN,1.0,1|1,BRETON,XAVIER,Droite,1.0,1.0,...,1,0,0,0,0,0,1,0,0,0
1,2012,1,AIN,1.0,1|1,DEBAT,JEAN-FRANCOIS,Droite,0.0,0.0,...,0,0,0,0,0,0,0,0,0,1
2,2012,1,AIN,2.0,1|2,DE LA VERPILLIERE,CHARLES,Droite,1.0,1.0,...,0,0,0,1,0,0,1,0,0,0
3,2012,1,AIN,2.0,1|2,EYRAUD,OLIVIER,Droite,0.0,0.0,...,0,0,0,1,0,0,0,1,0,0
4,2012,1,AIN,2.0,1|2,RAYMOND,MICHEL,Droite,0.0,0.0,...,0,0,0,1,0,0,0,0,0,1


In [12]:
df = df[["an",'circo_leg_meme_nuance',"code",
       'circo_pres_meme_nuance', 'score_bloc_pres', 'depute_sortant',
       'ancien_depute', 'au_gouvernement', 'ancien_ministre', 'top_triangulaire',
       'bloc_opposant_Centre', 'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche', 'voix_bloc_1nd', 
         
        '2nd_tour_0.0', '2nd_tour_1.0',
       '2nd_tour_2.0', '2nd_tour_3.0', '2nd_tour_4.0', '2nd_tour_nan',
       '2nd_tour_candidate_0.0', '2nd_tour_candidate_1.0',
       '2nd_tour_candidate_2.0', '2nd_tour_candidate_3.0',
       '2nd_tour_candidate_4.0', '2nd_tour_candidate_nan',
         
        'bloc_Centre', 'bloc_Divers',
       'bloc_Droite', 'bloc_Exdroite', 'bloc_Exgauche', 'bloc_Gauche', "score","elu"]]


In [13]:
df.head()

Unnamed: 0,an,circo_leg_meme_nuance,code,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,ancien_depute,au_gouvernement,ancien_ministre,top_triangulaire,...,2nd_tour_candidate_4.0,2nd_tour_candidate_nan,bloc_Centre,bloc_Divers,bloc_Droite,bloc_Exdroite,bloc_Exgauche,bloc_Gauche,score,elu
0,2012,1.0,1|1,1.0,0.257,1.0,1.0,0.0,0.0,0,...,0,0,0,0,1,0,0,0,0.515715,1
1,2012,0.0,1|1,0.0,0.2147,0.0,0.0,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0.484285,0
2,2012,1.0,1|2,1.0,0.2753,1.0,1.0,0.0,0.0,1,...,0,1,0,0,1,0,0,0,0.443146,1
3,2012,0.0,1|2,0.0,0.1798,0.0,0.0,0.0,0.0,1,...,0,1,0,0,0,1,0,0,0.169303,0
4,2012,0.0,1|2,0.0,0.2027,0.0,0.0,0.0,0.0,1,...,0,1,0,0,0,0,0,1,0.387551,0


In [14]:
X_train = df[(df["an"]!=2012)&(df["an"]!=1997)]
X_test = df[df["an"]==2012]
y_train = df.ix[(df["an"]!=2012)&(df["an"]!=1997), "score"]
y_test = df.ix[df["an"]==2012, "score"]
y_train_cl = df.ix[(df["an"]!=2012)&(df["an"]!=1997), "elu"]
y_test_cl = df.ix[df["an"]==2012, "elu"]

In [15]:
del X_train["score"]
del X_test["score"]
del X_train["elu"]
del X_test["elu"]

In [16]:
del X_train["an"]
del X_test["code"]
del X_train["code"]
del X_test["an"]

In [17]:
X_train.columns

Index(['circo_leg_meme_nuance', 'circo_pres_meme_nuance', 'score_bloc_pres',
       'depute_sortant', 'ancien_depute', 'au_gouvernement', 'ancien_ministre',
       'top_triangulaire', 'bloc_opposant_Centre', 'bloc_opposant_Divers',
       'bloc_opposant_Droite', 'bloc_opposant_Exdroite',
       'bloc_opposant_Exgauche', 'bloc_opposant_Gauche', 'voix_bloc_1nd',
       '2nd_tour_0.0', '2nd_tour_1.0', '2nd_tour_2.0', '2nd_tour_3.0',
       '2nd_tour_4.0', '2nd_tour_nan', '2nd_tour_candidate_0.0',
       '2nd_tour_candidate_1.0', '2nd_tour_candidate_2.0',
       '2nd_tour_candidate_3.0', '2nd_tour_candidate_4.0',
       '2nd_tour_candidate_nan', 'bloc_Centre', 'bloc_Divers', 'bloc_Droite',
       'bloc_Exdroite', 'bloc_Exgauche', 'bloc_Gauche'],
      dtype='object')

In [173]:
rl = LinearRegression()
rl.fit(X_train[['voix_bloc_1nd',  'top_triangulaire']], y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [174]:
rl.score(X_test[['voix_bloc_1nd',  'top_triangulaire']], y_test)

0.39181162649754087

In [175]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [176]:
rf.score(X_test, y_test)

0.46923183125278423

In [177]:
svm = SVR(kernel="rbf",C=15, gamma=0.2)
svm.fit(X_train, y_train)

SVR(C=15, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.2,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [178]:
svm.score(X_test, y_test)

0.70092089060259188

In [179]:
mean_absolute_error(y_test, svm.predict(X_test))

0.04673964231001073

In [18]:
X_train = df[(df["an"]!=2012)&(df["an"]!=1997)]
X_test = df[df["an"]==2012]
y_train_cl = df.ix[(df["an"]!=2012)&(df["an"]!=1997), "elu"]
y_test_cl = df.ix[df["an"]==2012, "elu"]
del X_train["score"]
del X_test["score"]
del X_train["elu"]
del X_test["elu"]
del X_train["an"]
del X_test["an"]
del X_train["code"]

In [19]:
X_train.columns

Index(['circo_leg_meme_nuance', 'circo_pres_meme_nuance', 'score_bloc_pres',
       'depute_sortant', 'ancien_depute', 'au_gouvernement', 'ancien_ministre',
       'top_triangulaire', 'bloc_opposant_Centre', 'bloc_opposant_Divers',
       'bloc_opposant_Droite', 'bloc_opposant_Exdroite',
       'bloc_opposant_Exgauche', 'bloc_opposant_Gauche', 'voix_bloc_1nd',
       '2nd_tour_0.0', '2nd_tour_1.0', '2nd_tour_2.0', '2nd_tour_3.0',
       '2nd_tour_4.0', '2nd_tour_nan', '2nd_tour_candidate_0.0',
       '2nd_tour_candidate_1.0', '2nd_tour_candidate_2.0',
       '2nd_tour_candidate_3.0', '2nd_tour_candidate_4.0',
       '2nd_tour_candidate_nan', 'bloc_Centre', 'bloc_Divers', 'bloc_Droite',
       'bloc_Exdroite', 'bloc_Exgauche', 'bloc_Gauche'],
      dtype='object')

In [20]:
clf1 = SVC(probability=True)
clf2 = RandomForestClassifier()
clf3= XGBClassifier(min_child_weight=2, n_estimators=400, 
                    learning_rate=0.02, max_depth=3)

clf1.fit(X_train, y_train_cl)
clf2.fit(X_train, y_train_cl)
clf3.fit(X_train, y_train_cl)

XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.02, max_delta_step=0, max_depth=3,
       min_child_weight=2, missing=None, n_estimators=400, nthread=-1,
       objective='binary:logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [21]:
predict_1 = clf1.predict_proba(X_test.drop(["code"], axis=1))
predict_2 = clf2.predict_proba(X_test.drop(["code"], axis=1))
predict_3 = clf3.predict_proba(X_test.drop(["code"], axis=1))

In [22]:
clf1.classes_

array([0, 1])

In [23]:
df_1 = pd.Series( (v[1] for v in predict_1) , name="svc")
df_2 = pd.Series( (v[1] for v in predict_2) , name="rf")
df_3 = pd.Series( (v[1] for v in predict_3) , name="xgb")

In [24]:
X_test = X_test.reset_index(drop=True)

In [25]:
X_test = pd.concat([X_test, df_1, df_2, df_3], axis=1)

In [26]:
X_test

Unnamed: 0,circo_leg_meme_nuance,code,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,ancien_depute,au_gouvernement,ancien_ministre,top_triangulaire,bloc_opposant_Centre,...,2nd_tour_candidate_nan,bloc_Centre,bloc_Divers,bloc_Droite,bloc_Exdroite,bloc_Exgauche,bloc_Gauche,svc,rf,xgb
0,1.0,1|1,1.0,0.2570,1.0,1.0,0.0,0.0,0,0,...,0,0,0,1,0,0,0,0.859699,0.3,0.359416
1,0.0,1|1,0.0,0.2147,0.0,0.0,0.0,0.0,0,0,...,0,0,0,0,0,0,1,0.187253,0.0,0.239322
2,1.0,1|2,1.0,0.2753,1.0,1.0,0.0,0.0,1,0,...,1,0,0,1,0,0,0,0.938988,0.7,0.741685
3,0.0,1|2,0.0,0.1798,0.0,0.0,0.0,0.0,1,0,...,1,0,0,0,1,0,0,0.058118,0.2,0.051547
4,0.0,1|2,0.0,0.2027,0.0,0.0,0.0,0.0,1,0,...,1,0,0,0,0,0,1,0.343329,0.0,0.189207
5,1.0,1|3,1.0,0.2671,1.0,1.0,0.0,0.0,0,0,...,0,0,0,1,0,0,0,0.961083,0.7,0.837806
6,0.0,1|3,0.0,0.2090,0.0,0.0,0.0,0.0,0,0,...,1,0,0,0,0,0,1,0.122216,0.0,0.052451
7,0.0,1|4,0.0,0.1944,0.0,0.0,0.0,0.0,0,0,...,1,0,0,0,0,0,1,0.202043,0.0,0.132394
8,1.0,1|4,1.0,0.2876,1.0,1.0,0.0,0.0,0,0,...,1,0,0,1,0,0,0,0.891173,0.9,0.699940
9,1.0,1|5,1.0,0.2412,0.0,0.0,0.0,0.0,0,0,...,1,0,0,1,0,0,0,0.788604,0.7,0.324771


In [27]:
X_test["mean"] = X_test.apply(lambda x: np.max([x["svc"], x["xgb"], x["rf"]]), axis=1)
X_test["max"] = X_test.apply(lambda x: np.mean([x["svc"], x["xgb"], x["rf"]]), axis=1)

In [28]:
X_test.shape

(1101, 39)

In [29]:
X_test = pd.merge(X_test, X_test.groupby(["code"])["rf"].max().to_frame().reset_index(), how="left", on="code")
X_test = pd.merge(X_test, X_test.groupby(["code"])["svc"].max().to_frame().reset_index(), how="left", on="code")
X_test = pd.merge(X_test, X_test.groupby(["code"])["xgb"].max().to_frame().reset_index(), how="left", on="code")
X_test = pd.merge(X_test, X_test.groupby(["code"])["mean"].max().to_frame().reset_index(), how="left", on="code")
X_test = pd.merge(X_test, X_test.groupby(["code"])["max"].max().to_frame().reset_index(), how="left", on="code")

In [30]:
X_test.head()

Unnamed: 0,circo_leg_meme_nuance,code,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,ancien_depute,au_gouvernement,ancien_ministre,top_triangulaire,bloc_opposant_Centre,...,svc_x,rf_x,xgb_x,mean_x,max_x,rf_y,svc_y,xgb_y,mean_y,max_y
0,1.0,1|1,1.0,0.257,1.0,1.0,0.0,0.0,0,0,...,0.859699,0.3,0.359416,0.859699,0.506372,0.3,0.859699,0.359416,0.859699,0.506372
1,0.0,1|1,0.0,0.2147,0.0,0.0,0.0,0.0,0,0,...,0.187253,0.0,0.239322,0.239322,0.142192,0.3,0.859699,0.359416,0.859699,0.506372
2,1.0,1|2,1.0,0.2753,1.0,1.0,0.0,0.0,1,0,...,0.938988,0.7,0.741685,0.938988,0.793557,0.7,0.938988,0.741685,0.938988,0.793557
3,0.0,1|2,0.0,0.1798,0.0,0.0,0.0,0.0,1,0,...,0.058118,0.2,0.051547,0.2,0.103222,0.7,0.938988,0.741685,0.938988,0.793557
4,0.0,1|2,0.0,0.2027,0.0,0.0,0.0,0.0,1,0,...,0.343329,0.0,0.189207,0.343329,0.177512,0.7,0.938988,0.741685,0.938988,0.793557


In [31]:
X_test["label_svc"] = X_test.apply(lambda x: 0 if x["svc_x"]<x["svc_y"] else 1, axis=1)
X_test["label_rf"] = X_test.apply(lambda x: 0 if x["rf_x"]<x["rf_y"] else 1, axis=1)
X_test["label_xgb"] = X_test.apply(lambda x: 0 if x["xgb_x"]<x["xgb_y"] else 1, axis=1)
X_test["label_mean"] = X_test.apply(lambda x: 0 if x["mean_x"]<x["mean_y"] else 1, axis=1)
X_test["label_max"] = X_test.apply(lambda x: 0 if x["max_x"]<x["max_y"] else 1, axis=1)

In [32]:
X_test

Unnamed: 0,circo_leg_meme_nuance,code,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,ancien_depute,au_gouvernement,ancien_ministre,top_triangulaire,bloc_opposant_Centre,...,rf_y,svc_y,xgb_y,mean_y,max_y,label_svc,label_rf,label_xgb,label_mean,label_max
0,1.0,1|1,1.0,0.2570,1.0,1.0,0.0,0.0,0,0,...,0.3,0.859699,0.359416,0.859699,0.506372,1,1,1,1,1
1,0.0,1|1,0.0,0.2147,0.0,0.0,0.0,0.0,0,0,...,0.3,0.859699,0.359416,0.859699,0.506372,0,0,0,0,0
2,1.0,1|2,1.0,0.2753,1.0,1.0,0.0,0.0,1,0,...,0.7,0.938988,0.741685,0.938988,0.793557,1,1,1,1,1
3,0.0,1|2,0.0,0.1798,0.0,0.0,0.0,0.0,1,0,...,0.7,0.938988,0.741685,0.938988,0.793557,0,0,0,0,0
4,0.0,1|2,0.0,0.2027,0.0,0.0,0.0,0.0,1,0,...,0.7,0.938988,0.741685,0.938988,0.793557,0,0,0,0,0
5,1.0,1|3,1.0,0.2671,1.0,1.0,0.0,0.0,0,0,...,0.7,0.961083,0.837806,0.961083,0.832963,1,1,1,1,1
6,0.0,1|3,0.0,0.2090,0.0,0.0,0.0,0.0,0,0,...,0.7,0.961083,0.837806,0.961083,0.832963,0,0,0,0,0
7,0.0,1|4,0.0,0.1944,0.0,0.0,0.0,0.0,0,0,...,0.9,0.891173,0.699940,0.900000,0.830371,0,0,0,0,0
8,1.0,1|4,1.0,0.2876,1.0,1.0,0.0,0.0,0,0,...,0.9,0.891173,0.699940,0.900000,0.830371,1,1,1,1,1
9,1.0,1|5,1.0,0.2412,0.0,0.0,0.0,0.0,0,0,...,0.7,0.788604,0.324771,0.788604,0.604458,1,1,1,1,1


In [33]:
print(classification_report(X_test["label_svc"], y_test_cl))
print(accuracy_score(X_test["label_svc"], y_test_cl))

             precision    recall  f1-score   support

          0       0.79      0.80      0.79       556
          1       0.79      0.79      0.79       545

avg / total       0.79      0.79      0.79      1101

0.791099000908


In [34]:
print(classification_report(X_test["label_rf"], y_test_cl))
print(accuracy_score(X_test["label_rf"], y_test_cl))

             precision    recall  f1-score   support

          0       0.80      0.84      0.82       537
          1       0.84      0.80      0.82       564

avg / total       0.82      0.82      0.82      1101

0.819255222525


In [35]:
print(classification_report(X_test["label_xgb"], y_test_cl))
print(accuracy_score(X_test["label_xgb"], y_test_cl))

             precision    recall  f1-score   support

          0       0.82      0.83      0.82       556
          1       0.82      0.82      0.82       545

avg / total       0.82      0.82      0.82      1101

0.821980018165


In [36]:
print(classification_report(X_test["label_mean"], y_test_cl))
print(accuracy_score(X_test["label_mean"], y_test_cl))

             precision    recall  f1-score   support

          0       0.81      0.81      0.81       555
          1       0.81      0.80      0.81       546

avg / total       0.81      0.81      0.81      1101

0.808356039964


In [37]:
print(classification_report(X_test["label_max"], y_test_cl))
print(accuracy_score(X_test["label_max"], y_test_cl))

             precision    recall  f1-score   support

          0       0.81      0.82      0.82       556
          1       0.82      0.81      0.81       545

avg / total       0.81      0.81      0.81      1101

0.814713896458


In [200]:
gbm = XGBClassifier()
gbm_params = {
'learning_rate': [0.01,0.02],
'n_estimators': [300,350,400],
'max_depth': [3,4],
"min_child_weight":[2]
}


grid = GridSearchCV(gbm, gbm_params,verbose=10,n_jobs=-1)
grid.fit(X_train, y_train_cl)
print (grid.best_params_)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=3 
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=3 
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=3 
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=3 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=3, score=0.795455 -   1.8s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=3 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=3, score=0.803030 -   1.8s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=3 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=3, score=0.797872 -   2.2s
[CV] min_child_weight=2, n_estimators=400, learning_rate=0.01, max_depth=3 
[CV]  min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=3, score=0.806061 -   2.6s
[CV

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    4.1s


[CV]  min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=3, score=0.792424 -   2.5s
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=4 
[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.01, max_depth=3, score=0.809091 -   2.7s
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=4 
[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.01, max_depth=3, score=0.790909 -   2.5s
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=4 
[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.01, max_depth=3, score=0.793313 -   2.7s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=4 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=4, score=0.813636 -   2.5s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=4 


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    6.9s


[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=4, score=0.795455 -   2.6s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=4 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.01, max_depth=4, score=0.794833 -   2.4s
[CV] min_child_weight=2, n_estimators=400, learning_rate=0.01, max_depth=4 
[CV]  min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=4, score=0.815152 -   2.6s
[CV] min_child_weight=2, n_estimators=400, learning_rate=0.01, max_depth=4 
[CV]  min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=4, score=0.800000 -   2.9s
[CV] min_child_weight=2, n_estimators=400, learning_rate=0.01, max_depth=4 
[CV]  min_child_weight=2, n_estimators=350, learning_rate=0.01, max_depth=4, score=0.793313 -   3.7s
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=3 
[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.01, max_depth=4, score=0.810606 -   3.8s
[CV] min_child

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   12.9s


[CV] min_child_weight=2, n_estimators=350, learning_rate=0.02, max_depth=3 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=3, score=0.798485 -   1.9s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.02, max_depth=3 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=3, score=0.791793 -   2.1s
[CV] min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=3 
[CV]  min_child_weight=2, n_estimators=350, learning_rate=0.02, max_depth=3, score=0.804545 -   2.1s
[CV] min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=3 
[CV]  min_child_weight=2, n_estimators=350, learning_rate=0.02, max_depth=3, score=0.813636 -   2.6s
[CV] min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=3 
[CV]  min_child_weight=2, n_estimators=350, learning_rate=0.02, max_depth=3, score=0.791793 -   3.0s
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=4 


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   16.5s


[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=3, score=0.818182 -   2.5s
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=4 
[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=3, score=0.806061 -   2.4s
[CV] min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=4 
[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=3, score=0.796353 -   2.6s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.02, max_depth=4 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=4, score=0.810606 -   2.0s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.02, max_depth=4 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=4, score=0.787234 -   1.4s
[CV] min_child_weight=2, n_estimators=350, learning_rate=0.02, max_depth=4 
[CV]  min_child_weight=2, n_estimators=300, learning_rate=0.02, max_depth=4, score=0.803030 -   2.0s
[CV] min_child

[Parallel(n_jobs=-1)]: Done  33 out of  36 | elapsed:   21.0s remaining:    1.9s


[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=4, score=0.807576 -   2.9s
[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=4, score=0.791793 -   2.6s
[CV]  min_child_weight=2, n_estimators=400, learning_rate=0.02, max_depth=4, score=0.800000 -   2.7s


[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   23.5s finished


{'min_child_weight': 2, 'n_estimators': 400, 'learning_rate': 0.02, 'max_depth': 3}


In [202]:
X_test.columns

Index(['circo_leg_meme_nuance', 'code', 'circo_pres_meme_nuance',
       'score_bloc_pres', 'depute_sortant', 'ancien_depute', 'au_gouvernement',
       'ancien_ministre', 'top_triangulaire', 'bloc_opposant_Centre',
       'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche', 'voix_bloc_1nd', 'bloc_Centre', 'bloc_Divers',
       'bloc_Droite', 'bloc_Exdroite', 'bloc_Exgauche', 'bloc_Gauche', 'svc_x',
       'rf_x', 'xgb_x', 'mean_x', 'max_x', 'rf_y', 'svc_y', 'xgb_y', 'mean_y',
       'max_y', 'label_svc', 'label_rf', 'label_xgb', 'label_mean',
       'label_max'],
      dtype='object')