In [1]:
import pandas as pd
import numpy as np 
import os
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.metrics import mean_absolute_error, classification_report, accuracy_score
from xgboost import XGBClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.tree import DecisionTreeClassifier



In [2]:
os.chdir("ETL Legislative 2T/data")

In [3]:
ls

[0m[01;32mcdsp_legi1997t2_circ.xlsx[0m*
[01;32mcdsp_legi2002t2_circ.xls[0m*
[01;32mcdsp_legi2007t2_circ.xls[0m*
[01;32mcdsp_legi2012t2_circ.xlsx[0m*
[01;32mdataframe_elections.xlsx[0m*
[01;32mdataset_legislative_2nd.csv[0m*
dataset_legislative_2nd_prediction_16_03.csv
dataset_legislative_2nd_prediction.csv
[01;32mdico_variables.csv[0m*
pred_format_second_tour_du_6_6_21h46.xlsx
pred_format_second_tour_du_7_6_14h36.xlsx
pred_format_second_tour_du_7_6_15h35.xlsx
prediction_2nd.csv
resultats2.csv


In [4]:
df = pd.read_csv("dataset_legislative_2nd_prediction_16_03.csv")
df["elu"] = df["elu"].map(lambda x: 1 if x=='O' else 0)

In [5]:
df.head()

Unnamed: 0,an,département,circonscription,code,name,first_name,bloc,circo_leg_meme_nuance,circo_pres_meme_nuance,score_bloc_pres,...,top_triangulaire,bloc_opposant_Centre,bloc_opposant_Divers,bloc_opposant_Droite,bloc_opposant_Exdroite,bloc_opposant_Exgauche,bloc_opposant_Gauche,voix_bloc_1nd,voix_bloc_2nd_lag,voix_candidat_2nd_lag
0,2012,AIN,1.0,1|1,BRETON,XAVIER,Droite,1.0,1.0,0.257,...,0,0,0,0,0,0,1,0.2241,0.567573,0.536621
1,2012,AIN,1.0,1|1,DEBAT,JEAN-FRANCOIS,Gauche,0.0,0.0,0.2147,...,0,0,0,1,0,0,0,0.2316,0.432427,0.463379
2,2012,AIN,2.0,1|2,DE LA VERPILLIERE,CHARLES,Droite,1.0,1.0,0.2753,...,1,0,0,0,1,0,1,0.2209,0.567573,
3,2012,AIN,2.0,1|2,EYRAUD,OLIVIER,Exdroite,0.0,0.0,0.1798,...,1,0,0,1,0,0,1,0.1252,,
4,2012,AIN,2.0,1|2,RAYMOND,MICHEL,Gauche,0.0,0.0,0.2027,...,1,0,0,1,1,0,0,0.1368,0.432427,


In [6]:
df.columns

Index(['an', 'département', 'circonscription', 'code', 'name', 'first_name',
       'bloc', 'circo_leg_meme_nuance', 'circo_pres_meme_nuance',
       'score_bloc_pres', 'depute_sortant', 'ancien_depute', 'au_gouvernement',
       'ancien_ministre', 'p_voix', 'p_voix_candidat_2nd', 'elu',
       'membre_majorite', 'top_triangulaire', 'bloc_opposant_Centre',
       'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche', 'voix_bloc_1nd', 'voix_bloc_2nd_lag',
       'voix_candidat_2nd_lag'],
      dtype='object')

In [7]:
nd_tour = pd.get_dummies(pd.qcut(df["voix_bloc_2nd_lag"], 5, labels=False), prefix="bloc_2nd_tour" )
nd_tour["bloc_2nd_tour_nan"] = nd_tour.apply(lambda x : 0 if np.sum(x)==1 else 1 , axis=1)

nd_tour_candidate = pd.get_dummies(pd.qcut(df["voix_candidat_2nd_lag"], 5, labels=False), 
                                   prefix="2nd_tour_candidate" )
nd_tour_candidate["2nd_tour_candidate_nan"] = nd_tour_candidate.apply(lambda x : 0 if np.sum(x)==1 else 1 , axis=1)

nd = pd.concat([nd_tour, nd_tour_candidate], axis=1)

In [8]:
df = pd.concat([df, nd], axis=1)

In [9]:
df.columns

Index(['an', 'département', 'circonscription', 'code', 'name', 'first_name',
       'bloc', 'circo_leg_meme_nuance', 'circo_pres_meme_nuance',
       'score_bloc_pres', 'depute_sortant', 'ancien_depute', 'au_gouvernement',
       'ancien_ministre', 'p_voix', 'p_voix_candidat_2nd', 'elu',
       'membre_majorite', 'top_triangulaire', 'bloc_opposant_Centre',
       'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche', 'voix_bloc_1nd', 'voix_bloc_2nd_lag',
       'voix_candidat_2nd_lag', 'bloc_2nd_tour_0.0', 'bloc_2nd_tour_1.0',
       'bloc_2nd_tour_2.0', 'bloc_2nd_tour_3.0', 'bloc_2nd_tour_4.0',
       'bloc_2nd_tour_nan', '2nd_tour_candidate_0.0', '2nd_tour_candidate_1.0',
       '2nd_tour_candidate_2.0', '2nd_tour_candidate_3.0',
       '2nd_tour_candidate_4.0', '2nd_tour_candidate_nan'],
      dtype='object')

In [10]:
del df['voix_candidat_2nd_lag']
del df['voix_bloc_2nd_lag']

In [11]:
df = pd.get_dummies(df, columns=["bloc"], prefix="bloc")

In [12]:
df.columns

Index(['an', 'département', 'circonscription', 'code', 'name', 'first_name',
       'circo_leg_meme_nuance', 'circo_pres_meme_nuance', 'score_bloc_pres',
       'depute_sortant', 'ancien_depute', 'au_gouvernement', 'ancien_ministre',
       'p_voix', 'p_voix_candidat_2nd', 'elu', 'membre_majorite',
       'top_triangulaire', 'bloc_opposant_Centre', 'bloc_opposant_Divers',
       'bloc_opposant_Droite', 'bloc_opposant_Exdroite',
       'bloc_opposant_Exgauche', 'bloc_opposant_Gauche', 'voix_bloc_1nd',
       'bloc_2nd_tour_0.0', 'bloc_2nd_tour_1.0', 'bloc_2nd_tour_2.0',
       'bloc_2nd_tour_3.0', 'bloc_2nd_tour_4.0', 'bloc_2nd_tour_nan',
       '2nd_tour_candidate_0.0', '2nd_tour_candidate_1.0',
       '2nd_tour_candidate_2.0', '2nd_tour_candidate_3.0',
       '2nd_tour_candidate_4.0', '2nd_tour_candidate_nan', 'bloc_Centre',
       'bloc_Divers', 'bloc_Droite', 'bloc_Exdroite', 'bloc_Exgauche',
       'bloc_Gauche'],
      dtype='object')

In [13]:
df.head()

Unnamed: 0,an,département,circonscription,code,name,first_name,circo_leg_meme_nuance,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,...,2nd_tour_candidate_2.0,2nd_tour_candidate_3.0,2nd_tour_candidate_4.0,2nd_tour_candidate_nan,bloc_Centre,bloc_Divers,bloc_Droite,bloc_Exdroite,bloc_Exgauche,bloc_Gauche
0,2012,AIN,1.0,1|1,BRETON,XAVIER,1.0,1.0,0.257,1.0,...,1,0,0,0,0,0,1,0,0,0
1,2012,AIN,1.0,1|1,DEBAT,JEAN-FRANCOIS,0.0,0.0,0.2147,0.0,...,0,0,0,0,0,0,0,0,0,1
2,2012,AIN,2.0,1|2,DE LA VERPILLIERE,CHARLES,1.0,1.0,0.2753,1.0,...,0,0,0,1,0,0,1,0,0,0
3,2012,AIN,2.0,1|2,EYRAUD,OLIVIER,0.0,0.0,0.1798,0.0,...,0,0,0,1,0,0,0,1,0,0
4,2012,AIN,2.0,1|2,RAYMOND,MICHEL,0.0,0.0,0.2027,0.0,...,0,0,0,1,0,0,0,0,0,1


In [14]:
df["score_pre_maj"] = df.apply(lambda x: x["score_bloc_pres"] if x['membre_majorite']==1 else 0, axis=1)

In [15]:
df = df[[
        'an', 'département','circonscription','code', 'name','first_name',
       'circo_leg_meme_nuance', 'circo_pres_meme_nuance', 
       'depute_sortant', 'ancien_depute', 'au_gouvernement', 'ancien_ministre','membre_majorite',"score_pre_maj",
        "score_bloc_pres",
    
        'bloc_opposant_Centre', 'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche',
        
        'bloc_2nd_tour_0.0',
       'bloc_2nd_tour_1.0', 'bloc_2nd_tour_2.0', 'bloc_2nd_tour_3.0',
       'bloc_2nd_tour_4.0', 'bloc_2nd_tour_nan', '2nd_tour_candidate_0.0',
       '2nd_tour_candidate_1.0', '2nd_tour_candidate_2.0',
       '2nd_tour_candidate_3.0', '2nd_tour_candidate_4.0',
       '2nd_tour_candidate_nan', 
    
        
        'bloc_Centre', 'bloc_Divers', 'bloc_Droite',
       'bloc_Exdroite', 'bloc_Exgauche', 'bloc_Gauche',
    
       'p_voix', 'voix_bloc_1nd', 'top_triangulaire', 'p_voix_candidat_2nd', 'elu']]
        


In [16]:
df.head()

Unnamed: 0,an,département,circonscription,code,name,first_name,circo_leg_meme_nuance,circo_pres_meme_nuance,depute_sortant,ancien_depute,...,bloc_Divers,bloc_Droite,bloc_Exdroite,bloc_Exgauche,bloc_Gauche,p_voix,voix_bloc_1nd,top_triangulaire,p_voix_candidat_2nd,elu
0,2012,AIN,1.0,1|1,BRETON,XAVIER,1.0,1.0,1.0,1.0,...,0,1,0,0,0,0.2241,0.2241,0,0.515715,1
1,2012,AIN,1.0,1|1,DEBAT,JEAN-FRANCOIS,0.0,0.0,0.0,0.0,...,0,0,0,0,1,0.2316,0.2316,0,0.484285,0
2,2012,AIN,2.0,1|2,DE LA VERPILLIERE,CHARLES,1.0,1.0,1.0,1.0,...,0,1,0,0,0,0.2209,0.2209,1,0.443146,1
3,2012,AIN,2.0,1|2,EYRAUD,OLIVIER,0.0,0.0,0.0,0.0,...,0,0,1,0,0,0.1252,0.1252,1,0.169303,0
4,2012,AIN,2.0,1|2,RAYMOND,MICHEL,0.0,0.0,0.0,0.0,...,0,0,0,0,1,0.1368,0.1368,1,0.387551,0


In [18]:
#X_train = df[(df["an"]!=2012)&(df["an"]!=1997)]
#X_test = df[df["an"]==2012]
#y_train = df.ix[(df["an"]!=2012)&(df["an"]!=1997), "score"]
#y_test = df.ix[df["an"]==2012, "score"]
#y_train_cl = df.ix[(df["an"]!=2012)&(df["an"]!=1997), "elu"]
#y_test_cl = df.ix[df["an"]==2012, "elu"]

In [18]:
#del X_train["score"]
#del X_test["score"]
#del X_train["elu"]
#del X_test["elu"]

In [19]:
#del X_train["an"]
#del X_test["code"]
#del X_train["code"]
#del X_test["an"]

In [20]:
#X_train.columns

In [21]:
#rl = LinearRegression()
#rl.fit(X_train[['voix_bloc_1nd',  'top_triangulaire']], y_train)

In [22]:
#rl.score(X_test[['voix_bloc_1nd',  'top_triangulaire']], y_test)

In [23]:
#rf = RandomForestRegressor()
#rf.fit(X_train, y_train)

In [24]:
#rf.score(X_test, y_test)

In [25]:
#svm = SVR(kernel="rbf",C=15, gamma=0.2)
#svm.fit(X_train, y_train)

In [26]:
#svm.score(X_test, y_test)

In [27]:
#mean_absolute_error(y_test, svm.predict(X_test))

In [47]:
X_train = df[(df["an"]!=2012)&(df["an"]!=1997)&(df["an"]!=2017)]
X_test = df[df["an"]==2012]
y_train_cl = df.ix[(df["an"]!=2012)&(df["an"]!=1997)&(df["an"]!=2017), "elu"]
y_test_cl = df.ix[df["an"]==2012, "elu"]
X_validation = df[(df["an"]==2017)]
del X_train['p_voix_candidat_2nd']
del X_test['p_voix_candidat_2nd']
del X_train["elu"]
del X_test["elu"]
del X_train["an"]
del X_test["an"]
del X_train["code"]
del X_train["département"]
del X_test["département"]
del X_train["name"]
del X_test["name"]
del X_train['first_name']
del X_test['first_name']
del X_train['circonscription']
del X_test['circonscription']

In [48]:
X_test.columns

Index(['code', 'circo_leg_meme_nuance', 'circo_pres_meme_nuance',
       'depute_sortant', 'ancien_depute', 'au_gouvernement', 'ancien_ministre',
       'membre_majorite', 'score_pre_maj', 'score_bloc_pres',
       'bloc_opposant_Centre', 'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche', 'bloc_2nd_tour_0.0', 'bloc_2nd_tour_1.0',
       'bloc_2nd_tour_2.0', 'bloc_2nd_tour_3.0', 'bloc_2nd_tour_4.0',
       'bloc_2nd_tour_nan', '2nd_tour_candidate_0.0', '2nd_tour_candidate_1.0',
       '2nd_tour_candidate_2.0', '2nd_tour_candidate_3.0',
       '2nd_tour_candidate_4.0', '2nd_tour_candidate_nan', 'bloc_Centre',
       'bloc_Divers', 'bloc_Droite', 'bloc_Exdroite', 'bloc_Exgauche',
       'bloc_Gauche', 'p_voix', 'voix_bloc_1nd', 'top_triangulaire'],
      dtype='object')

In [21]:
X_train.columns

Index(['circo_leg_meme_nuance', 'circo_pres_meme_nuance', 'score_bloc_pres',
       'depute_sortant', 'ancien_depute', 'au_gouvernement', 'ancien_ministre',
       'membre_majorite', 'score_pre_maj', 'bloc_opposant_Centre',
       'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche', 'bloc_2nd_tour_0.0', 'bloc_2nd_tour_1.0',
       'bloc_2nd_tour_2.0', 'bloc_2nd_tour_3.0', 'bloc_2nd_tour_4.0',
       'bloc_2nd_tour_nan', '2nd_tour_candidate_0.0', '2nd_tour_candidate_1.0',
       '2nd_tour_candidate_2.0', '2nd_tour_candidate_3.0',
       '2nd_tour_candidate_4.0', '2nd_tour_candidate_nan', 'bloc_Centre',
       'bloc_Divers', 'bloc_Droite', 'bloc_Exdroite', 'bloc_Exgauche',
       'bloc_Gauche', 'p_voix', 'voix_bloc_1nd', 'top_triangulaire'],
      dtype='object')

In [20]:
X_train.head()

Unnamed: 0,circo_leg_meme_nuance,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,ancien_depute,au_gouvernement,ancien_ministre,membre_majorite,bloc_opposant_Centre,bloc_opposant_Divers,...,2nd_tour_candidate_nan,bloc_Centre,bloc_Divers,bloc_Droite,bloc_Exdroite,bloc_Exgauche,bloc_Gauche,p_voix,voix_bloc_1nd,top_triangulaire
1099,1.0,1.0,0.311,0.0,0.0,0.0,0.0,1,0,0,...,1,0,0,1,0,0,0,0.2799,0.2799,0
1100,0.0,0.0,0.2214,0.0,0.0,0.0,0.0,0,0,0,...,1,0,0,0,0,0,1,0.1895,0.1895,0
1101,1.0,1.0,0.2974,1.0,1.0,0.0,0.0,1,0,0,...,0,0,0,1,0,0,0,0.2736,0.2736,0
1102,0.0,0.0,0.2122,0.0,0.0,0.0,0.0,0,0,0,...,1,0,0,0,0,0,1,0.104,0.104,0
1103,0.0,1.0,0.2554,0.0,0.0,0.0,0.0,1,0,0,...,1,0,0,1,0,0,0,0.1783,0.1783,0


In [25]:
clf1 = SVC(probability=True)
clf2 = RandomForestClassifier()
clf3= XGBClassifier(min_child_weight=2, n_estimators=400, 
                    learning_rate=0.02, max_depth=3)

clf1.fit(X_train, y_train_cl)
clf2.fit(X_train, y_train_cl)
clf3.fit(X_train, y_train_cl)

XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.02, max_delta_step=0, max_depth=3,
       min_child_weight=2, missing=None, n_estimators=400, nthread=-1,
       objective='binary:logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [26]:
predict_1 = clf1.predict_proba(X_test.drop(["code"], axis=1))
predict_2 = clf2.predict_proba(X_test.drop(["code"], axis=1))
predict_3 = clf3.predict_proba(X_test.drop(["code"], axis=1))

In [27]:
clf1.classes_

array([0, 1])

In [28]:
df_1 = pd.Series( (v[1] for v in predict_1) , name="svc")
df_2 = pd.Series( (v[1] for v in predict_2) , name="rf")
df_3 = pd.Series( (v[1] for v in predict_3) , name="xgb")

In [29]:
X_test = X_test.reset_index(drop=True)

In [30]:
X_test = pd.concat([X_test, df_1, df_2, df_3], axis=1)

In [31]:
X_test

Unnamed: 0,code,circo_leg_meme_nuance,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,ancien_depute,au_gouvernement,ancien_ministre,bloc_opposant_Centre,bloc_opposant_Divers,...,bloc_Droite,bloc_Exdroite,bloc_Exgauche,bloc_Gauche,p_voix,voix_bloc_1nd,top_triangulaire,svc,rf,xgb
0,1|1,1.0,1.0,0.2570,1.0,1.0,0.0,0.0,0,0,...,1,0,0,0,0.2241,0.2241,0,0.899404,0.2,0.535904
1,1|1,0.0,0.0,0.2147,0.0,0.0,0.0,0.0,0,0,...,0,0,0,1,0.2316,0.2316,0,0.128163,0.3,0.180060
2,1|2,1.0,1.0,0.2753,1.0,1.0,0.0,0.0,0,0,...,1,0,0,0,0.2209,0.2209,1,0.965826,0.8,0.816438
3,1|2,0.0,0.0,0.1798,0.0,0.0,0.0,0.0,0,0,...,0,1,0,0,0.1252,0.1252,1,0.041169,0.3,0.050229
4,1|2,0.0,0.0,0.2027,0.0,0.0,0.0,0.0,0,0,...,0,0,0,1,0.1368,0.1368,1,0.268590,0.1,0.070581
5,1|3,1.0,1.0,0.2671,1.0,1.0,0.0,0.0,0,0,...,1,0,0,0,0.2214,0.2214,0,0.955686,0.9,0.778302
6,1|3,0.0,0.0,0.2090,0.0,0.0,0.0,0.0,0,0,...,0,0,0,1,0.1742,0.1742,0,0.169498,0.1,0.093517
7,1|4,0.0,0.0,0.1944,0.0,0.0,0.0,0.0,0,0,...,0,0,0,1,0.1823,0.1823,0,0.172156,0.0,0.098948
8,1|4,1.0,1.0,0.2876,1.0,1.0,0.0,0.0,0,0,...,1,0,0,0,0.2370,0.2370,0,0.946488,0.4,0.852537
9,1|5,1.0,1.0,0.2412,0.0,0.0,0.0,0.0,0,0,...,1,0,0,0,0.1795,0.1795,0,0.849302,0.5,0.505396


In [32]:
X_test["mean"] = X_test.apply(lambda x: np.max([x["svc"], x["xgb"], x["rf"]]), axis=1)
X_test["max"] = X_test.apply(lambda x: np.mean([x["svc"], x["xgb"], x["rf"]]), axis=1)

In [33]:
X_test.shape

(1101, 40)

In [34]:
X_test = pd.merge(X_test, X_test.groupby(["code"])["rf"].max().to_frame().reset_index(), how="left", on="code")
X_test = pd.merge(X_test, X_test.groupby(["code"])["svc"].max().to_frame().reset_index(), how="left", on="code")
X_test = pd.merge(X_test, X_test.groupby(["code"])["xgb"].max().to_frame().reset_index(), how="left", on="code")
X_test = pd.merge(X_test, X_test.groupby(["code"])["mean"].max().to_frame().reset_index(), how="left", on="code")
X_test = pd.merge(X_test, X_test.groupby(["code"])["max"].max().to_frame().reset_index(), how="left", on="code")

In [35]:
X_test.head()

Unnamed: 0,code,circo_leg_meme_nuance,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,ancien_depute,au_gouvernement,ancien_ministre,bloc_opposant_Centre,bloc_opposant_Divers,...,svc_x,rf_x,xgb_x,mean_x,max_x,rf_y,svc_y,xgb_y,mean_y,max_y
0,1|1,1.0,1.0,0.257,1.0,1.0,0.0,0.0,0,0,...,0.899404,0.2,0.535904,0.899404,0.545103,0.3,0.899404,0.535904,0.899404,0.545103
1,1|1,0.0,0.0,0.2147,0.0,0.0,0.0,0.0,0,0,...,0.128163,0.3,0.18006,0.3,0.202741,0.3,0.899404,0.535904,0.899404,0.545103
2,1|2,1.0,1.0,0.2753,1.0,1.0,0.0,0.0,0,0,...,0.965826,0.8,0.816438,0.965826,0.860754,0.8,0.965826,0.816438,0.965826,0.860754
3,1|2,0.0,0.0,0.1798,0.0,0.0,0.0,0.0,0,0,...,0.041169,0.3,0.050229,0.3,0.130466,0.8,0.965826,0.816438,0.965826,0.860754
4,1|2,0.0,0.0,0.2027,0.0,0.0,0.0,0.0,0,0,...,0.26859,0.1,0.070581,0.26859,0.146391,0.8,0.965826,0.816438,0.965826,0.860754


In [36]:
X_test["label_svc"] = X_test.apply(lambda x: 0 if x["svc_x"]<x["svc_y"] else 1, axis=1)
X_test["label_rf"] = X_test.apply(lambda x: 0 if x["rf_x"]<x["rf_y"] else 1, axis=1)
X_test["label_xgb"] = X_test.apply(lambda x: 0 if x["xgb_x"]<x["xgb_y"] else 1, axis=1)
X_test["label_mean"] = X_test.apply(lambda x: 0 if x["mean_x"]<x["mean_y"] else 1, axis=1)
X_test["label_max"] = X_test.apply(lambda x: 0 if x["max_x"]<x["max_y"] else 1, axis=1)

In [37]:
X_test

Unnamed: 0,code,circo_leg_meme_nuance,circo_pres_meme_nuance,score_bloc_pres,depute_sortant,ancien_depute,au_gouvernement,ancien_ministre,bloc_opposant_Centre,bloc_opposant_Divers,...,rf_y,svc_y,xgb_y,mean_y,max_y,label_svc,label_rf,label_xgb,label_mean,label_max
0,1|1,1.0,1.0,0.2570,1.0,1.0,0.0,0.0,0,0,...,0.3,0.899404,0.535904,0.899404,0.545103,1,0,1,1,1
1,1|1,0.0,0.0,0.2147,0.0,0.0,0.0,0.0,0,0,...,0.3,0.899404,0.535904,0.899404,0.545103,0,1,0,0,0
2,1|2,1.0,1.0,0.2753,1.0,1.0,0.0,0.0,0,0,...,0.8,0.965826,0.816438,0.965826,0.860754,1,1,1,1,1
3,1|2,0.0,0.0,0.1798,0.0,0.0,0.0,0.0,0,0,...,0.8,0.965826,0.816438,0.965826,0.860754,0,0,0,0,0
4,1|2,0.0,0.0,0.2027,0.0,0.0,0.0,0.0,0,0,...,0.8,0.965826,0.816438,0.965826,0.860754,0,0,0,0,0
5,1|3,1.0,1.0,0.2671,1.0,1.0,0.0,0.0,0,0,...,0.9,0.955686,0.778302,0.955686,0.877996,1,1,1,1,1
6,1|3,0.0,0.0,0.2090,0.0,0.0,0.0,0.0,0,0,...,0.9,0.955686,0.778302,0.955686,0.877996,0,0,0,0,0
7,1|4,0.0,0.0,0.1944,0.0,0.0,0.0,0.0,0,0,...,0.4,0.946488,0.852537,0.946488,0.733008,0,0,0,0,0
8,1|4,1.0,1.0,0.2876,1.0,1.0,0.0,0.0,0,0,...,0.4,0.946488,0.852537,0.946488,0.733008,1,1,1,1,1
9,1|5,1.0,1.0,0.2412,0.0,0.0,0.0,0.0,0,0,...,0.5,0.849302,0.505396,0.849302,0.618233,1,1,1,1,1


In [38]:
print(classification_report(X_test["label_svc"], y_test_cl))
print(accuracy_score(X_test["label_svc"], y_test_cl))

             precision    recall  f1-score   support

          0       0.81      0.81      0.81       560
          1       0.80      0.80      0.80       541

avg / total       0.80      0.80      0.80      1101

0.80199818347


In [39]:
print(classification_report(X_test["label_rf"], y_test_cl))
print(accuracy_score(X_test["label_rf"], y_test_cl))

             precision    recall  f1-score   support

          0       0.77      0.83      0.80       520
          1       0.84      0.78      0.81       581

avg / total       0.80      0.80      0.80      1101

0.80199818347


In [40]:
print(classification_report(X_test["label_xgb"], y_test_cl))
print(accuracy_score(X_test["label_xgb"], y_test_cl))

             precision    recall  f1-score   support

          0       0.82      0.82      0.82       558
          1       0.82      0.82      0.82       543

avg / total       0.82      0.82      0.82      1101

0.820163487738


In [41]:
print(classification_report(X_test["label_mean"], y_test_cl))
print(accuracy_score(X_test["label_mean"], y_test_cl))

             precision    recall  f1-score   support

          0       0.82      0.82      0.82       559
          1       0.81      0.81      0.81       542

avg / total       0.82      0.82      0.82      1101

0.815622161671


In [42]:
print(classification_report(X_test["label_max"], y_test_cl))
print(accuracy_score(X_test["label_max"], y_test_cl))

             precision    recall  f1-score   support

          0       0.81      0.81      0.81       560
          1       0.81      0.81      0.81       541

avg / total       0.81      0.81      0.81      1101

0.809264305177


In [50]:
gbm = XGBClassifier()
gbm_params = {
'learning_rate': [0.01,0.1,0.3],
'n_estimators': [300,350,400,1000,1500],
'max_depth': [4,5,6,7],
"min_child_weight":[2,5,10,15]
}


X_train = X_train[['p_voix','voix_bloc_1nd','score_pre_maj', 'circo_pres_meme_nuance','circo_leg_meme_nuance',
'depute_sortant','ancien_depute','top_triangulaire','ancien_ministre', 'au_gouvernement']]
grid = GridSearchCV(gbm, gbm_params,verbose=10,n_jobs=-1)
grid.fit(X_train, y_train_cl)
print (grid.best_params_)

Fitting 3 folds for each of 240 candidates, totalling 720 fits
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=2 
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=2 
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=2 
[CV] max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=2, score=0.787879 -   1.6s
[CV] max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=2, score=0.763636 -   1.7s
[CV] max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=2, score=0.784848 -   1.8s
[CV] max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=2, score=0.793939 -   2.0s
[

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    3.7s


[CV]  max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=2, score=0.795455 -   2.4s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=2, score=0.787879 -   2.3s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=2, score=0.763636 -   2.2s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=2, score=0.762121 -   4.2s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=2 


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    8.7s


[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=2, score=0.801515 -   5.0s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=2, score=0.793939 -   5.0s
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=5, score=0.762121 -   1.6s
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=5, score=0.786364 -   1.6s
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=2, score=0.796970 -   7.6s
[CV] max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=5, score=0.759091 -   1.4s
[CV] max_d

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   15.5s


[CV]  max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=5, score=0.789394 -   1.8s
[CV] max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=2, score=0.795455 -   7.5s
[CV] max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=2, score=0.777273 -   7.4s
[CV] max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=5, score=0.760606 -   1.9s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=5, score=0.762121 -   1.3s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=5, score=0.795455 -   2.2s
[CV] max_d

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   18.6s


[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=5, score=0.771212 -   3.1s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=5, score=0.798485 -   4.7s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=5, score=0.783333 -   5.1s
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=10, score=0.746970 -   1.4s
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=5, score=0.796970 -   7.2s
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=10, score=0.792424 -   1.4s
[CV

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   28.0s


[CV]  max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=10, score=0.789394 -   1.8s
[CV] max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=5, score=0.780303 -   7.1s
[CV] max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=10, score=0.757576 -   1.8s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.804545 -   2.5s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.789394 -   2.0s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.754545 -   2.0s


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   35.8s


[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=15, score=0.736364 -   1.5s
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=10, score=0.786364 -   4.7s
[CV] max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=10, score=0.809091 -   6.5s
[CV] max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=15, score=0.748485 -   1.0s
[CV] max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.01, min_child_weight=15, score=0.789394 -   1.5s
[CV] max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=4, n_estimators=350, learning_rate=0.01, min_child_weight=15, score=0.736364 -   1.7s
[

[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   43.0s


[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.801515 -   5.0s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.781818 -   5.1s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.765152 -   5.5s
[CV] max_depth=5, n_estimators=300, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=5, n_estimators=300, learning_rate=0.01, min_child_weight=2, score=0.778788 -   2.8s
[CV] max_depth=5, n_estimators=300, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.01, min_child_weight=15, score=0.803030 -   9.1s
[CV] max_depth=5, n_estimators=300, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=5, n_estimators=300, learning_rate=0.01, min_child_weight=2, score=0.789394 -   2.4s
[C

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   56.6s


[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=2, score=0.784848 -   2.8s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=2, score=0.763636 -   3.3s
[CV] max_depth=5, n_estimators=1000, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=2, score=0.800000 -   3.8s
[CV] max_depth=5, n_estimators=1000, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=2, score=0.786364 -   3.8s
[CV] max_depth=5, n_estimators=1000, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=2, score=0.766667 -   3.7s
[CV] max_depth=5, n_estimators=1500, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=5, n_estimators=1000, learning_rate=0.01, min_child_weight=2, score=0.798485 -   7.1s
[CV] max_

[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:  1.4min


[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=5, score=0.793939 -   3.9s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.01, min_child_weight=2, score=0.789394 -  17.3s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.01, min_child_weight=2, score=0.772727 -  18.1s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=5, score=0.765152 -   4.7s
[CV] max_depth=5, n_estimators=1000, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=5, score=0.787879 -   4.8s
[CV] max_depth=5, n_estimators=1000, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=5, score=0.792424 -   4.7s
[CV] max_d

[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:  1.8min


[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=10, score=0.772727 -   3.7s
[CV] max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=5, n_estimators=300, learning_rate=0.01, min_child_weight=10, score=0.756061 -   4.8s
[CV] max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.01, min_child_weight=5, score=0.790909 -  16.3s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=10, score=0.789394 -   3.9s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.01, min_child_weight=5, score=0.778788 -  18.0s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=10, score=0.768182 -   5.3s
[CV

[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:  2.1min


[CV]  max_depth=5, n_estimators=300, learning_rate=0.01, min_child_weight=15, score=0.754545 -   1.2s
[CV] max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.01, min_child_weight=10, score=0.778788 -   6.8s
[CV] max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=15, score=0.765152 -   2.2s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=15, score=0.787879 -   2.0s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.01, min_child_weight=15, score=0.759091 -   2.2s
[CV] max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.01, min_child_weight=15, score=0.772727 -   2.4s
[C

[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:  2.4min


[CV]  max_depth=6, n_estimators=300, learning_rate=0.01, min_child_weight=2, score=0.796970 -   2.8s
[CV] max_depth=6, n_estimators=350, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=6, n_estimators=300, learning_rate=0.01, min_child_weight=2, score=0.766667 -   2.9s
[CV] max_depth=6, n_estimators=350, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=6, n_estimators=350, learning_rate=0.01, min_child_weight=2, score=0.801515 -   2.6s
[CV] max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.01, min_child_weight=15, score=0.769697 -   8.9s
[CV] max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=6, n_estimators=350, learning_rate=0.01, min_child_weight=2, score=0.787879 -   3.3s
[CV] max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=6, n_estimators=350, learning_rate=0.01, min_child_weight=2, score=0.768182 -   3.8s
[CV] max_dep

[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:  2.8min


[CV]  max_depth=6, n_estimators=350, learning_rate=0.01, min_child_weight=5, score=0.796970 -   4.2s
[CV] max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=6, n_estimators=1500, learning_rate=0.01, min_child_weight=2, score=0.795455 -  17.6s
[CV] max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=6, n_estimators=1500, learning_rate=0.01, min_child_weight=2, score=0.780303 -  15.8s
[CV] max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=6, n_estimators=350, learning_rate=0.01, min_child_weight=5, score=0.765152 -   5.3s
[CV] max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=5, score=0.790909 -   4.2s
[CV] max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=5, score=0.793939 -   3.5s
[CV] max_d

[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  3.2min


[CV]  max_depth=6, n_estimators=1500, learning_rate=0.01, min_child_weight=5, score=0.771212 -  13.6s
[CV] max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=6, n_estimators=350, learning_rate=0.01, min_child_weight=10, score=0.777273 -   6.2s
[CV] max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.796970 -   7.1s
[CV] max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.787879 -   6.9s
[CV] max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.772727 -   5.6s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=10, score=0.813636 -   9.8

[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:  3.7min


[CV]  max_depth=6, n_estimators=400, learning_rate=0.01, min_child_weight=15, score=0.759091 -   3.2s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.807576 -   6.7s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.769697 -   7.1s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.762121 -   6.9s
[CV] max_depth=7, n_estimators=300, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=7, n_estimators=300, learning_rate=0.01, min_child_weight=2, score=0.783333 -   2.9s
[CV] max_depth=7, n_estimators=300, learning_rate=0.01, min_child_weight=2 
[CV]  max_depth=6, n_estimators=1500, learning_rate=0.01, min_child_weight=15, score=0.809091 -  10.1s

[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  4.3min


[CV]  max_depth=7, n_estimators=300, learning_rate=0.01, min_child_weight=5, score=0.757576 -   3.3s
[CV] max_depth=7, n_estimators=300, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=7, n_estimators=1500, learning_rate=0.01, min_child_weight=2, score=0.790909 -  14.6s
[CV] max_depth=7, n_estimators=300, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=7, n_estimators=300, learning_rate=0.01, min_child_weight=5, score=0.789394 -   2.9s
[CV] max_depth=7, n_estimators=350, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=7, n_estimators=300, learning_rate=0.01, min_child_weight=5, score=0.763636 -   3.0s
[CV] max_depth=7, n_estimators=350, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=7, n_estimators=350, learning_rate=0.01, min_child_weight=5, score=0.780303 -   3.4s
[CV] max_depth=7, n_estimators=350, learning_rate=0.01, min_child_weight=5 
[CV]  max_depth=7, n_estimators=350, learning_rate=0.01, min_child_weight=5, score=0.793939 -   3.1s
[CV] max_dept

[Parallel(n_jobs=-1)]: Done 213 tasks      | elapsed:  4.9min


[CV]  max_depth=7, n_estimators=350, learning_rate=0.01, min_child_weight=10, score=0.786364 -   3.2s
[CV] max_depth=7, n_estimators=400, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=7, n_estimators=350, learning_rate=0.01, min_child_weight=10, score=0.778788 -   3.0s
[CV] max_depth=7, n_estimators=400, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=7, n_estimators=1500, learning_rate=0.01, min_child_weight=5, score=0.774242 -  13.1s
[CV] max_depth=7, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=7, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.796970 -   3.3s
[CV] max_depth=7, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=7, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.783333 -   2.9s
[CV] max_depth=7, n_estimators=1000, learning_rate=0.01, min_child_weight=10 
[CV]  max_depth=7, n_estimators=400, learning_rate=0.01, min_child_weight=10, score=0.780303 -   2.6s


[Parallel(n_jobs=-1)]: Done 234 tasks      | elapsed:  5.3min


[CV]  max_depth=7, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.777273 -   5.8s
[CV] max_depth=7, n_estimators=1500, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=7, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.809091 -   7.2s
[CV] max_depth=7, n_estimators=1500, learning_rate=0.01, min_child_weight=15 
[CV]  max_depth=7, n_estimators=1000, learning_rate=0.01, min_child_weight=15, score=0.762121 -   6.8s
[CV] max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=2, score=0.793939 -   1.5s
[CV] max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=7, n_estimators=1500, learning_rate=0.01, min_child_weight=15, score=0.807576 -   8.9s
[CV] max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=2, score=0.783333 -   1.9s
[CV] ma

[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:  5.7min


[CV]  max_depth=4, n_estimators=350, learning_rate=0.1, min_child_weight=5, score=0.784848 -   1.8s
[CV] max_depth=4, n_estimators=400, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.1, min_child_weight=2, score=0.772727 -   7.7s
[CV] max_depth=4, n_estimators=400, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=4, n_estimators=350, learning_rate=0.1, min_child_weight=5, score=0.780303 -   1.8s
[CV] max_depth=4, n_estimators=400, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.1, min_child_weight=2, score=0.760606 -   7.5s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.1, min_child_weight=5, score=0.793939 -   1.9s
[CV] max_depth=4, n_estimators=1000, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=4, n_estimators=400, learning_rate=0.1, min_child_weight=5, score=0.787879 -   2.0s
[CV] max_depth=4, n_e

[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:  6.0min


[CV]  max_depth=4, n_estimators=1000, learning_rate=0.1, min_child_weight=10, score=0.771212 -   5.0s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.1, min_child_weight=10, score=0.765152 -   5.1s
[CV] max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=15, score=0.793939 -   1.5s
[CV] max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.1, min_child_weight=10, score=0.783333 -   6.9s
[CV] max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.1, min_child_weight=15, score=0.778788 -   1.6s
[CV] max_depth=4, n_estimators=350, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.1, min_child_weight=10, score=0.768182 -   5.6s
[CV] max_

[Parallel(n_jobs=-1)]: Done 305 tasks      | elapsed:  6.4min


[CV]  max_depth=5, n_estimators=350, learning_rate=0.1, min_child_weight=2, score=0.775758 -   2.3s
[CV] max_depth=5, n_estimators=1000, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.1, min_child_weight=2, score=0.790909 -   2.8s
[CV] max_depth=5, n_estimators=1000, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.1, min_child_weight=2, score=0.778788 -   2.6s
[CV] max_depth=5, n_estimators=1000, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=5, n_estimators=400, learning_rate=0.1, min_child_weight=2, score=0.777273 -   2.5s
[CV] max_depth=5, n_estimators=1500, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=5, n_estimators=1000, learning_rate=0.1, min_child_weight=2, score=0.780303 -   6.1s
[CV] max_depth=5, n_estimators=1500, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=5, n_estimators=1000, learning_rate=0.1, min_child_weight=2, score=0.763636 -   6.1s
[CV] max_depth=5, 

[Parallel(n_jobs=-1)]: Done 330 tasks      | elapsed:  6.9min


[CV]  max_depth=5, n_estimators=300, learning_rate=0.1, min_child_weight=10, score=0.763636 -   1.5s
[CV] max_depth=5, n_estimators=350, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.1, min_child_weight=10, score=0.786364 -   1.9s
[CV] max_depth=5, n_estimators=350, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.1, min_child_weight=10, score=0.804545 -   2.4s
[CV] max_depth=5, n_estimators=400, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.1, min_child_weight=5, score=0.760606 -   8.8s
[CV] max_depth=5, n_estimators=400, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.1, min_child_weight=5, score=0.757576 -   8.5s
[CV] max_depth=5, n_estimators=400, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.1, min_child_weight=10, score=0.763636 -   2.0s
[CV] max_depth

[Parallel(n_jobs=-1)]: Done 357 tasks      | elapsed:  7.3min


[CV]  max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=2, score=0.800000 -   2.1s
[CV] max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.1, min_child_weight=15, score=0.778788 -   8.8s
[CV] max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=2, score=0.783333 -   2.2s
[CV] max_depth=6, n_estimators=350, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=2, score=0.768182 -   2.1s
[CV] max_depth=6, n_estimators=350, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=6, n_estimators=350, learning_rate=0.1, min_child_weight=2, score=0.795455 -   2.6s
[CV] max_depth=6, n_estimators=350, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.1, min_child_weight=15, score=0.766667 -   8.3s
[CV] max_depth=6, n_e

[Parallel(n_jobs=-1)]: Done 384 tasks      | elapsed:  7.8min


[CV]  max_depth=6, n_estimators=1000, learning_rate=0.1, min_child_weight=5, score=0.783333 -   6.8s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.1, min_child_weight=5, score=0.759091 -   7.0s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.1, min_child_weight=5, score=0.768182 -   7.5s
[CV] max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=10, score=0.796970 -   1.9s
[CV] max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=6, n_estimators=1500, learning_rate=0.1, min_child_weight=5, score=0.783333 -   9.5s
[CV] max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=6, n_estimators=300, learning_rate=0.1, min_child_weight=10, score=0.780303 -   1.9s
[CV] max_depth

[Parallel(n_jobs=-1)]: Done 413 tasks      | elapsed:  8.4min


[CV]  max_depth=6, n_estimators=400, learning_rate=0.1, min_child_weight=15, score=0.766667 -   2.8s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.1, min_child_weight=15, score=0.789394 -   6.7s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.1, min_child_weight=15, score=0.766667 -   5.6s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.1, min_child_weight=15, score=0.774242 -   6.7s
[CV] max_depth=7, n_estimators=300, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=7, n_estimators=300, learning_rate=0.1, min_child_weight=2, score=0.790909 -   2.3s
[CV] max_depth=7, n_estimators=300, learning_rate=0.1, min_child_weight=2 
[CV]  max_depth=6, n_estimators=1500, learning_rate=0.1, min_child_weight=15, score=0.780303 -   9.3s
[CV] max_d

[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  9.1min


[CV]  max_depth=7, n_estimators=400, learning_rate=0.1, min_child_weight=5, score=0.783333 -   3.7s
[CV] max_depth=7, n_estimators=1000, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=7, n_estimators=400, learning_rate=0.1, min_child_weight=5, score=0.763636 -   3.3s
[CV] max_depth=7, n_estimators=1500, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=7, n_estimators=1000, learning_rate=0.1, min_child_weight=5, score=0.781818 -   8.3s
[CV] max_depth=7, n_estimators=1500, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=7, n_estimators=1000, learning_rate=0.1, min_child_weight=5, score=0.768182 -   8.7s
[CV] max_depth=7, n_estimators=1500, learning_rate=0.1, min_child_weight=5 
[CV]  max_depth=7, n_estimators=1000, learning_rate=0.1, min_child_weight=5, score=0.763636 -   8.7s
[CV] max_depth=7, n_estimators=300, learning_rate=0.1, min_child_weight=10 
[CV]  max_depth=7, n_estimators=300, learning_rate=0.1, min_child_weight=10, score=0.796970 -   2.7s
[CV] max_depth=7

[Parallel(n_jobs=-1)]: Done 473 tasks      | elapsed:  9.8min


[CV]  max_depth=7, n_estimators=400, learning_rate=0.1, min_child_weight=15, score=0.766667 -   3.0s
[CV] max_depth=7, n_estimators=1500, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=7, n_estimators=1000, learning_rate=0.1, min_child_weight=15, score=0.786364 -   8.0s
[CV] max_depth=7, n_estimators=1500, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=7, n_estimators=1000, learning_rate=0.1, min_child_weight=15, score=0.763636 -   8.3s
[CV] max_depth=7, n_estimators=1500, learning_rate=0.1, min_child_weight=15 
[CV]  max_depth=7, n_estimators=1000, learning_rate=0.1, min_child_weight=15, score=0.774242 -   8.4s
[CV] max_depth=4, n_estimators=300, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.3, min_child_weight=2, score=0.786364 -   1.6s
[CV] max_depth=4, n_estimators=300, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=7, n_estimators=1500, learning_rate=0.1, min_child_weight=15, score=0.778788 -  10.2s
[CV] max_d

[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed: 10.3min


[CV]  max_depth=4, n_estimators=1000, learning_rate=0.3, min_child_weight=5, score=0.753030 -   4.7s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.3, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.3, min_child_weight=5, score=0.757576 -   5.4s
[CV] max_depth=4, n_estimators=1500, learning_rate=0.3, min_child_weight=5 
[CV]  max_depth=4, n_estimators=1000, learning_rate=0.3, min_child_weight=5, score=0.748485 -   5.1s
[CV] max_depth=4, n_estimators=300, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.3, min_child_weight=10, score=0.796970 -   1.6s
[CV] max_depth=4, n_estimators=300, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.3, min_child_weight=5, score=0.760606 -   7.9s
[CV] max_depth=4, n_estimators=300, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=4, n_estimators=300, learning_rate=0.3, min_child_weight=10, score=0.765152 -   1.6s
[CV] max_depth

[Parallel(n_jobs=-1)]: Done 537 tasks      | elapsed: 10.9min


[CV]  max_depth=5, n_estimators=300, learning_rate=0.3, min_child_weight=2, score=0.778788 -   2.0s
[CV] max_depth=5, n_estimators=300, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.3, min_child_weight=15, score=0.762121 -   7.5s
[CV] max_depth=5, n_estimators=300, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=5, n_estimators=300, learning_rate=0.3, min_child_weight=2, score=0.766667 -   2.0s
[CV] max_depth=5, n_estimators=350, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=5, n_estimators=300, learning_rate=0.3, min_child_weight=2, score=0.757576 -   1.9s
[CV] max_depth=5, n_estimators=350, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.3, min_child_weight=15, score=0.763636 -   7.1s
[CV] max_depth=5, n_estimators=350, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=4, n_estimators=1500, learning_rate=0.3, min_child_weight=15, score=0.754545 -   7.0s
[CV] max_depth=5, n

[Parallel(n_jobs=-1)]: Done 570 tasks      | elapsed: 11.5min


[CV]  max_depth=5, n_estimators=300, learning_rate=0.3, min_child_weight=10, score=0.757576 -   1.4s
[CV] max_depth=5, n_estimators=350, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.3, min_child_weight=10, score=0.792424 -   1.9s
[CV] max_depth=5, n_estimators=350, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.3, min_child_weight=10, score=0.765152 -   2.3s
[CV] max_depth=5, n_estimators=400, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=5, n_estimators=350, learning_rate=0.3, min_child_weight=10, score=0.759091 -   2.0s
[CV] max_depth=5, n_estimators=400, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.3, min_child_weight=5, score=0.746970 -  10.0s
[CV] max_depth=5, n_estimators=400, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=5, n_estimators=1500, learning_rate=0.3, min_child_weight=5, score=0.739394 -   9.2s
[CV] max_depth

[Parallel(n_jobs=-1)]: Done 605 tasks      | elapsed: 12.1min


[CV]  max_depth=6, n_estimators=400, learning_rate=0.3, min_child_weight=2, score=0.756061 -   2.6s
[CV]  max_depth=6, n_estimators=350, learning_rate=0.3, min_child_weight=2, score=0.756061 -   2.9s
[CV] max_depth=6, n_estimators=1000, learning_rate=0.3, min_child_weight=2 
[CV] max_depth=6, n_estimators=1000, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=6, n_estimators=400, learning_rate=0.3, min_child_weight=2, score=0.780303 -   3.0s
[CV] max_depth=6, n_estimators=1000, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=6, n_estimators=400, learning_rate=0.3, min_child_weight=2, score=0.753030 -   3.1s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.3, min_child_weight=2, score=0.753030 -   6.6s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.3, min_child_weight=2 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.3, min_child_weight=2, score=0.742424 -   7.0s
[CV] max_depth=6, 

[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed: 12.9min


[CV]  max_depth=6, n_estimators=1000, learning_rate=0.3, min_child_weight=10, score=0.759091 -   7.3s
[CV] max_depth=6, n_estimators=1500, learning_rate=0.3, min_child_weight=10 
[CV]  max_depth=6, n_estimators=1000, learning_rate=0.3, min_child_weight=10, score=0.736364 -   7.5s
[CV] max_depth=6, n_estimators=300, learning_rate=0.3, min_child_weight=15 
[CV]  max_depth=6, n_estimators=300, learning_rate=0.3, min_child_weight=15, score=0.796970 -   2.4s
[CV] max_depth=6, n_estimators=300, learning_rate=0.3, min_child_weight=15 
[CV]  max_depth=6, n_estimators=1500, learning_rate=0.3, min_child_weight=10, score=0.760606 -  10.4s
[CV] max_depth=6, n_estimators=300, learning_rate=0.3, min_child_weight=15 
[CV]  max_depth=6, n_estimators=300, learning_rate=0.3, min_child_weight=15, score=0.775758 -   2.0s
[CV] max_depth=6, n_estimators=350, learning_rate=0.3, min_child_weight=15 
[CV]  max_depth=6, n_estimators=300, learning_rate=0.3, min_child_weight=15, score=0.765152 -   2.0s
[CV] max_d

[Parallel(n_jobs=-1)]: Done 677 tasks      | elapsed: 13.7min


[CV]  max_depth=7, n_estimators=350, learning_rate=0.3, min_child_weight=5, score=0.757576 -   2.3s
[CV] max_depth=7, n_estimators=400, learning_rate=0.3, min_child_weight=5 
[CV]  max_depth=7, n_estimators=1500, learning_rate=0.3, min_child_weight=2, score=0.733333 -  11.7s
[CV]  max_depth=7, n_estimators=1500, learning_rate=0.3, min_child_weight=2, score=0.745455 -  12.1s
[CV] max_depth=7, n_estimators=400, learning_rate=0.3, min_child_weight=5 
[CV] max_depth=7, n_estimators=400, learning_rate=0.3, min_child_weight=5 
[CV]  max_depth=7, n_estimators=350, learning_rate=0.3, min_child_weight=5, score=0.754545 -   3.6s
[CV] max_depth=7, n_estimators=1000, learning_rate=0.3, min_child_weight=5 
[CV]  max_depth=7, n_estimators=400, learning_rate=0.3, min_child_weight=5, score=0.771212 -   3.4s
[CV] max_depth=7, n_estimators=1000, learning_rate=0.3, min_child_weight=5 
[CV]  max_depth=7, n_estimators=400, learning_rate=0.3, min_child_weight=5, score=0.754545 -   2.9s
[CV] max_depth=7, n_e

[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed: 14.7min finished


{'max_depth': 6, 'n_estimators': 1500, 'learning_rate': 0.01, 'min_child_weight': 10}


In [56]:
print(grid.best_score_)

0.7929292929292929


In [202]:
X_test.columns

Index(['circo_leg_meme_nuance', 'code', 'circo_pres_meme_nuance',
       'score_bloc_pres', 'depute_sortant', 'ancien_depute', 'au_gouvernement',
       'ancien_ministre', 'top_triangulaire', 'bloc_opposant_Centre',
       'bloc_opposant_Divers', 'bloc_opposant_Droite',
       'bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
       'bloc_opposant_Gauche', 'voix_bloc_1nd', 'bloc_Centre', 'bloc_Divers',
       'bloc_Droite', 'bloc_Exdroite', 'bloc_Exgauche', 'bloc_Gauche', 'svc_x',
       'rf_x', 'xgb_x', 'mean_x', 'max_x', 'rf_y', 'svc_y', 'xgb_y', 'mean_y',
       'max_y', 'label_svc', 'label_rf', 'label_xgb', 'label_mean',
       'label_max'],
      dtype='object')

In [17]:
clf_f = XGBClassifier(min_child_weight=10, n_estimators=1500, 
                    learning_rate=0.01, max_depth=6)


X_train = df[(df["an"]!=1997)&(df["an"]!=2017)]
y_train_cl = df.ix[(df["an"]!=1997)&(df["an"]!=2017), "elu"]
X_validation = df[(df["an"]==2017)]
del X_train['p_voix_candidat_2nd']
del X_train["elu"]
del X_train["an"]
del X_train["département"]
del X_train["code"]
del X_train["name"]
del X_train["first_name"]
del X_train['circonscription']

del X_validation["an"]
del X_validation["p_voix_candidat_2nd"]
del X_validation['elu']

In [18]:
X_train = X_train[['p_voix','score_pre_maj','bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
'top_triangulaire','ancien_ministre', 'au_gouvernement','circo_pres_meme_nuance']]

In [19]:
#cl = DecisionTreeClassifier()
#cl.fit(X_train, y_train_cl)
#sorted(list(zip(cl.feature_importances_, X_train.columns)), reverse=True)

In [20]:
X_train.columns

Index(['p_voix', 'score_pre_maj', 'bloc_opposant_Exdroite',
       'bloc_opposant_Exgauche', 'top_triangulaire', 'ancien_ministre',
       'au_gouvernement', 'circo_pres_meme_nuance'],
      dtype='object')

In [21]:
X_validation = X_validation[['département', 'circonscription', 'code', 'name', 'first_name',
    'p_voix','score_pre_maj','bloc_opposant_Exdroite', 'bloc_opposant_Exgauche',
'top_triangulaire','ancien_ministre', 'au_gouvernement','circo_pres_meme_nuance', ]]

In [22]:
clf_f.fit(X_train, y_train_cl)

XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.01, max_delta_step=0, max_depth=6,
       min_child_weight=10, missing=None, n_estimators=1500, nthread=-1,
       objective='binary:logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [23]:
predict = clf_f.predict_proba(X_validation.drop(["département", 'circonscription',"code","name","first_name"], axis=1))

In [24]:
df_prob = pd.Series( (v[1] for v in predict) , name="predict_proba")
X_validation = X_validation.reset_index(drop=True)
X_validation = pd.concat([X_validation, df_prob], axis=1)

In [25]:
X_validation = pd.merge(X_validation, X_validation.groupby(["code"])["predict_proba"].max().to_frame().reset_index(), how="left", on="code")

In [26]:
X_validation["elu"] = X_validation.apply(lambda x: 0 if x["predict_proba_x"]<x["predict_proba_y"] else 1, axis=1)

In [27]:
te = X_validation.groupby(["code"])["elu"].sum().reset_index()
te[te["elu"]>1]

Unnamed: 0,code,elu
558,986|1,2
561,987|3,2


In [30]:
X_validation[X_validation["code"]=="986|1"]

Unnamed: 0,département,circonscription,code,name,first_name,p_voix,score_pre_maj,bloc_opposant_Exdroite,bloc_opposant_Exgauche,top_triangulaire,ancien_ministre,au_gouvernement,circo_pres_meme_nuance,predict_proba_x,predict_proba_y,elu
1121,WALLIS-ET-FUTUNA,1.0,986|1,BRIAL,SYLVAIN,0.19348,0.0,0,0,1,0.0,0.0,0.0,0.12696,0.963821,0
1122,WALLIS-ET-FUTUNA,1.0,986|1,POLUTELE,NAPOLE,0.433491,0.0,0,0,1,0.0,0.0,0.0,0.963821,0.963821,1
1123,WALLIS-ET-FUTUNA,1.0,986|1,DELORD,HERVE-MICHEL,0.373029,0.0,0,0,1,0.0,0.0,0.0,0.963821,0.963821,1


In [31]:
df_col = pd.read_excel("dataframe_elections.xlsx")
df_col = df_col[df_col["an"]==2017]
df_col  = df_col.rename(columns={"nom":"name","prenom":"first_name"})
df_col.head()

Unnamed: 0,an,c_dep,dep,circo,code,inscrits,geo_frontalier,geo_dom,geo_idf,etrangers,...,sexe,name,first_name,etiquette,nuance,nuance_groupe,bloc,voix,p_voix,second_tour
0,2017,1,AIN,1,1|1,82653.0,1,0,0,0.0793,...,F,BLATRIX-CONTAT,FLORENCE,,SOC,SOC,Gauche,,,
1,2017,1,AIN,1,1|1,82653.0,1,0,0,0.0793,...,M,BONNOT,GILBERT,,DIV,DIV,Divers,,,
2,2017,1,AIN,1,1|1,82653.0,1,0,0,0.0793,...,M,BRETON,XAVIER,,LR,LR,Droite,,,
3,2017,1,AIN,1,1|1,82653.0,1,0,0,0.0793,...,M,BUISSON,JEROME,,FN,FN,Exdroite,,,
4,2017,1,AIN,1,1|1,82653.0,1,0,0,0.0793,...,F,CARLIER,MARIE,,DIV,DIV,Divers,,,


In [33]:
# add partie politique
X_validation_df = pd.merge(X_validation, df_col[["code","name","first_name","nuance_groupe"]], 
                     on=["code","name","first_name"],how="left")

In [34]:
#list_circo = X_validation_df.ix[(X_validation_df["nuance_groupe"]=="SOC")&(X_validation_df["elu"]==1),"code"].tolist()
#X_validation_df[X_validation_df["code"].isin(list_circo)]

In [35]:
X_validation_df.groupby(["nuance_groupe"])["elu"].sum()

nuance_groupe
COM      3
DIV      2
DLF      1
DVD      5
DVG     13
ECO      4
EXD      0
FI      13
FN      26
LR     101
MDM     41
RDG      3
REM    281
SOC     69
UDI     17
Name: elu, dtype: int64

In [37]:
X_validation_df.to_csv("prediction_2nd.csv", index=False)

In [38]:
exportCsv = X_validation_df[['département','circonscription','code','first_name','name','nuance_groupe','elu']]
exportCsv['code'] = exportCsv['code'].apply(str.lower)
exportCsv['code'] = exportCsv['code'].apply(str).apply(lambda x : x.replace('|', '-'))
exportCsv['candidat'] = exportCsv['first_name'].apply(str.capitalize) + ' ' + exportCsv['name'].apply(str.capitalize)
exportCsv['nom circo'] = exportCsv['département'].apply(
    lambda x : x.replace('-', ' ')).apply(str.title) + ' - ' + exportCsv['circonscription'].apply(str)
exportCsv = exportCsv.drop(['département', 'circonscription', 'first_name', 'name'], axis=1)
exportCsv = exportCsv.sort_values(['code', 'elu'], ascending=[True, False])

circos = exportCsv['code'].unique()
temp = pd.DataFrame()
d = {}
for c in circos:
    temp = exportCsv[exportCsv['code'] == c]
    l = []
    for index, rows in temp.iterrows():
        l.extend(rows[1:].values)
    d[c] = l
  
duels = {k: v for k, v in d.items() if len(v) < 9}

duelsDf = pd.DataFrame.from_dict(duels, orient='index')


    
duelsDf.columns = ['color1','score1','candidat1','nom circo','color2','score2','candidat2','nom circo']
duelsDf = duelsDf.iloc[:,:7]
print(duelsDf.head())

triangulaires = {k: v for k, v in d.items() if len(v) > 8}
triangulaires = pd.DataFrame(triangulaires)

triangulaires.index = ['color1','score1','candidat1','nom circo','color2','score2','candidat2','nom circo','color3','score3','candidat3','nom circo']
triangulaires = triangulaires.T

nomCirco = triangulaires['nom circo'].iloc[:,0]
triangulaires = triangulaires.drop('nom circo', axis=1)
triangulaires['nom circo'] = nomCirco
final = pd.concat([duelsDf, triangulaires])
final = final.fillna(value='')
final.index.name = 'circo'
print(final.sort_index().head())
final.to_csv('resultats2.csv', index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas

     color1  score1          candidat1                nom circo color2  \
79-2    SOC       1     Delphine Batho        Deux Sevres - 2.0    REM   
17-1    MDM       1    Otilia Ferreira  Charente Maritime - 1.0    DVG   
1-3     REM       1      Olga Givernet                Ain - 3.0     LR   
85-5    REM       1     Pierre Henriet             Vendee - 5.0    SOC   
15-1    REM       1  Francois Danemans             Cantal - 1.0     LR   

      score2                 candidat2  
79-2     0.0          Christine Heintz  
17-1     0.0           Olivier Falorni  
1-3      0.0  Stephanie Pernod beaudon  
85-5     0.0            Hugues Fourage  
15-1     0.0          Vincent Descoeur  
                       candidat1                 candidat2 candidat3 color1  \
circo                                                                         
1-1                Xavier Breton            Laurent Mallet               LR   
1-2    Charles De la verpilliere       Marie-jeanne Beguet              