In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
import numpy as np
import pandas as pd

In [6]:
from xgboost import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [7]:
X = np.load("./tatanic_X_train.npy")
y = np.load("./tatanic_y_train.npy")

In [8]:
estimator1 = XGBClassifier(max_depth=3, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator2 = LGBMClassifier(max_depth=2, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator3 = RandomForestClassifier(n_estimators=500, max_depth=3, n_jobs=-1)
estimator4 = SVC(probability=True)
estimator5 = MLPClassifier(hidden_layer_sizes=(512,256, 32))

In [9]:
base_estimators = [estimator1, estimator2, estimator3, estimator4, estimator5]

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((533, 27), (356, 27), (533,), (356,))

In [12]:
for estimator in base_estimators:
            estimator.fit(X_train, y_train)

In [13]:
base_estimators[0].predict_proba(X_test)

array([[2.12596834e-01, 7.87403166e-01],
       [3.97248864e-02, 9.60275114e-01],
       [9.67849433e-01, 3.21505889e-02],
       [8.06035042e-01, 1.93964958e-01],
       [9.65058327e-01, 3.49416472e-02],
       [6.58898950e-01, 3.41101050e-01],
       [2.72517979e-01, 7.27482021e-01],
       [7.89796114e-01, 2.10203886e-01],
       [9.69196260e-01, 3.08037456e-02],
       [8.36186171e-01, 1.63813815e-01],
       [9.57217336e-01, 4.27826457e-02],
       [9.26519275e-01, 7.34807104e-02],
       [9.33504760e-01, 6.64952174e-02],
       [2.27426171e-01, 7.72573829e-01],
       [1.52094185e-01, 8.47905815e-01],
       [8.49496543e-01, 1.50503442e-01],
       [9.92577493e-01, 7.42248585e-03],
       [7.24271774e-01, 2.75728226e-01],
       [9.22485411e-01, 7.75145888e-02],
       [2.78218031e-01, 7.21781969e-01],
       [2.25511193e-02, 9.77448881e-01],
       [8.86196733e-01, 1.13803238e-01],
       [5.06880999e-01, 4.93119001e-01],
       [9.39615965e-01, 6.03840463e-02],
       [9.494707

In [14]:
meta_train_set = np.array([estimator.predict(X_test) for estimator in base_estimators]).T

In [15]:
meta_train_set

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.],
       ...,
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.]])

In [16]:
from sklearn.model_selection import cross_val_score

In [17]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.811998658618377
0.8175922199865863
0.8092600044712721
0.8204091213950369
0.8064431030628214


In [18]:
meta_train_set2 = np.array([estimator.predict_proba(X_test)[:,1] for estimator in base_estimators]).T

In [19]:
meta_train_set2

array([[0.78740317, 0.82307968, 0.67751481, 0.71829326, 0.7120796 ],
       [0.96027511, 0.97854772, 0.84587584, 0.73192096, 0.99999969],
       [0.03215059, 0.05001944, 0.12081011, 0.14348571, 0.08531864],
       ...,
       [0.97578979, 0.92453773, 0.78335037, 0.71017078, 0.99998455],
       [0.97858471, 0.99151046, 0.83633081, 0.74453846, 0.99999997],
       [0.35436112, 0.27183471, 0.12749528, 0.14265144, 0.1016244 ]])

In [None]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set2, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8037066845517551
0.8037055667337357
0.834575229152694
0.820449362843729


In [None]:
new_X_test = np.concatenate([X_test, meta_train_set2], axis = 1)
new_X_test.shape

In [None]:
for estimator in base_estimators:
    result = cross_val_score(estimator, new_X_test, y_test, scoring="accuracy" , cv=5).mean()
    print(result)