In [1]:
import pandas as pd
import numpy as np

# Stacking classifier
from sklearn.ensemble import StackingClassifier

# Base models
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression

flights = pd.read_csv('flights.csv').dropna().sample(1000)

flights

Unnamed: 0,year,month,day,dep_time,sched_dep_time,delay,dep_delay,arr_time,sched_arr_time,arr_delay,...,time_hour,temp,dewp,humid,wind_dir,wind_speed,wind_gust,precip,pressure,visib
292994,2013,8,14,1627.0,1625,1,2.0,1901.0,1839,22.0,...,8/14/13 16:00,75.92,48.02,37.30,300.0,23.01560,31.07106,0.0,1012.0,10.0
217726,2013,5,27,1252.0,1300,0,-8.0,1346.0,1409,-23.0,...,5/27/13 13:00,73.04,30.02,20.33,260.0,11.50780,16.11092,0.0,1021.4,10.0
323913,2013,9,17,804.0,810,0,-6.0,921.0,954,-33.0,...,9/17/13 8:00,53.06,35.06,50.28,30.0,14.96014,21.86482,0.0,1030.0,10.0
293105,2013,8,14,1801.0,1735,1,26.0,2133.0,2030,63.0,...,8/14/13 17:00,75.02,48.92,39.75,310.0,21.86482,27.61872,0.0,1012.4,10.0
1347,2013,1,2,1457.0,1459,0,-2.0,1628.0,1645,-17.0,...,1/2/13 14:00,35.06,10.94,36.26,300.0,13.80936,21.86482,0.0,1017.5,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305051,2013,8,27,1246.0,1115,1,91.0,1404.0,1254,70.0,...,8/27/13 11:00,78.98,64.94,62.16,350.0,18.41248,24.16638,0.0,1011.6,10.0
148117,2013,3,13,1803.0,1759,1,4.0,2054.0,2054,0.0,...,3/13/13 17:00,48.02,19.04,31.30,270.0,16.11092,26.46794,0.0,1009.2,10.0
205152,2013,5,13,1658.0,1705,0,-7.0,1844.0,1906,-22.0,...,5/13/13 17:00,55.04,23.00,28.57,320.0,11.50780,19.56326,0.0,1015.6,10.0
151209,2013,3,17,821.0,824,0,-3.0,1016.0,1014,2.0,...,3/17/13 8:00,30.92,15.98,53.44,310.0,9.20624,19.56326,0.0,1020.1,10.0


In [2]:
# Create input features X and output feature y
X = flights[
    [
        'temp',
        'dewp',
        'humid',
        'wind_dir',
        'wind_speed',
        'wind_gust',
        'precip',
        'pressure',
        'visib',
    ]
]
y = flights[['delay']]

In [3]:
# Create a list of base models
estimators = [
    ('decision_tree', DecisionTreeClassifier()),
    ('knn', KNeighborsClassifier()),
    ('gnb', GaussianNB()),
]

In [4]:
# Initialize the model
stackingModel = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression()
)

# Fit the model
stackingModel = stackingModel.fit(X, np.ravel(y))

In [5]:
# Prediction accuracy for StackingClassifier()
stackingModel.score(X, y)

0.782

In [6]:
# Predictions from StackingClassifier()
stackingModel.predict(X)

array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,

In [7]:
# Predicted class probabilities from StackingClassifier()
stackingModel.predict_proba(X)

array([[0.48210849, 0.51789151],
       [0.63245517, 0.36754483],
       [0.60343212, 0.39656788],
       ...,
       [0.64311993, 0.35688007],
       [0.67855592, 0.32144408],
       [0.52003406, 0.47996594]])