In [1]:
import pandas as pd
import numpy as np

# Stacking regressor
from sklearn.ensemble import StackingRegressor

# Base models
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


flights = pd.read_csv('flights.csv').dropna().sample(1000)

flights

Unnamed: 0,year,month,day,dep_time,sched_dep_time,delay,dep_delay,arr_time,sched_arr_time,arr_delay,...,time_hour,temp,dewp,humid,wind_dir,wind_speed,wind_gust,precip,pressure,visib
52458,2013,10,28,1223.0,1230,0,-7.0,1523.0,1459,24.0,...,10/28/13 12:00,60.08,30.02,31.85,260.0,13.80936,21.86482,0.0,1023.0,10.0
708,2013,1,1,1915.0,1920,0,-5.0,2238.0,2257,-19.0,...,1/1/13 19:00,33.08,14.00,44.92,350.0,13.80936,24.16638,0.0,1014.3,10.0
220025,2013,5,29,1825.0,1830,0,-5.0,2058.0,2100,-2.0,...,5/29/13 18:00,87.08,62.96,44.63,230.0,11.50780,18.41248,0.0,1015.8,10.0
249311,2013,6,29,1730.0,1730,1,0.0,2030.0,2043,-13.0,...,6/29/13 17:00,75.92,69.08,79.37,180.0,20.71404,26.46794,0.0,1003.7,10.0
126626,2013,2,18,1540.0,1548,0,-8.0,1803.0,1830,-27.0,...,2/18/13 15:00,35.96,1.94,23.24,280.0,12.65858,23.01560,0.0,1022.4,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56365,2013,11,1,1359.0,1359,1,0.0,1458.0,1511,-13.0,...,11/1/13 13:00,68.00,55.94,65.31,230.0,17.26170,27.61872,0.0,1001.4,10.0
232679,2013,6,12,957.0,1000,0,-3.0,1315.0,1319,-4.0,...,6/12/13 10:00,71.06,50.00,47.29,310.0,20.71404,33.37262,0.0,1008.8,10.0
142459,2013,3,7,1809.0,1610,1,119.0,1957.0,1736,141.0,...,3/7/13 16:00,35.96,30.02,78.79,350.0,24.16638,34.52340,0.0,1017.4,8.0
45239,2013,10,20,1648.0,1655,0,-7.0,1823.0,1845,-22.0,...,10/20/13 16:00,62.96,33.98,33.75,310.0,11.50780,18.41248,0.0,1014.2,10.0


In [2]:
# Create input features X and output feature y
X = flights[['dewp', 'humid', 'wind_dir']]
y = flights[['temp']]

In [3]:
# Create a list of base models
estimators = [
    ('linear', LinearRegression()),
    ('decision_tree', DecisionTreeRegressor()),
    ('svr', make_pipeline(StandardScaler(), SVR())),
    ('poly', make_pipeline(PolynomialFeatures(degree=2), LinearRegression())),
]

In [4]:
# Initialize the model
stackingModel = StackingRegressor(
    estimators=estimators, final_estimator=LinearRegression()
)

# Fit the model
stackingModel = stackingModel.fit(X, np.ravel(y))

In [5]:
# Prediction accuracy for BaggingClassifier()
stackingModel.score(X, y)

0.9992257211559527

In [6]:
# Predictions from BaggingClassifier()
stackingModel.predict(X)

array([ 60.49694156,  33.08987131,  87.2901551 ,  75.75678962,
        35.75089954,  73.74816885,  32.38455819,  42.43016646,
        37.49391523,  71.45504384,  36.37428879,  41.61917961,
        26.98314782,  30.97220926,  47.12074051,  57.49431936,
        67.88713041,  37.02658345,  20.27447345,  40.36792124,
        50.06120325,  35.40995561,  26.6124224 ,  30.41390878,
        26.86022941,  70.11290704,  51.72997488,  78.54478548,
        38.12310214,  56.1983864 ,  30.17532912,  39.4425136 ,
        38.37734464,  62.61993544,  69.50756168,  77.75323181,
        48.45834296,  32.32389588,  26.43690609,  24.11136063,
        87.29139288,  59.45707918,  31.95033403,  58.22865553,
        84.32691167,  34.70301545,  15.83645022,  56.28109489,
        55.57323783,  75.45343716, 100.01560703,  28.41920332,
        29.97916395,  38.13194186,  32.25677778,  91.2640942 ,
        71.69915512,  32.81931798,  61.49749669,  38.03377598,
        62.3507178 ,  92.95113912,  50.99028538,  37.16