In [47]:
import pandas as pd
import numpy as np
import xgboost as xgb

import pickle
from geopy.geocoders import Nominatim
from sklearn.model_selection import train_test_split

pd.set_option('display.max_columns', None)

# Import data and take a look at it

In [24]:
sample_df = pd.read_csv("train.csv")

In [25]:
sample_df.shape

(1458644, 11)

In [26]:
sample_df.head()

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag,trip_duration
0,id2875421,2,2016-03-14 17:24:55,2016-03-14 17:32:30,1,-73.982155,40.767937,-73.96463,40.765602,N,455
1,id2377394,1,2016-06-12 00:43:35,2016-06-12 00:54:38,1,-73.980415,40.738564,-73.999481,40.731152,N,663
2,id3858529,2,2016-01-19 11:35:24,2016-01-19 12:10:48,1,-73.979027,40.763939,-74.005333,40.710087,N,2124
3,id3504673,2,2016-04-06 19:32:31,2016-04-06 19:39:40,1,-74.01004,40.719971,-74.012268,40.706718,N,429
4,id2181028,2,2016-03-26 13:30:55,2016-03-26 13:38:10,1,-73.973053,40.793209,-73.972923,40.78252,N,435


# Data Preprocessing

In [27]:
sample_df["store_and_fwd_flag"].value_counts()

N    1450599
Y       8045
Name: store_and_fwd_flag, dtype: int64

In [28]:
#Convert character variables to numeric 
f = lambda x: 0 if x == 'N' else 1

sample_df["store_and_fwd_flag"] = sample_df["store_and_fwd_flag"].apply(lambda x: f(x))            

In [29]:
#Check result
sample_df["store_and_fwd_flag"].value_counts()

0    1450599
1       8045
Name: store_and_fwd_flag, dtype: int64

## Engineer features

In [30]:
#First, convert datetime strings into datetime
sample_df["dropoff_datetime"] = pd.to_datetime(sample_df["dropoff_datetime"], format='%Y-%m-%d %H:%M:%S')
sample_df["pickup_datetime"] = pd.to_datetime(sample_df["pickup_datetime"], format='%Y-%m-%d %H:%M:%S')

In [31]:
#Now construct other variables, like month, date, etc.
sample_df["pickup_month"] = sample_df["pickup_datetime"].dt.month
sample_df["pickup_day"] = sample_df["pickup_datetime"].dt.day
sample_df["pickup_weekday"] = sample_df["pickup_datetime"].dt.weekday #sample_df["pickup_weekday"] = sample_df["pickup_datetime"].dt.weekday_name
sample_df["pickup_hour"] = sample_df["pickup_datetime"].dt.hour
sample_df["pickup_minute"] = sample_df["pickup_datetime"].dt.minute

In [32]:
#Get latitude and longitude differences 
sample_df["latitude_difference"] = sample_df["dropoff_latitude"] - sample_df["pickup_latitude"]
sample_df["longitude_difference"] = sample_df["dropoff_longitude"] - sample_df["pickup_longitude"]

In [33]:
#Convert duration to minutes for easier interpretation
sample_df["trip_duration"] = sample_df["trip_duration"].apply(lambda x: round(x/60))   

In [34]:
#Convert trip distance from longitude and latitude differences to Manhattan distance.
sample_df["trip_distance"] = 0.621371 * 6371 * (abs(2 * np.arctan2(np.sqrt(np.square(np.sin((abs(sample_df["latitude_difference"]) * np.pi / 180) / 2))), 
                                  np.sqrt(1-(np.square(np.sin((abs(sample_df["latitude_difference"]) * np.pi / 180) / 2)))))) + \
                                     abs(2 * np.arctan2(np.sqrt(np.square(np.sin((abs(sample_df["longitude_difference"]) * np.pi / 180) / 2))), 
                                  np.sqrt(1-(np.square(np.sin((abs(sample_df["longitude_difference"]) * np.pi / 180) / 2)))))))

In [35]:
sample_df.head(5)

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag,trip_duration,pickup_month,pickup_day,pickup_weekday,pickup_hour,pickup_minute,latitude_difference,longitude_difference,trip_distance
0,id2875421,2,2016-03-14 17:24:55,2016-03-14 17:32:30,1,-73.982155,40.767937,-73.96463,40.765602,0,8,3,14,0,17,24,-0.002335,0.017525,1.372146
1,id2377394,1,2016-06-12 00:43:35,2016-06-12 00:54:38,1,-73.980415,40.738564,-73.999481,40.731152,0,11,6,12,6,0,43,-0.007412,-0.019066,1.82944
2,id3858529,2,2016-01-19 11:35:24,2016-01-19 12:10:48,1,-73.979027,40.763939,-74.005333,40.710087,0,35,1,19,1,11,35,-0.053852,-0.026306,5.538397
3,id3504673,2,2016-04-06 19:32:31,2016-04-06 19:39:40,1,-74.01004,40.719971,-74.012268,40.706718,0,7,4,6,2,19,32,-0.013252,-0.002228,1.069567
4,id2181028,2,2016-03-26 13:30:55,2016-03-26 13:38:10,1,-73.973053,40.793209,-73.972923,40.78252,0,7,3,26,5,13,30,-0.010689,0.00013,0.747485


# Modeling

In [36]:
X = sample_df.drop(["trip_duration", "id", "vendor_id", "pickup_datetime", "dropoff_datetime"], axis=1)
y = sample_df["trip_duration"]

In [37]:
#Split the data into training, test, and valdiation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2018)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=2019)

In [38]:
#Define evaluation metric
def rmsle(y_true, y_pred):
    assert len(y_true) == len(y_pred)
    return np.square(np.log(y_pred + 1) - np.log(y_true + 1)).mean() ** 0.5

In [39]:
#XGBoost parameters 
params = {
    'booster':            'gbtree',
    'objective':          'reg:linear',
    'learning_rate':      0.05,
    'max_depth':          14,
    'subsample':          0.9,
    'colsample_bytree':   0.7,
    'colsample_bylevel':  0.7,
    'silent':             1,
    'feval':              'rmsle'
}

In [40]:
nrounds = 2000

In [41]:
#Define train and validation sets
dtrain = xgb.DMatrix(X_train, np.log(y_train+1))
dval = xgb.DMatrix(X_val, np.log(y_val+1))

#this is for tracking the error
watchlist = [(dval, 'eval'), (dtrain, 'train')]

In [42]:
#Train model
gbm = xgb.train(params,
                dtrain,
                num_boost_round = nrounds,
                evals = watchlist,
                verbose_eval = True
                )

[0]	eval-rmse:2.0056	train-rmse:2.00487
[1]	eval-rmse:1.90958	train-rmse:1.90889
[2]	eval-rmse:1.82076	train-rmse:1.8201
[3]	eval-rmse:1.7394	train-rmse:1.73862
[4]	eval-rmse:1.65689	train-rmse:1.65615
[5]	eval-rmse:1.57862	train-rmse:1.57782
[6]	eval-rmse:1.50444	train-rmse:1.50362
[7]	eval-rmse:1.43443	train-rmse:1.43351
[8]	eval-rmse:1.36794	train-rmse:1.36684
[9]	eval-rmse:1.3051	train-rmse:1.30383
[10]	eval-rmse:1.24578	train-rmse:1.24448
[11]	eval-rmse:1.19034	train-rmse:1.18892
[12]	eval-rmse:1.13689	train-rmse:1.1353
[13]	eval-rmse:1.08757	train-rmse:1.08583
[14]	eval-rmse:1.04003	train-rmse:1.03817
[15]	eval-rmse:0.995148	train-rmse:0.993058
[16]	eval-rmse:0.952726	train-rmse:0.950474
[17]	eval-rmse:0.912655	train-rmse:0.909898
[18]	eval-rmse:0.875215	train-rmse:0.872203
[19]	eval-rmse:0.839729	train-rmse:0.836364
[20]	eval-rmse:0.805995	train-rmse:0.80232
[21]	eval-rmse:0.775364	train-rmse:0.771327
[22]	eval-rmse:0.745339	train-rmse:0.740953
[23]	eval-rmse:0.717162	train-rmse

[187]	eval-rmse:0.333042	train-rmse:0.225253
[188]	eval-rmse:0.333021	train-rmse:0.224926
[189]	eval-rmse:0.333017	train-rmse:0.22458
[190]	eval-rmse:0.332987	train-rmse:0.224138
[191]	eval-rmse:0.332943	train-rmse:0.223944
[192]	eval-rmse:0.332934	train-rmse:0.223644
[193]	eval-rmse:0.332864	train-rmse:0.223346
[194]	eval-rmse:0.332825	train-rmse:0.222863
[195]	eval-rmse:0.332807	train-rmse:0.222794
[196]	eval-rmse:0.332802	train-rmse:0.222518
[197]	eval-rmse:0.332799	train-rmse:0.222398
[198]	eval-rmse:0.332766	train-rmse:0.222174
[199]	eval-rmse:0.332731	train-rmse:0.221978
[200]	eval-rmse:0.332712	train-rmse:0.22181
[201]	eval-rmse:0.332685	train-rmse:0.221644
[202]	eval-rmse:0.332673	train-rmse:0.221428
[203]	eval-rmse:0.332643	train-rmse:0.22116
[204]	eval-rmse:0.332638	train-rmse:0.221084
[205]	eval-rmse:0.33263	train-rmse:0.220994
[206]	eval-rmse:0.332612	train-rmse:0.220668
[207]	eval-rmse:0.332585	train-rmse:0.22035
[208]	eval-rmse:0.332579	train-rmse:0.220298
[209]	eval-rmse

[371]	eval-rmse:0.329566	train-rmse:0.190447
[372]	eval-rmse:0.329544	train-rmse:0.190186
[373]	eval-rmse:0.329529	train-rmse:0.190046
[374]	eval-rmse:0.329526	train-rmse:0.190002
[375]	eval-rmse:0.329518	train-rmse:0.189886
[376]	eval-rmse:0.329516	train-rmse:0.189749
[377]	eval-rmse:0.329494	train-rmse:0.189498
[378]	eval-rmse:0.329487	train-rmse:0.189285
[379]	eval-rmse:0.329489	train-rmse:0.189128
[380]	eval-rmse:0.329488	train-rmse:0.189094
[381]	eval-rmse:0.329484	train-rmse:0.18876
[382]	eval-rmse:0.32948	train-rmse:0.188634
[383]	eval-rmse:0.329488	train-rmse:0.188462
[384]	eval-rmse:0.329462	train-rmse:0.188255
[385]	eval-rmse:0.329456	train-rmse:0.188093
[386]	eval-rmse:0.329445	train-rmse:0.18791
[387]	eval-rmse:0.329445	train-rmse:0.187877
[388]	eval-rmse:0.329444	train-rmse:0.187848
[389]	eval-rmse:0.329446	train-rmse:0.187776
[390]	eval-rmse:0.329444	train-rmse:0.187741
[391]	eval-rmse:0.329413	train-rmse:0.18748
[392]	eval-rmse:0.32941	train-rmse:0.187416
[393]	eval-rmse

[554]	eval-rmse:0.328635	train-rmse:0.169749
[555]	eval-rmse:0.328632	train-rmse:0.169691
[556]	eval-rmse:0.328628	train-rmse:0.169606
[557]	eval-rmse:0.328635	train-rmse:0.16948
[558]	eval-rmse:0.32864	train-rmse:0.169399
[559]	eval-rmse:0.328638	train-rmse:0.169345
[560]	eval-rmse:0.328637	train-rmse:0.169236
[561]	eval-rmse:0.328636	train-rmse:0.16921
[562]	eval-rmse:0.328626	train-rmse:0.169108
[563]	eval-rmse:0.328622	train-rmse:0.16904
[564]	eval-rmse:0.328618	train-rmse:0.168937
[565]	eval-rmse:0.328614	train-rmse:0.168861
[566]	eval-rmse:0.328614	train-rmse:0.168779
[567]	eval-rmse:0.328604	train-rmse:0.168617
[568]	eval-rmse:0.328607	train-rmse:0.168483
[569]	eval-rmse:0.328606	train-rmse:0.168412
[570]	eval-rmse:0.328582	train-rmse:0.168125
[571]	eval-rmse:0.328582	train-rmse:0.16803
[572]	eval-rmse:0.328573	train-rmse:0.167965
[573]	eval-rmse:0.328571	train-rmse:0.167855
[574]	eval-rmse:0.328564	train-rmse:0.167688
[575]	eval-rmse:0.32855	train-rmse:0.167557
[576]	eval-rmse:

[738]	eval-rmse:0.328176	train-rmse:0.15448
[739]	eval-rmse:0.328173	train-rmse:0.154365
[740]	eval-rmse:0.328172	train-rmse:0.154348
[741]	eval-rmse:0.328172	train-rmse:0.154273
[742]	eval-rmse:0.328173	train-rmse:0.154207
[743]	eval-rmse:0.328171	train-rmse:0.154147
[744]	eval-rmse:0.328173	train-rmse:0.154044
[745]	eval-rmse:0.328177	train-rmse:0.153943
[746]	eval-rmse:0.32818	train-rmse:0.15391
[747]	eval-rmse:0.328178	train-rmse:0.153885
[748]	eval-rmse:0.328172	train-rmse:0.153796
[749]	eval-rmse:0.328164	train-rmse:0.153712
[750]	eval-rmse:0.328163	train-rmse:0.153571
[751]	eval-rmse:0.328166	train-rmse:0.153493
[752]	eval-rmse:0.328161	train-rmse:0.153389
[753]	eval-rmse:0.328162	train-rmse:0.153323
[754]	eval-rmse:0.328158	train-rmse:0.153278
[755]	eval-rmse:0.328158	train-rmse:0.153169
[756]	eval-rmse:0.328161	train-rmse:0.153074
[757]	eval-rmse:0.328157	train-rmse:0.153032
[758]	eval-rmse:0.328157	train-rmse:0.152926
[759]	eval-rmse:0.328155	train-rmse:0.152849
[760]	eval-rm

[921]	eval-rmse:0.327895	train-rmse:0.142381
[922]	eval-rmse:0.327895	train-rmse:0.142364
[923]	eval-rmse:0.327892	train-rmse:0.142294
[924]	eval-rmse:0.327892	train-rmse:0.14224
[925]	eval-rmse:0.327885	train-rmse:0.142153
[926]	eval-rmse:0.327887	train-rmse:0.142107
[927]	eval-rmse:0.327887	train-rmse:0.141948
[928]	eval-rmse:0.327885	train-rmse:0.14179
[929]	eval-rmse:0.327883	train-rmse:0.141758
[930]	eval-rmse:0.327883	train-rmse:0.141748
[931]	eval-rmse:0.327882	train-rmse:0.141683
[932]	eval-rmse:0.327874	train-rmse:0.141617
[933]	eval-rmse:0.327872	train-rmse:0.141576
[934]	eval-rmse:0.327878	train-rmse:0.141544
[935]	eval-rmse:0.327882	train-rmse:0.141471
[936]	eval-rmse:0.327881	train-rmse:0.141362
[937]	eval-rmse:0.327879	train-rmse:0.141247
[938]	eval-rmse:0.327879	train-rmse:0.141217
[939]	eval-rmse:0.327879	train-rmse:0.141153
[940]	eval-rmse:0.327878	train-rmse:0.141106
[941]	eval-rmse:0.327879	train-rmse:0.141085
[942]	eval-rmse:0.327872	train-rmse:0.140999
[943]	eval-r

[1102]	eval-rmse:0.327698	train-rmse:0.131615
[1103]	eval-rmse:0.327696	train-rmse:0.131603
[1104]	eval-rmse:0.327696	train-rmse:0.131502
[1105]	eval-rmse:0.327695	train-rmse:0.131473
[1106]	eval-rmse:0.327697	train-rmse:0.131433
[1107]	eval-rmse:0.327696	train-rmse:0.131405
[1108]	eval-rmse:0.327697	train-rmse:0.131354
[1109]	eval-rmse:0.327697	train-rmse:0.131323
[1110]	eval-rmse:0.327696	train-rmse:0.131278
[1111]	eval-rmse:0.327692	train-rmse:0.13121
[1112]	eval-rmse:0.327691	train-rmse:0.131178
[1113]	eval-rmse:0.327692	train-rmse:0.131162
[1114]	eval-rmse:0.327692	train-rmse:0.131132
[1115]	eval-rmse:0.327691	train-rmse:0.13106
[1116]	eval-rmse:0.327687	train-rmse:0.130968
[1117]	eval-rmse:0.327686	train-rmse:0.130957
[1118]	eval-rmse:0.327686	train-rmse:0.130911
[1119]	eval-rmse:0.327682	train-rmse:0.13087
[1120]	eval-rmse:0.327674	train-rmse:0.130776
[1121]	eval-rmse:0.327674	train-rmse:0.130704
[1122]	eval-rmse:0.327673	train-rmse:0.130642
[1123]	eval-rmse:0.327674	train-rmse:

[1281]	eval-rmse:0.327565	train-rmse:0.12255
[1282]	eval-rmse:0.327566	train-rmse:0.122532
[1283]	eval-rmse:0.327566	train-rmse:0.122515
[1284]	eval-rmse:0.327565	train-rmse:0.122473
[1285]	eval-rmse:0.327566	train-rmse:0.12246
[1286]	eval-rmse:0.327566	train-rmse:0.122421
[1287]	eval-rmse:0.327561	train-rmse:0.122338
[1288]	eval-rmse:0.327561	train-rmse:0.122313
[1289]	eval-rmse:0.327557	train-rmse:0.122184
[1290]	eval-rmse:0.327558	train-rmse:0.122143
[1291]	eval-rmse:0.32756	train-rmse:0.122108
[1292]	eval-rmse:0.327561	train-rmse:0.122044
[1293]	eval-rmse:0.327559	train-rmse:0.121992
[1294]	eval-rmse:0.32756	train-rmse:0.121981
[1295]	eval-rmse:0.32756	train-rmse:0.121959
[1296]	eval-rmse:0.32756	train-rmse:0.121928
[1297]	eval-rmse:0.327559	train-rmse:0.121897
[1298]	eval-rmse:0.327561	train-rmse:0.121847
[1299]	eval-rmse:0.327561	train-rmse:0.121829
[1300]	eval-rmse:0.327561	train-rmse:0.121786
[1301]	eval-rmse:0.327556	train-rmse:0.121651
[1302]	eval-rmse:0.327557	train-rmse:0.1

[1460]	eval-rmse:0.327512	train-rmse:0.114567
[1461]	eval-rmse:0.327513	train-rmse:0.11451
[1462]	eval-rmse:0.327513	train-rmse:0.114463
[1463]	eval-rmse:0.327509	train-rmse:0.114358
[1464]	eval-rmse:0.327507	train-rmse:0.114299
[1465]	eval-rmse:0.327508	train-rmse:0.114223
[1466]	eval-rmse:0.327504	train-rmse:0.114155
[1467]	eval-rmse:0.327503	train-rmse:0.11408
[1468]	eval-rmse:0.327498	train-rmse:0.114014
[1469]	eval-rmse:0.327498	train-rmse:0.113998
[1470]	eval-rmse:0.327499	train-rmse:0.11394
[1471]	eval-rmse:0.327501	train-rmse:0.113876
[1472]	eval-rmse:0.3275	train-rmse:0.113839
[1473]	eval-rmse:0.3275	train-rmse:0.113776
[1474]	eval-rmse:0.3275	train-rmse:0.113752
[1475]	eval-rmse:0.327501	train-rmse:0.113731
[1476]	eval-rmse:0.327502	train-rmse:0.113701
[1477]	eval-rmse:0.327499	train-rmse:0.11362
[1478]	eval-rmse:0.327498	train-rmse:0.113606
[1479]	eval-rmse:0.327497	train-rmse:0.113569
[1480]	eval-rmse:0.327499	train-rmse:0.113497
[1481]	eval-rmse:0.327498	train-rmse:0.11344

[1639]	eval-rmse:0.327454	train-rmse:0.106982
[1640]	eval-rmse:0.327456	train-rmse:0.106961
[1641]	eval-rmse:0.327456	train-rmse:0.106929
[1642]	eval-rmse:0.327457	train-rmse:0.106894
[1643]	eval-rmse:0.327456	train-rmse:0.10685
[1644]	eval-rmse:0.327456	train-rmse:0.106834
[1645]	eval-rmse:0.327459	train-rmse:0.106787
[1646]	eval-rmse:0.32746	train-rmse:0.106755
[1647]	eval-rmse:0.327461	train-rmse:0.106732
[1648]	eval-rmse:0.327462	train-rmse:0.106697
[1649]	eval-rmse:0.327461	train-rmse:0.106677
[1650]	eval-rmse:0.32746	train-rmse:0.106659
[1651]	eval-rmse:0.327461	train-rmse:0.106639
[1652]	eval-rmse:0.327461	train-rmse:0.106572
[1653]	eval-rmse:0.327462	train-rmse:0.106532
[1654]	eval-rmse:0.327461	train-rmse:0.106505
[1655]	eval-rmse:0.327461	train-rmse:0.106444
[1656]	eval-rmse:0.327461	train-rmse:0.106433
[1657]	eval-rmse:0.327461	train-rmse:0.106373
[1658]	eval-rmse:0.327462	train-rmse:0.1063
[1659]	eval-rmse:0.327463	train-rmse:0.10626
[1660]	eval-rmse:0.327462	train-rmse:0.1

[1818]	eval-rmse:0.327425	train-rmse:0.100281
[1819]	eval-rmse:0.327425	train-rmse:0.100257
[1820]	eval-rmse:0.327425	train-rmse:0.100239
[1821]	eval-rmse:0.327424	train-rmse:0.100217
[1822]	eval-rmse:0.327425	train-rmse:0.100142
[1823]	eval-rmse:0.327425	train-rmse:0.100113
[1824]	eval-rmse:0.327426	train-rmse:0.100098
[1825]	eval-rmse:0.327426	train-rmse:0.100072
[1826]	eval-rmse:0.327426	train-rmse:0.100047
[1827]	eval-rmse:0.327426	train-rmse:0.100029
[1828]	eval-rmse:0.327425	train-rmse:0.099987
[1829]	eval-rmse:0.327425	train-rmse:0.099964
[1830]	eval-rmse:0.327424	train-rmse:0.099934
[1831]	eval-rmse:0.327425	train-rmse:0.099897
[1832]	eval-rmse:0.327425	train-rmse:0.099826
[1833]	eval-rmse:0.327424	train-rmse:0.099772
[1834]	eval-rmse:0.327426	train-rmse:0.099685
[1835]	eval-rmse:0.327426	train-rmse:0.099668
[1836]	eval-rmse:0.327426	train-rmse:0.099621
[1837]	eval-rmse:0.327426	train-rmse:0.099581
[1838]	eval-rmse:0.327426	train-rmse:0.09957
[1839]	eval-rmse:0.327424	train-rms

[1997]	eval-rmse:0.327426	train-rmse:0.094502
[1998]	eval-rmse:0.327426	train-rmse:0.094468
[1999]	eval-rmse:0.327426	train-rmse:0.094428


In [43]:
#Test predictions
pred = np.exp(gbm.predict(xgb.DMatrix(X_test))) - 1

In [44]:
#Use mean absolute error to get a basic estimate of the error
mae = (abs(pred - y_test)).mean()
mae

4.827394962310791

In [49]:
#Take a look at feature importance
feature_scores = gbm.get_fscore()
feature_scores

{'dropoff_latitude': 610689,
 'dropoff_longitude': 622853,
 'latitude_difference': 498608,
 'longitude_difference': 438287,
 'passenger_count': 227133,
 'pickup_day': 387770,
 'pickup_hour': 323986,
 'pickup_latitude': 753138,
 'pickup_longitude': 829288,
 'pickup_minute': 413378,
 'pickup_month': 227789,
 'pickup_weekday': 216596,
 'store_and_fwd_flag': 5359,
 'trip_distance': 453975}

In [50]:
#This is not very telling, so let's scale the features
summ = 0
for key in feature_scores:
    summ = summ + feature_scores[key]

for key in feature_scores:
    feature_scores[key] = feature_scores[key] / summ

feature_scores

{'dropoff_latitude': 0.10163161031338946,
 'dropoff_longitude': 0.10365595807117137,
 'latitude_difference': 0.08297895320717828,
 'longitude_difference': 0.07294025860859542,
 'passenger_count': 0.037799751666250894,
 'pickup_day': 0.06453315768127972,
 'pickup_hour': 0.05391814638710342,
 'pickup_latitude': 0.12533814712268523,
 'pickup_longitude': 0.13801112326170953,
 'pickup_minute': 0.06879487236241084,
 'pickup_month': 0.03790892398860414,
 'pickup_weekday': 0.03604617123845182,
 'store_and_fwd_flag': 0.0008918513345900355,
 'trip_distance': 0.07555107475657984}

# Save the model

In [48]:
filename = "xgb_model.sav"
pickle.dump(gbm, open(filename, 'wb'))