In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb

import pickle
from geopy.geocoders import Nominatim
from sklearn.model_selection import train_test_split

pd.set_option('display.max_columns', None)

# I - Import data and Analysing

In [2]:
sample_df = pd.read_csv("train.csv")

In [3]:
sample_df.shape

(1458644, 11)

In [4]:
sample_df.head()

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag,trip_duration
0,id2875421,2,2016-03-14 17:24:55,2016-03-14 17:32:30,1,-73.982155,40.767937,-73.96463,40.765602,N,455
1,id2377394,1,2016-06-12 00:43:35,2016-06-12 00:54:38,1,-73.980415,40.738564,-73.999481,40.731152,N,663
2,id3858529,2,2016-01-19 11:35:24,2016-01-19 12:10:48,1,-73.979027,40.763939,-74.005333,40.710087,N,2124
3,id3504673,2,2016-04-06 19:32:31,2016-04-06 19:39:40,1,-74.01004,40.719971,-74.012268,40.706718,N,429
4,id2181028,2,2016-03-26 13:30:55,2016-03-26 13:38:10,1,-73.973053,40.793209,-73.972923,40.78252,N,435


# II - Data Preprocessing

In [5]:
sample_df["store_and_fwd_flag"].value_counts()

N    1450599
Y       8045
Name: store_and_fwd_flag, dtype: int64

In [6]:
#Convert character variables to numeric 
f = lambda x: 0 if x == 'N' else 1

sample_df["store_and_fwd_flag"] = sample_df["store_and_fwd_flag"].apply(lambda x: f(x))            

In [7]:
#Check result
sample_df["store_and_fwd_flag"].value_counts()

0    1450599
1       8045
Name: store_and_fwd_flag, dtype: int64

### Engineer features

In [8]:
#First, convert datetime strings into datetime
sample_df["dropoff_datetime"] = pd.to_datetime(sample_df["dropoff_datetime"], format='%Y-%m-%d %H:%M:%S')
sample_df["pickup_datetime"] = pd.to_datetime(sample_df["pickup_datetime"], format='%Y-%m-%d %H:%M:%S')

In [9]:
#Now construct other variables, like month, date, etc.
sample_df["pickup_month"] = sample_df["pickup_datetime"].dt.month
sample_df["pickup_day"] = sample_df["pickup_datetime"].dt.day
sample_df["pickup_weekday"] = sample_df["pickup_datetime"].dt.weekday #sample_df["pickup_weekday"] = sample_df["pickup_datetime"].dt.weekday_name
sample_df["pickup_hour"] = sample_df["pickup_datetime"].dt.hour
sample_df["pickup_minute"] = sample_df["pickup_datetime"].dt.minute

In [10]:
#Get latitude and longitude differences 
sample_df["latitude_difference"] = sample_df["dropoff_latitude"] - sample_df["pickup_latitude"]
sample_df["longitude_difference"] = sample_df["dropoff_longitude"] - sample_df["pickup_longitude"]

In [11]:
#Convert duration to minutes for easier interpretation
sample_df["trip_duration"] = sample_df["trip_duration"].apply(lambda x: round(x/60))   

In [12]:
#Convert trip distance from longitude and latitude differences to Manhattan distance.
sample_df["trip_distance"] = 0.621371 * 6371 * (abs(2 * np.arctan2(np.sqrt(np.square(np.sin((abs(sample_df["latitude_difference"]) * np.pi / 180) / 2))), 
                                  np.sqrt(1-(np.square(np.sin((abs(sample_df["latitude_difference"]) * np.pi / 180) / 2)))))) + \
                                     abs(2 * np.arctan2(np.sqrt(np.square(np.sin((abs(sample_df["longitude_difference"]) * np.pi / 180) / 2))), 
                                  np.sqrt(1-(np.square(np.sin((abs(sample_df["longitude_difference"]) * np.pi / 180) / 2)))))))

In [13]:
sample_df.head(5)

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag,trip_duration,pickup_month,pickup_day,pickup_weekday,pickup_hour,pickup_minute,latitude_difference,longitude_difference,trip_distance
0,id2875421,2,2016-03-14 17:24:55,2016-03-14 17:32:30,1,-73.982155,40.767937,-73.96463,40.765602,0,8,3,14,0,17,24,-0.002335,0.017525,1.372146
1,id2377394,1,2016-06-12 00:43:35,2016-06-12 00:54:38,1,-73.980415,40.738564,-73.999481,40.731152,0,11,6,12,6,0,43,-0.007412,-0.019066,1.82944
2,id3858529,2,2016-01-19 11:35:24,2016-01-19 12:10:48,1,-73.979027,40.763939,-74.005333,40.710087,0,35,1,19,1,11,35,-0.053852,-0.026306,5.538397
3,id3504673,2,2016-04-06 19:32:31,2016-04-06 19:39:40,1,-74.01004,40.719971,-74.012268,40.706718,0,7,4,6,2,19,32,-0.013252,-0.002228,1.069567
4,id2181028,2,2016-03-26 13:30:55,2016-03-26 13:38:10,1,-73.973053,40.793209,-73.972923,40.78252,0,7,3,26,5,13,30,-0.010689,0.00013,0.747485


# III - Modeling

In [14]:
X = sample_df.drop(["trip_duration", "id", "vendor_id", "pickup_datetime", "dropoff_datetime"], axis=1)
y = sample_df["trip_duration"]

In [15]:
#Split the data into training, test, and valdiation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2018)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=2019)

In [16]:
#Define evaluation metric
def rmsle(y_true, y_pred):
    assert len(y_true) == len(y_pred)
    return np.square(np.log(y_pred + 1) - np.log(y_true + 1)).mean() ** 0.5

In [17]:
#XGBoost parameters 
params = {
    'booster':            'gbtree',
    'objective':          'reg:linear',
    'learning_rate':      0.05,
    'max_depth':          14,
    'subsample':          0.9,
    'colsample_bytree':   0.7,
    'colsample_bylevel':  0.7,
    'silent':             1,
    'feval':              'rmsle'
}

In [18]:
nrounds = 2000

In [19]:
#Define train and validation sets
dtrain = xgb.DMatrix(X_train, np.log(y_train+1))
dval = xgb.DMatrix(X_val, np.log(y_val+1))

#this is for tracking the error
watchlist = [(dval, 'eval'), (dtrain, 'train')]

In [20]:
#Train model
gbm = xgb.train(params,
                dtrain,
                num_boost_round = nrounds,
                evals = watchlist,
                verbose_eval = True
                )

Parameters: { feval, silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[0]	eval-rmse:2.00792	train-rmse:2.00719
[1]	eval-rmse:1.91382	train-rmse:1.91313
[2]	eval-rmse:1.82240	train-rmse:1.82167
[3]	eval-rmse:1.73565	train-rmse:1.73494
[4]	eval-rmse:1.65361	train-rmse:1.65290
[5]	eval-rmse:1.57611	train-rmse:1.57537
[6]	eval-rmse:1.50257	train-rmse:1.50186
[7]	eval-rmse:1.43241	train-rmse:1.43160
[8]	eval-rmse:1.36599	train-rmse:1.36502
[9]	eval-rmse:1.30346	train-rmse:1.30225
[10]	eval-rmse:1.24576	train-rmse:1.24445
[11]	eval-rmse:1.18975	train-rmse:1.18829
[12]	eval-rmse:1.13648	train-rmse:1.13483
[13]	eval-rmse:1.08709	train-rmse:1.08529
[14]	eval-rmse:1.03926	train-rmse:1.03712
[15]	eval-rmse:0.99427	train-rmse:0.99197
[16]	eval-rmse:0.95189	train-rmse:0.94917
[17]	ev

[183]	eval-rmse:0.33246	train-rmse:0.22509
[184]	eval-rmse:0.33241	train-rmse:0.22469
[185]	eval-rmse:0.33234	train-rmse:0.22444
[186]	eval-rmse:0.33228	train-rmse:0.22395
[187]	eval-rmse:0.33226	train-rmse:0.22385
[188]	eval-rmse:0.33226	train-rmse:0.22368
[189]	eval-rmse:0.33224	train-rmse:0.22354
[190]	eval-rmse:0.33220	train-rmse:0.22309
[191]	eval-rmse:0.33214	train-rmse:0.22283
[192]	eval-rmse:0.33211	train-rmse:0.22256
[193]	eval-rmse:0.33211	train-rmse:0.22248
[194]	eval-rmse:0.33210	train-rmse:0.22234
[195]	eval-rmse:0.33207	train-rmse:0.22211
[196]	eval-rmse:0.33208	train-rmse:0.22187
[197]	eval-rmse:0.33208	train-rmse:0.22155
[198]	eval-rmse:0.33207	train-rmse:0.22131
[199]	eval-rmse:0.33207	train-rmse:0.22122
[200]	eval-rmse:0.33195	train-rmse:0.22051
[201]	eval-rmse:0.33190	train-rmse:0.22029
[202]	eval-rmse:0.33188	train-rmse:0.22002
[203]	eval-rmse:0.33187	train-rmse:0.21995
[204]	eval-rmse:0.33181	train-rmse:0.21944
[205]	eval-rmse:0.33168	train-rmse:0.21895
[206]	eval-

[374]	eval-rmse:0.32912	train-rmse:0.18888
[375]	eval-rmse:0.32910	train-rmse:0.18870
[376]	eval-rmse:0.32910	train-rmse:0.18864
[377]	eval-rmse:0.32910	train-rmse:0.18845
[378]	eval-rmse:0.32911	train-rmse:0.18830
[379]	eval-rmse:0.32911	train-rmse:0.18821
[380]	eval-rmse:0.32911	train-rmse:0.18816
[381]	eval-rmse:0.32909	train-rmse:0.18788
[382]	eval-rmse:0.32905	train-rmse:0.18761
[383]	eval-rmse:0.32905	train-rmse:0.18752
[384]	eval-rmse:0.32905	train-rmse:0.18740
[385]	eval-rmse:0.32904	train-rmse:0.18721
[386]	eval-rmse:0.32904	train-rmse:0.18707
[387]	eval-rmse:0.32902	train-rmse:0.18689
[388]	eval-rmse:0.32903	train-rmse:0.18673
[389]	eval-rmse:0.32903	train-rmse:0.18662
[390]	eval-rmse:0.32903	train-rmse:0.18657
[391]	eval-rmse:0.32902	train-rmse:0.18639
[392]	eval-rmse:0.32902	train-rmse:0.18621
[393]	eval-rmse:0.32899	train-rmse:0.18604
[394]	eval-rmse:0.32899	train-rmse:0.18596
[395]	eval-rmse:0.32899	train-rmse:0.18590
[396]	eval-rmse:0.32898	train-rmse:0.18562
[397]	eval-

[565]	eval-rmse:0.32817	train-rmse:0.16729
[566]	eval-rmse:0.32817	train-rmse:0.16724
[567]	eval-rmse:0.32816	train-rmse:0.16716
[568]	eval-rmse:0.32816	train-rmse:0.16710
[569]	eval-rmse:0.32815	train-rmse:0.16694
[570]	eval-rmse:0.32815	train-rmse:0.16693
[571]	eval-rmse:0.32816	train-rmse:0.16681
[572]	eval-rmse:0.32815	train-rmse:0.16668
[573]	eval-rmse:0.32815	train-rmse:0.16664
[574]	eval-rmse:0.32815	train-rmse:0.16659
[575]	eval-rmse:0.32815	train-rmse:0.16645
[576]	eval-rmse:0.32814	train-rmse:0.16630
[577]	eval-rmse:0.32814	train-rmse:0.16628
[578]	eval-rmse:0.32814	train-rmse:0.16614
[579]	eval-rmse:0.32813	train-rmse:0.16613
[580]	eval-rmse:0.32813	train-rmse:0.16600
[581]	eval-rmse:0.32813	train-rmse:0.16595
[582]	eval-rmse:0.32813	train-rmse:0.16589
[583]	eval-rmse:0.32812	train-rmse:0.16572
[584]	eval-rmse:0.32812	train-rmse:0.16559
[585]	eval-rmse:0.32812	train-rmse:0.16551
[586]	eval-rmse:0.32812	train-rmse:0.16545
[587]	eval-rmse:0.32812	train-rmse:0.16538
[588]	eval-

[756]	eval-rmse:0.32776	train-rmse:0.15133
[757]	eval-rmse:0.32776	train-rmse:0.15124
[758]	eval-rmse:0.32776	train-rmse:0.15120
[759]	eval-rmse:0.32776	train-rmse:0.15117
[760]	eval-rmse:0.32776	train-rmse:0.15107
[761]	eval-rmse:0.32776	train-rmse:0.15099
[762]	eval-rmse:0.32776	train-rmse:0.15087
[763]	eval-rmse:0.32776	train-rmse:0.15083
[764]	eval-rmse:0.32776	train-rmse:0.15075
[765]	eval-rmse:0.32776	train-rmse:0.15067
[766]	eval-rmse:0.32776	train-rmse:0.15056
[767]	eval-rmse:0.32776	train-rmse:0.15054
[768]	eval-rmse:0.32776	train-rmse:0.15050
[769]	eval-rmse:0.32776	train-rmse:0.15044
[770]	eval-rmse:0.32776	train-rmse:0.15039
[771]	eval-rmse:0.32775	train-rmse:0.15034
[772]	eval-rmse:0.32775	train-rmse:0.15030
[773]	eval-rmse:0.32775	train-rmse:0.15027
[774]	eval-rmse:0.32775	train-rmse:0.15016
[775]	eval-rmse:0.32775	train-rmse:0.15005
[776]	eval-rmse:0.32775	train-rmse:0.15001
[777]	eval-rmse:0.32773	train-rmse:0.14981
[778]	eval-rmse:0.32773	train-rmse:0.14976
[779]	eval-

[947]	eval-rmse:0.32755	train-rmse:0.13801
[948]	eval-rmse:0.32754	train-rmse:0.13789
[949]	eval-rmse:0.32754	train-rmse:0.13785
[950]	eval-rmse:0.32754	train-rmse:0.13775
[951]	eval-rmse:0.32753	train-rmse:0.13769
[952]	eval-rmse:0.32753	train-rmse:0.13756
[953]	eval-rmse:0.32753	train-rmse:0.13747
[954]	eval-rmse:0.32753	train-rmse:0.13741
[955]	eval-rmse:0.32753	train-rmse:0.13736
[956]	eval-rmse:0.32753	train-rmse:0.13731
[957]	eval-rmse:0.32753	train-rmse:0.13722
[958]	eval-rmse:0.32753	train-rmse:0.13715
[959]	eval-rmse:0.32753	train-rmse:0.13710
[960]	eval-rmse:0.32753	train-rmse:0.13705
[961]	eval-rmse:0.32753	train-rmse:0.13700
[962]	eval-rmse:0.32753	train-rmse:0.13696
[963]	eval-rmse:0.32753	train-rmse:0.13689
[964]	eval-rmse:0.32753	train-rmse:0.13684
[965]	eval-rmse:0.32754	train-rmse:0.13676
[966]	eval-rmse:0.32754	train-rmse:0.13667
[967]	eval-rmse:0.32754	train-rmse:0.13664
[968]	eval-rmse:0.32754	train-rmse:0.13660
[969]	eval-rmse:0.32754	train-rmse:0.13658
[970]	eval-

[1135]	eval-rmse:0.32742	train-rmse:0.12672
[1136]	eval-rmse:0.32742	train-rmse:0.12671
[1137]	eval-rmse:0.32741	train-rmse:0.12664
[1138]	eval-rmse:0.32741	train-rmse:0.12658
[1139]	eval-rmse:0.32741	train-rmse:0.12653
[1140]	eval-rmse:0.32741	train-rmse:0.12643
[1141]	eval-rmse:0.32741	train-rmse:0.12640
[1142]	eval-rmse:0.32741	train-rmse:0.12629
[1143]	eval-rmse:0.32742	train-rmse:0.12623
[1144]	eval-rmse:0.32742	train-rmse:0.12618
[1145]	eval-rmse:0.32741	train-rmse:0.12608
[1146]	eval-rmse:0.32741	train-rmse:0.12605
[1147]	eval-rmse:0.32741	train-rmse:0.12596
[1148]	eval-rmse:0.32741	train-rmse:0.12590
[1149]	eval-rmse:0.32741	train-rmse:0.12584
[1150]	eval-rmse:0.32741	train-rmse:0.12577
[1151]	eval-rmse:0.32740	train-rmse:0.12572
[1152]	eval-rmse:0.32741	train-rmse:0.12567
[1153]	eval-rmse:0.32741	train-rmse:0.12565
[1154]	eval-rmse:0.32740	train-rmse:0.12559
[1155]	eval-rmse:0.32740	train-rmse:0.12555
[1156]	eval-rmse:0.32740	train-rmse:0.12552
[1157]	eval-rmse:0.32741	train-r

[1322]	eval-rmse:0.32732	train-rmse:0.11729
[1323]	eval-rmse:0.32732	train-rmse:0.11723
[1324]	eval-rmse:0.32732	train-rmse:0.11720
[1325]	eval-rmse:0.32732	train-rmse:0.11717
[1326]	eval-rmse:0.32732	train-rmse:0.11710
[1327]	eval-rmse:0.32732	train-rmse:0.11708
[1328]	eval-rmse:0.32732	train-rmse:0.11705
[1329]	eval-rmse:0.32732	train-rmse:0.11702
[1330]	eval-rmse:0.32732	train-rmse:0.11698
[1331]	eval-rmse:0.32732	train-rmse:0.11690
[1332]	eval-rmse:0.32732	train-rmse:0.11688
[1333]	eval-rmse:0.32732	train-rmse:0.11682
[1334]	eval-rmse:0.32732	train-rmse:0.11681
[1335]	eval-rmse:0.32732	train-rmse:0.11679
[1336]	eval-rmse:0.32732	train-rmse:0.11675
[1337]	eval-rmse:0.32731	train-rmse:0.11666
[1338]	eval-rmse:0.32731	train-rmse:0.11657
[1339]	eval-rmse:0.32731	train-rmse:0.11655
[1340]	eval-rmse:0.32731	train-rmse:0.11652
[1341]	eval-rmse:0.32731	train-rmse:0.11646
[1342]	eval-rmse:0.32731	train-rmse:0.11642
[1343]	eval-rmse:0.32731	train-rmse:0.11637
[1344]	eval-rmse:0.32731	train-r

[1509]	eval-rmse:0.32727	train-rmse:0.10832
[1510]	eval-rmse:0.32727	train-rmse:0.10828
[1511]	eval-rmse:0.32727	train-rmse:0.10823
[1512]	eval-rmse:0.32727	train-rmse:0.10816
[1513]	eval-rmse:0.32727	train-rmse:0.10810
[1514]	eval-rmse:0.32727	train-rmse:0.10806
[1515]	eval-rmse:0.32727	train-rmse:0.10804
[1516]	eval-rmse:0.32727	train-rmse:0.10800
[1517]	eval-rmse:0.32727	train-rmse:0.10797
[1518]	eval-rmse:0.32727	train-rmse:0.10792
[1519]	eval-rmse:0.32727	train-rmse:0.10788
[1520]	eval-rmse:0.32727	train-rmse:0.10782
[1521]	eval-rmse:0.32727	train-rmse:0.10779
[1522]	eval-rmse:0.32727	train-rmse:0.10773
[1523]	eval-rmse:0.32727	train-rmse:0.10771
[1524]	eval-rmse:0.32727	train-rmse:0.10767
[1525]	eval-rmse:0.32727	train-rmse:0.10762
[1526]	eval-rmse:0.32727	train-rmse:0.10759
[1527]	eval-rmse:0.32727	train-rmse:0.10754
[1528]	eval-rmse:0.32726	train-rmse:0.10748
[1529]	eval-rmse:0.32726	train-rmse:0.10744
[1530]	eval-rmse:0.32726	train-rmse:0.10733
[1531]	eval-rmse:0.32726	train-r

[1696]	eval-rmse:0.32723	train-rmse:0.10037
[1697]	eval-rmse:0.32723	train-rmse:0.10034
[1698]	eval-rmse:0.32723	train-rmse:0.10031
[1699]	eval-rmse:0.32723	train-rmse:0.10028
[1700]	eval-rmse:0.32723	train-rmse:0.10020
[1701]	eval-rmse:0.32723	train-rmse:0.10018
[1702]	eval-rmse:0.32723	train-rmse:0.10009
[1703]	eval-rmse:0.32723	train-rmse:0.10008
[1704]	eval-rmse:0.32723	train-rmse:0.10002
[1705]	eval-rmse:0.32723	train-rmse:0.09998
[1706]	eval-rmse:0.32723	train-rmse:0.09994
[1707]	eval-rmse:0.32723	train-rmse:0.09987
[1708]	eval-rmse:0.32723	train-rmse:0.09985
[1709]	eval-rmse:0.32723	train-rmse:0.09983
[1710]	eval-rmse:0.32723	train-rmse:0.09974
[1711]	eval-rmse:0.32723	train-rmse:0.09972
[1712]	eval-rmse:0.32723	train-rmse:0.09970
[1713]	eval-rmse:0.32723	train-rmse:0.09969
[1714]	eval-rmse:0.32723	train-rmse:0.09963
[1715]	eval-rmse:0.32723	train-rmse:0.09960
[1716]	eval-rmse:0.32723	train-rmse:0.09950
[1717]	eval-rmse:0.32723	train-rmse:0.09947
[1718]	eval-rmse:0.32723	train-r

[1883]	eval-rmse:0.32724	train-rmse:0.09356
[1884]	eval-rmse:0.32724	train-rmse:0.09354
[1885]	eval-rmse:0.32724	train-rmse:0.09351
[1886]	eval-rmse:0.32724	train-rmse:0.09350
[1887]	eval-rmse:0.32724	train-rmse:0.09345
[1888]	eval-rmse:0.32724	train-rmse:0.09341
[1889]	eval-rmse:0.32724	train-rmse:0.09338
[1890]	eval-rmse:0.32724	train-rmse:0.09334
[1891]	eval-rmse:0.32724	train-rmse:0.09334
[1892]	eval-rmse:0.32724	train-rmse:0.09331
[1893]	eval-rmse:0.32724	train-rmse:0.09324
[1894]	eval-rmse:0.32724	train-rmse:0.09318
[1895]	eval-rmse:0.32724	train-rmse:0.09315
[1896]	eval-rmse:0.32724	train-rmse:0.09314
[1897]	eval-rmse:0.32724	train-rmse:0.09308
[1898]	eval-rmse:0.32723	train-rmse:0.09305
[1899]	eval-rmse:0.32723	train-rmse:0.09301
[1900]	eval-rmse:0.32723	train-rmse:0.09299
[1901]	eval-rmse:0.32723	train-rmse:0.09295
[1902]	eval-rmse:0.32723	train-rmse:0.09291
[1903]	eval-rmse:0.32723	train-rmse:0.09287
[1904]	eval-rmse:0.32724	train-rmse:0.09283
[1905]	eval-rmse:0.32724	train-r

In [21]:
#Test predictions
pred = np.exp(gbm.predict(xgb.DMatrix(X_test))) - 1

In [22]:
#Use mean absolute error to get a basic estimate of the error
mae = (abs(pred - y_test)).mean()
mae

4.824260129902769

In [None]:
#Feature importance
feature_scores = gbm.get_fscore()
feature_scores

In [26]:
#TScaling the features
summ = 0
for key in feature_scores:
    summ = summ + feature_scores[key]

for key in feature_scores:
    feature_scores[key] = feature_scores[key] / summ

feature_scores

{'latitude_difference': 0.07695194364430662,
 'dropoff_latitude': 0.10605043121823939,
 'pickup_weekday': 0.037651449787270405,
 'pickup_longitude': 0.13147960188380023,
 'pickup_month': 0.03779538574594608,
 'pickup_day': 0.06338367753305076,
 'pickup_minute': 0.06553463969889438,
 'passenger_count': 0.03906060059257675,
 'store_and_fwd_flag': 0.0009169253663784017,
 'longitude_difference': 0.07894960028289635,
 'pickup_hour': 0.05621514984928726,
 'dropoff_longitude': 0.10504901819037121,
 'trip_distance': 0.07539594908317963,
 'pickup_latitude': 0.12556562712380254}

# IV - Saving the generated model

In [25]:
filename = "xgb_model.sav"
pickle.dump(gbm, open(filename, 'wb'))