Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
nyc-taxi-fare-prediction-deployment-example/src/nyc_taxi_fare/pipeline.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
66 lines (56 sloc)
1.79 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from lightgbm import LGBMRegressor | |
from sklearn.compose import make_column_transformer | |
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import FunctionTransformer | |
def haversine_distance(lat1, lng1, lat2, lng2): | |
lat1, lng1, lat2, lng2 = (np.radians(x) for x in (lat1, lng1, lat2, lng2)) | |
d = ( | |
np.sin(lat2 / 2 - lat1 / 2) ** 2 | |
+ np.cos(lat1) * np.cos(lat2) * np.sin(lng2 / 2 - lng1 / 2) ** 2 | |
) | |
return 2 * 6371 * np.arcsin(np.sqrt(d)) # 6,371 km is the earth radius | |
def haversine_distance_from_df(df): | |
return pd.DataFrame( | |
{ | |
"haversine_distance": haversine_distance( | |
df["pickup_latitude"], | |
df["pickup_longitude"], | |
df["dropoff_latitude"], | |
df["dropoff_longitude"], | |
) | |
} | |
) | |
def split_pickup_datetime(df): | |
return pd.DataFrame( | |
{ | |
"pickup_dayofweek": df["tpep_pickup_datetime"].dt.dayofweek, | |
"pickup_hour": df["tpep_pickup_datetime"].dt.hour, | |
"pickup_minute": df["tpep_pickup_datetime"].dt.minute, | |
} | |
) | |
def feature_enginering(): | |
return make_column_transformer( | |
(FunctionTransformer(), ["passenger_count"]), | |
( | |
FunctionTransformer(func=split_pickup_datetime), | |
["tpep_pickup_datetime"], | |
), | |
( | |
FunctionTransformer( | |
func=haversine_distance_from_df, | |
), | |
[ | |
"pickup_latitude", | |
"pickup_longitude", | |
"dropoff_latitude", | |
"dropoff_longitude", | |
], | |
), | |
) | |
def lgbm_pipeline(): | |
return make_pipeline( | |
feature_enginering(), | |
LGBMRegressor(objective="regression_l1"), | |
) |