In [2]:
import numpy as np 
import pandas as pd
from sklearn.compose import ColumnTransformer 
from sklearn.pipeline import Pipeline 
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer,KNNImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder,OrdinalEncoder,LabelEncoder,PowerTransformer

In [3]:
from sklearn import set_config

set_config(transform_output= "pandas")

In [4]:
df = pd.read_csv('swiggy_cleaned.csv')

In [5]:
df.head()

Unnamed: 0,rider_id,age,ratings,restaurant_latitude,restaurant_longitude,delivery_latitude,delivery_longitude,order_date,weather,traffic,...,city_name,order_day,order_month,order_day_of_week,is_weekend,pickup_time_minutes,order_time_hour,order_time_of_day,distance,distance_type
0,INDORES13DEL02,37.0,4.9,22.745049,75.892471,22.765049,75.912471,2022-03-19,sunny,high,...,INDO,19,3,saturday,1,15.0,11.0,morning,3.025149,short
1,BANGRES18DEL02,34.0,4.5,12.913041,77.683237,13.043041,77.813237,2022-03-25,stormy,jam,...,BANG,25,3,friday,0,5.0,19.0,evening,20.18353,very_long
2,BANGRES19DEL01,23.0,4.4,12.914264,77.6784,12.924264,77.6884,2022-03-19,sandstorms,low,...,BANG,19,3,saturday,1,15.0,8.0,morning,1.552758,short
3,COIMBRES13DEL02,38.0,4.7,11.003669,76.976494,11.053669,77.026494,2022-04-05,sunny,medium,...,COIMB,5,4,tuesday,0,10.0,18.0,evening,7.790401,medium
4,CHENRES12DEL01,32.0,4.6,12.972793,80.249982,13.012793,80.289982,2022-03-26,cloudy,high,...,CHEN,26,3,saturday,1,15.0,13.0,afternoon,6.210138,medium


In [6]:
df.shape

(45502, 27)

In [7]:
df.columns

Index(['rider_id', 'age', 'ratings', 'restaurant_latitude',
       'restaurant_longitude', 'delivery_latitude', 'delivery_longitude',
       'order_date', 'weather', 'traffic', 'vehicle_condition',
       'type_of_order', 'type_of_vehicle', 'multiple_deliveries', 'festival',
       'city_type', 'time_taken', 'city_name', 'order_day', 'order_month',
       'order_day_of_week', 'is_weekend', 'pickup_time_minutes',
       'order_time_hour', 'order_time_of_day', 'distance', 'distance_type'],
      dtype='object')

In [8]:
columns_to_drop =  ['rider_id',
                    'restaurant_latitude',
                    'restaurant_longitude',
                    'delivery_latitude',
                    'delivery_longitude',
                    'order_date',
                    "order_time_hour",
                    "order_day",
                    "city_name",
                    "order_day_of_week",
                    "order_month"]

df.drop(columns=columns_to_drop, inplace=True)

df

Unnamed: 0,age,ratings,weather,traffic,vehicle_condition,type_of_order,type_of_vehicle,multiple_deliveries,festival,city_type,time_taken,is_weekend,pickup_time_minutes,order_time_of_day,distance,distance_type
0,37.0,4.9,sunny,high,2,snack,motorcycle,0.0,no,urban,24,1,15.0,morning,3.025149,short
1,34.0,4.5,stormy,jam,2,snack,scooter,1.0,no,metropolitian,33,0,5.0,evening,20.183530,very_long
2,23.0,4.4,sandstorms,low,0,drinks,motorcycle,1.0,no,urban,26,1,15.0,morning,1.552758,short
3,38.0,4.7,sunny,medium,0,buffet,motorcycle,1.0,no,metropolitian,21,0,10.0,evening,7.790401,medium
4,32.0,4.6,cloudy,high,1,snack,scooter,1.0,no,metropolitian,30,1,15.0,afternoon,6.210138,medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45497,30.0,4.8,windy,high,1,meal,motorcycle,0.0,no,metropolitian,32,0,10.0,morning,1.489846,short
45498,21.0,4.6,windy,jam,0,buffet,motorcycle,1.0,no,metropolitian,36,0,15.0,evening,,
45499,30.0,4.9,cloudy,low,1,drinks,scooter,0.0,no,metropolitian,16,0,15.0,night,4.657195,short
45500,20.0,4.7,cloudy,high,0,snack,motorcycle,1.0,no,metropolitian,26,0,5.0,afternoon,6.232393,medium


In [9]:
temp_df = df.copy().dropna()

In [10]:
temp_df.isna().sum()

age                    0
ratings                0
weather                0
traffic                0
vehicle_condition      0
type_of_order          0
type_of_vehicle        0
multiple_deliveries    0
festival               0
city_type              0
time_taken             0
is_weekend             0
pickup_time_minutes    0
order_time_of_day      0
distance               0
distance_type          0
dtype: int64

In [11]:
X = temp_df.drop(columns= 'time_taken')
y = temp_df['time_taken']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
X_train.head()

Unnamed: 0,age,ratings,weather,traffic,vehicle_condition,type_of_order,type_of_vehicle,multiple_deliveries,festival,city_type,is_weekend,pickup_time_minutes,order_time_of_day,distance,distance_type
8708,29.0,3.9,sunny,jam,0,drinks,motorcycle,2.0,no,semi-urban,0,15.0,night,9.348069,medium
25198,39.0,4.4,sandstorms,medium,0,meal,motorcycle,1.0,no,metropolitian,1,5.0,afternoon,4.469606,short
34049,29.0,4.5,sunny,high,1,buffet,motorcycle,0.0,no,urban,0,10.0,morning,1.513073,short
25987,39.0,4.8,sandstorms,low,0,meal,motorcycle,1.0,no,metropolitian,1,15.0,night,10.445118,long
37121,30.0,4.4,windy,medium,1,buffet,scooter,1.0,no,metropolitian,1,10.0,afternoon,6.217834,medium


In [14]:
X_train.isna().sum()

age                    0
ratings                0
weather                0
traffic                0
vehicle_condition      0
type_of_order          0
type_of_vehicle        0
multiple_deliveries    0
festival               0
city_type              0
is_weekend             0
pickup_time_minutes    0
order_time_of_day      0
distance               0
distance_type          0
dtype: int64

In [15]:
pt = PowerTransformer()
y_train_pt = pt.fit_transform(y_train.values.reshape(-1,1))
y_test_pt = pt.transform(y_test.values.reshape(-1,1))

In [16]:
X_train.shape,y_train_pt.shape

((30156, 15), (30156, 1))

In [17]:
X_train.columns

Index(['age', 'ratings', 'weather', 'traffic', 'vehicle_condition',
       'type_of_order', 'type_of_vehicle', 'multiple_deliveries', 'festival',
       'city_type', 'is_weekend', 'pickup_time_minutes', 'order_time_of_day',
       'distance', 'distance_type'],
      dtype='object')

In [18]:
num_cols = ['age', 'ratings','pickup_time_minutes','distance']

nominal_cat_cols = ['weather','type_of_order',
                    'type_of_vehicle','festival',
                    'city_type','is_weekend',
                    'order_time_of_day']

ordinal_cat_cols = [
    'traffic', 'distance_type'
]

In [19]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30156 entries, 8708 to 18968
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   age                  30156 non-null  float64
 1   ratings              30156 non-null  float64
 2   weather              30156 non-null  object 
 3   traffic              30156 non-null  object 
 4   vehicle_condition    30156 non-null  int64  
 5   type_of_order        30156 non-null  object 
 6   type_of_vehicle      30156 non-null  object 
 7   multiple_deliveries  30156 non-null  float64
 8   festival             30156 non-null  object 
 9   city_type            30156 non-null  object 
 10  is_weekend           30156 non-null  int64  
 11  pickup_time_minutes  30156 non-null  float64
 12  order_time_of_day    30156 non-null  object 
 13  distance             30156 non-null  float64
 14  distance_type        30156 non-null  object 
dtypes: float64(5), int64(2), object(8)
mem

In [20]:
X_train.shape

(30156, 15)

In [21]:
for col in ordinal_cat_cols:
    print(col , X_train[col].unique())

traffic ['jam' 'medium' 'high' 'low']
distance_type ['medium' 'short' 'long' 'very_long']


In [22]:
traffic_order = ['low', 'medium', 'high', 'jam']

distance_type = ['short' , 'medium','long', 'very_long']

In [23]:
preprocessor = ColumnTransformer(
    transformers=[
        ('scaling', MinMaxScaler(), num_cols),

        ('ordinal_encode', OrdinalEncoder(
            categories=[traffic_order,distance_type],
            handle_unknown='use_encoded_value',
            unknown_value= -1,
            encoded_missing_value= -999)
            , ordinal_cat_cols),
        
        ('nominal_encode', 
         OneHotEncoder(handle_unknown='ignore', sparse_output=False), 
         nominal_cat_cols)
    ], remainder='passthrough',verbose_feature_names_out=False,
    force_int_remainder_cols = False,n_jobs=-1
)

In [24]:
preprocessor

In [25]:
X_train_processed = preprocessor.fit_transform(X_train)

X_test_processed = preprocessor.transform(X_test)

In [26]:
X_train_processed.shape

(30156, 32)

In [27]:
X_test_processed.shape

(7539, 32)

In [28]:
X_train_processed.columns

Index(['age', 'ratings', 'pickup_time_minutes', 'distance', 'traffic',
       'distance_type', 'weather_cloudy', 'weather_fog', 'weather_sandstorms',
       'weather_stormy', 'weather_sunny', 'weather_windy',
       'type_of_order_buffet', 'type_of_order_drinks', 'type_of_order_meal',
       'type_of_order_snack', 'type_of_vehicle_electric_scooter',
       'type_of_vehicle_motorcycle', 'type_of_vehicle_scooter', 'festival_no',
       'festival_yes', 'city_type_metropolitian', 'city_type_semi-urban',
       'city_type_urban', 'is_weekend_0', 'is_weekend_1',
       'order_time_of_day_afternoon', 'order_time_of_day_evening',
       'order_time_of_day_morning', 'order_time_of_day_night',
       'vehicle_condition', 'multiple_deliveries'],
      dtype='object')

In [29]:
import dagshub
dagshub.init(repo_owner='vinayak910', repo_name='swiggy-delivery-time-prediction', mlflow=True)

In [30]:
import mlflow 

mlflow.set_tracking_uri("https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow")

In [31]:
mlflow.set_experiment("Exp 2 - Top 2 Models")


<Experiment: artifact_location='mlflow-artifacts:/6fe92ff5fbce4fa5984f2a2220cac0bc', creation_time=1752244480233, experiment_id='2', last_update_time=1752244480233, lifecycle_stage='active', name='Exp 2 - Top 2 Models', tags={}>

In [32]:
%pip install lightgbm


Note: you may need to restart the kernel to use updated packages.


In [33]:
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import optuna 
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score,mean_absolute_error

In [34]:
from sklearn.model_selection import cross_val_score

In [35]:
def objective(trial):
    with mlflow.start_run(nested=True):
        model_name = trial.suggest_categorical("model", ["SVM", "RF", "KNN", "GB", "XGB", "LGBM"])

        # Model selection and hyperparameter tuning
        if model_name == "SVM":
            kernel_svm = trial.suggest_categorical("kernel_svm", ["linear", "poly", "rbf"])
            if kernel_svm == "linear":
                c_linear = trial.suggest_float("c_linear", 0.1, 10, log=True)
                model = SVR(C=c_linear, kernel="linear")
            elif kernel_svm == "poly":
                c_poly = trial.suggest_float("c_poly", 0.1, 10, log=True)
                degree_poly = trial.suggest_int("degree_poly", 2, 5)
                model = SVR(C=c_poly, degree=degree_poly, kernel="poly")
            else:
                c_rbf = trial.suggest_float("c_rbf", 0.1, 100, log=True)
                gamma_rbf = trial.suggest_float("gamma_rbf", 1e-4, 1.0, log=True)
                model = SVR(C=c_rbf, gamma=gamma_rbf, kernel="rbf")

        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf", 50, 300, step=10)
            max_depth_rf = trial.suggest_int("max_depth_rf", 4, 30)
            min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
            model = RandomForestRegressor(
                n_estimators=n_estimators_rf,
                max_depth=max_depth_rf,
                min_samples_split=min_samples_split,
                random_state=42,
                n_jobs=-1
            )

        elif model_name == "GB":
            n_estimators_gb = trial.suggest_int("n_estimators_gb", 50, 300, step=10)
            learning_rate_gb = trial.suggest_float("learning_rate_gb", 0.01, 0.3, log=True)
            max_depth_gb = trial.suggest_int("max_depth_gb", 3, 15)
            model = GradientBoostingRegressor(
                n_estimators=n_estimators_gb,
                learning_rate=learning_rate_gb,
                max_depth=max_depth_gb,
                random_state=42
            )

        elif model_name == "KNN":
            n_neighbors_knn = trial.suggest_int("n_neighbors_knn", 1, 30)
            weights_knn = trial.suggest_categorical("weights_knn", ["uniform", "distance"])
            model = KNeighborsRegressor(
                n_neighbors=n_neighbors_knn,
                weights=weights_knn,
                n_jobs=-1
            )

        elif model_name == "XGB":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb", 50, 300, step=10)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb", 0.01, 0.3, log=True)
            max_depth_xgb = trial.suggest_int("max_depth_xgb", 3, 15)
            subsample_xgb = trial.suggest_float("subsample_xgb", 0.6, 1.0)
            colsample_bytree_xgb = trial.suggest_float("colsample_bytree_xgb", 0.6, 1.0)
            model = XGBRegressor(
                n_estimators=n_estimators_xgb,
                learning_rate=learning_rate_xgb,
                max_depth=max_depth_xgb,
                subsample=subsample_xgb,
                colsample_bytree=colsample_bytree_xgb,
                random_state=42,
                n_jobs=-1,
                verbosity=0
            )

        elif model_name == "LGBM":
            n_estimators_lgbm = trial.suggest_int("n_estimators_lgbm", 50, 300, step=10)
            learning_rate_lgbm = trial.suggest_float("learning_rate_lgbm", 0.01, 0.3, log=True)
            max_depth_lgbm = trial.suggest_int("max_depth_lgbm", 3, 15)
            num_leaves = trial.suggest_int("num_leaves", 15, 100)
            model = LGBMRegressor(
                n_estimators=n_estimators_lgbm,
                learning_rate=learning_rate_lgbm,
                max_depth=max_depth_lgbm,
                num_leaves=num_leaves,
                random_state=42
            )

        # Train model
        model.fit(X_train_processed, y_train_pt.values.ravel())

        # Predictions
        y_pred_train = model.predict(X_train_processed)
        y_pred_test = model.predict(X_test_processed)

        y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1, 1))
        y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1, 1))

        # Evaluation
        train_mae = mean_absolute_error(y_train, y_pred_train_org)
        test_mae = mean_absolute_error(y_test, y_pred_test_org)
        train_r2 = r2_score(y_train, y_pred_train_org)
        test_r2 = r2_score(y_test, y_pred_test_org)

        # Logging
        mlflow.log_param("model", model_name)
        mlflow.log_params(model.get_params())

        mlflow.log_metric("train_mae", train_mae)
        mlflow.log_metric("test_mae", test_mae)
        mlflow.log_metric("train_r2", train_r2)
        mlflow.log_metric("test_r2", test_r2)

        return test_mae


In [36]:
study = optuna.create_study(direction="minimize",study_name="model_selection")

with mlflow.start_run(run_name="Best Model") as parent:
    # optimize the objective function
    study.optimize(objective,n_trials=30,n_jobs=-1)

    # log the best parameters
    mlflow.log_params(study.best_params)

    # log the best score
    mlflow.log_metric("best_score",study.best_value)

[I 2025-07-12 11:49:02,983] A new study created in memory with name: model_selection


🏃 View run charming-sow-503 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/126ca0580816493a91cad6a4f22c34e3
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:49:20,253] Trial 7 finished with value: 4.4424163978121864 and parameters: {'model': 'KNN', 'n_neighbors_knn': 17, 'weights_knn': 'uniform'}. Best is trial 7 with value: 4.4424163978121864.


🏃 View run nosy-jay-711 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/9811ff68c54d416eb798dc64cdd88d09
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2
🏃 View run legendary-frog-507 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/26aa81b9977643fb81b41911ac297787
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:49:44,899] Trial 4 finished with value: 3.0066005650264143 and parameters: {'model': 'RF', 'n_estimators_rf': 230, 'max_depth_rf': 13, 'min_samples_split': 10}. Best is trial 4 with value: 3.0066005650264143.
[I 2025-07-12 11:49:46,030] Trial 3 finished with value: 3.046848638123444 and parameters: {'model': 'RF', 'n_estimators_rf': 160, 'max_depth_rf': 27, 'min_samples_split': 5}. Best is trial 4 with value: 3.0066005650264143.


🏃 View run loud-hawk-837 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/cd866c75c78746a6839d658a6e9e9326
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2
🏃 View run judicious-shrimp-517 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/172faba9cfdb4f529393a5cd3992af14
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:49:59,841] Trial 0 finished with value: 3.7801411151885986 and parameters: {'model': 'XGB', 'n_estimators_xgb': 260, 'learning_rate_xgb': 0.014577844443557818, 'max_depth_xgb': 3, 'subsample_xgb': 0.758470943580105, 'colsample_bytree_xgb': 0.913132133751938}. Best is trial 4 with value: 3.0066005650264143.
[I 2025-07-12 11:50:02,904] Trial 1 finished with value: 3.0196645405005773 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 100, 'learning_rate_lgbm': 0.11961833351230527, 'max_depth_lgbm': 6, 'num_leaves': 56}. Best is trial 4 with value: 3.0066005650264143.


🏃 View run enthused-horse-396 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/bb16d8b8c4584ff181869c65db7a4127
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2
🏃 View run delicate-mare-692 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/71520baec3464073bb7cc1cae1699a3b
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:50:08,892] Trial 5 finished with value: 2.990514781944793 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 260, 'learning_rate_lgbm': 0.05097993203016533, 'max_depth_lgbm': 7, 'num_leaves': 62}. Best is trial 5 with value: 2.990514781944793.
[I 2025-07-12 11:50:09,891] Trial 6 finished with value: 2.972978989649277 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 280, 'learning_rate_lgbm': 0.016893820484143285, 'max_depth_lgbm': 9, 'num_leaves': 96}. Best is trial 6 with value: 2.972978989649277.


🏃 View run mysterious-kite-270 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/bbecb88636c14f9bb72e6a2c372f2da8
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:50:17,048] Trial 9 finished with value: 3.0350802992221544 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 180, 'learning_rate_lgbm': 0.21437231217522015, 'max_depth_lgbm': 12, 'num_leaves': 57}. Best is trial 6 with value: 2.972978989649277.


🏃 View run angry-cod-335 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/92d1f75446664f22ba52688a15dbcc77
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:50:36,796] Trial 14 finished with value: 3.0347727038661865 and parameters: {'model': 'RF', 'n_estimators_rf': 240, 'max_depth_rf': 26, 'min_samples_split': 8}. Best is trial 6 with value: 2.972978989649277.


🏃 View run clean-seal-368 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/96d0ce4a8e6f4667aea8e5ceded7e6c1
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:50:39,609] Trial 13 finished with value: 3.0420971248896063 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 240, 'learning_rate_lgbm': 0.22246982580536212, 'max_depth_lgbm': 11, 'num_leaves': 28}. Best is trial 6 with value: 2.972978989649277.


🏃 View run zealous-lynx-500 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/f16a66b27151412f9ab16ea69b0c8164
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:50:48,580] Trial 16 finished with value: 4.5379390143051985 and parameters: {'model': 'KNN', 'n_neighbors_knn': 4, 'weights_knn': 'distance'}. Best is trial 6 with value: 2.972978989649277.


🏃 View run puzzled-cub-905 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/9c11eba18ce643eba5dc15e1b53537e1
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:50:57,612] Trial 17 finished with value: 4.618093747928405 and parameters: {'model': 'GB', 'n_estimators_gb': 70, 'learning_rate_gb': 0.0234089886274019, 'max_depth_gb': 3}. Best is trial 6 with value: 2.972978989649277.


🏃 View run valuable-hare-433 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/cf19860d6dad4165981f52d22e75cf57
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:52:04,090] Trial 11 finished with value: 3.0708844832980944 and parameters: {'model': 'GB', 'n_estimators_gb': 250, 'learning_rate_gb': 0.07100934655617012, 'max_depth_gb': 10}. Best is trial 6 with value: 2.972978989649277.


🏃 View run worried-ox-600 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/4a90fb7e01c041df91c6bc40018f4a77
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:53:08,579] Trial 18 finished with value: 3.252366284424499 and parameters: {'model': 'GB', 'n_estimators_gb': 210, 'learning_rate_gb': 0.18383179143822284, 'max_depth_gb': 14}. Best is trial 6 with value: 2.972978989649277.


🏃 View run loud-shad-690 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/ef54ce8277fd482d88c58a4f7942d450
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:54:24,187] Trial 15 finished with value: 3.9307672642601164 and parameters: {'model': 'SVM', 'kernel_svm': 'poly', 'c_poly': 1.1973662814964101, 'degree_poly': 2}. Best is trial 6 with value: 2.972978989649277.


🏃 View run resilient-bass-354 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/1c8e0c50aeaa46fab4cf5f810e7b1856
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:54:40,162] Trial 2 finished with value: 4.166237934379796 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 29.15886252911665, 'gamma_rbf': 0.0011647091037171465}. Best is trial 6 with value: 2.972978989649277.


🏃 View run dapper-bear-33 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/f52079c3ef814ffc99c348df558959f2
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:54:42,965] Trial 22 finished with value: 3.05675215418449 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 300, 'learning_rate_lgbm': 0.014580685054811447, 'max_depth_lgbm': 6, 'num_leaves': 98}. Best is trial 6 with value: 2.972978989649277.


🏃 View run enthused-foal-24 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/ae138f21ef1b4f7bb0ef747170812ddc
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:54:53,203] Trial 23 finished with value: 3.0545280295210104 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 270, 'learning_rate_lgbm': 0.014822187379756128, 'max_depth_lgbm': 6, 'num_leaves': 98}. Best is trial 6 with value: 2.972978989649277.


🏃 View run vaunted-bee-330 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/101dd6452e0c4cf0b43384cee2d14c86
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:55:00,895] Trial 24 finished with value: 2.972163077996169 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 290, 'learning_rate_lgbm': 0.02309582511580382, 'max_depth_lgbm': 8, 'num_leaves': 97}. Best is trial 24 with value: 2.972163077996169.


🏃 View run intrigued-mule-490 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/e4d463ca50ce4516846d7f10bcd9cd23
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:55:12,249] Trial 25 finished with value: 3.3632075786590576 and parameters: {'model': 'XGB', 'n_estimators_xgb': 50, 'learning_rate_xgb': 0.26084881669534254, 'max_depth_xgb': 15, 'subsample_xgb': 0.9764710402248902, 'colsample_bytree_xgb': 0.6285840542740292}. Best is trial 24 with value: 2.972163077996169.


🏃 View run mercurial-seal-509 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/279095f92b50437aafefc9f35c66b374
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:55:24,824] Trial 26 finished with value: 3.4666717052459717 and parameters: {'model': 'XGB', 'n_estimators_xgb': 70, 'learning_rate_xgb': 0.29142688933794386, 'max_depth_xgb': 15, 'subsample_xgb': 0.9858266372734618, 'colsample_bytree_xgb': 0.6100481556705324}. Best is trial 24 with value: 2.972163077996169.


🏃 View run glamorous-zebra-555 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/b8dedfd640124ab8b4615de024d87a48
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:55:30,861] Trial 27 finished with value: 2.962967016054149 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 200, 'learning_rate_lgbm': 0.03198516726634783, 'max_depth_lgbm': 15, 'num_leaves': 81}. Best is trial 27 with value: 2.962967016054149.


🏃 View run loud-mule-292 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/73401cb4b3f649a7b3c6c42f72ee6222
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:55:40,082] Trial 28 finished with value: 2.9825806244815087 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 300, 'learning_rate_lgbm': 0.03644866715595504, 'max_depth_lgbm': 9, 'num_leaves': 81}. Best is trial 27 with value: 2.962967016054149.


🏃 View run secretive-shark-485 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/4b2719a4510d460ab72a76939630986b
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:55:46,493] Trial 29 finished with value: 2.9626465468155394 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 200, 'learning_rate_lgbm': 0.03195558234416709, 'max_depth_lgbm': 15, 'num_leaves': 84}. Best is trial 29 with value: 2.9626465468155394.


🏃 View run invincible-quail-545 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/2e52e87edd374ff0b90bd3e538cd1fc1
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:56:09,425] Trial 20 finished with value: 4.675390012764835 and parameters: {'model': 'SVM', 'kernel_svm': 'linear', 'c_linear': 0.4962464181702757}. Best is trial 29 with value: 2.9626465468155394.


🏃 View run handsome-shrimp-659 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/b3f7358a079049dd8c65eb0b830e42c2
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:56:54,641] Trial 19 finished with value: 4.144044863858972 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 0.8027875285687234, 'gamma_rbf': 0.007633731755588096}. Best is trial 29 with value: 2.9626465468155394.


🏃 View run unique-kit-553 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/3ac278b546594b1ba3c6b8e51436a65d
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 11:58:15,331] Trial 21 finished with value: 3.9188328681065734 and parameters: {'model': 'SVM', 'kernel_svm': 'poly', 'c_poly': 5.870915132083519, 'degree_poly': 2}. Best is trial 29 with value: 2.9626465468155394.


🏃 View run lyrical-turtle-942 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/da2086c4c3674030bbb55ca890921187
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 12:20:01,184] Trial 10 finished with value: 3.802061772688303 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 10.558868548456417, 'gamma_rbf': 0.21229070926340704}. Best is trial 29 with value: 2.9626465468155394.


🏃 View run trusting-fowl-941 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/a7b2fb5b6e574536a05e56b46d02331b
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 13:07:48,036] Trial 12 finished with value: 4.052919028790522 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 35.950117590324204, 'gamma_rbf': 0.1949212122679456}. Best is trial 29 with value: 2.9626465468155394.


🏃 View run debonair-stag-775 at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/f774bca89a0d49bab1a4e43d479fe532
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


[I 2025-07-12 13:09:26,335] Trial 8 finished with value: 4.223655322760632 and parameters: {'model': 'SVM', 'kernel_svm': 'rbf', 'c_rbf': 40.45533477588705, 'gamma_rbf': 0.22946656015571157}. Best is trial 29 with value: 2.9626465468155394.


🏃 View run Best Model at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2/runs/905a33b2c2634bde94e7c00c6ff41fc7
🧪 View experiment at: https://dagshub.com/vinayak910/swiggy-delivery-time-prediction.mlflow/#/experiments/2


In [37]:
study.best_params

{'model': 'LGBM',
 'n_estimators_lgbm': 200,
 'learning_rate_lgbm': 0.03195558234416709,
 'max_depth_lgbm': 15,
 'num_leaves': 84}

In [42]:
lgbm_params = {'n_estimators': 200,
 'learning_rate': 0.03195558234416709,
 'max_depth': 15,
 'num_leaves': 84}

In [44]:
lgbm = LGBMRegressor(**lgbm_params)

lgbm.fit(X_train_processed,y_train_pt.values.ravel())

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005711 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 366
[LightGBM] [Info] Number of data points in the train set: 30156, number of used features: 32
[LightGBM] [Info] Start training from score -0.000000


In [45]:


# get the predictions
y_pred_train = lgbm.predict(X_train_processed)
y_pred_test = lgbm.predict(X_test_processed)

In [46]:
y_pred_train_org = pt.inverse_transform(y_pred_train.reshape(-1,1))
y_pred_test_org = pt.inverse_transform(y_pred_test.reshape(-1,1))

In [47]:
from sklearn.metrics import mean_absolute_error, r2_score

print(f"The train error is {mean_absolute_error(y_train,y_pred_train_org):.2f} minutes")
print(f"The test error is {mean_absolute_error(y_test,y_pred_test_org):.2f} minutes")

The train error is 2.81 minutes
The test error is 2.96 minutes


In [49]:
print(f"The train r2 score is {r2_score(y_train,y_pred_train_org):.2f}")
print(f"The test r2 score is {r2_score(y_test,y_pred_test_org)}")

The train r2 score is 0.86
The test r2 score is 0.8433906328750047


In [50]:
study.trials_dataframe()


Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_c_linear,params_c_poly,params_c_rbf,params_colsample_bytree_xgb,params_degree_poly,...,params_model,params_n_estimators_gb,params_n_estimators_lgbm,params_n_estimators_rf,params_n_estimators_xgb,params_n_neighbors_knn,params_num_leaves,params_subsample_xgb,params_weights_knn,state
0,0,3.780141,2025-07-12 11:49:03.706803,2025-07-12 11:49:59.841720,0 days 00:00:56.134917,,,,0.913132,,...,XGB,,,,260.0,,,0.758471,,COMPLETE
1,1,3.019665,2025-07-12 11:49:03.708506,2025-07-12 11:50:02.904078,0 days 00:00:59.195572,,,,,,...,LGBM,,100.0,,,,56.0,,,COMPLETE
2,2,4.166238,2025-07-12 11:49:03.709924,2025-07-12 11:54:40.162352,0 days 00:05:36.452428,,,29.158863,,,...,SVM,,,,,,,,,COMPLETE
3,3,3.046849,2025-07-12 11:49:03.711599,2025-07-12 11:49:46.030506,0 days 00:00:42.318907,,,,,,...,RF,,,160.0,,,,,,COMPLETE
4,4,3.006601,2025-07-12 11:49:03.713086,2025-07-12 11:49:44.899552,0 days 00:00:41.186466,,,,,,...,RF,,,230.0,,,,,,COMPLETE
5,5,2.990515,2025-07-12 11:49:03.714677,2025-07-12 11:50:08.892297,0 days 00:01:05.177620,,,,,,...,LGBM,,260.0,,,,62.0,,,COMPLETE
6,6,2.972979,2025-07-12 11:49:03.716009,2025-07-12 11:50:09.890850,0 days 00:01:06.174841,,,,,,...,LGBM,,280.0,,,,96.0,,,COMPLETE
7,7,4.442416,2025-07-12 11:49:03.717622,2025-07-12 11:49:20.253352,0 days 00:00:16.535730,,,,,,...,KNN,,,,,17.0,,,uniform,COMPLETE
8,8,4.223655,2025-07-12 11:49:20.261054,2025-07-12 13:09:26.335777,0 days 01:20:06.074723,,,40.455335,,,...,SVM,,,,,,,,,COMPLETE
9,9,3.03508,2025-07-12 11:49:44.902589,2025-07-12 11:50:17.047968,0 days 00:00:32.145379,,,,,,...,LGBM,,180.0,,,,57.0,,,COMPLETE


In [51]:
study.trials_dataframe()['params_model'].value_counts()


params_model
LGBM    11
SVM      8
XGB      3
RF       3
GB       3
KNN      2
Name: count, dtype: int64

In [52]:
study.trials_dataframe().groupby("params_model")['value'].mean().sort_values()


params_model
LGBM    3.004725
RF      3.029407
XGB     3.536673
GB      3.647115
SVM     4.114239
KNN     4.490178
Name: value, dtype: float64

In [53]:
from sklearn.compose import TransformedTargetRegressor

model = TransformedTargetRegressor(regressor=lgbm,
                                    transformer=pt)
     

In [55]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model,
                         X_train_processed,
                         y_train,
                         scoring="neg_mean_absolute_error",
                         cv=5,n_jobs=-1)

scores

array([-3.02893811, -2.99902077, -2.98214722, -2.99944664, -2.98452171])

In [56]:
- scores.mean()


np.float64(2.9988148920504374)

In [57]:
optuna.visualization.plot_parallel_coordinate(study,params=["model"])
