In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("D:/Data Sets/Food_Delivery_Times.csv")

In [3]:
df.head()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Order_ID                1000 non-null   int64  
 1   Distance_km             1000 non-null   float64
 2   Weather                 970 non-null    object 
 3   Traffic_Level           970 non-null    object 
 4   Time_of_Day             970 non-null    object 
 5   Vehicle_Type            1000 non-null   object 
 6   Preparation_Time_min    1000 non-null   int64  
 7   Courier_Experience_yrs  970 non-null    float64
 8   Delivery_Time_min       1000 non-null   int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 70.4+ KB


In [5]:
df.isnull().mean()*100

Order_ID                  0.0
Distance_km               0.0
Weather                   3.0
Traffic_Level             3.0
Time_of_Day               3.0
Vehicle_Type              0.0
Preparation_Time_min      0.0
Courier_Experience_yrs    3.0
Delivery_Time_min         0.0
dtype: float64

In [6]:
df.describe()

Unnamed: 0,Order_ID,Distance_km,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
count,1000.0,1000.0,1000.0,970.0,1000.0
mean,500.5,10.05997,16.982,4.579381,56.732
std,288.819436,5.696656,7.204553,2.914394,22.070915
min,1.0,0.59,5.0,0.0,8.0
25%,250.75,5.105,11.0,2.0,41.0
50%,500.5,10.19,17.0,5.0,55.5
75%,750.25,15.0175,23.0,7.0,71.0
max,1000.0,19.99,29.0,9.0,153.0


In [7]:
df.shape

(1000, 9)

In [8]:
df.sample(5)

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
397,443,7.8,Rainy,Medium,Morning,Bike,11,7.0,49
879,157,13.61,Clear,Low,,Scooter,12,2.0,43
264,358,11.92,Clear,Low,Afternoon,Scooter,14,4.0,46
709,932,16.78,Rainy,Medium,Morning,Bike,22,1.0,70
592,668,3.63,Windy,Medium,Morning,Car,22,9.0,39


In [9]:
df['Weather'].value_counts()

Weather
Clear    470
Rainy    204
Foggy    103
Snowy     97
Windy     96
Name: count, dtype: int64

In [10]:
df['Traffic_Level'].value_counts()

Traffic_Level
Medium    390
Low       383
High      197
Name: count, dtype: int64

In [11]:
df['Time_of_Day'].value_counts()

Time_of_Day
Morning      308
Evening      293
Afternoon    284
Night         85
Name: count, dtype: int64

In [12]:
df['Vehicle_Type'].value_counts()

Vehicle_Type
Bike       503
Scooter    302
Car        195
Name: count, dtype: int64

In [13]:
df['Courier_Experience_yrs'].value_counts()

Courier_Experience_yrs
6.0    109
9.0    108
1.0    107
8.0    101
2.0     99
4.0     94
7.0     91
0.0     91
5.0     90
3.0     80
Name: count, dtype: int64

## Handling Missing Values

In [14]:
df['Weather'].fillna('Missing', inplace=True)

In [15]:
df['Time_of_Day'].fillna('Missing', inplace=True)

In [16]:
df['Traffic_Level'].fillna(df['Traffic_Level'].mode().iloc[0], inplace=True)

In [17]:
df['Courier_Experience_yrs'].fillna(df['Courier_Experience_yrs'].mean(), inplace=True)

In [18]:
df.isnull().sum()

Order_ID                  0
Distance_km               0
Weather                   0
Traffic_Level             0
Time_of_Day               0
Vehicle_Type              0
Preparation_Time_min      0
Courier_Experience_yrs    0
Delivery_Time_min         0
dtype: int64

## OneHot Encoding

In [19]:
df_encoded = pd.get_dummies(df,columns=['Weather','Time_of_Day','Vehicle_Type'],dtype=int)
df_encoded.head(5)

Unnamed: 0,Order_ID,Distance_km,Traffic_Level,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min,Weather_Clear,Weather_Foggy,Weather_Missing,Weather_Rainy,Weather_Snowy,Weather_Windy,Time_of_Day_Afternoon,Time_of_Day_Evening,Time_of_Day_Missing,Time_of_Day_Morning,Time_of_Day_Night,Vehicle_Type_Bike,Vehicle_Type_Car,Vehicle_Type_Scooter
0,522,7.93,Low,12,1.0,43,0,0,0,0,0,1,1,0,0,0,0,0,0,1
1,738,16.42,Medium,20,2.0,84,1,0,0,0,0,0,0,1,0,0,0,1,0,0
2,741,9.52,Low,28,1.0,59,0,1,0,0,0,0,0,0,0,0,1,0,0,1
3,661,7.44,Medium,5,1.0,37,0,0,0,1,0,0,1,0,0,0,0,0,0,1
4,412,19.03,Low,16,5.0,68,1,0,0,0,0,0,0,0,0,1,0,1,0,0


In [20]:
from sklearn.preprocessing import OrdinalEncoder

In [21]:
df_encoded['Traffic_Level'].fillna(df_encoded['Traffic_Level'].mode()[0], inplace=True)
oe = OrdinalEncoder(categories=[['Low','Medium','High']])

# Apply ordinal encoding
df_encoded['Traffic_Level'] = oe.fit_transform(df_encoded[['Traffic_Level']])

## Define Feature and Target Variable

In [40]:
from sklearn.model_selection import train_test_split

X = df_encoded.drop('Delivery_Time_min', axis=1)  
y = df_encoded['Delivery_Time_min'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [41]:
X.head(7)

Unnamed: 0,Order_ID,Distance_km,Traffic_Level,Preparation_Time_min,Courier_Experience_yrs,Weather_Clear,Weather_Foggy,Weather_Missing,Weather_Rainy,Weather_Snowy,Weather_Windy,Time_of_Day_Afternoon,Time_of_Day_Evening,Time_of_Day_Missing,Time_of_Day_Morning,Time_of_Day_Night,Vehicle_Type_Bike,Vehicle_Type_Car,Vehicle_Type_Scooter
0,522,7.93,0.0,12,1.0,0,0,0,0,0,1,1,0,0,0,0,0,0,1
1,738,16.42,1.0,20,2.0,1,0,0,0,0,0,0,1,0,0,0,1,0,0
2,741,9.52,0.0,28,1.0,0,1,0,0,0,0,0,0,0,0,1,0,0,1
3,661,7.44,1.0,5,1.0,0,0,0,1,0,0,1,0,0,0,0,0,0,1
4,412,19.03,0.0,16,5.0,1,0,0,0,0,0,0,0,0,1,0,1,0,0
5,679,19.4,0.0,8,9.0,1,0,0,0,0,0,0,1,0,0,0,0,0,1
6,627,9.52,0.0,12,1.0,1,0,0,0,0,0,0,0,1,0,0,1,0,0


In [42]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=1.0)  
ridge.fit(X_train, y_train)

y_pred = ridge.predict(X_test)

In [43]:
from sklearn.metrics import r2_score

ridge_r2 = r2_score(y_test, y_pred)

print(f"Ridge R2: {ridge_r2}")

Ridge R2: 0.8288781484284475


In [44]:
from sklearn.linear_model import Lasso

model = Lasso(alpha=0.1)
model.fit(X_train, y_train)
y_pred1 = model.predict(X_test)

In [45]:
lasso_r2 = r2_score(y_test, y_pred1)

print(f"Lasso R2: {lasso_r2}")

Lasso R2: 0.8263065074469824


In [46]:
from sklearn.model_selection import cross_val_score

In [47]:
# Use cross-validation for R2 
r2_scores = cross_val_score(ridge, X_train, y_train, cv=10, scoring='r2')

# Print the mean R² score
print(f"Ridge Cross-Validation R2: {np.mean(r2_scores)}")

Ridge Cross-Validation R2: 0.7527341077104588


In [48]:
# Use cross-validation for R2 
r2_scores = cross_val_score(model, X_train, y_train, cv=10, scoring='r2')

# Print the mean R² score
print(f"Lasso Cross-Validation R2: {np.mean(r2_scores)}")

Lasso Cross-Validation R2: 0.7527164121782863
