In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Datasets/deliverytime.txt')
df.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Type_of_order,Type_of_vehicle,Time_taken(min)
0,4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,Snack,motorcycle,24
1,B379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,Snack,scooter,33
2,5D6D,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,Drinks,motorcycle,26
3,7A6A,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,Buffet,motorcycle,21
4,70A2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,Snack,scooter,30


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45593 entries, 0 to 45592
Data columns (total 11 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   ID                           45593 non-null  object 
 1   Delivery_person_ID           45593 non-null  object 
 2   Delivery_person_Age          45593 non-null  int64  
 3   Delivery_person_Ratings      45593 non-null  float64
 4   Restaurant_latitude          45593 non-null  float64
 5   Restaurant_longitude         45593 non-null  float64
 6   Delivery_location_latitude   45593 non-null  float64
 7   Delivery_location_longitude  45593 non-null  float64
 8   Type_of_order                45593 non-null  object 
 9   Type_of_vehicle              45593 non-null  object 
 10  Time_taken(min)              45593 non-null  int64  
dtypes: float64(5), int64(2), object(4)
memory usage: 3.8+ MB


In [None]:
df.isnull().sum()

Unnamed: 0,0
ID,0
Delivery_person_ID,0
Delivery_person_Age,0
Delivery_person_Ratings,0
Restaurant_latitude,0
Restaurant_longitude,0
Delivery_location_latitude,0
Delivery_location_longitude,0
Type_of_order,0
Type_of_vehicle,0


In [None]:
np.size(df)

501523

In [None]:
#Convert radian to degree
def deg_to_rad(degrees):
    return degrees*(np.pi/180)

In [None]:
#Set R in kilometers
r = 6371
def discal(lat1,lon1,lat2,lon2):
  d_lat = deg_to_rad(lat2-lat1)
  d_lon = deg_to_rad(lon2-lon1)
  a = np.sin(d_lat/2)**2 + np.cos(deg_to_rad(lat1)) * np.cos(deg_to_rad(lat2)) * np.sin(d_lon/2)**2
  c = 2 * np.arctan2(np.sqrt(a),np.sqrt(1-a))
  return r * c

In [None]:
df['Distance'] = df.apply(lambda row:discal(row['Restaurant_latitude'],
                                             row['Restaurant_longitude'],
                                             row['Delivery_location_latitude'],
                                             row['Delivery_location_longitude']),axis=1)

In [None]:
df.head(5)

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Type_of_order,Type_of_vehicle,Time_taken(min),Distance
0,4607,INDORES13DEL02,37,4.9,22.745049,75.892471,22.765049,75.912471,Snack,motorcycle,24,3.025149
1,B379,BANGRES18DEL02,34,4.5,12.913041,77.683237,13.043041,77.813237,Snack,scooter,33,20.18353
2,5D6D,BANGRES19DEL01,23,4.4,12.914264,77.6784,12.924264,77.6884,Drinks,motorcycle,26,1.552758
3,7A6A,COIMBRES13DEL02,38,4.7,11.003669,76.976494,11.053669,77.026494,Buffet,motorcycle,21,7.790401
4,70A2,CHENRES12DEL01,32,4.6,12.972793,80.249982,13.012793,80.289982,Snack,scooter,30,6.210138


In [None]:
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

#Relationship between the distance and time taken to deliver the food
fig = px.scatter(data_frame = df, x="Distance",y="Time_taken(min)", size="Time_taken(min)",color = 'Delivery_person_Age',
                    trendline="lowess", title = "Relationship Between Distance and Time Taken")
fig.show()

In [None]:
#Relationship between the time taken to deliver the food and the age of the delivery partner
fig = px.scatter(data_frame = df, x="Delivery_person_Age",y="Time_taken(min)", size="Time_taken(min)",
                color = "Distance",trendline="lowess", title = "Relationship Between Time Taken and Age")
fig.show()

In [None]:
#Relationship between the time taken to deliver the food and the ratings of the delivery partner
fig = px.scatter(data_frame = df, x="Delivery_person_Ratings",y="Time_taken(min)", size="Time_taken(min)",
                  color = "Distance",trendline="lowess", title = "Relationship Between Time Taken and Ratings")
fig.show()

In [None]:
#The type of food ordered by the customer and the type of vehicle used by the delivery partner
fig = px.box(df, x="Type_of_vehicle", y="Time_taken(min)",color="Type_of_order",
      title="Distribution of Time Taken by Type of Vehicle and Order Type",
      labels={"Type_of_vehicle": "Type of Vehicle", "Time_taken(min)": "Time Taken (min)", "Type_of_order": "Type of Order"},
      hover_data=["Distance"])
fig.show()

In [None]:
from sklearn.model_selection import train_test_split
df = pd.get_dummies(df, columns=['Type_of_order', 'Type_of_vehicle'], drop_first=True)
X = df[['Distance', 'Delivery_person_Age', 'Delivery_person_Ratings'] +
              [col for col in df.columns if 'Type_of_order_' in col or 'Type_of_vehicle_' in col]]
Y = df['Time_taken(min)']

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score,accuracy_score
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test,y_pred)

print(f'Root Mean Squared Error: {rmse}')
print(f'R^2 Score: {r2}')

Root Mean Squared Error: 8.361708242333657
R^2 Score: 0.20255872970729627


In [None]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test,y_pred)

print(f'Root Mean Squared Error: {rmse}')
print(f'R^2 Score: {r2}')

Root Mean Squared Error: 7.747139456404222
R^2 Score: 0.31547165580605274


In [None]:
#Model Testing
def predict_delivery_time(distance,delivery_person_age,delivery_person_ratings,type_of_order,type_of_vehicle, model):
    input_data = pd.DataFrame({
        'Distance': [distance],
        'Delivery_person_Age': [delivery_person_age],
        'Delivery_person_Ratings': [delivery_person_ratings],
        'Type_of_order': [type_of_order],
        'Type_of_vehicle': [type_of_vehicle]
    })

    input_data = pd.get_dummies(input_data,drop_first=True)

    for i in X.columns:
        if i not in input_data.columns:
            input_data[i] = 0

    predicted_time = model.predict(input_data)
    return predicted_time[0]


In [None]:
distance = 10.0
delivery_person_age = 50
delivery_person_ratings = 3.5
type_of_order = 'Food'
type_of_vehicle = 'scooter'

predicted_time = predict_delivery_time(distance, delivery_person_age, delivery_person_ratings, type_of_order, type_of_vehicle, model)
print(f'Predicted Delivery Time: {predicted_time} minutes')

Predicted Delivery Time: 33.925193786621094 minutes
