In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

data = pd.read_csv("deliverytime.txt")
print(data.head())

     ID Delivery_person_ID  Delivery_person_Age  Delivery_person_Ratings  \
0  4607     INDORES13DEL02                   37                      4.9   
1  B379     BANGRES18DEL02                   34                      4.5   
2  5D6D     BANGRES19DEL01                   23                      4.4   
3  7A6A    COIMBRES13DEL02                   38                      4.7   
4  70A2     CHENRES12DEL01                   32                      4.6   

   Restaurant_latitude  Restaurant_longitude  Delivery_location_latitude  \
0            22.745049             75.892471                   22.765049   
1            12.913041             77.683237                   13.043041   
2            12.914264             77.678400                   12.924264   
3            11.003669             76.976494                   11.053669   
4            12.972793             80.249982                   13.012793   

   Delivery_location_longitude Type_of_order Type_of_vehicle  Time_taken(min)  
0     

In [2]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45593 entries, 0 to 45592
Data columns (total 11 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   ID                           45593 non-null  object 
 1   Delivery_person_ID           45593 non-null  object 
 2   Delivery_person_Age          45593 non-null  int64  
 3   Delivery_person_Ratings      45593 non-null  float64
 4   Restaurant_latitude          45593 non-null  float64
 5   Restaurant_longitude         45593 non-null  float64
 6   Delivery_location_latitude   45593 non-null  float64
 7   Delivery_location_longitude  45593 non-null  float64
 8   Type_of_order                45593 non-null  object 
 9   Type_of_vehicle              45593 non-null  object 
 10  Time_taken(min)              45593 non-null  int64  
dtypes: float64(5), int64(2), object(4)
memory usage: 3.8+ MB


In [3]:
data.isnull().sum()

ID                             0
Delivery_person_ID             0
Delivery_person_Age            0
Delivery_person_Ratings        0
Restaurant_latitude            0
Restaurant_longitude           0
Delivery_location_latitude     0
Delivery_location_longitude    0
Type_of_order                  0
Type_of_vehicle                0
Time_taken(min)                0
dtype: int64

In [4]:
# Set the earth's radius (in kilometers)
R = 6371

# Convert degrees to radians
def deg_to_rad(degrees):
    return degrees * (np.pi/180)

# Function to calculate the distance between two points using the haversine formula
def distcalculate(lat1, lon1, lat2, lon2):
    d_lat = deg_to_rad(lat2-lat1)
    d_lon = deg_to_rad(lon2-lon1)
    a = np.sin(d_lat/2)**2 + np.cos(deg_to_rad(lat1)) * np.cos(deg_to_rad(lat2)) * np.sin(d_lon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    return R * c
  
# Calculate the distance between each pair of points
data['distance'] = np.nan

for i in range(len(data)):
    data.loc[i, 'distance'] = distcalculate(data.loc[i, 'Restaurant_latitude'], 
                                        data.loc[i, 'Restaurant_longitude'], 
                                        data.loc[i, 'Delivery_location_latitude'], 
                                        data.loc[i, 'Delivery_location_longitude'])

In [5]:
print(data.head())

     ID Delivery_person_ID  Delivery_person_Age  Delivery_person_Ratings  \
0  4607     INDORES13DEL02                   37                      4.9   
1  B379     BANGRES18DEL02                   34                      4.5   
2  5D6D     BANGRES19DEL01                   23                      4.4   
3  7A6A    COIMBRES13DEL02                   38                      4.7   
4  70A2     CHENRES12DEL01                   32                      4.6   

   Restaurant_latitude  Restaurant_longitude  Delivery_location_latitude  \
0            22.745049             75.892471                   22.765049   
1            12.913041             77.683237                   13.043041   
2            12.914264             77.678400                   12.924264   
3            11.003669             76.976494                   11.053669   
4            12.972793             80.249982                   13.012793   

   Delivery_location_longitude Type_of_order Type_of_vehicle  Time_taken(min)  \
0    

In [6]:
import plotly.io as pio
import plotly.express as px

# Set renderer to something Jupyter understands, like iframe
pio.renderers.default = 'iframe'  # Alternatives: 'notebook', 'notebook_connected', 'svg'

# Your existing code
figure = px.scatter(data_frame=data, 
                    x="distance", 
                    y="Time_taken(min)", 
                    size="Time_taken(min)", 
                    trendline="ols", )
figure.show()

##### There is a consistent relationship between the time taken and the distance travelled to deliver the food. It means that most delivery partners deliver food within 25-30 minutes, regardless of distance.

In [7]:
pio.renderers.default = 'iframe'
figure = px.scatter(data_frame = data, 
                    x="Delivery_person_Age",
                    y="Time_taken(min)", 
                    size="Time_taken(min)", 
                    color = "distance",
                    trendline="ols", 
                    title = "Relationship Between Time Taken and Age")
figure.show()

##### The plot suggests that older delivery persons tend to take slightly longer times for deliveries, but this trend is not very strong.
##### Longer deliveries (in terms of both time and distance) are visible at the extremes of age, but most deliveries seem to happen within a concentrated age and time range.

In [8]:
pio.renderers.default = 'iframe'
figure = px.scatter(data_frame = data, 
                    x="Delivery_person_Ratings",
                    y="Time_taken(min)", 
                    size="Time_taken(min)", 
                    color = "distance",
                    trendline="ols", 
                    title = "Relationship Between Time Taken and Ratings")
figure.show()

##### There is an inverse linear relationship between the time taken to deliver the food and the ratings of the delivery partner. It means delivery partners with higher ratings take less time to deliver the food compared to partners with low ratings.

In [9]:
pio.renderers.default = 'iframe'
fig = px.box(data, 
             x="Type_of_vehicle",
             y="Time_taken(min)", 
             color="Type_of_order")
fig.show()

##### So there is not much difference between the time taken by delivery partners depending on the vehicle they are driving and the type of food they are delivering.

In [10]:
from sklearn.model_selection import train_test_split
x = np.array(data[["Delivery_person_Age", 
                   "Delivery_person_Ratings", 
                   "distance"]])
y = np.array(data[["Time_taken(min)"]])
xtrain, xtest, ytrain, ytest = train_test_split(x, y, 
                                                test_size=0.10, 
                                                random_state=42)

# creating the LSTM neural network model
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (xtrain.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [11]:
# training the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(xtrain, ytrain, batch_size=1, epochs=9)

Epoch 1/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 4ms/step - loss: 75.9063
Epoch 2/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 4ms/step - loss: 65.2515
Epoch 3/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 4ms/step - loss: 60.9761
Epoch 4/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 5ms/step - loss: 60.3793
Epoch 5/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 4ms/step - loss: 59.4523
Epoch 6/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 4ms/step - loss: 59.3651
Epoch 7/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 4ms/step - loss: 58.8923
Epoch 8/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 4ms/step - loss: 59.3677
Epoch 9/9
[1m41033/41033[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 4ms/step - loss: 59.3755


<keras.src.callbacks.history.History at 0x195c105b080>

In [12]:
print("Food Delivery Time Prediction")
a = int(input("Age of Delivery Partner: "))
b = float(input("Ratings of Previous Deliveries: "))
c = int(input("Total Distance (in meters): "))

features = np.array([[a, b, c]])
print("Predicted Delivery Time in Minutes = ", model.predict(features))

Food Delivery Time Prediction


Age of Delivery Partner:  25
Ratings of Previous Deliveries:  4.5
Total Distance (in meters):  5000


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 416ms/step
Predicted Delivery Time in Minutes =  [[24.243114]]


In [13]:
import pickle
import os

# Define the directory to save the model
artifacts_dir = "artifacts"
os.makedirs(artifacts_dir, exist_ok=True)  # Create the directory if it doesn't exist

# Save the trained model
model_path = os.path.join(artifacts_dir, "food_delivery_model.pkl")
with open(model_path, "wb") as f:
    pickle.dump(model, f)

print(f"Model saved successfully at: {model_path}")

Model saved successfully at: artifacts\food_delivery_model.pkl
