In [1]:
import os
import re
import pickle
import shelve
import category_encoders as ce
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
from copy import deepcopy
from geopy.distance import geodesic
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

# Data Reading and Understanding

In [2]:
filepath= os.path.join(os.pardir, 'dataset', 'food_delivery.csv')
df = pd.read_csv(filepath)
df.head()

Unnamed: 0,ID,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Order_Date,Time_Orderd,Time_Order_picked,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken (min)
0,0xcdcd,DEHRES17DEL01,36.0,4.2,30.327968,78.046106,30.397968,78.116106,12-02-2022,21:55,22:10,Fog,Jam,2,Snack,motorcycle,3.0,No,Metropolitian,46
1,0xd987,KOCRES16DEL01,21.0,4.7,10.003064,76.307589,10.043064,76.347589,13-02-2022,14:55,15:05,Stormy,High,1,Meal,motorcycle,1.0,No,Metropolitian,23
2,0x2784,PUNERES13DEL03,23.0,4.7,18.56245,73.916619,18.65245,74.006619,04-03-2022,17:30,17:40,Sandstorms,Medium,1,Drinks,scooter,1.0,No,Metropolitian,21
3,0xc8b6,LUDHRES15DEL02,34.0,4.3,30.899584,75.809346,30.919584,75.829346,13-02-2022,09:20,09:30,Sandstorms,Low,0,Buffet,motorcycle,0.0,No,Metropolitian,20
4,0xdb64,KNPRES14DEL02,24.0,4.7,26.463504,80.372929,26.593504,80.502929,14-02-2022,19:50,20:05,Fog,Jam,1,Snack,scooter,1.0,No,Metropolitian,41


In [3]:
df.shape

(45584, 20)

In [4]:
df.dtypes

ID                              object
Delivery_person_ID              object
Delivery_person_Age            float64
Delivery_person_Ratings        float64
Restaurant_latitude            float64
Restaurant_longitude           float64
Delivery_location_latitude     float64
Delivery_location_longitude    float64
Order_Date                      object
Time_Orderd                     object
Time_Order_picked               object
Weather_conditions              object
Road_traffic_density            object
Vehicle_condition                int64
Type_of_order                   object
Type_of_vehicle                 object
multiple_deliveries            float64
Festival                        object
City                            object
Time_taken (min)                 int64
dtype: object

In [5]:
for i in df.select_dtypes(np.object).columns:
    print(i.upper())
    print(df[i].nunique())
    print()

ID
45584

DELIVERY_PERSON_ID
1320

ORDER_DATE
44

TIME_ORDERD
176

TIME_ORDER_PICKED
193

WEATHER_CONDITIONS
6

ROAD_TRAFFIC_DENSITY
4

TYPE_OF_ORDER
4

TYPE_OF_VEHICLE
4

FESTIVAL
2

CITY
3



Features with large number of categories:
* 1320 unique Delivery persons
* 44 unique Order_Date
* 176 and 196 unique order time and pickup time respectively

## Set Parameters for Scenarios

In [6]:
f_time_to_hours     = True      # Converting time column to just show the hour
f_date_to_split     = True      # Split Date column to 'day' and 'month'

In [7]:
f_coord_to_dist     = True      # Extract Crows fly distance (Great circle distance) from Coordinates Columns
f_drop_coord        = False     # Dropping Coordinate columns
f_DelID_to_city     = False     # Converting Delivery_personID to city code
f_target_encoding   = False     # Target Encoding on Delivery person ID
f_ordinal_encoding  = False     # Ordinal Encoding some categorical columns

# Data Cleaning and Preparation

#### Dropping unrequired Columns

In [8]:
df.drop(['ID'], axis=1, inplace=True)

#### Cleaning Faulty time values

In [9]:
HM_PAT = re.compile(r'^\d\d\:\d\d$')            # HH:MM Format
HMS_PAT = re.compile(r'^\d\d\:\d\d\:\d\d$')     # HH:MM:SS Format
DOT_PAT = re.compile(r'\d+\.\d+')               # Faulty pattern observed in Data

In [10]:
df_clean = df[df.Time_Orderd.str.contains(HM_PAT)|df.Time_Orderd.str.contains(HMS_PAT)]
df_clean = df_clean[(df_clean.Time_Order_picked.str.contains(HM_PAT)|df_clean.Time_Order_picked.str.contains(HMS_PAT))]

# Feature Engineering

#### X-y Split

In [11]:
X = df_clean.iloc[:,:-1]
y = df_clean.iloc[:,-1]

In [12]:
X.shape

(35818, 18)

#### Converting Time to Hours

In [13]:
def get_hour(time_str:str):
    HM_PAT = re.compile(r'^\d\d\:\d\d$')            # HH:MM Format
    HMS_PAT = re.compile(r'^\d\d\:\d\d\:\d\d$')     # HH:MM:SS Format
    
    if time_str.startswith('24'):
        time_str = '00'+time_str[2:]
    else:
        pass

    if re.fullmatch(HM_PAT, time_str):
        hour = datetime.strptime(time_str, '%H:%M').hour
    elif re.fullmatch(HMS_PAT, time_str):
        hour = datetime.strptime(time_str, '%H:%M:%S').hour
    else:
        hour = None

    return hour

In [14]:
if f_time_to_hours == True:
    X['Time_Orderd'] = X.Time_Orderd.apply(get_hour)
    X['Time_Order_picked'] = X.Time_Order_picked.apply(get_hour)

In [15]:
X.shape

(35818, 18)

#### Splitting "Date" columns to "Day" and "Month" columns

In [16]:
if f_date_to_split == True:
    X['Order_Day'] = X.Order_Date.apply(lambda x : datetime.strptime(x, '%d-%m-%Y').day)
    X['Order_Month'] = X.Order_Date.apply(lambda x : datetime.strptime(x, '%d-%m-%Y').month)
    X.drop(['Order_Date'], 1, inplace=True)

NameError: name 'f_date_to_split' is not defined

In [None]:
X.shape

(35818, 19)

#### Converting geographical coordinates to distance

In [None]:
def get_great_circle_distance(pickup_coordinates:pd.DataFrame, drop_coordinates:pd.DataFrame):
    dist_arr = list()

    for i in range(len(pickup_coordinates)):
        dist_arr.append(geodesic(pickup_coordinates.to_numpy()[i], drop_coordinates.to_numpy()[i]).km)
    
    return dist_arr

In [None]:
if f_coord_to_dist:
    X['Geo_Distance'] = get_great_circle_distance(
        X[['Restaurant_latitude','Restaurant_longitude']],
        X[['Delivery_location_latitude', 'Delivery_location_longitude']]
    )

##### Dropping Latitude and Longitude Columns

In [None]:
if f_drop_coord:
    X.drop(['Restaurant_latitude','Restaurant_longitude','Delivery_location_latitude', 'Delivery_location_longitude'], axis = 1, inplace = True)

* Latitude and Longitude columns havent been dropped as of now

In [None]:
X.shape

(35818, 19)

#### Getting City code from Delivery_Person_ID

In [None]:
def ID_to_city(id:str):
    city_cd = id.split('RES')[0]
    return city_cd

In [None]:
if f_DelID_to_city:
    X['Delivery_person_ID'] = X['Delivery_person_ID'].apply(ID_to_city)

# Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state=10)

# Data Preprocessing

In [None]:
X_train.head()

Unnamed: 0,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Order_Day,Order_Month
16542,BANG,20.0,4.3,12.972532,77.608179,13.082532,77.718179,21,21,Sandstorms,Jam,0,Drinks,motorcycle,1.0,No,Metropolitian,2,4
34173,CHEN,31.0,4.6,13.058616,80.264151,13.098616,80.304151,15,15,Sunny,Medium,0,Snack,motorcycle,1.0,No,Metropolitian,3,3
5275,KOC,24.0,4.3,9.985697,76.281128,9.995697,76.291128,8,8,Sandstorms,Low,0,Buffet,motorcycle,0.0,No,Metropolitian,17,2
41885,VAD,29.0,5.0,0.0,0.0,0.04,0.04,13,14,Stormy,High,0,Drinks,motorcycle,1.0,No,Metropolitian,5,4
43650,DEH,29.0,4.7,0.0,0.0,0.08,0.08,17,17,Sunny,Medium,2,Drinks,electric_scooter,1.0,No,Urban,14,2


In [None]:
X_train.shape

(25072, 19)

Four column categories requiring different preprocessing strategies:

* Numeric columns:<br>
        Median Imputation >> Standard Scaling
* Ordinal Columns:<br>
        Mode Imputation >> Ordinal Encoding >> Standard Scaling
* Nominal Columns:<br>
        Mode Imputation >> Label Encoding >> Standard Scaling
* Target-dependent Columns:<br>
        Mode Imputation >> Target Encoding (Mean) >> Standard Scaling

##### Identifying columns

In [None]:
ordinal_columns = ['Road_traffic_density', 'Type_of_vehicle', 'City']
nominal_columns = ['Weather_conditions', 'Type_of_order', 'Festival']
target_dependent_columns = ['Delivery_person_ID']

numeric_columns = list(X_train.select_dtypes(np.number).columns)
all_columns = numeric_columns + target_dependent_columns + nominal_columns + ordinal_columns

if f_target_encoding == True:
    if f_ordinal_encoding == True:
        categoric_columns = nominal_columns
    elif f_ordinal_encoding == False:
        categoric_columns = ordinal_columns + nominal_columns

elif f_target_encoding == False:
    if f_ordinal_encoding == True:
        categoric_columns = target_dependent_columns + nominal_columns
    elif f_ordinal_encoding == False:
        categoric_columns = target_dependent_columns + ordinal_columns + nominal_columns


#### Initializing Imputer

In [None]:
num_imputer = SimpleImputer(strategy='median')
cat_imputer = SimpleImputer(strategy='most_frequent')

non_numeric_columns = [i for i in list(X_train.columns) if i not in numeric_columns]

imputer = ColumnTransformer([
    ('numeric_imputer', num_imputer, numeric_columns),
    ('categoric_imputation', cat_imputer, non_numeric_columns)
])

imputer.fit(X_train)

X_train = pd.DataFrame(imputer.transform(X_train), columns= numeric_columns + non_numeric_columns, index=X_train.index)
X_test = pd.DataFrame(imputer.transform(X_test), columns= numeric_columns + non_numeric_columns, index=X_test.index)
X_train

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Vehicle_condition,multiple_deliveries,Order_Day,Order_Month,Delivery_person_ID,Weather_conditions,Road_traffic_density,Type_of_order,Type_of_vehicle,Festival,City
16542,20,4.3,12.9725,77.6082,13.0825,77.7182,21,21,0,1,2,4,BANG,Sandstorms,Jam,Drinks,motorcycle,No,Metropolitian
34173,31,4.6,13.0586,80.2642,13.0986,80.3042,15,15,0,1,3,3,CHEN,Sunny,Medium,Snack,motorcycle,No,Metropolitian
5275,24,4.3,9.9857,76.2811,9.9957,76.2911,8,8,0,0,17,2,KOC,Sandstorms,Low,Buffet,motorcycle,No,Metropolitian
41885,29,5,0,0,0.04,0.04,13,14,0,1,5,4,VAD,Stormy,High,Drinks,motorcycle,No,Metropolitian
43650,29,4.7,0,0,0.08,0.08,17,17,2,1,14,2,DEH,Sunny,Medium,Drinks,electric_scooter,No,Urban
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12998,33,5,12.9791,77.6406,13.0591,77.7206,22,22,2,1,6,3,BANG,Sandstorms,Low,Drinks,scooter,No,Metropolitian
11928,35,4.9,0,0,0.13,0.13,19,19,2,1,20,3,VAD,Cloudy,Jam,Snack,scooter,Yes,Metropolitian
35676,34,4.8,12.3212,76.6211,12.3512,76.6511,21,21,2,1,13,3,MYS,Cloudy,Jam,Meal,scooter,No,Metropolitian
37170,25,4.8,30.8934,75.8212,30.9134,75.8412,9,9,2,1,13,2,LUDH,Fog,Low,Snack,scooter,No,Metropolitian


In [None]:
X_train.isna().sum()

Delivery_person_Age            0
Delivery_person_Ratings        0
Restaurant_latitude            0
Restaurant_longitude           0
Delivery_location_latitude     0
Delivery_location_longitude    0
Time_Orderd                    0
Time_Order_picked              0
Vehicle_condition              0
multiple_deliveries            0
Order_Day                      0
Order_Month                    0
Delivery_person_ID             0
Weather_conditions             0
Road_traffic_density           0
Type_of_order                  0
Type_of_vehicle                0
Festival                       0
City                           0
dtype: int64

In [None]:
X_test.isna().sum()

Delivery_person_Age            0
Delivery_person_Ratings        0
Restaurant_latitude            0
Restaurant_longitude           0
Delivery_location_latitude     0
Delivery_location_longitude    0
Time_Orderd                    0
Time_Order_picked              0
Vehicle_condition              0
multiple_deliveries            0
Order_Day                      0
Order_Month                    0
Delivery_person_ID             0
Weather_conditions             0
Road_traffic_density           0
Type_of_order                  0
Type_of_vehicle                0
Festival                       0
City                           0
dtype: int64

In [None]:
X_train.shape, X_test.shape

((25072, 19), (10746, 19))

#### Initializing Encoders

We wont use Scikit-learn Pipeline class because Pipeline API doesnot support LabelEncoder() or TargetEncoder()

In [None]:
def CustomEncode (X:pd.DataFrame, x, y, encoder_mapping:dict)-> pd.DataFrame:
    df = X
    df_x = x

    for encoder, columns in encoder_mapping.items():
        if isinstance(encoder, OrdinalEncoder):
            all_columns = list(df.columns)
            ordinal_columns = columns
            non_ordinal_columns = [k for k in all_columns if k not in ordinal_columns]

            transformer = ColumnTransformer([
                ('dummy step', SimpleImputer(strategy='most_frequent'), non_ordinal_columns),
                ('encode', encoder, ordinal_columns)
            ])
            transformer.fit(X)
            df = pd.DataFrame(transformer.transform(X), columns=non_ordinal_columns + ordinal_columns, index=X.index)
            df_x = pd.DataFrame(transformer.transform(x), columns=non_ordinal_columns + ordinal_columns, index=x.index)
            
        else:
            for column in columns:
                column_2D = [[value] for value in df[column]]
                column_2D_x = [[value] for value in df_x[column]]
                encoder.fit(column_2D)
                df[column] = encoder.transform (column_2D)
                df_x[column] = encoder.transform (column_2D_x)
    
    return df, df_x

# Mapping For Ordinal Encoder
traffic_map = ['Jam', 'Low', 'Medium', 'High']
vehicle_map = ['bicycle', 'electric_scooter', 'scooter', 'motorcycle']
city_map = ['Semi-Urban', 'Urban', 'Metropolitian']

ord_encoder = OrdinalEncoder(categories=[traffic_map, vehicle_map, city_map])
nom_encoder = LabelEncoder()

if f_ordinal_encoding == True:
    # Ordinal Encoding
    encoder_map = {ord_encoder:ordinal_columns,
                nom_encoder:categoric_columns}
elif f_ordinal_encoding == False:
    # Label Encoding
    encoder_map = {nom_encoder:categoric_columns}

X_train, X_test = CustomEncode(X_train, X_test, y_train, encoder_map)

X_train

  return f(**kwargs)


Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Vehicle_condition,multiple_deliveries,Order_Day,Order_Month,Delivery_person_ID,Weather_conditions,Road_traffic_density,Type_of_order,Type_of_vehicle,Festival,City
16542,20,4.3,12.9725,77.6082,13.0825,77.7182,21,21,0,1,2,4,3,2,1,1,1,0,0
34173,31,4.6,13.0586,80.2642,13.0986,80.3042,15,15,0,1,3,3,5,4,3,3,1,0,0
5275,24,4.3,9.9857,76.2811,9.9957,76.2911,8,8,0,0,17,2,13,2,2,0,1,0,0
41885,29,5,0,0,0.04,0.04,13,14,0,1,5,4,21,3,0,1,1,0,0
43650,29,4.7,0,0,0.08,0.08,17,17,2,1,14,2,7,4,3,1,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12998,33,5,12.9791,77.6406,13.0591,77.7206,22,22,2,1,6,3,3,2,2,1,2,0,0
11928,35,4.9,0,0,0.13,0.13,19,19,2,1,20,3,21,0,1,3,2,1,0
35676,34,4.8,12.3212,76.6211,12.3512,76.6511,21,21,2,1,13,3,17,0,1,2,2,0,0
37170,25,4.8,30.8934,75.8212,30.9134,75.8412,9,9,2,1,13,2,15,1,2,3,2,0,0


#### Target Encoding

In [None]:
if f_target_encoding == True:
    # Target Encoding
    for i in target_dependent_columns:
        tgt_encoder = ce.TargetEncoder()
        tgt_encoder.fit(X_train[i], y_train)
        X_train[i] = tgt_encoder.transform(X_train[i], y_train)
        X_test[i] = tgt_encoder.transform(X_test[i], y_test)

X_train

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Vehicle_condition,multiple_deliveries,Order_Day,Order_Month,Delivery_person_ID,Weather_conditions,Road_traffic_density,Type_of_order,Type_of_vehicle,Festival,City
16542,20,4.3,12.9725,77.6082,13.0825,77.7182,21,21,0,1,2,4,3,2,1,1,1,0,0
34173,31,4.6,13.0586,80.2642,13.0986,80.3042,15,15,0,1,3,3,5,4,3,3,1,0,0
5275,24,4.3,9.9857,76.2811,9.9957,76.2911,8,8,0,0,17,2,13,2,2,0,1,0,0
41885,29,5,0,0,0.04,0.04,13,14,0,1,5,4,21,3,0,1,1,0,0
43650,29,4.7,0,0,0.08,0.08,17,17,2,1,14,2,7,4,3,1,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12998,33,5,12.9791,77.6406,13.0591,77.7206,22,22,2,1,6,3,3,2,2,1,2,0,0
11928,35,4.9,0,0,0.13,0.13,19,19,2,1,20,3,21,0,1,3,2,1,0
35676,34,4.8,12.3212,76.6211,12.3512,76.6511,21,21,2,1,13,3,17,0,1,2,2,0,0
37170,25,4.8,30.8934,75.8212,30.9134,75.8412,9,9,2,1,13,2,15,1,2,3,2,0,0


In [None]:
X_train.shape, X_test.shape

((25072, 19), (10746, 19))

#### Initializing Scaler

In [None]:
scaler = ColumnTransformer([('scaler', StandardScaler(), list(X_train.columns))])

scaler.fit(X_train)
X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns, index=X_train.index)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)
X_train


Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Vehicle_condition,multiple_deliveries,Order_Day,Order_Month,Delivery_person_ID,Weather_conditions,Road_traffic_density,Type_of_order,Type_of_vehicle,Festival,City
16542,-1.664388,-1.075718,-0.552550,0.325919,-0.591127,0.328086,0.752202,0.733763,-1.222673,0.441859,-1.359216,1.872152,-1.452359,-0.288060,-0.785710,-0.456111,-0.423764,-0.142210,-0.537223
34173,0.245661,-0.114675,-0.541341,0.451117,-0.588938,0.449982,-0.578254,-0.503836,-1.222673,0.441859,-1.244376,0.032721,-1.132396,0.881438,1.349140,1.333781,-0.423764,-0.142210,-0.537223
5275,-0.969825,-1.075718,-0.941433,0.263364,-1.011252,0.260819,-2.130452,-1.947701,-1.222673,-1.321359,0.363376,-1.806710,0.147456,-0.288060,0.281715,-1.351057,-0.423764,-0.142210,-0.537223
41885,-0.101621,1.166717,-2.241560,-3.332385,-2.366245,-3.333457,-1.021739,-0.710102,-1.222673,0.441859,-1.014697,1.872152,1.427308,0.296689,-1.853135,-0.456111,-0.423764,-0.142210,-0.537223
43650,-0.101621,0.205673,-2.241560,-3.332385,-2.360801,-3.331572,-0.134768,-0.091303,1.223648,0.441859,0.018857,-1.806710,-0.812433,0.881438,1.349140,-0.456111,-2.116118,-0.142210,1.871657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12998,0.592943,1.166717,-0.551695,0.327449,-0.594317,0.328201,0.973944,0.940030,1.223648,0.441859,-0.899858,0.032721,-1.452359,-0.288060,0.281715,-0.456111,1.268591,-0.142210,-0.537223
11928,0.940224,0.846369,-2.241560,-3.332385,-2.353996,-3.329215,0.308717,0.321230,1.223648,0.441859,0.707894,0.032721,1.427308,-1.457558,-0.785710,1.333781,1.268591,7.031833,-0.537223
35676,0.766584,0.526021,-0.637350,0.279390,-0.690661,0.277786,0.752202,0.733763,1.223648,0.441859,-0.095982,0.032721,0.787382,-1.457558,-0.785710,0.438835,1.268591,-0.142210,-0.537223
37170,-0.796184,0.526021,1.780728,0.241684,1.835692,0.239610,-1.908709,-1.741435,1.223648,0.441859,-0.095982,-1.806710,0.467419,-0.872809,0.281715,1.333781,1.268591,-0.142210,-0.537223


In [None]:
for i in X_train.columns:
    print(i, end=':\t')
    print(round(np.mean(X_train[i]), 2), end = '\t')
    print(round(np.std(X_train[i]),2))

Delivery_person_Age:	-0.0	1.0
Delivery_person_Ratings:	0.0	1.0
Restaurant_latitude:	0.0	1.0
Restaurant_longitude:	0.0	1.0
Delivery_location_latitude:	-0.0	1.0
Delivery_location_longitude:	0.0	1.0
Time_Orderd:	-0.0	1.0
Time_Order_picked:	-0.0	1.0
Vehicle_condition:	-0.0	1.0
multiple_deliveries:	-0.0	1.0
Order_Day:	-0.0	1.0
Order_Month:	-0.0	1.0
Delivery_person_ID:	-0.0	1.0
Weather_conditions:	-0.0	1.0
Road_traffic_density:	0.0	1.0
Type_of_order:	-0.0	1.0
Type_of_vehicle:	0.0	1.0
Festival:	0.0	1.0
City:	-0.0	1.0


# Final Output

In [None]:
train_df = pd.concat([X_train,y_train], axis=1)
test_df = pd.concat([X_test,y_test], axis=1)

In [None]:
train_df.to_csv('train.csv', index=False)
train_df

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Vehicle_condition,multiple_deliveries,Order_Day,Order_Month,Delivery_person_ID,Weather_conditions,Road_traffic_density,Type_of_order,Type_of_vehicle,Festival,City,Time_taken (min)
16542,-1.664388,-1.075718,-0.552550,0.325919,-0.591127,0.328086,0.752202,0.733763,-1.222673,0.441859,-1.359216,1.872152,-1.452359,-0.288060,-0.785710,-0.456111,-0.423764,-0.142210,-0.537223,35
34173,0.245661,-0.114675,-0.541341,0.451117,-0.588938,0.449982,-0.578254,-0.503836,-1.222673,0.441859,-1.244376,0.032721,-1.132396,0.881438,1.349140,1.333781,-0.423764,-0.142210,-0.537223,21
5275,-0.969825,-1.075718,-0.941433,0.263364,-1.011252,0.260819,-2.130452,-1.947701,-1.222673,-1.321359,0.363376,-1.806710,0.147456,-0.288060,0.281715,-1.351057,-0.423764,-0.142210,-0.537223,15
41885,-0.101621,1.166717,-2.241560,-3.332385,-2.366245,-3.333457,-1.021739,-0.710102,-1.222673,0.441859,-1.014697,1.872152,1.427308,0.296689,-1.853135,-0.456111,-0.423764,-0.142210,-0.537223,29
43650,-0.101621,0.205673,-2.241560,-3.332385,-2.360801,-3.331572,-0.134768,-0.091303,1.223648,0.441859,0.018857,-1.806710,-0.812433,0.881438,1.349140,-0.456111,-2.116118,-0.142210,1.871657,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12998,0.592943,1.166717,-0.551695,0.327449,-0.594317,0.328201,0.973944,0.940030,1.223648,0.441859,-0.899858,0.032721,-1.452359,-0.288060,0.281715,-0.456111,1.268591,-0.142210,-0.537223,24
11928,0.940224,0.846369,-2.241560,-3.332385,-2.353996,-3.329215,0.308717,0.321230,1.223648,0.441859,0.707894,0.032721,1.427308,-1.457558,-0.785710,1.333781,1.268591,7.031833,-0.537223,48
35676,0.766584,0.526021,-0.637350,0.279390,-0.690661,0.277786,0.752202,0.733763,1.223648,0.441859,-0.095982,0.032721,0.787382,-1.457558,-0.785710,0.438835,1.268591,-0.142210,-0.537223,35
37170,-0.796184,0.526021,1.780728,0.241684,1.835692,0.239610,-1.908709,-1.741435,1.223648,0.441859,-0.095982,-1.806710,0.467419,-0.872809,0.281715,1.333781,1.268591,-0.142210,-0.537223,19


In [None]:
test_df.to_csv('test.csv', index = False)
test_df

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Vehicle_condition,multiple_deliveries,Order_Day,Order_Month,Delivery_person_ID,Weather_conditions,Road_traffic_density,Type_of_order,Type_of_vehicle,Festival,City,Time_taken (min)
3612,0.940224,-0.435023,0.172238,0.151024,0.161079,0.151308,0.973944,0.940030,0.000488,-1.321359,-1.359216,0.032721,0.947363,0.296689,0.281715,-0.456111,-0.423764,-0.14221,1.871657,21
38498,0.766584,1.166717,0.689756,0.832858,0.699339,0.832188,0.752202,0.733763,1.223648,-1.321359,0.363376,-1.806710,0.307437,0.881438,-0.785710,1.333781,-2.116118,-0.14221,-0.537223,17
16360,-0.101621,-0.435023,0.662932,0.116606,0.665854,0.114062,-1.465224,-1.328902,0.000488,-1.321359,-1.244376,1.872152,1.427308,-0.288060,-1.853135,-0.456111,1.268591,-0.14221,-0.537223,24
34932,0.766584,0.846369,-0.808501,0.294976,-0.872293,0.292429,-1.686966,-1.535168,1.223648,0.441859,0.593055,0.032721,-0.972415,1.466187,0.281715,1.333781,1.268591,-0.14221,-0.537223,29
43316,-0.275261,0.526021,1.262129,0.239884,1.305830,0.242052,0.530459,0.527497,0.000488,0.441859,1.052412,0.032721,-0.172507,0.296689,-0.785710,-1.351057,-0.423764,-0.14221,-0.537223,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15615,1.287506,0.205673,0.346405,0.220527,0.344503,0.221281,-0.134768,-0.091303,1.223648,-1.321359,0.248536,-1.806710,-1.612341,0.296689,1.349140,1.333781,1.268591,-0.14221,1.871657,30
13659,-0.275261,0.205673,0.255438,0.100907,0.256217,0.104020,0.973944,0.940030,-1.222673,0.441859,1.052412,0.032721,0.627400,0.881438,0.281715,1.333781,-0.423764,-0.14221,1.871657,23
29077,0.766584,-1.075718,-0.639006,0.281498,-0.693752,0.279423,-1.465224,-1.328902,0.000488,0.441859,0.593055,0.032721,0.787382,-0.288060,-1.853135,1.333781,-0.423764,-0.14221,-0.537223,34
28854,0.419302,0.205673,0.027754,0.362896,0.001878,0.360349,-1.908709,-1.741435,-1.222673,-1.321359,-0.785018,0.032721,-0.492470,-1.457558,0.281715,1.333781,-0.423764,-0.14221,-0.537223,20
