In [None]:
"""
Trips and Users

Table: Trips

+-------------+----------+
| Column Name | Type     |
+-------------+----------+
| Id          | int      |
| Client_Id   | int      |
| Driver_Id   | int      |
| City_Id     | int      |
| Status      | enum     |
| Request_at  | date     |     
+-------------+----------+
Id is the primary key for this table.
The table holds all taxi trips. Each trip has a unique Id, while Client_Id and Driver_Id are foreign keys to the Users_Id at the Users table.
Status is an ENUM type of (‘completed’, ‘cancelled_by_driver’, ‘cancelled_by_client’).
 

Table: Users

+-------------+----------+
| Column Name | Type     |
+-------------+----------+
| Users_Id    | int      |
| Banned      | enum     |
| Role        | enum     |
+-------------+----------+
Users_Id is the primary key for this table.
The table holds all users. Each user has a unique Users_Id, and Role is an ENUM type of (‘client’, ‘driver’, ‘partner’).
Status is an ENUM type of (‘Yes’, ‘No’).
 

Write a SQL query to find the cancellation rate of requests with unbanned users (both client and driver must not be banned) each day between "2013-10-01" and "2013-10-03".

The cancellation rate is computed by dividing the number of canceled (by client or driver) requests with unbanned users by the total number of requests with unbanned users on that day.

Return the result table in any order. Round Cancellation Rate to two decimal points.
"""

In [None]:
"""testcase"""
{"headers": {"Trips": ["Id", "Client_Id", "Driver_Id", "City_Id", "Status", "Request_at"], 
             "Users": ["Users_Id", "Banned", "Role"]}, 
 "rows": {"Trips": [["1", "1", "10", "1", "completed", "2013-10-01"], 
                    ["2", "2", "11", "1", "cancelled_by_driver", "2013-10-01"], 
                    ["3", "3", "12", "6", "completed", "2013-10-01"], 
                    ["4", "4", "13", "6", "cancelled_by_client", "2013-10-01"], 
                    ["5", "1", "10", "1", "completed", "2013-10-02"], 
                    ["6", "2", "11", "6", "completed", "2013-10-02"], 
                    ["7", "3", "12", "6", "completed", "2013-10-02"], 
                    ["8", "2", "12", "12", "completed", "2013-10-03"], 
                    ["9", "3", "10", "12", "completed", "2013-10-03"], 
                    ["10", "4", "13", "12", "cancelled_by_driver", "2013-10-03"]], 
          "Users": [["1", "No", "client"], 
                    ["2", "Yes", "client"], 
                    ["3", "No", "client"], 
                    ["4", "No", "client"], 
                    ["10", "No", "driver"], 
                    ["11", "No", "driver"], 
                    ["12", "No", "driver"], 
                    ["13", "No", "driver"]]}}

In [None]:
"""SQL table creation and data insertion"""
DROP TABLE IF EXISTS Trips;
DROP TABLE IF EXISTS Users;

CREATE TABLE Trips(
Id int(4) NOT NULL,
Client_Id int(4) NOT NULL,
Driver_Id int(4) NOT NULL,
City_Id int(4) NOT NULL,
Status enum('completed', 'cancelled_by_driver', 'cancelled_by_client') NOT NULL,
Request_at date NOT NULL,
PRIMARY KEY ('Id'));

CREATE TABLE Users(
Users_Id int(4) NOT NULL,
Banned enum('Yes', 'No') NOT NULL,
Role enum('client', 'driver', 'partner') NOT NULL,
PRIMARY KEY ('User_Id'));

INSERT INTO Trips VALUES 
("1", "1", "10", "1", "completed", "2013-10-01"), 
("2", "2", "11", "1", "cancelled_by_driver", "2013-10-01"), 
("3", "3", "12", "6", "completed", "2013-10-01"), 
("4", "4", "13", "6", "cancelled_by_client", "2013-10-01"), 
("5", "1", "10", "1", "completed", "2013-10-02"), 
("6", "2", "11", "6", "completed", "2013-10-02"), 
("7", "3", "12", "6", "completed", "2013-10-02"), 
("8", "2", "12", "12", "completed", "2013-10-03"), 
("9", "3", "10", "12", "completed", "2013-10-03"), 
("10", "4", "13", "12", "cancelled_by_driver", "2013-10-03");

INSERT INTO Users VALUES
("1", "No", "client"), 
("2", "Yes", "client"), 
("3", "No", "client"), 
("4", "No", "client"), 
("10", "No", "driver"), 
("11", "No", "driver"), 
("12", "No", "driver"), 
("13", "No", "driver");

In [None]:
"""SQL soltion"""
# 1
SELECT ao.Request_at AS Day, IFNULL(ROUND( co.cnt/ao.cnt, 2), 0.00) AS 'Cancellation Rate'
FROM
(SELECT COUNT(*) AS cnt, Request_at
FROM Trips
WHERE Client_Id IN (SELECT Users_Id FROM Users WHERE BANNED = 'No')
AND Driver_Id IN (SELECT Users_Id FROM Users WHERE BANNED = 'No')
AND Status IN ('cancelled_by_driver', 'cancelled_by_client')
AND Request_at BETWEEN '2013-10-01' AND '2013-10-03'
GROUP BY Request_at) AS co
RIGHT JOIN
(SELECT COUNT(*) AS cnt, Request_at
FROM Trips
WHERE Client_Id IN (SELECT Users_Id FROM Users WHERE BANNED = 'No')
AND Driver_Id IN (SELECT Users_Id FROM Users WHERE BANNED = 'No')
AND Request_at BETWEEN '2013-10-01' AND '2013-10-03'
GROUP BY Request_at) AS ao
ON co.Request_at = ao.Request_at

# 2
SELECT Request_at AS Day, 
ROUND(SUM(IF(Status = 'Completed', 0, 1)) / COUNT(*), 2) AS 'Cancellation Rate'
FROM Trips
WHERE Client_Id IN (SELECT Users_Id FROM Users WHERE BANNED = 'No')
AND Driver_Id IN (SELECT Users_Id FROM Users WHERE BANNED = 'No')
AND Request_at BETWEEN '2013-10-01' AND '2013-10-03'
GROUP BY Request_at

In [2]:
"""pandas dataframe creation"""
import pandas as pd

testcase = {"headers": {"Trips": ["Id", "Client_Id", "Driver_Id", "City_Id", "Status", "Request_at"], 
             "Users": ["Users_Id", "Banned", "Role"]}, 
 "rows": {"Trips": [["1", "1", "10", "1", "completed", "2013-10-01"], 
                    ["2", "2", "11", "1", "cancelled_by_driver", "2013-10-01"], 
                    ["3", "3", "12", "6", "completed", "2013-10-01"], 
                    ["4", "4", "13", "6", "cancelled_by_client", "2013-10-01"], 
                    ["5", "1", "10", "1", "completed", "2013-10-02"], 
                    ["6", "2", "11", "6", "completed", "2013-10-02"], 
                    ["7", "3", "12", "6", "completed", "2013-10-02"], 
                    ["8", "2", "12", "12", "completed", "2013-10-03"], 
                    ["9", "3", "10", "12", "completed", "2013-10-03"], 
                    ["10", "4", "13", "12", "cancelled_by_driver", "2013-10-03"]], 
          "Users": [["1", "No", "client"], 
                    ["2", "Yes", "client"], 
                    ["3", "No", "client"], 
                    ["4", "No", "client"], 
                    ["10", "No", "driver"], 
                    ["11", "No", "driver"], 
                    ["12", "No", "driver"], 
                    ["13", "No", "driver"]]}}

Trips = pd.DataFrame(testcase['rows']['Trips'], columns=testcase['headers']['Trips'])
Users = pd.DataFrame(testcase['rows']['Users'], columns=testcase['headers']['Users'])

Trips.head()

Unnamed: 0,Id,Client_Id,Driver_Id,City_Id,Status,Request_at
0,1,1,10,1,completed,2013-10-01
1,2,2,11,1,cancelled_by_driver,2013-10-01
2,3,3,12,6,completed,2013-10-01
3,4,4,13,6,cancelled_by_client,2013-10-01
4,5,1,10,1,completed,2013-10-02


In [3]:
Users.head()

Unnamed: 0,Users_Id,Banned,Role
0,1,No,client
1,2,Yes,client
2,3,No,client
3,4,No,client
4,10,No,driver


In [13]:
# label if the client is banned
df = pd.merge(Trips[['Client_Id', 'Driver_Id', 'Status', 'Request_at']], Users[Users['Role'] == 'client'][['Users_Id', 'Banned']],
             left_on='Client_Id', right_on='Users_Id')
df

Unnamed: 0,Client_Id,Driver_Id,Status,Request_at,Users_Id,Banned
0,1,10,completed,2013-10-01,1,No
1,1,10,completed,2013-10-02,1,No
2,2,11,cancelled_by_driver,2013-10-01,2,Yes
3,2,11,completed,2013-10-02,2,Yes
4,2,12,completed,2013-10-03,2,Yes
5,3,12,completed,2013-10-01,3,No
6,3,12,completed,2013-10-02,3,No
7,3,10,completed,2013-10-03,3,No
8,4,13,cancelled_by_client,2013-10-01,4,No
9,4,13,cancelled_by_driver,2013-10-03,4,No


In [14]:
# label if the driver is banned
df = pd.merge(df, Users[Users['Role'] == 'driver'][['Users_Id', 'Banned']],
             left_on='Driver_Id', right_on='Users_Id',
             suffixes=['_client', '_driver'])
df

Unnamed: 0,Client_Id,Driver_Id,Status,Request_at,Users_Id_client,Banned_client,Users_Id_driver,Banned_driver
0,1,10,completed,2013-10-01,1,No,10,No
1,1,10,completed,2013-10-02,1,No,10,No
2,3,10,completed,2013-10-03,3,No,10,No
3,2,11,cancelled_by_driver,2013-10-01,2,Yes,11,No
4,2,11,completed,2013-10-02,2,Yes,11,No
5,2,12,completed,2013-10-03,2,Yes,12,No
6,3,12,completed,2013-10-01,3,No,12,No
7,3,12,completed,2013-10-02,3,No,12,No
8,4,13,cancelled_by_client,2013-10-01,4,No,13,No
9,4,13,cancelled_by_driver,2013-10-03,4,No,13,No


In [17]:
# filter by banned and date
df_filter = df[(df['Banned_client'] == 'No') & (df['Banned_driver'] == 'No') & (df['Request_at'].isin(['2013-10-01', '2013-10-02', '2013-10-03']))]
df_filter

Unnamed: 0,Client_Id,Driver_Id,Status,Request_at,Users_Id_client,Banned_client,Users_Id_driver,Banned_driver
0,1,10,completed,2013-10-01,1,No,10,No
1,1,10,completed,2013-10-02,1,No,10,No
2,3,10,completed,2013-10-03,3,No,10,No
6,3,12,completed,2013-10-01,3,No,12,No
7,3,12,completed,2013-10-02,3,No,12,No
8,4,13,cancelled_by_client,2013-10-01,4,No,13,No
9,4,13,cancelled_by_driver,2013-10-03,4,No,13,No


In [23]:
# find number of canncelled trips per day
df_cancelled = df_filter[df_filter['Status'] != 'completed'].groupby('Request_at')['Status'].count().reset_index().rename(columns={'Status': 'cnt_cancelled'})
df_cancelled

Unnamed: 0,Request_at,cnt_cancelled
0,2013-10-01,1
1,2013-10-03,1


In [25]:
# find number of all trips per day
df_all = df_filter.groupby('Request_at').count()['Status'].reset_index().rename(columns={'Status': 'cnt_all'})
df_all

Unnamed: 0,Request_at,cnt_all
0,2013-10-01,3
1,2013-10-02,2
2,2013-10-03,2


In [30]:
# use right join to make sure any day with orders is included
result = pd.merge(df_cancelled, df_all, on='Request_at', how='right')
# set number of cancelled orders to 0 if no cancelled orders found
result.loc[result['cnt_cancelled'].isna(), 'cnt_cancelled'] = 0
result

Unnamed: 0,Request_at,cnt_cancelled,cnt_all
0,2013-10-01,1.0,3
1,2013-10-03,1.0,2
2,2013-10-02,0.0,2


In [34]:
# calculate the cancellation rate
result['Cancellation Rate'] = round(result['cnt_cancelled'] / result['cnt_all'], 2)
result = result.rename(columns={'Request_at': 'Day'})
result[['Day', 'Cancellation Rate']]

Unnamed: 0,Day,Cancellation Rate
0,2013-10-01,0.33
1,2013-10-03,0.5
2,2013-10-02,0.0
