In [1]:
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
import matplotlib.ticker

pd.set_option('display.float_format', lambda x: '%.5f' % x)


In [2]:
#CONSTANTS

# to do: change the rates so that there are company specific values as well (for 2019, need Via rates), maybe store in a dataframe

NON_WAV_PER_MILE_22 = 1.161
NON_WAV_PER_MIN_22 = 0.529

WAV_PER_MILE_22 = 1.504
WAV_PER_MIN_22 = 0.529

OUT_CITY_NON_WAV_PER_MILE_22 = 1.348
OUT_CITY_WAV_PER_MILE_22 = 1.746
OUT_CITY_PER_MIN_22 = 0.613


NON_WAV_PER_MILE_19 = 1.088
NON_WAV_PER_MIN_19 = 0.495

WAV_PER_MILE_19 = 1.410
WAV_PER_MIN_19 = 0.495

OUT_CITY_NON_WAV_PER_MILE_19 = 1.262
OUT_CITY_WAV_PER_MILE_19 = 1.636
OUT_CITY_PER_MIN_19 = 0.574

JUNO = "HV0002"
UBER = "HV0003"
VIA = "HV0004"
LYFT = "HV0005"

# 2019 (not adjusted)

In [3]:
# Import Cleaned Data

rides2019 = pq.read_table("../Aya_NYTWA/Data/february_2019_cleaned.parquet", memory_map=True).to_pandas()

In [4]:
# Create minimum rates 2019 based on WAV and Out of City Conditions

conditions2019 = [
    (rides2019['wav_match_flag'] == 'N') & (rides2019['out_of_city'] == False),
    (rides2019['wav_match_flag'] == 'Y') & (rides2019['out_of_city'] == False),
    (rides2019['wav_match_flag'] == 'N') & (rides2019['out_of_city'] == True),
    (rides2019['wav_match_flag'] == 'Y') & (rides2019['out_of_city'] == True)]

choices2019 = [rides2019.trip_miles*NON_WAV_PER_MILE_19 + (rides2019.trip_time/60)*NON_WAV_PER_MIN_19,
              rides2019.trip_miles*WAV_PER_MILE_19 + (rides2019.trip_time/60)*WAV_PER_MIN_19,
              rides2019.trip_miles*OUT_CITY_NON_WAV_PER_MILE_19 + (rides2019.trip_time/60)*OUT_CITY_PER_MIN_19,
              rides2019.trip_miles*OUT_CITY_WAV_PER_MILE_19 + (rides2019.trip_time/60)*OUT_CITY_PER_MIN_19]

rides2019['min_rate'] = np.select(conditions2019, choices2019, default=rides2019.trip_miles*NON_WAV_PER_MILE_19 + (rides2019.trip_time/60)*NON_WAV_PER_MIN_19)



## Driver pay analysis

#### Distribution of driver pay

In [None]:
# Total driver pay across all rides

rides2019['driver_pay'].sum()    

In [5]:
# Create variable for dollar amount of driver pay over min rate

rides2019['over_min_rate'] = (rides2019.driver_pay - rides2019.min_rate)

In [6]:
# Create variable for percentage of driver pay over min rate

rides2019['perc_over_min_rate'] = (rides2019.over_min_rate/rides2019.min_rate)

In [7]:
# Create rate of passenger fare, by mile and minute

rides2019['passenger_fare_mile'] = rides2019.base_passenger_fare/rides2019.trip_miles
rides2019['passenger_fare_min'] = rides2019.base_passenger_fare/(rides2019.trip_time/60)

In [8]:
# Determine percent of trips that leave city 
# Drop Shared rides, out of town rides, and VIA rides

rides2019 = rides2019.loc[rides2019['shared_match_flag'] == "N"]
rides2019 = rides2019.loc[rides2019['hvfhs_license_num'] != VIA]
rides2019 = rides2019.loc[rides2019['out_of_city'] == False]

In [9]:
rides2019.driver_pay.describe()


count   14008641.00000
mean          15.03863
std           12.31794
min            0.03000
25%            6.88000
50%           10.99000
75%           18.40000
max          708.63000
Name: driver_pay, dtype: float64

In [14]:
rides2019.min_rate.describe()

count   14008641.00000
mean          13.13266
std           10.15241
min            0.01913
25%            6.16928
50%           10.10461
75%           16.85767
max          433.25567
Name: min_rate, dtype: float64

#### Distribution of percentage of driver earnings over the minimum required rate for the trip (In 2022 the bottom 25% of trips made less than 0.5% over the minimum rate, the top 25% of trips made at least 30% over the minimum rate, and the median percent made over the minimum rate was 7%)

In [None]:
rides2019.perc_over_min_rate.describe()

In [None]:
test = rides2019[-.5 < rides2019["perc_over_min_rate"] < 1]["perc_over_min_rate"]

plt.hist(test, weights = np.ones(len(test))/len(test), bins = 30)

In [None]:
plt.show()

#### !! Percentage of rides where the driver made less than the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate < 0])/len(rides2019)

#### !! Percentage of rides where the driver made 1% or less over the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate <= 0.01])/len(rides2019)

#### !! Percentage of rides where the driver made 10% or less over the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate <= 0.10])/len(rides2019)

#### !! Percentage of rides where the driver made 20% or less over the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate <= 0.20])/len(rides2019)

#### Percentage of rides where the driver made more than 5% of the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate > 0.05])/len(rides2019)

#### Percentage of rides where the driver made more than 10% of the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate > 0.10])/len(rides2019)

#### Percentage of rides where the driver made more than 20% of the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate > 0.20])/len(rides2019)

#### Distribution of driver pay for rides where the driver made less than the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate < 0].driver_pay.describe()

#### Distribution of underpayment for rides where the driver made less than the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate < 0].over_min_rate.describe()

#### Distribution of minimum rate (in dollars) for rides where the driver made less than minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate < 0].min_rate.describe()

#### Distribution of ride length for rides where the driver made less than minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate < 0].trip_miles.describe()

#### Distribution of driver pay for rides where the driver made over 20% of the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate > .2].driver_pay.describe()

#### Distribution of overage (in dollars) for rides where the driver made over 20% of the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate > .2]["over_min_rate"].describe()

#### Distribution of minimum rate (in dollars) for rides where the driver made over 20% of the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate > .2]["min_rate"].describe()

#### !! Percent of trips where overage is more than \$1

In [None]:
len(rides2019[rides2019['over_min_rate'] > 1])/len(rides2019)

#### !! Percent of trips where overage was \$20 or more

In [None]:
len(rides2019[rides2019['over_min_rate'] > 20])/len(rides2019)

#### !! Average overage of all trips in dollars

In [None]:
rides2019['over_min_rate'].describe()

#### !! Average overage of trips that were 10% or less over in dollars

In [None]:
rides2019[rides2019['perc_over_min_rate'] < .1]['over_min_rate'].describe()

#### !! Average overage of all trips which are over pay rules in dollars

In [None]:
rides2019[rides2019['over_min_rate'] > 0]['over_min_rate'].describe()

## Passenger Fare Analysis

In [None]:
# Total passenger fare across all rides

rides2019['base_passenger_fare'].sum()    

In [None]:
rides2019['passenger_fare_mile'] = rides2019.base_passenger_fare/rides2019.trip_miles
rides2019['passenger_fare_min'] = rides2019.base_passenger_fare/(rides2019.trip_time/60)

#### Distribution of passenger fare

In [None]:
rides2019.base_passenger_fare.describe()

#### Average fare per minute cost 

In [None]:
np.mean(rides2019['passenger_fare_min'])

#### Average fare per mile cost

In [None]:
np.mean(rides2019['passenger_fare_mile'])

#### Regression of minutes and miles by passenger fare (In 2022, each additional mile per trip increases the fare by 2.03 dollars and each additional minute per trip increases the fare by 0.47 dollars)



In [None]:
# convert trip time to minutes
rides2019['trip_time_min'] = rides2019.trip_time/60

In [None]:
result = sm.ols(formula="base_passenger_fare ~ trip_miles + trip_time_min", data=rides2019).fit()
print(result.params)

## Drivers' earnings compared to passenger fare

In [10]:
rides2019['perc_earnings_fare'] = rides2019["driver_pay"]/rides2019["base_passenger_fare"]

rides2019["perc_earnings_fare"].describe()

count   14008641.00000
mean           1.01184
std            1.93883
min            0.00046
25%            0.76704
50%            0.86162
75%            1.05461
max         1598.50000
Name: perc_earnings_fare, dtype: float64

In [16]:
rides2019[rides2019["perc_earnings_fare"] > 1]["driver_pay"].describe()

count   4148073.00000
mean         15.90777
std          13.67965
min           0.47000
25%           7.01000
50%          11.11000
75%          19.39000
max         440.59000
Name: driver_pay, dtype: float64

In [17]:
rides2019[rides2019["perc_earnings_fare"] < 1]["driver_pay"].describe()

count   9849992.00000
mean         14.67533
std          11.68090
min           0.03000
25%           6.82000
50%          10.94000
75%          18.08000
max         708.63000
Name: driver_pay, dtype: float64

In [12]:
len(rides2019[rides2019["perc_earnings_fare"] > 1])/len(rides2019["perc_earnings_fare"])

0.29610816638102155

# 2019 (adjusted)

## Driver pay analysis

In [None]:
# adjust 2019 data to 2022 dollar inflation rate

rides2019['driver_pay_adj'] = rides2019.driver_pay * 1.159 #inflation rate between 2019 and 2022

In [None]:
# Total driver pay across all rides

rides2019['driver_pay_adj'].sum()    

In [None]:
# Create dollar amount of min rate adjusted to inflation

rides2019['min_rate_adj'] = rides2019.min_rate * 1.159

In [None]:
# Create variable for dollar amount of driver pay over min rate

rides2019['over_min_rate_adj'] = (rides2019.driver_pay_adj - rides2019.min_rate * 1.159)

In [None]:
# Create variable for percentage of driver pay over min rate

rides2019['perc_over_min_rate_adj'] = (rides2019.over_min_rate_adj/(rides2019.min_rate * 1.159))

In [None]:
# Create rate of passenger fare, by mile and minute

rides2019['passenger_fare_mile_adj'] = rides2019.base_passenger_fare * 1.159/rides2019.trip_miles
rides2019['passenger_fare_min_adj'] = rides2019.base_passenger_fare * 1.159/(rides2019.trip_time/60)

In [None]:
# Determine percent of trips that leave city 
# Drop Shared rides, out of town rides, and VIA rides

rides2019 = rides2019.loc[rides2019['shared_match_flag'] == "N"]
rides2019 = rides2019.loc[rides2019['hvfhs_license_num'] != VIA]
rides2019 = rides2019.loc[rides2019['out_of_city'] == False]

In [None]:
rides2019.driver_pay_adj.describe()

#### Distribution of percentage of driver earnings over the minimum required rate for the trip (In 2022 the bottom 25% of trips made less than 0.5% over the minimum rate, the top 25% of trips made at least 30% over the minimum rate, and the median percent made over the minimum rate was 7%)

In [None]:
rides2019.perc_over_min_rate_adj.describe()

#### !! Percentage of rides where the driver made less than the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate_adj < 0])/len(rides2019)

#### !! Percentage of rides where the driver made 1% or less over the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate_adj <= 0.01])/len(rides2019)

#### !! Percentage of rides where the driver made 10% or less over the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate_adj <= 0.10])/len(rides2019)

#### Percentage of rides where the driver made more than 5% of the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate_adj > 0.05])/len(rides2019)

#### Percentage of rides where the driver made more than 10% of the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate_adj > 0.10])/len(rides2019)

#### Percentage of rides where the driver made more than 20% of the minimum required rate

In [None]:
len(rides2019[rides2019.perc_over_min_rate_adj > 0.20])/len(rides2019)

#### Distribution of driver pay for rides where the driver made less than the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate_adj < 0].driver_pay_adj.describe()

#### Distribution of underpayment for rides where the driver made less than the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate_adj < 0].over_min_rate_adj.describe()

#### Distribution of minimum rate (in dollars) for rides where the driver made less than minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate_adj < 0].min_rate_adj.describe()

#### Distribution of ride length for rides where the driver made less than minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate_adj < 0].trip_miles.describe()

#### Distribution of driver pay for rides where the driver made over 20% of the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate_adj > .2].driver_pay_adj.describe()

#### Distribution of overage (in dollars) for rides where the driver made over 20% of the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate_adj > .2]["over_min_rate_adj"].describe()

#### Distribution of minimum rate (in dollars) for rides where the driver made over 20% of the minimum rate

In [None]:
rides2019[rides2019.perc_over_min_rate > .2]["min_rate_adj"].describe()

#### Distribution of driver pay for rides where the driver made over 20% of the minimum rate (2022 dollars)

In [None]:
rides2019[rides2019.perc_over_min_rate_adj > .2].driver_pay_adj.describe()

#### !! Percent of trips where overage is more than \$1

In [None]:
len(rides2019[rides2019['over_min_rate_adj'] > 1])/len(rides2019)

#### !! Percent of trips where overage was \$20 or more

In [None]:
len(rides2019[rides2019['over_min_rate_adj'] > 20])/len(rides2019)

#### !! Average overage of all trips in dollars

In [None]:
rides2019['over_min_rate_adj'].describe()

#### !! Average overage of trips that were 10% or less over in dollars

In [None]:
rides2019[rides2019['perc_over_min_rate_adj'] < .1]['over_min_rate_adj'].describe()

#### !! Average overage of all trips which are over pay rules in dollars

In [None]:
rides2019[rides2019['over_min_rate_adj'] > 0]['over_min_rate_adj'].describe()

## Passenger Fare Analysis

In [None]:
# Convert 2019 base passenger fare to 2022 dollars

rides2019['passenger_fare_adj'] = rides2019.base_passenger_fare * 1.159 #inflation rate between 2019 and 2022
rides2019['passenger_fare_mile_adj'] = rides2019.base_passenger_fare*1.159/rides2019.trip_miles
rides2019['passenger_fare_min_adj'] = rides2019.base_passenger_fare*1.159/(rides2019.trip_time/60)

In [None]:
# Total passenger fare across all rides

rides2019['passenger_fare_adj'].sum()  

#### Distribution of passenger fare

In [None]:
rides2019.passenger_fare_adj.describe()

#### Average fare per minute cost 

In [None]:
np.mean(rides2019['passenger_fare_min_adj'])

#### Average fare per mile cost (2022 Dollars)

In [None]:
np.mean(rides2019['passenger_fare_mile_adj'])

#### Regression of minutes and miles by passenger fare (In 2022, each additional mile per trip increases the fare by 2.03 dollars and each additional minute per trip increases the fare by 0.47 dollars)



In [None]:
# convert trip time to minutes
rides2019['trip_time_min'] = rides2019.trip_time/60

In [None]:
result = sm.ols(formula="passenger_fare_adj ~ trip_miles + trip_time_min", data=rides2019).fit()
print(result.params)

## Drivers' earnings compared to passenger fare

In [None]:
rides2019['perc_earnings_fare_adj'] = rides2019["driver_pay_adj"]/rides2019["passenger_fare_adj"]

rides2019["perc_earnings_fare_adj"].describe()

In [None]:
rides2019[rides2019["perc_earnings_fare_adj"] > 1]["perc_earnings_fare_adj"].describe()

In [None]:
rides2019[rides2019["perc_earnings_fare_adj"] < 1]["perc_earnings_fare_adj"].describe()

# 2022

In [None]:
rides2022 = pq.read_table("../Aya_NYTWA/Data/april_2022_cleaned.parquet", memory_map=True).to_pandas()

In [None]:
# Create minimum rates 2022 based on WAV and Out of City Conditions

conditions2022 = [
    (rides2022['wav_match_flag'] == 'N') & (rides2022['out_of_city'] == False),
    (rides2022['wav_match_flag'] == 'Y') & (rides2022['out_of_city'] == False),
    (rides2022['wav_match_flag'] == 'N') & (rides2022['out_of_city'] == True),
    (rides2022['wav_match_flag'] == 'Y') & (rides2022['out_of_city'] == True)]

choices2022 = [rides2022.trip_miles*NON_WAV_PER_MILE_22 + (rides2022.trip_time/60)*NON_WAV_PER_MIN_22,
              rides2022.trip_miles*WAV_PER_MILE_22 + (rides2022.trip_time/60)*WAV_PER_MIN_22,
              rides2022.trip_miles*OUT_CITY_NON_WAV_PER_MILE_22 + (rides2022.trip_time/60)*OUT_CITY_PER_MIN_22,
              rides2022.trip_miles*OUT_CITY_WAV_PER_MILE_22 + (rides2022.trip_time/60)*OUT_CITY_PER_MIN_22]

rides2022['min_rate'] = np.select(conditions2022, choices2022, default=rides2022.trip_miles*NON_WAV_PER_MILE_22 + (rides2022.trip_time/60)*NON_WAV_PER_MIN_22)


In [None]:
# Create variable for dollar amount of driver pay over min rate

rides2022['over_min_rate'] = (rides2022.driver_pay - rides2022.min_rate)

In [None]:
# Create variable for percentage of driver pay over min rate

rides2022['perc_over_min_rate'] = (rides2022.over_min_rate/rides2022.min_rate)

In [None]:
# Create rate of passenger fare, by mile and minute


rides2022['passenger_fare_mile'] = rides2022.base_passenger_fare/rides2022.trip_miles
rides2022['passenger_fare_min'] = rides2022.base_passenger_fare/(rides2022.trip_time/60)

In [None]:
# Determine percent of trips that leave city 
# Drop Shared rides, out of town rides, and VIA rides

rides2022 = rides2022.loc[rides2022['hvfhs_license_num'] != VIA]

rides2022 = rides2022.loc[rides2022['out_of_city'] == False]

## Driver Pay Analysis


In [None]:
# Total driver pay across all rides

rides2022['driver_pay'].sum()    

#### Distribution of driver pay (in 2022 dollars)

In [None]:
rides2022.driver_pay.describe()

In [None]:
rides.min_rate.describe()

#### Distribution of percentage of driver earnings over the minimum required rate for the trip (In 2022 the bottom 25% of trips made less than 0.5% over the minimum rate, the top 25% of trips made at least 30% over the minimum rate, and the median percent made over the minimum rate was 7%)

In [None]:
rides2022.perc_over_min_rate.describe()

#### !! Percentage of rides where the driver made less than the minimum required rate

In [None]:
len(rides2022[rides2022.perc_over_min_rate < 0])/len(rides2022)

#### !! Percentage of rides where the driver made 1% or less over the minimum required rate

In [None]:
len(rides2022[rides2022.perc_over_min_rate <= 0.01])/len(rides2022)

#### !! Percentage of rides where the driver made 10% or less over the minimum required rate

In [None]:
len(rides2022[rides2022.perc_over_min_rate <= 0.10])/len(rides2022)

#### Percentage of rides where the driver made more than 5% of the minimum required rate

In [None]:
len(rides2022[rides2022.perc_over_min_rate > 0.05])/len(rides2022)

#### Percentage of rides where the driver made more than 10% of the minimum required rate

In [None]:
len(rides2022[rides2022.perc_over_min_rate > 0.10])/len(rides2022)

#### Percentage of rides where the driver made more than 20% of the minimum required rate

In [None]:
len(rides2022[rides2022.perc_over_min_rate > 0.20])/len(rides2022)

#### Distribution of driver pay for rides where the driver made less than the minimum rate

In [None]:
rides2022[rides2022.perc_over_min_rate < 0].driver_pay.describe()

#### Distribution of underpayment for rides where the driver made less than the minimum rate

In [None]:
rides2022[rides2022.perc_over_min_rate < 0].over_min_rate.describe()

#### Distribution of minimum rate (in dollars) for rides where the driver made less than minimum rate

In [None]:
rides2022[rides2022.perc_over_min_rate < 0].min_rate.describe()

#### Distribution of ride length for rides where the driver made less than minimum rate

In [None]:
rides2022[rides2022.perc_over_min_rate < 0].trip_miles.describe()

#### Distribution of driver pay for rides where the driver made over 20% of the minimum rate

In [None]:
rides2022[rides2022.perc_over_min_rate > .2].driver_pay.describe()

#### Distribution of overage (in dollars) for rides where the driver made over 20% of the minimum rate

In [None]:
rides2022[rides2022.perc_over_min_rate > .2]["over_min_rate"].describe()

#### Distribution of minimum rate (in dollars) for rides where the driver made over 20% of the minimum rate

In [None]:
rides2022[rides2022.perc_over_min_rate > .2]["min_rate"].describe()

#### Distribution of driver pay for rides where the driver made over 20% of the minimum rate (2022 dollars)

In [None]:
rides2022[rides2022.perc_over_min_rate > .2].driver_pay.describe()

#### !! Percent of trips where overage is more than \$1

In [None]:
len(rides2022[rides2022['over_min_rate'] > 1])/len(rides2022)

#### !! Percent of trips where overage was \$20 or more

In [None]:
len(rides2022[rides2022['over_min_rate'] > 20])/len(rides2022)

#### !! Average overage of all trips in dollars

In [None]:
rides2022['over_min_rate'].describe()

#### !! Average overage of trips that were 10% or less over in dollars

In [None]:
rides2022[rides2022['perc_over_min_rate'] < .1]['over_min_rate'].describe()

#### !! Average overage of all trips which are over pay rules in dollars

In [None]:
rides2022[rides2022['over_min_rate'] > 0]['over_min_rate'].describe()

## Passenger Fare Analysis

In [None]:
# Total passenger fare across all rides

rides2022['base_passenger_fare'].sum()  

#### Distribution of passenger fare (2022 Dollars)

In [None]:
rides2022.base_passenger_fare.describe()

#### Average fare per minute cost (2022 Dollars)

In [None]:
np.mean(rides2022['passenger_fare_min'])

#### Average fare per mile cost (2022 Dollars)

In [None]:
np.mean(rides2022['passenger_fare_mile'])

#### Regression of minutes and miles by passenger fare (In 2022, each additional mile per trip increases the fare by 2.03 dollars and each additional minute per trip increases the fare by 0.47 dollars)



In [None]:
# convert trip time to minutes
rides2022['trip_time_min'] = rides2022.trip_time/60

In [None]:
result = sm.ols(formula="base_passenger_fare ~ trip_miles + trip_time_min", data=rides2022).fit()
print(result.params)

## Drivers' earnings compared to passenger fare

In [None]:
rides2022['perc_earnings_fare'] = rides2022["driver_pay"]/rides2022["base_passenger_fare"]

rides2022["perc_earnings_fare"].describe()

In [None]:
len(rides2022[rides2022["perc_earnings_fare"] > 1])/len(rides2022["perc_earnings_fare"])

In [None]:
rides2022[rides2022["perc_earnings_fare"] > 1]["over_min_rate"].describe()