In [30]:
import pandas as pd
import os

In [31]:
external_dir = "../data/external"
interim_dir = "../data/interim"
processed_dir = "../data/processed"

input_file1 = os.path.join(external_dir, "etc/od_20241121_sandag_airport_draftfinal.xlsx") # latest
input_file2 = os.path.join(external_dir, "etc/od_20241015_sandag_airport_pilot_4.xlsx") #older version but records needed
input_file3 = os.path.join(external_dir, "etc/od_20250131_sandag_airport_sas_draftinal.xlsx")
variable_map_file = os.path.join(processed_dir, "revised_names.csv")
clean_survey_file = os.path.join(interim_dir, "survey_data_clean.csv")
output_csv_filename = os.path.join(processed_dir, "data_model_output.csv")
#summary_csv_filename = os.path.join(processed_dir, "data_model_output_summary.csv")

In [32]:
in_df_complete1 = pd.read_excel(input_file1, sheet_name = 0)
in_df_incomplete1 = pd.read_excel(input_file1, sheet_name = 1)

in_df_complete2 = pd.read_excel(input_file2, sheet_name = 0)
in_df_incomplete2 = pd.read_excel(input_file2, sheet_name = 1)

in_df_complete3 = pd.read_excel(input_file3, sheet_name = 0)
in_df_incomplete3 = pd.read_excel(input_file3, sheet_name = 1)

in_df_complete1['is_self_administered'], in_df_incomplete1['is_self_administered'] = False, False
in_df_complete2['is_self_administered'], in_df_incomplete2['is_self_administered'] = False, False
in_df_complete3['is_self_administered'], in_df_incomplete3['is_self_administered'] = True, True

in_df_complete = pd.concat([in_df_complete1, in_df_complete2, in_df_complete3], ignore_index = True)
in_df_incomplete = pd.concat([in_df_incomplete1, in_df_incomplete2, in_df_incomplete3], ignore_index = True)

in_df_complete['is_completed'] = 1
in_df_incomplete['is_completed'] = 0

in_df_complete['weight'] = 1
in_df_incomplete['weight'] = 0

in_df = pd.concat([in_df_complete, in_df_incomplete], ignore_index = True)
header_df = pd.read_csv(variable_map_file)[['ETC_name','WSP_name']]
header_dict = pd.Series(header_df.WSP_name.values,index=header_df.ETC_name).to_dict()
clean_df = in_df.rename(columns=header_dict).copy().drop(columns=["delete"])

  in_df_complete['is_completed'] = 1
  in_df_complete['weight'] = 1


In [50]:
clean_df = clean_df[clean_df['is_completed'] == 1]
sas = clean_df[clean_df['is_self_administered'] == 1]

In [51]:
intercept = clean_df[clean_df['is_self_administered'] == 0]

In [54]:
modes_sas = set(sas['main_mode_label'])
modes_intercept = set(intercept['main_mode_label'])

In [55]:
modes_sas

{'Bicycle: non-electric bikeshare',
 'Bicycle: personal electric bicycle',
 'Bicycle: personal non-electric bicycle',
 'Car service/black car/limo/executive car',
 'Dropped off by car by family/friend',
 'Drove alone and parked',
 'Drove with others and parked',
 'E-scooter: personal',
 'Employee shuttle',
 'Get in a parked vehicle and drive alone',
 'Get in a parked vehicle and drive with others',
 'Get in a parked vehicle and ride with other traveler(s)',
 'Hotel shuttle van',
 'Picked up by car by family/friend',
 'Public transit',
 'Rental car: dropped off at rental agency',
 'Rental car: picked up at rental agency',
 'Taxi',
 'Uber/Lyft',
 'Walk',
 nan}

In [56]:
modes_intercept

{'Bicycle: personal electric bicycle',
 'Car service/black car/limo/executive car',
 'Chartered tour bus',
 'Dropped off by car by family/friend',
 'Drove alone and parked',
 'Drove with others and parked',
 'Employee shuttle',
 'Get in a parked vehicle and drive alone',
 'Get in a parked vehicle and drive with others',
 'Get in a parked vehicle and ride with other traveler(s)',
 'Hotel shuttle van',
 'Other',
 'Other public transit',
 'Other shared van (please specify)',
 'Picked up by car by family/friend',
 'Rental car: Dropped off at rental agency',
 'Rental car: Picked up at rental agency',
 'Rental car: get in a parked rental car',
 'Rental car: parked rental car',
 'Rode with other traveler(s) and parked',
 'Taxi',
 'Uber/Lyft',
 'Walk',
 nan}

In [57]:
sas['main_mode_label'].value_counts()

main_mode_label
Drove alone and parked                                     202
Uber/Lyft                                                  121
Dropped off by car by family/friend                         65
Rental car: dropped off at rental agency                    13
Drove with others and parked                                11
Picked up by car by family/friend                            9
Taxi                                                         9
Car service/black car/limo/executive car                     7
Public transit                                               6
Bicycle: personal electric bicycle                           6
Walk                                                         4
Bicycle: personal non-electric bicycle                       2
Employee shuttle                                             2
Get in a parked vehicle and ride with other traveler(s)      2
E-scooter: personal                                          1
Hotel shuttle van                      

In [58]:
intercept['main_mode_label'].value_counts()

main_mode_label
Dropped off by car by family/friend                        2338
Uber/Lyft                                                  2171
Rental car: Dropped off at rental agency                    756
Drove alone and parked                                      457
Rental car: parked rental car                               207
Hotel shuttle van                                           199
Drove with others and parked                                162
Taxi                                                        153
Rental car: Picked up at rental agency                       94
Car service/black car/limo/executive car                     75
Other shared van (please specify)                            45
Other                                                        36
Employee shuttle                                             35
Other public transit                                         33
Picked up by car by family/friend                            33
Chartered tour bus      

In [60]:
print(modes_sas - modes_intercept)

{'E-scooter: personal', 'Rental car: picked up at rental agency', 'Public transit', 'Rental car: dropped off at rental agency', 'Bicycle: non-electric bikeshare', 'Bicycle: personal non-electric bicycle'}


Rental car: Picked up at rental agency vs Rental car: picked up at rental agency -> there is a case-mismatch in the label. We can do a str.lower() comparison, but htere is Public transit, which is not at all available as an option. It is called other_public_transit.

In [61]:
mode_code_columns = ['main_transit_mode', 'main_mode', 'access_mode', 'egress_mode', 'reverse_mode', 'reverse_mode_predicted', 'other_airport_accessmode', 'reverse_commute_mode']
mode_label_columns = ['main_transit_mode_label', 'main_mode_label', 'access_mode_label', 'egress_mode_label', 'reverse_mode_label', 'reverse_mode_predicted_label', 'other_airport_accessmode_label', 'reverse_commute_mode_label']

In [74]:
for mode in mode_label_columns:
    print(f'{mode} = {set(sas[mode].str.lower())-set(intercept[mode].str.lower())}')

main_transit_mode_label = set()
main_mode_label = {'bicycle: non-electric bikeshare', 'e-scooter: personal', 'public transit', 'bicycle: personal non-electric bicycle'}
access_mode_label = {'bicycle: personal non-electric bicycle', 'public transit'}
egress_mode_label = set()
reverse_mode_label = set()
reverse_mode_predicted_label = {'public transit'}
other_airport_accessmode_label = {'personal e-scooter', 'car service / black car / limo / executive car', 'public transit', 'other', 'uber / lyft'}
reverse_commute_mode_label = {'e-scooter: personal', 'bicycle: personal non-electric bicycle', 'refused/no answer', 'public transit'}


In [72]:
for mode in mode_label_columns:
    print(set(intercept[mode].str.lower()))

{'mts route 992', 'none of the above', 'airport flyer shuttle'}
{'uber/lyft', 'bicycle: personal electric bicycle', 'other shared van (please specify)', 'rental car: get in a parked rental car', 'get in a parked vehicle and ride with other traveler(s)', 'car service/black car/limo/executive car', nan, 'get in a parked vehicle and drive with others', 'picked up by car by family/friend', 'rental car: dropped off at rental agency', 'drove with others and parked', 'other public transit', 'employee shuttle', 'drove alone and parked', 'chartered tour bus', 'get in a parked vehicle and drive alone', 'rental car: picked up at rental agency', 'walk', 'taxi', 'dropped off by car by family/friend', 'hotel shuttle van', 'rental car: parked rental car', 'other', 'rode with other traveler(s) and parked'}
{nan, 'walk', 'uber/lyft', 'taxi', 'drove with others and parked', 'dropped off by car by family/friend', 'drove alone and parked', 'other', 'other public transit', 'rode with other traveler(s) and 