In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import joypy
from datetime import datetime
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates
import matplotlib.ticker as mtick


In [37]:
df = pd.read_csv("trippub.csv")
df = df[['HOUSEID','PERSONID','HHSTATE','TRPTRANS','TRPMILES','WTTRDFIN','TRAVDAY','TRIPPURP','HH_CBSA','STRTTIME']]

day = { 1 : 'Sunday', 2: 'Weekday', 3: 'Weekday', 4: 'Weekday', 5: "Weekday", 6: "Weekday", 7: "Saturday"}
day = pd.Series(day).to_frame('Day')
day = day.reset_index().rename(columns = {'index' : 'TRAVDAY'})

PURPOSE = { 'HBW' : "HBW", "HBO" : "HBO", "HBOSHOP": "HBO", "HBOSOCREC": "HBO", "NHB" : "NHB" }
PURPOSE = pd.Series(PURPOSE).to_frame('PURP')
PURPOSE = PURPOSE.reset_index().rename(columns = {'index' : 'TRIPPURP'})

df = pd.merge(df, day, on = 'TRAVDAY')
df = df.loc[(df['TRIPPURP'] != '-9'),]

df = pd.merge(df, PURPOSE, on = 'TRIPPURP')

df['TripDist'] = df['TRPMILES'] * df['WTTRDFIN']

df['Time'] = df['STRTTIME'].apply(lambda num : format(num,'04d'))
df['DateTime'] = df['Time'].apply(lambda num : datetime.strptime(num, "%H%M").strftime("%I:%M %p"))
df['Hour'] = pd.to_datetime(df['DateTime'], format='%I:%M %p')
df['NearestHour'] = df['Hour'].dt.round(freq='60min')


national_bike = df.loc[(df["TRPTRANS"] == 2),]
national_walk = df.loc[df["TRPTRANS"] == 1,]
weights_bike = national_bike['WTTRDFIN'].sum()

In [27]:
df.head()
df.dtypes

HOUSEID       int64
PERSONID      int64
HHSTATE      object
TRPTRANS      int64
TRPMILES    float64
WTTRDFIN    float64
TRAVDAY       int64
TRIPPURP     object
HH_CBSA      object
STRTTIME      int64
Day          object
PURP         object
TripDist    float64
dtype: object

In [39]:
# National Average Bike Trip Distance 

avg_dist_nat_bike = national_bike.groupby('Day')['TripDist','WTTRDFIN'].sum()
avg_dist_nat_bike = pd.DataFrame(avg_dist_nat_bike)
avg_dist_nat_bike['AvgDist'] = avg_dist_nat_bike['TripDist']/avg_dist_nat_bike['WTTRDFIN']
avg_dist_nat_bike

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,TripDist,WTTRDFIN,AvgDist
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Saturday,1124916000.0,277328400.0,4.056261
Sunday,667096800.0,171934000.0,3.879959
Weekday,3740563000.0,1576857000.0,2.372164


In [40]:
# National Average Walk Trip Distance 

avg_dist_nat_walk = national_walk.groupby('Day')['TripDist','WTTRDFIN'].sum()
avg_dist_nat_walk = pd.DataFrame(avg_dist_nat_walk)
avg_dist_nat_walk['AvgDist'] = avg_dist_nat_walk['TripDist']/avg_dist_nat_walk['WTTRDFIN']
avg_dist_nat_walk

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,TripDist,WTTRDFIN,AvgDist
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Saturday,1676141000.0,2605913000.0,0.643207
Sunday,1683127000.0,2357477000.0,0.713953
Weekday,18167400000.0,19252640000.0,0.943632


In [41]:
# National Average Bike Trip Distance by Tour Purpose

avg_dist_nat_bike = national_bike.groupby(['Day','PURP'])['TripDist','WTTRDFIN'].sum()
avg_dist_nat_bike = pd.DataFrame(avg_dist_nat_bike)
avg_dist_nat_bike.reset_index()

avg_dist_nat_bike['AvgDist'] = avg_dist_nat_bike['TripDist']/avg_dist_nat_bike['WTTRDFIN']

avg_dist_nat_bike.pivot_table(index = ['Day'], values = 'AvgDist', columns = 'PURP')

  This is separate from the ipykernel package so we can avoid doing imports until


PURP,HBO,HBW,NHB
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Saturday,2.635886,1.991578,5.113414
Sunday,5.871699,2.924829,3.208333
Weekday,2.213762,2.79474,2.068974


In [42]:
# National Average Walk Trip Distance 
avg_dist_nat_walk = national_walk.groupby(['Day','PURP'])['TripDist','WTTRDFIN'].mean()
avg_dist_nat_walk = pd.DataFrame(avg_dist_nat_walk)
avg_dist_nat_walk.reset_index()

avg_dist_nat_walk['AvgDist'] = avg_dist_nat_walk['TripDist']/avg_dist_nat_walk['WTTRDFIN']

avg_dist_nat_walk.pivot_table(index = ['Day'], values = 'AvgDist', columns = 'PURP')

  


PURP,HBO,HBW,NHB
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Saturday,0.817154,0.665986,0.513202
Sunday,0.901161,0.66,0.530508
Weekday,0.885562,1.551666,0.894429


In [None]:
#Time of Day

In [43]:
#Time of Day Bike National

avg_dist_nat_bike = national_bike.groupby(['NearestHour','PURP'])['TripDist','WTTRDFIN'].sum()
avg_dist_nat_bike = pd.DataFrame(avg_dist_nat_bike)
avg_dist_nat_bike.reset_index()

avg_dist_nat_bike['AvgDist'] = avg_dist_nat_bike['TripDist']/avg_dist_nat_bike['WTTRDFIN']

avg_dist_nat_bike.pivot_table(index = ['NearestHour'], values = 'AvgDist', columns = 'PURP')

  This is separate from the ipykernel package so we can avoid doing imports until


PURP,HBO,HBW,NHB
NearestHour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1900-01-01 00:00:00,,0.977058,1.514
1900-01-01 01:00:00,0.432,0.993076,
1900-01-01 02:00:00,,2.321197,1.050384
1900-01-01 04:00:00,5.095175,1.191112,0.304
1900-01-01 05:00:00,12.629936,1.091895,1.826577
1900-01-01 06:00:00,10.84346,4.338426,2.452367
1900-01-01 07:00:00,2.137506,2.870058,8.431268
1900-01-01 08:00:00,1.650946,2.763413,3.238734
1900-01-01 09:00:00,2.382128,2.641911,2.97619
1900-01-01 10:00:00,4.499993,2.044277,4.501469


In [45]:
#Time of Day Bike National

avg_dist_nat_bike = national_bike.groupby(['Day','NearestHour','PURP'])['TripDist','WTTRDFIN'].sum()
avg_dist_nat_bike = pd.DataFrame(avg_dist_nat_bike)
avg_dist_nat_bike.reset_index()

avg_dist_nat_bike['AvgDist'] = avg_dist_nat_bike['TripDist']/avg_dist_nat_bike['WTTRDFIN']

avg_dist_nat_bike.pivot_table(index = ['NearestHour'], values = 'AvgDist', columns = ['PURP', 'Day'])

  This is separate from the ipykernel package so we can avoid doing imports until


PURP,HBO,HBO,HBO,HBW,HBW,HBW,NHB,NHB,NHB
Day,Saturday,Sunday,Weekday,Saturday,Sunday,Weekday,Saturday,Sunday,Weekday
NearestHour,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1900-01-01 00:00:00,,,,,,0.977058,,,1.514
1900-01-01 01:00:00,,,0.432,,,0.993076,,,
1900-01-01 02:00:00,,,,2.321197,,,,,1.050384
1900-01-01 04:00:00,82.0,,4.302641,,,1.191112,,,0.304
1900-01-01 05:00:00,,,12.629936,1.471107,0.815,1.185827,,1.781,2.063818
1900-01-01 06:00:00,8.131986,13.0,11.05218,0.26,1.164,4.510542,,1.781,2.960983
1900-01-01 07:00:00,1.980186,16.0,2.091476,3.266,5.067919,2.692359,16.413751,4.697037,3.369138
1900-01-01 08:00:00,1.489229,0.797,1.666058,2.703725,1.180167,2.81007,5.47545,4.262797,2.68179
1900-01-01 09:00:00,1.461941,14.863789,1.35775,1.61,0.903968,2.773097,6.541877,6.996878,1.759282
1900-01-01 10:00:00,2.252891,11.69258,3.213068,3.186994,1.586,1.804423,1.827873,15.454208,4.321947


In [44]:
# National Average Walk Trip Distance 
avg_dist_nat_walk = national_walk.groupby(['NearestHour','PURP'])['TripDist','WTTRDFIN'].mean()
avg_dist_nat_walk = pd.DataFrame(avg_dist_nat_walk)
avg_dist_nat_walk.reset_index()

avg_dist_nat_walk['AvgDist'] = avg_dist_nat_walk['TripDist']/avg_dist_nat_walk['WTTRDFIN']

avg_dist_nat_walk.pivot_table(index = ['NearestHour'], values = 'AvgDist', columns = 'PURP')

  


PURP,HBO,HBW,NHB
NearestHour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1900-01-01 00:00:00,0.665372,1.140169,0.254314
1900-01-01 01:00:00,0.33172,1.818587,3.941451
1900-01-01 02:00:00,0.897576,0.645482,1.410305
1900-01-01 03:00:00,0.356723,0.381,0.505852
1900-01-01 04:00:00,2.420295,0.949275,0.641367
1900-01-01 05:00:00,0.85532,0.706411,0.892974
1900-01-01 06:00:00,1.067777,1.078918,0.769166
1900-01-01 07:00:00,0.764258,0.701864,12.139362
1900-01-01 08:00:00,1.383872,1.675016,1.040766
1900-01-01 09:00:00,0.935507,0.558195,0.547599


In [46]:
# National Average Walk Trip Distance 
avg_dist_nat_walk = national_walk.groupby(['Day','NearestHour','PURP'])['TripDist','WTTRDFIN'].mean()
avg_dist_nat_walk = pd.DataFrame(avg_dist_nat_walk)
avg_dist_nat_walk.reset_index()

avg_dist_nat_walk['AvgDist'] = avg_dist_nat_walk['TripDist']/avg_dist_nat_walk['WTTRDFIN']

avg_dist_nat_walk.pivot_table(index = ['NearestHour'], values = 'AvgDist', columns = ['PURP','Day'])

  


PURP,HBO,HBO,HBO,HBW,HBW,HBW,NHB,NHB,NHB
Day,Saturday,Sunday,Weekday,Saturday,Sunday,Weekday,Saturday,Sunday,Weekday
NearestHour,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1900-01-01 00:00:00,0.090373,,0.708873,0.075,0.698043,1.298777,0.227738,0.111699,0.321391
1900-01-01 01:00:00,0.267,,0.40841,1.313,,2.480909,0.225625,,4.135633
1900-01-01 02:00:00,,0.781,0.934328,,,0.645482,0.207291,0.192,1.909121
1900-01-01 03:00:00,0.25,,0.39,,,0.381,0.484,,0.889
1900-01-01 04:00:00,4.046645,1.067631,2.071736,0.625,0.311,2.450435,0.68982,1.0,0.376571
1900-01-01 05:00:00,2.877188,1.646098,0.747079,,0.1,0.73121,0.673043,1.584925,0.833848
1900-01-01 06:00:00,1.521059,1.107732,1.031002,0.61604,1.579954,1.08041,0.465534,0.948983,0.786598
1900-01-01 07:00:00,1.536646,1.034407,0.7081,0.721508,0.396145,0.774597,2.785249,0.688175,13.668857
1900-01-01 08:00:00,0.891315,1.511753,1.404954,0.581991,0.503005,1.776786,0.798824,1.100155,1.059193
1900-01-01 09:00:00,0.484214,0.575772,1.037778,0.468562,0.094721,0.568389,0.444594,0.668413,0.554153


In [8]:
# FLORIDA

In [9]:
fl_bike = df.loc[(df["TRPTRANS"] == 2) & (df["HHSTATE"] == "FL"),]
fl_walk = df.loc[(df["TRPTRANS"] == 1) & (df["HHSTATE"] == "FL"),]

In [10]:
fl_bike.shape


(116, 11)

In [11]:
fl_walk.shape

(812, 11)

In [12]:
# Average Bike Trip Distance in Florida

avg_dist_fl_bike = fl_bike.groupby('Day')['TripDist','WTTRDFIN'].sum()
avg_dist_fl_bike = pd.DataFrame(avg_dist_fl_bike)

avg_dist_fl_bike['AvgDist'] = avg_dist_fl_bike['TripDist']/avg_dist_fl_bike['WTTRDFIN']
avg_dist_fl_bike

  


Unnamed: 0_level_0,TripDist,WTTRDFIN,AvgDist
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Saturday,49068820.0,27528650.0,1.782464
Sunday,125235400.0,37869330.0,3.307041
Weekday,405210300.0,176985000.0,2.289517


In [13]:
#Average Walk Trip Distance in Florida

avg_dist_fl_walk = fl_walk.groupby('Day')['TripDist','WTTRDFIN'].sum()
avg_dist_fl_walk = pd.DataFrame(avg_dist_fl_walk)
avg_dist_fl_walk

avg_dist_fl_walk['AvgDist'] = avg_dist_fl_walk['TripDist']/avg_dist_fl_walk['WTTRDFIN']
avg_dist_fl_walk

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,TripDist,WTTRDFIN,AvgDist
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Saturday,103804600.0,198993300.0,0.521649
Sunday,133352800.0,290508000.0,0.459033
Weekday,2423820000.0,1185929000.0,2.043815


In [14]:
#Average Bike Trip Distance in Florida
avg_dist_fl_bike = fl_bike.groupby(['Day','TRIPPURP'])['TripDist','WTTRDFIN'].sum()
avg_dist_fl_bike = pd.DataFrame(avg_dist_fl_bike)
avg_dist_fl_bike.reset_index()

avg_dist_fl_bike['AvgDist'] = avg_dist_fl_bike['TripDist']/avg_dist_fl_bike['WTTRDFIN']

avg_dist_fl_bike.pivot_table(index = ['Day'], values = 'AvgDist', columns = 'TRIPPURP')


  


TRIPPURP,HBO,HBSHOP,HBSOCREC,HBW,NHB
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Saturday,2.299462,0.323,1.660445,,2.828
Sunday,7.158275,0.636652,4.000844,6.359,
Weekday,1.916611,0.87886,1.868723,5.242045,1.449847


In [15]:
# Florida - Average Walk Trip Distance 
avg_dist_fl_walk = fl_walk.groupby(['Day','TRIPPURP'])['TripDist','WTTRDFIN'].mean()
avg_dist_fl_walk = pd.DataFrame(avg_dist_fl_walk)
avg_dist_fl_walk.reset_index()

avg_dist_fl_walk['AvgDist'] = avg_dist_fl_walk['TripDist']/avg_dist_fl_walk['WTTRDFIN']

avg_dist_fl_walk.pivot_table(index = ['Day'], values = 'AvgDist', columns = 'TRIPPURP')

  


TRIPPURP,HBO,HBSHOP,HBSOCREC,HBW,NHB
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Saturday,0.785272,0.604649,0.552377,0.236353,0.242131
Sunday,0.51864,0.315484,0.43912,0.258,0.539781
Weekday,0.893879,5.220103,0.691502,26.116039,0.539827


In [17]:
fl_cbsa_bike = df.loc[(df["TRPTRANS"] == 2) & (df["HH_CBSA"] == '36740'),]
fl_cbsa_walk = df.loc[(df["TRPTRANS"] == 1) & (df["HH_CBSA"] == '36740'),]

In [18]:
fl_cbsa_bike.shape

(16, 11)

In [19]:
fl_cbsa_walk.shape

(75, 11)

In [20]:
########### CBSA ############

In [21]:
# FL CBSA - Average Bike Trip Distance

avg_dist_fl_bike = fl_cbsa_bike.groupby('Day')['TripDist','WTTRDFIN'].sum()
avg_dist_fl_bike = pd.DataFrame(avg_dist_fl_bike)

avg_dist_fl_bike['AvgDist'] = avg_dist_fl_bike['TripDist']/avg_dist_fl_bike['WTTRDFIN']
avg_dist_fl_bike

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,TripDist,WTTRDFIN,AvgDist
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Weekday,27484250.0,45383540.0,0.6056


In [22]:
# FL CBSA - Average Walk Trip Distance

avg_dist_fl_walk = fl_cbsa_walk.groupby('Day')['TripDist','WTTRDFIN'].sum()
avg_dist_fl_walk = pd.DataFrame(avg_dist_fl_walk)
avg_dist_fl_walk

avg_dist_fl_walk['AvgDist'] = avg_dist_fl_walk['TripDist']/avg_dist_fl_walk['WTTRDFIN']
avg_dist_fl_walk

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,TripDist,WTTRDFIN,AvgDist
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Saturday,41524020.0,44824200.0,0.926375
Sunday,1784226.0,12276610.0,0.145335
Weekday,53159580.0,84036440.0,0.632578


In [23]:
# FL CBSA - Average Bike Trip Distance
avg_dist_fl_bike = fl_cbsa_bike.groupby(['Day','TRIPPURP'])['TripDist','WTTRDFIN'].sum()
avg_dist_fl_bike = pd.DataFrame(avg_dist_fl_bike)
avg_dist_fl_bike.reset_index()

avg_dist_fl_bike['AvgDist'] = avg_dist_fl_bike['TripDist']/avg_dist_fl_bike['WTTRDFIN']

avg_dist_fl_bike.pivot_table(index = ['Day'], values = 'AvgDist', columns = 'TRIPPURP')


  


TRIPPURP,HBO,HBSHOP,HBSOCREC
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Weekday,0.565725,0.570647,1.071965


In [24]:
# FL CBSA - Average Walk Trip Distance 
avg_dist_fl_walk = fl_cbsa_walk.groupby(['Day','TRIPPURP'])['TripDist','WTTRDFIN'].mean()
avg_dist_fl_walk = pd.DataFrame(avg_dist_fl_walk)
avg_dist_fl_walk.reset_index()

avg_dist_fl_walk['AvgDist'] = avg_dist_fl_walk['TripDist']/avg_dist_fl_walk['WTTRDFIN']

avg_dist_fl_walk.pivot_table(index = ['Day'], values = 'AvgDist', columns = 'TRIPPURP')

  


TRIPPURP,HBO,HBSHOP,HBSOCREC,NHB
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Saturday,1.109104,1.4795,0.636407,0.219
Sunday,,,0.087854,0.29525
Weekday,1.108155,1.027,0.300567,0.362885
