In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

%matplotlib inline

In [2]:
data_dir = './data'

In [3]:
bs_filepath = os.path.join(data_dir, 'austin_bikeshare_stations.csv')
bt_filepath = os.path.join(data_dir, 'austin_bikeshare_trips.csv')
bw_filepath = os.path.join(data_dir, 'austin_weather.csv')

In [4]:
station_df = pd.read_csv(bs_filepath)
trip_df = pd.read_csv(bt_filepath)
weather_df = pd.read_csv(bw_filepath)

In [9]:
station_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   latitude    72 non-null     float64
 1   location    72 non-null     object 
 2   longitude   72 non-null     float64
 3   name        72 non-null     object 
 4   station_id  72 non-null     int64  
 5   status      72 non-null     object 
dtypes: float64(2), int64(1), object(3)
memory usage: 3.5+ KB


In [10]:
trip_df.head()

Unnamed: 0,bikeid,checkout_time,duration_minutes,end_station_id,end_station_name,month,start_station_id,start_station_name,start_time,subscriber_type,trip_id,year
0,8.0,19:12:00,41,2565.0,Trinity & 6th Street,3.0,2536.0,Waller & 6th St.,2015-03-19 19:12:00,Walk Up,9900082882,2015.0
1,141.0,2:06:04,6,2570.0,South Congress & Academy,10.0,2494.0,2nd & Congress,2016-10-30 02:06:04,Local365,12617682,2016.0
2,578.0,16:28:27,13,2498.0,Convention Center / 4th St. @ MetroRail,3.0,2538.0,Bullock Museum @ Congress & MLK,2016-03-11 16:28:27,Local365,9075366,2016.0
3,555.0,15:12:00,80,2712.0,Toomey Rd @ South Lamar,11.0,2497.0,Capitol Station / Congress & 11th,2014-11-23 15:12:00,24-Hour Kiosk (Austin B-cycle),9900319298,2014.0
4,86.0,15:39:13,25,3377.0,MoPac Pedestrian Bridge @ Veterans Drive,4.0,2707.0,Rainey St @ Cummings,2017-04-16 15:39:13,Walk Up,14468597,2017.0


In [11]:
weather_df.head()

Unnamed: 0,Date,TempHighF,TempAvgF,TempLowF,DewPointHighF,DewPointAvgF,DewPointLowF,HumidityHighPercent,HumidityAvgPercent,HumidityLowPercent,...,SeaLevelPressureAvgInches,SeaLevelPressureLowInches,VisibilityHighMiles,VisibilityAvgMiles,VisibilityLowMiles,WindHighMPH,WindAvgMPH,WindGustMPH,PrecipitationSumInches,Events
0,2013-12-21,74,60,45,67,49,43,93,75,57,...,29.68,29.59,10,7,2,20,4,31,0.46,"Rain , Thunderstorm"
1,2013-12-22,56,48,39,43,36,28,93,68,43,...,30.13,29.87,10,10,5,16,6,25,0,
2,2013-12-23,58,45,32,31,27,23,76,52,27,...,30.49,30.41,10,10,10,8,3,12,0,
3,2013-12-24,61,46,31,36,28,21,89,56,22,...,30.45,30.3,10,10,7,12,4,20,0,
4,2013-12-25,58,50,41,44,40,36,86,71,56,...,30.33,30.27,10,10,7,10,2,16,T,


In [48]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1319 entries, 0 to 1318
Data columns (total 21 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Date                        1319 non-null   object
 1   TempHighF                   1319 non-null   int64 
 2   TempAvgF                    1319 non-null   int64 
 3   TempLowF                    1319 non-null   int64 
 4   DewPointHighF               1319 non-null   object
 5   DewPointAvgF                1319 non-null   object
 6   DewPointLowF                1319 non-null   object
 7   HumidityHighPercent         1319 non-null   object
 8   HumidityAvgPercent          1319 non-null   object
 9   HumidityLowPercent          1319 non-null   object
 10  SeaLevelPressureHighInches  1319 non-null   object
 11  SeaLevelPressureAvgInches   1319 non-null   object
 12  SeaLevelPressureLowInches   1319 non-null   object
 13  VisibilityHighMiles         1319 non-null   obje

In [50]:
weather_df['Date'].unique().size

1319

---
---

In [62]:
trip_df_clean = trip_df.copy()

---

In [63]:
trip_df_clean.dropna(inplace=True, axis=0)

In [64]:
trip_df_clean['Date'] = trip_df_clean['start_time'].str.split().str[0]
trip_df_clean['start_time'] = trip_df_clean['start_time'].str.split().str[1]

In [65]:
rows_to_remove = trip_df_clean.query('year == "nan"').index

trip_df_clean.drop(rows_to_remove, inplace=True)

In [66]:
trip_df_clean['year'] = pd.to_numeric(trip_df_clean['year'], downcast='integer')
trip_df_clean['month'] = pd.to_numeric(trip_df_clean['month'], downcast='integer')

In [67]:
trip_df_clean['start_station_id'] = pd.to_numeric(trip_df_clean['start_station_id'], downcast='integer')
trip_df_clean['end_station_id'] = pd.to_numeric(trip_df_clean['end_station_id'], downcast='integer')

In [68]:
trip_df_clean

Unnamed: 0,bikeid,checkout_time,duration_minutes,end_station_id,end_station_name,month,start_station_id,start_station_name,start_time,subscriber_type,trip_id,year,Date
0,8.0,19:12:00,41,2565,Trinity & 6th Street,3,2536,Waller & 6th St.,19:12:00,Walk Up,9900082882,2015,2015-03-19
1,141.0,2:06:04,6,2570,South Congress & Academy,10,2494,2nd & Congress,02:06:04,Local365,12617682,2016,2016-10-30
2,578.0,16:28:27,13,2498,Convention Center / 4th St. @ MetroRail,3,2538,Bullock Museum @ Congress & MLK,16:28:27,Local365,9075366,2016,2016-03-11
3,555.0,15:12:00,80,2712,Toomey Rd @ South Lamar,11,2497,Capitol Station / Congress & 11th,15:12:00,24-Hour Kiosk (Austin B-cycle),9900319298,2014,2014-11-23
4,86.0,15:39:13,25,3377,MoPac Pedestrian Bridge @ Veterans Drive,4,2707,Rainey St @ Cummings,15:39:13,Walk Up,14468597,2017,2017-04-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...
649226,52.0,0:12:00,90,2571,Red River & 8th Street,7,2544,East 6th & Pedernales St.,00:12:00,24-Hour Kiosk (Austin B-cycle),9900185926,2014,2014-07-06
649227,104.0,17:20:44,53,2499,City Hall / Lavaca & 2nd,2,3619,6th & Congress,17:20:44,Walk Up,13649794,2017,2017-02-06
649228,240.0,13:12:55,44,2495,4th & Congress,11,2495,4th & Congress,13:12:55,Walk Up,9900329990,2015,2015-11-18
649229,421.0,14:45:34,106,2494,2nd & Congress,5,2494,2nd & Congress,14:45:34,Try Before You Buy Special,10219003,2016,2016-05-29


In [69]:
trip_df_clean[['start_station_id', 'end_station_id']]

Unnamed: 0,start_station_id,end_station_id
0,2536,2565
1,2494,2570
2,2538,2498
3,2497,2712
4,2707,3377
...,...,...
649226,2544,2571
649227,3619,2499
649228,2495,2495
649229,2494,2494


In [70]:
df_final = trip_df_clean.copy()

In [71]:
df_final = df_final.merge(weather_df, on='Date')

In [72]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 581625 entries, 0 to 581624
Data columns (total 33 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   bikeid                      581625 non-null  float64
 1   checkout_time               581625 non-null  object 
 2   duration_minutes            581625 non-null  int64  
 3   end_station_id              581625 non-null  int16  
 4   end_station_name            581625 non-null  object 
 5   month                       581625 non-null  int8   
 6   start_station_id            581625 non-null  int16  
 7   start_station_name          581625 non-null  object 
 8   start_time                  581625 non-null  object 
 9   subscriber_type             581625 non-null  object 
 10  trip_id                     581625 non-null  int64  
 11  year                        581625 non-null  int16  
 12  Date                        581625 non-null  object 
 13  TempHighF     

In [73]:
trip_df_clean

Unnamed: 0,bikeid,checkout_time,duration_minutes,end_station_id,end_station_name,month,start_station_id,start_station_name,start_time,subscriber_type,trip_id,year,Date
0,8.0,19:12:00,41,2565,Trinity & 6th Street,3,2536,Waller & 6th St.,19:12:00,Walk Up,9900082882,2015,2015-03-19
1,141.0,2:06:04,6,2570,South Congress & Academy,10,2494,2nd & Congress,02:06:04,Local365,12617682,2016,2016-10-30
2,578.0,16:28:27,13,2498,Convention Center / 4th St. @ MetroRail,3,2538,Bullock Museum @ Congress & MLK,16:28:27,Local365,9075366,2016,2016-03-11
3,555.0,15:12:00,80,2712,Toomey Rd @ South Lamar,11,2497,Capitol Station / Congress & 11th,15:12:00,24-Hour Kiosk (Austin B-cycle),9900319298,2014,2014-11-23
4,86.0,15:39:13,25,3377,MoPac Pedestrian Bridge @ Veterans Drive,4,2707,Rainey St @ Cummings,15:39:13,Walk Up,14468597,2017,2017-04-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...
649226,52.0,0:12:00,90,2571,Red River & 8th Street,7,2544,East 6th & Pedernales St.,00:12:00,24-Hour Kiosk (Austin B-cycle),9900185926,2014,2014-07-06
649227,104.0,17:20:44,53,2499,City Hall / Lavaca & 2nd,2,3619,6th & Congress,17:20:44,Walk Up,13649794,2017,2017-02-06
649228,240.0,13:12:55,44,2495,4th & Congress,11,2495,4th & Congress,13:12:55,Walk Up,9900329990,2015,2015-11-18
649229,421.0,14:45:34,106,2494,2nd & Congress,5,2494,2nd & Congress,14:45:34,Try Before You Buy Special,10219003,2016,2016-05-29


In [78]:
df_final[10000:10010].T

Unnamed: 0,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009
bikeid,861,576,282,334,133,236,241,414,191,709
checkout_time,14:48:56,14:34:01,11:22:57,14:33:28,11:18:50,16:24:47,14:23:17,10:22:02,14:50:10,14:31:49
duration_minutes,15,16,10,135,32,15,11,10,32,137
end_station_id,2494,2494,2494,2549,3377,2504,2502,2564,3377,2549
end_station_name,2nd & Congress,2nd & Congress,2nd & Congress,Long Center @ South 1st & Riverside,MoPac Pedestrian Bridge @ Veterans Drive,South Congress & Elizabeth,Barton Springs & Riverside,5th & San Marcos,MoPac Pedestrian Bridge @ Veterans Drive,Long Center @ South 1st & Riverside
month,9,9,9,9,9,9,9,9,9,9
start_station_id,2575,2548,2564,2549,3377,2494,2537,2823,2707,2549
start_station_name,Riverside @ S. Lamar,UT West Mall @ Guadalupe,5th & San Marcos,Long Center @ South 1st & Riverside,MoPac Pedestrian Bridge @ Veterans Drive,2nd & Congress,West & 6th St.,Capital Metro HQ - East 5th at Broadway,Rainey St @ Cummings,Long Center @ South 1st & Riverside
start_time,14:48:56,14:34:01,11:22:57,14:33:28,11:18:50,16:24:47,14:23:17,10:22:02,14:50:10,14:31:49
subscriber_type,Explorer,Local365,Local30,Walk Up,Try Before You Buy Special,Explorer,Walk Up,Local30,Walk Up,Walk Up
