# Visualizing NYC Bus Timetables

Data source:

http://web.mta.info/developers/data/nyct/bus/google_transit_bronx.zip

http://web.mta.info/developers/data/nyct/bus/google_transit_brooklyn.zip

http://web.mta.info/developers/data/nyct/bus/google_transit_manhattan.zip

http://web.mta.info/developers/data/nyct/bus/google_transit_queens.zip

http://web.mta.info/developers/data/nyct/bus/google_transit_staten_island.zip

Import libraries.

In [18]:
import pandas as pd
import numpy as np
import datetime as dt

# Brooklyn

### Load GTFS data

In [2]:
agency = pd.read_csv('gtfs/brooklyn/agency.txt')
calendar_dates = pd.read_csv('gtfs/brooklyn/calendar_dates.txt')
calendar = pd.read_csv('gtfs/brooklyn/calendar.txt')
routes = pd.read_csv('gtfs/brooklyn/routes.txt')
shapes = pd.read_csv('gtfs/brooklyn/shapes.txt')
stop_times = pd.read_csv('gtfs/brooklyn/stop_times.txt')
stops = pd.read_csv('gtfs/brooklyn/stops.txt')
trips = pd.read_csv('gtfs/brooklyn/trips.txt')

### Preview the data

In [3]:
agency.head()

Unnamed: 0,agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone
0,MTA NYCT,MTA New York City Transit,http://www.mta.info,America/New_York,en,718-330-1234


In [4]:
calendar_dates.head()

Unnamed: 0,service_id,date,exception_type
0,EN_A7-Weekday-SDon,20170116,2
1,EN_A7-Weekday-SDon,20170220,2
2,EN_A7-Weekday-SDon,20170221,2
3,EN_A7-Weekday-SDon,20170222,2
4,EN_A7-Weekday-SDon,20170223,2


In [5]:
calendar.head()

Unnamed: 0,service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
0,EN_A7-Sunday,0,0,0,0,0,0,1,20170108,20170326
1,EN_A7-Weekday-SDon,1,1,1,1,1,0,0,20170109,20170331
2,EN_A7-Saturday,0,0,0,0,0,1,0,20170114,20170401
3,EN_H7-Weekday,1,1,1,1,1,0,0,20170116,20170116
4,EN_A7-Weekday,1,1,1,1,1,0,0,20170221,20170224


In [7]:
routes.head(2)

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color
0,B1,MTA NYCT,B1,Bay Ridge - Manhattan Beach,via 86th St / Ocean Pkwy,3,http://web.mta.info/nyct/bus/schedule/bkln/b00...,00AEEF,FFFFFF
1,B11,MTA NYCT,B11,Sunset Park - Midwood,via 49th & 50th St / Avenue J,3,http://web.mta.info/nyct/bus/schedule/bkln/b01...,006CB7,FFFFFF


In [8]:
shapes.head(2)

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence
0,B10163,40.6219,-74.028521,10001
1,B10163,40.621997,-74.028681,10002


In [9]:
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type
0,EN_H7-Weekday-008100_B12_1,01:21:00,01:21:00,801177,1,0,0
1,EN_H7-Weekday-008100_B12_1,01:22:36,01:22:36,301386,2,0,0
2,EN_H7-Weekday-008100_B12_1,01:23:07,01:23:07,301387,3,0,0
3,EN_H7-Weekday-008100_B12_1,01:24:01,01:24:01,301388,4,0,0
4,EN_H7-Weekday-008100_B12_1,01:24:18,01:24:18,301389,5,0,0


In [11]:
stops.head()

Unnamed: 0,stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station
0,300000,ORIENTAL BL/MACKENZIE ST,,40.578251,-73.939743,,,0,
1,300002,ORIENTAL BL/JAFFRAY ST,,40.578068,-73.943031,,,0,
2,300003,ORIENTAL BL/HASTINGS ST,,40.577961,-73.944664,,,0,
3,300004,ORIENTAL BL/FALMOUTH ST,,40.577721,-73.946205,,,0,
4,300006,ORIENTAL BL/DOVER ST,,40.577354,-73.949554,,,0,


In [12]:
trips.head()

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,direction_id,shape_id
0,B12,EN_H7-Weekday,EN_H7-Weekday-008100_B12_1,LEFFRTS GDNS OCEAN AV,1,B120114
1,B12,EN_H7-Weekday,EN_H7-Weekday-011000_B12_1,EAST NY ALABAMA AV,0,B120113
2,B12,EN_H7-Weekday,EN_H7-Weekday-016100_B12_1,LEFFRTS GDNS OCEAN AV,1,B120114
3,B12,EN_H7-Weekday,EN_H7-Weekday-019000_B12_1,EAST NY ALABAMA AV,0,B120113
4,B12,EN_H7-Weekday,EN_H7-Weekday-012100_B12_2,LEFFRTS GDNS OCEAN AV,1,B120114


### Merge calendar, service_id and trip_id information

Join calendar and trips on service_id = trip_id.

In [13]:
service_trip_id = pd.merge(calendar[['service_id']],trips[['service_id', 'trip_id']])
service_trip_id.head()

Unnamed: 0,service_id,trip_id
0,EN_A7-Sunday,EN_A7-Sunday-060000_B84_201
1,EN_A7-Sunday,EN_A7-Sunday-063000_B65_101
2,EN_A7-Sunday,EN_A7-Sunday-066000_B84_201
3,EN_A7-Sunday,EN_A7-Sunday-069000_B65_101
4,EN_A7-Sunday,EN_A7-Sunday-072000_B84_201


Join calendar with trip_id.

In [14]:
calendar = pd.merge(calendar,service_trip_id)
calendar.head()

Unnamed: 0,service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date,trip_id
0,EN_A7-Sunday,0,0,0,0,0,0,1,20170108,20170326,EN_A7-Sunday-060000_B84_201
1,EN_A7-Sunday,0,0,0,0,0,0,1,20170108,20170326,EN_A7-Sunday-063000_B65_101
2,EN_A7-Sunday,0,0,0,0,0,0,1,20170108,20170326,EN_A7-Sunday-066000_B84_201
3,EN_A7-Sunday,0,0,0,0,0,0,1,20170108,20170326,EN_A7-Sunday-069000_B65_101
4,EN_A7-Sunday,0,0,0,0,0,0,1,20170108,20170326,EN_A7-Sunday-072000_B84_201


Here's what the stop_times table looks like:

In [15]:
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type
0,EN_H7-Weekday-008100_B12_1,01:21:00,01:21:00,801177,1,0,0
1,EN_H7-Weekday-008100_B12_1,01:22:36,01:22:36,301386,2,0,0
2,EN_H7-Weekday-008100_B12_1,01:23:07,01:23:07,301387,3,0,0
3,EN_H7-Weekday-008100_B12_1,01:24:01,01:24:01,301388,4,0,0
4,EN_H7-Weekday-008100_B12_1,01:24:18,01:24:18,301389,5,0,0


### Add abritrary arrival and departure dates to timetables

Define a function to add arrival dates to stop_times tables. We will use 1/5/15 as the start date because that is the date we are currently using to visualize taxi trips. This code deals with arrival times > 24 hours and converts them to 1/6/15.

In [39]:
arbitrary_dates = ['1/5/2015', '1/6/2015']

In [41]:
def add_arrival_date(df, dates = arbitrary_dates):
    df = df.copy()
    arrival_date = []
    arrival_time = []

    for i in df['arrival_time']:
        hour = i[:i.find(':')]
        minute = i[i.find(':')+1:i.find(':',4)]
        second = i[i.find(':',5)+1:]
        
        if int(hour) < 24:
            arrival_date.append(dates[0])
            arrival_time.append(i)
        elif 24 <= int(hour) < 48:
            arrival_date.append(dates[1])
            hour = int(hour) - 24
            arrival_time.append(str(hour)+":"+minute+":"+second)
        else:
            arrival_date.append('NA')
            
    df['arrival_date'] = arrival_date
    df['arrival_time'] = arrival_time
    return df

Define a function to add departure dates to stop_times tables. We will use 1/5/15 as the start date because that is the date we are currently using to visualize taxi trips. This code deals with departure times > 24 hours and converts them to 1/6/15.

In [42]:
def add_departure_date(df, dates = arbitrary_dates):
    df = df.copy()
    departure_date = []
    departure_time = []

    for i in df['departure_time']:
        hour = i[:i.find(':')]
        minute = i[i.find(':')+1:i.find(':',4)]
        second = i[i.find(':',5)+1:]
        
        if int(hour) < 24:
            departure_date.append(dates[0])
            departure_time.append(i)
        elif 24 <= int(hour) < 48:
            departure_date.append(dates[1])
            hour = int(hour) - 24
            departure_time.append(str(hour)+":"+minute+":"+second)
        else:
            departure_date.append('NA')
            
    df['departure_date'] = departure_date
    df['departure_time'] = departure_time
    return df

### Get timetables for each day of the week

In [43]:
monday = calendar[calendar['monday'] == 1]
tuesday = calendar[calendar['tuesday'] == 1]
wednesday = calendar[calendar['wednesday'] == 1]
thursday = calendar[calendar['thursday'] == 1]
friday = calendar[calendar['friday'] == 1]
saturday = calendar[calendar['saturday'] == 1]
sunday = calendar[calendar['sunday'] == 1]

Store the results here:

In [44]:
triplist = []

### Monday

In [45]:
monday.head(2)

Unnamed: 0,service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date,trip_id
1955,EN_A7-Weekday-SDon,1,1,1,1,1,0,0,20170109,20170331,EN_A7-Weekday-SDon-007000_Q24_401
1956,EN_A7-Weekday-SDon,1,1,1,1,1,0,0,20170109,20170331,EN_A7-Weekday-SDon-012000_Q24_401


In [46]:
monday_trips = monday['trip_id']
print "There are ", len(monday_trips), "trips on Monday"
monday_trips.head()

There are  39306 trips on Monday


1955    EN_A7-Weekday-SDon-007000_Q24_401
1956    EN_A7-Weekday-SDon-012000_Q24_401
1957    EN_A7-Weekday-SDon-016000_Q24_401
1958    EN_A7-Weekday-SDon-020000_Q24_401
1959    EN_A7-Weekday-SDon-024500_Q24_401
Name: trip_id, dtype: object

In [49]:
for i in monday_trips:
    df = stop_times[stop_times['trip_id'] == i]
    df = add_arrival_date(df, dates = arbitrary_dates)
    df = add_departure_date(df, dates = arbitrary_dates)
    
    df['arrival_datetime'] = pd.to_datetime(df['arrival_date'] + ' ' + df['arrival_time'])
    df['departure_datetime'] = pd.to_datetime(df['departure_date'] + ' ' + df['departure_time'])


    # join df with stops
    df = pd.merge(df, stops[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']],left_on='stop_id', right_on='stop_id')

    # join df with trips to get direction id
    df = pd.merge(df, trips[['trip_id', 'direction_id', 'route_id']], left_on='trip_id', right_on='trip_id', how='left')

    # join df with routes to get route id
    df = pd.merge(df, routes[['route_id', 'route_long_name']], left_on='route_id', right_on='route_id', how='left')

    # make a new dataframe called legs
    legs = pd.DataFrame()
    legs['starttime'] = df['departure_datetime']
    legs['stoptime'] = df['arrival_datetime'].shift(-1).fillna(method='ffill')
    legs['tripduration'] = (legs['stoptime'] - legs['starttime'])/np.timedelta64(1, 's')
    legs['tripduration'] = [int(x) for x in legs['tripduration']]
    legs['start station id'] = df['stop_id']
    legs['start station name'] = df['stop_name']
    legs['start station latitude'] = df['stop_lat']
    legs['start station longitude'] = df['stop_lon']
    legs['end station id'] = legs['start station id'].shift(-1).fillna(method='ffill')
    legs['end station name'] = legs['start station name'].shift(-1).fillna(method='ffill')
    legs['end station latitude'] = legs['start station latitude'].shift(-1).fillna(method='ffill')
    legs['end station longitude'] = legs['start station longitude'].shift(-1).fillna(method='ffill')
    legs['route_id'] = 1 # arbitrary
    legs['direction'] = df['direction_id']
    legs['trainName'] = df['route_long_name']

    triplist.append(legs)

In [50]:
triplist[1]

Unnamed: 0,starttime,stoptime,tripduration,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,route_id,direction,trainName
0,2015-01-05 02:00:00,2015-01-05 02:00:34,34,504384,JAMAICA AV/170 ST,40.707287,-73.789581,503185.0,JAMAICA AV/168 PL,40.706787,-73.791458,1,1,Broadway Junction - Jamaica
1,2015-01-05 02:00:34,2015-01-05 02:01:38,64,503185,JAMAICA AV/168 PL,40.706787,-73.791458,504535.0,JAMAICA AV/165 ST,40.705578,-73.794777,1,1,Broadway Junction - Jamaica
2,2015-01-05 02:01:38,2015-01-05 02:02:44,66,504535,JAMAICA AV/165 ST,40.705578,-73.794777,503953.0,JAMAICA AV/162 ST,40.704224,-73.798172,1,1,Broadway Junction - Jamaica
3,2015-01-05 02:02:44,2015-01-05 02:03:33,49,503953,JAMAICA AV/162 ST,40.704224,-73.798172,503932.0,JAMAICA AV/PARSONS BL,40.703278,-73.800682,1,1,Broadway Junction - Jamaica
4,2015-01-05 02:03:33,2015-01-05 02:04:40,67,503932,JAMAICA AV/PARSONS BL,40.703278,-73.800682,502145.0,JAMAICA AV/150 ST,40.702229,-73.804329,1,1,Broadway Junction - Jamaica
5,2015-01-05 02:04:40,2015-01-05 02:06:00,80,502145,JAMAICA AV/150 ST,40.702229,-73.804329,503184.0,JAMAICA AV/146 ST,40.701828,-73.808846,1,1,Broadway Junction - Jamaica
6,2015-01-05 02:06:00,2015-01-05 02:06:58,58,503184,JAMAICA AV/146 ST,40.701828,-73.808846,504198.0,JAMAICA AV/144 ST,40.702312,-73.811432,1,1,Broadway Junction - Jamaica
7,2015-01-05 02:06:58,2015-01-05 02:08:00,62,504198,JAMAICA AV/144 ST,40.702312,-73.811432,504199.0,JAMAICA AV/QUEENS BL,40.702587,-73.814247,1,1,Broadway Junction - Jamaica
8,2015-01-05 02:08:00,2015-01-05 02:08:27,27,504199,JAMAICA AV/QUEENS BL,40.702587,-73.814247,504293.0,JAMAICA AV/138 ST,40.702545,-73.815773,1,1,Broadway Junction - Jamaica
9,2015-01-05 02:08:27,2015-01-05 02:09:06,39,504293,JAMAICA AV/138 ST,40.702545,-73.815773,504080.0,JAMAICA AV/METROPOLITAN AV,40.702457,-73.817993,1,1,Broadway Junction - Jamaica


Concatenate the list of dataframes:

In [None]:
data = pd.concat(triplist)

In [None]:
len(data)

In [None]:
data.head(2)

In [None]:
data.tail(2)

In [None]:
data.to_csv('brooklyn_monday_test.csv')

# Queens

### Load GTFS data

In [None]:
agency = pd.read_csv('gtfs/queens/agency.txt')
calendar_dates = pd.read_csv('gtfs/queens/calendar_dates.txt')
calendar = pd.read_csv('gtfs/queens/calendar.txt')
routes = pd.read_csv('gtfs/queens/routes.txt')
shapes = pd.read_csv('gtfs/queens/shapes.txt')
stop_times = pd.read_csv('gtfs/queens/stop_times.txt')
stops = pd.read_csv('gtfs/queens/stops.txt')
trips = pd.read_csv('gtfs/queens/trips.txt')

In [None]:
service_trip_id = pd.merge(calendar[['service_id']],trips[['service_id', 'trip_id']])
calendar = pd.merge(calendar,service_trip_id)
arbitrary_dates = ['1/5/2015', '1/6/2015']

### Monday

In [None]:
monday = calendar[calendar['monday'] == 1]
tuesday = calendar[calendar['tuesday'] == 1]
wednesday = calendar[calendar['wednesday'] == 1]
thursday = calendar[calendar['thursday'] == 1]
friday = calendar[calendar['friday'] == 1]
saturday = calendar[calendar['saturday'] == 1]
sunday = calendar[calendar['sunday'] == 1]

In [None]:
triplist = []

In [None]:
monday_trips = monday['trip_id']
print "There are ", len(monday_trips), "trips on Monday"
monday_trips.head()

In [None]:
for i in monday_trips:
    df = stop_times[stop_times['trip_id'] == i]
    df = add_arrival_date(df, dates = arbitrary_dates)
    df = add_departure_date(df, dates = arbitrary_dates)
    
    df['arrival_datetime'] = pd.to_datetime(df['arrival_date'] + ' ' + df['arrival_time'])
    df['departure_datetime'] = pd.to_datetime(df['departure_date'] + ' ' + df['departure_time'])


    # join df with stops
    df = pd.merge(df, stops[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']],left_on='stop_id', right_on='stop_id')

    # join df with trips to get direction id
    df = pd.merge(df, trips[['trip_id', 'direction_id', 'route_id']], left_on='trip_id', right_on='trip_id', how='left')

    # join df with routes to get route id
    df = pd.merge(df, routes[['route_id', 'route_long_name']], left_on='route_id', right_on='route_id', how='left')

    # make a new dataframe called legs
    legs = pd.DataFrame()
    legs['starttime'] = df['departure_datetime']
    legs['stoptime'] = df['arrival_datetime'].shift(-1).fillna(method='ffill')
    legs['tripduration'] = (legs['stoptime'] - legs['starttime'])/np.timedelta64(1, 's')
    legs['tripduration'] = [int(x) for x in legs['tripduration']]
    legs['start station id'] = df['stop_id']
    legs['start station name'] = df['stop_name']
    legs['start station latitude'] = df['stop_lat']
    legs['start station longitude'] = df['stop_lon']
    legs['end station id'] = legs['start station id'].shift(-1).fillna(method='ffill')
    legs['end station name'] = legs['start station name'].shift(-1).fillna(method='ffill')
    legs['end station latitude'] = legs['start station latitude'].shift(-1).fillna(method='ffill')
    legs['end station longitude'] = legs['start station longitude'].shift(-1).fillna(method='ffill')
    legs['route_id'] = 1 # arbitrary
    legs['direction'] = df['direction_id']
    legs['trainName'] = df['route_long_name']

    triplist.append(legs)

In [None]:
triplist[1]

In [None]:
data = pd.concat(triplist)

In [None]:
len(data)

In [None]:
data.head(2)

In [None]:
data.tail(2)

In [None]:
data.to_csv('queens_monday.csv')

# Manhattan

### Load GTFS data

In [None]:
agency = pd.read_csv('gtfs/manhattan/agency.txt')
calendar_dates = pd.read_csv('gtfs/manhattan/calendar_dates.txt')
calendar = pd.read_csv('gtfs/manhattan/calendar.txt')
routes = pd.read_csv('gtfs/manhattan/routes.txt')
shapes = pd.read_csv('gtfs/manhattan/shapes.txt')
stop_times = pd.read_csv('gtfs/manhattan/stop_times.txt')
stops = pd.read_csv('gtfs/manhattan/stops.txt')
trips = pd.read_csv('gtfs/manhattan/trips.txt')

service_trip_id = pd.merge(calendar[['service_id']],trips[['service_id', 'trip_id']])
calendar = pd.merge(calendar,service_trip_id)
arbitrary_dates = ['1/5/2015', '1/6/2015']

monday = calendar[calendar['monday'] == 1]
tuesday = calendar[calendar['tuesday'] == 1]
wednesday = calendar[calendar['wednesday'] == 1]
thursday = calendar[calendar['thursday'] == 1]
friday = calendar[calendar['friday'] == 1]
saturday = calendar[calendar['saturday'] == 1]
sunday = calendar[calendar['sunday'] == 1]

triplist = []

monday_trips = monday['trip_id']
print "There are ", len(monday_trips), "trips on Monday"
monday_trips.head()

### Monday

In [None]:
for i in monday_trips:
    df = stop_times[stop_times['trip_id'] == i]
    df = add_arrival_date(df, dates = arbitrary_dates)
    df = add_departure_date(df, dates = arbitrary_dates)
    
    df['arrival_datetime'] = pd.to_datetime(df['arrival_date'] + ' ' + df['arrival_time'])
    df['departure_datetime'] = pd.to_datetime(df['departure_date'] + ' ' + df['departure_time'])


    # join df with stops
    df = pd.merge(df, stops[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']],left_on='stop_id', right_on='stop_id')

    # join df with trips to get direction id
    df = pd.merge(df, trips[['trip_id', 'direction_id', 'route_id']], left_on='trip_id', right_on='trip_id', how='left')

    # join df with routes to get route id
    df = pd.merge(df, routes[['route_id', 'route_long_name']], left_on='route_id', right_on='route_id', how='left')

    # make a new dataframe called legs
    legs = pd.DataFrame()
    legs['starttime'] = df['departure_datetime']
    legs['stoptime'] = df['arrival_datetime'].shift(-1).fillna(method='ffill')
    legs['tripduration'] = (legs['stoptime'] - legs['starttime'])/np.timedelta64(1, 's')
    legs['tripduration'] = [int(x) for x in legs['tripduration']]
    legs['start station id'] = df['stop_id']
    legs['start station name'] = df['stop_name']
    legs['start station latitude'] = df['stop_lat']
    legs['start station longitude'] = df['stop_lon']
    legs['end station id'] = legs['start station id'].shift(-1).fillna(method='ffill')
    legs['end station name'] = legs['start station name'].shift(-1).fillna(method='ffill')
    legs['end station latitude'] = legs['start station latitude'].shift(-1).fillna(method='ffill')
    legs['end station longitude'] = legs['start station longitude'].shift(-1).fillna(method='ffill')
    legs['route_id'] = 1 # arbitrary
    legs['direction'] = df['direction_id']
    legs['trainName'] = df['route_long_name']

    triplist.append(legs)

In [None]:
data = pd.concat(triplist)
data.to_csv('manhattan_monday.csv')

# Bronx

### Load GTFS data

In [None]:
agency = pd.read_csv('gtfs/bronx/agency.txt')
calendar_dates = pd.read_csv('gtfs/bronx/calendar_dates.txt')
calendar = pd.read_csv('gtfs/bronx/calendar.txt')
routes = pd.read_csv('gtfs/bronx/routes.txt')
shapes = pd.read_csv('gtfs/bronx/shapes.txt')
stop_times = pd.read_csv('gtfs/bronx/stop_times.txt')
stops = pd.read_csv('gtfs/bronx/stops.txt')
trips = pd.read_csv('gtfs/bronx/trips.txt')

service_trip_id = pd.merge(calendar[['service_id']],trips[['service_id', 'trip_id']])
calendar = pd.merge(calendar,service_trip_id)
arbitrary_dates = ['1/5/2015', '1/6/2015']

monday = calendar[calendar['monday'] == 1]
tuesday = calendar[calendar['tuesday'] == 1]
wednesday = calendar[calendar['wednesday'] == 1]
thursday = calendar[calendar['thursday'] == 1]
friday = calendar[calendar['friday'] == 1]
saturday = calendar[calendar['saturday'] == 1]
sunday = calendar[calendar['sunday'] == 1]

triplist = []

monday_trips = monday['trip_id']
print "There are ", len(monday_trips), "trips on Monday"
monday_trips.head()

### Monday

In [None]:
for i in monday_trips:
    df = stop_times[stop_times['trip_id'] == i]
    df = add_arrival_date(df, dates = arbitrary_dates)
    df = add_departure_date(df, dates = arbitrary_dates)
    
    df['arrival_datetime'] = pd.to_datetime(df['arrival_date'] + ' ' + df['arrival_time'])
    df['departure_datetime'] = pd.to_datetime(df['departure_date'] + ' ' + df['departure_time'])


    # join df with stops
    df = pd.merge(df, stops[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']],left_on='stop_id', right_on='stop_id')

    # join df with trips to get direction id
    df = pd.merge(df, trips[['trip_id', 'direction_id', 'route_id']], left_on='trip_id', right_on='trip_id', how='left')

    # join df with routes to get route id
    df = pd.merge(df, routes[['route_id', 'route_long_name']], left_on='route_id', right_on='route_id', how='left')

    # make a new dataframe called legs
    legs = pd.DataFrame()
    legs['starttime'] = df['departure_datetime']
    legs['stoptime'] = df['arrival_datetime'].shift(-1).fillna(method='ffill')
    legs['tripduration'] = (legs['stoptime'] - legs['starttime'])/np.timedelta64(1, 's')
    legs['tripduration'] = [int(x) for x in legs['tripduration']]
    legs['start station id'] = df['stop_id']
    legs['start station name'] = df['stop_name']
    legs['start station latitude'] = df['stop_lat']
    legs['start station longitude'] = df['stop_lon']
    legs['end station id'] = legs['start station id'].shift(-1).fillna(method='ffill')
    legs['end station name'] = legs['start station name'].shift(-1).fillna(method='ffill')
    legs['end station latitude'] = legs['start station latitude'].shift(-1).fillna(method='ffill')
    legs['end station longitude'] = legs['start station longitude'].shift(-1).fillna(method='ffill')
    legs['route_id'] = 1 # arbitrary
    legs['direction'] = df['direction_id']
    legs['trainName'] = df['route_long_name']

    triplist.append(legs)

In [None]:
data = pd.concat(triplist)
data.to_csv('bronx_monday.csv')

# Staten Island

### Load GTFS data

In [None]:
agency = pd.read_csv('gtfs/staten_island/agency.txt')
calendar_dates = pd.read_csv('gtfs/staten_island/calendar_dates.txt')
calendar = pd.read_csv('gtfs/staten_island/calendar.txt')
routes = pd.read_csv('gtfs/staten_island/routes.txt')
shapes = pd.read_csv('gtfs/staten_island/shapes.txt')
stop_times = pd.read_csv('gtfs/staten_island/stop_times.txt')
stops = pd.read_csv('gtfs/staten_island/stops.txt')
trips = pd.read_csv('gtfs/staten_island/trips.txt')

service_trip_id = pd.merge(calendar[['service_id']],trips[['service_id', 'trip_id']])
calendar = pd.merge(calendar,service_trip_id)
arbitrary_dates = ['1/5/2015', '1/6/2015']

monday = calendar[calendar['monday'] == 1]
tuesday = calendar[calendar['tuesday'] == 1]
wednesday = calendar[calendar['wednesday'] == 1]
thursday = calendar[calendar['thursday'] == 1]
friday = calendar[calendar['friday'] == 1]
saturday = calendar[calendar['saturday'] == 1]
sunday = calendar[calendar['sunday'] == 1]

triplist = []

monday_trips = monday['trip_id']
print "There are ", len(monday_trips), "trips on Monday"
monday_trips.head()

### Monday

In [None]:
for i in monday_trips:
    df = stop_times[stop_times['trip_id'] == i]
    df = add_arrival_date(df, dates = arbitrary_dates)
    df = add_departure_date(df, dates = arbitrary_dates)
    
    df['arrival_datetime'] = pd.to_datetime(df['arrival_date'] + ' ' + df['arrival_time'])
    df['departure_datetime'] = pd.to_datetime(df['departure_date'] + ' ' + df['departure_time'])


    # join df with stops
    df = pd.merge(df, stops[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']],left_on='stop_id', right_on='stop_id')

    # join df with trips to get direction id
    df = pd.merge(df, trips[['trip_id', 'direction_id', 'route_id']], left_on='trip_id', right_on='trip_id', how='left')

    # join df with routes to get route id
    df = pd.merge(df, routes[['route_id', 'route_long_name']], left_on='route_id', right_on='route_id', how='left')

    # make a new dataframe called legs
    legs = pd.DataFrame()
    legs['starttime'] = df['departure_datetime']
    legs['stoptime'] = df['arrival_datetime'].shift(-1).fillna(method='ffill')
    legs['tripduration'] = (legs['stoptime'] - legs['starttime'])/np.timedelta64(1, 's')
    legs['tripduration'] = [int(x) for x in legs['tripduration']]
    legs['start station id'] = df['stop_id']
    legs['start station name'] = df['stop_name']
    legs['start station latitude'] = df['stop_lat']
    legs['start station longitude'] = df['stop_lon']
    legs['end station id'] = legs['start station id'].shift(-1).fillna(method='ffill')
    legs['end station name'] = legs['start station name'].shift(-1).fillna(method='ffill')
    legs['end station latitude'] = legs['start station latitude'].shift(-1).fillna(method='ffill')
    legs['end station longitude'] = legs['start station longitude'].shift(-1).fillna(method='ffill')
    legs['route_id'] = 1 # arbitrary
    legs['direction'] = df['direction_id']
    legs['trainName'] = df['route_long_name']

    triplist.append(legs)

In [None]:
data = pd.concat(triplist)
data.to_csv('staten_island_monday.csv')