In [33]:
import pandas as pd
import numpy as np
import geopandas

In [34]:
stations_df = pd.read_csv("./data/Stations/Stations.csv")
station_lookups = pd.read_csv("./data/Stations/turnstile_ca_gtfs_stop_id.csv")

In [35]:
stations_df.columns

Index(['Station ID', 'Complex ID', 'GTFS Stop ID', 'Division', 'Line',
       'Stop Name', 'Borough', 'Daytime Routes', 'Structure', 'GTFS Latitude',
       'GTFS Longitude', 'North Direction Label', 'South Direction Label'],
      dtype='object')

In [36]:
stationsWithLookups = pd.merge(stations_df, station_lookups, how="left", left_on="GTFS Stop ID", right_on="stop_id")

In [37]:
stations = geopandas.GeoDataFrame(
    stationsWithLookups, geometry=geopandas.points_from_xy(stationsWithLookups["GTFS Longitude"], stationsWithLookups["GTFS Latitude"]))

In [38]:
stations.head()

Unnamed: 0,Station ID,Complex ID,GTFS Stop ID,Division,Line,Stop Name,Borough,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude,North Direction Label,South Direction Label,ca,stop_id,geometry
0,1,1,R01,BMT,Astoria,Astoria - Ditmars Blvd,Q,N W,Elevated,40.775036,-73.912034,,Manhattan,R515,R01,POINT (-73.91203399999999 40.775036)
1,2,2,R03,BMT,Astoria,Astoria Blvd,Q,N W,Elevated,40.770258,-73.917843,Ditmars Blvd,Manhattan,R514,R03,POINT (-73.917843 40.770258)
2,3,3,R04,BMT,Astoria,30 Av,Q,N W,Elevated,40.766779,-73.921479,Astoria - Ditmars Blvd,Manhattan,R513,R04,POINT (-73.92147900000001 40.766779)
3,4,4,R05,BMT,Astoria,Broadway,Q,N W,Elevated,40.76182,-73.925508,Astoria - Ditmars Blvd,Manhattan,R512,R05,POINT (-73.92550799999999 40.76182)
4,5,5,R06,BMT,Astoria,36 Av,Q,N W,Elevated,40.756804,-73.929575,Astoria - Ditmars Blvd,Manhattan,R511,R06,POINT (-73.929575 40.756804)


In [39]:
tracts = geopandas.read_file("./data/CensusTracts/geo_export_a7ef6b7d-a2a7-4254-9883-c91526ccfbe0.shp")

In [40]:
stationsWithTracts = geopandas.sjoin(stations, tracts, how="left", op="intersects")

In [41]:
stationsWithTracts.head()

Unnamed: 0,Station ID,Complex ID,GTFS Stop ID,Division,Line,Stop Name,Borough,Daytime Routes,Structure,GTFS Latitude,...,boro_ct201,boro_name,cdeligibil,ct2010,ctlabel,ntacode,ntaname,puma,shape_area,shape_leng
0,1,1,R01,BMT,Astoria,Astoria - Ditmars Blvd,Q,N W,Elevated,40.775036,...,4011500,Queens,E,11500,115,QN72,Steinway,4101,1997244.0,5907.89869
1,2,2,R03,BMT,Astoria,Astoria Blvd,Q,N W,Elevated,40.770258,...,4012500,Queens,E,12500,125,QN72,Steinway,4101,1752217.0,7021.101428
2,3,3,R04,BMT,Astoria,30 Av,Q,N W,Elevated,40.766779,...,4006300,Queens,E,6300,63,QN70,Astoria,4101,2288183.0,6357.914009
3,4,4,R05,BMT,Astoria,Broadway,Q,N W,Elevated,40.76182,...,4005900,Queens,I,5900,59,QN70,Astoria,4101,2047587.0,6129.011011
4,5,5,R06,BMT,Astoria,36 Av,Q,N W,Elevated,40.756804,...,4005300,Queens,E,5300,53,QN70,Astoria,4101,2111786.0,8537.628224


In [45]:
columnsToKeep = ['Station ID', 'Stop Name',"GTFS Stop ID", "ca","Line","Daytime Routes",'ct2010', "ntaname", "ctlabel"]

In [46]:
stationsWithTracts[stationsWithTracts.is_empty]

Unnamed: 0,Station ID,Complex ID,GTFS Stop ID,Division,Line,Stop Name,Borough,Daytime Routes,Structure,GTFS Latitude,...,boro_ct201,boro_name,cdeligibil,ct2010,ctlabel,ntacode,ntaname,puma,shape_area,shape_leng


In [47]:
stationsWithTracts = stationsWithTracts[columnsToKeep]

In [48]:
stationsWithTracts.head()

Unnamed: 0,Station ID,Stop Name,GTFS Stop ID,ca,Line,Daytime Routes,ct2010,ntaname,ctlabel
0,1,Astoria - Ditmars Blvd,R01,R515,Astoria,N W,11500,Steinway,115
1,2,Astoria Blvd,R03,R514,Astoria,N W,12500,Steinway,125
2,3,30 Av,R04,R513,Astoria,N W,6300,Astoria,63
3,4,Broadway,R05,R512,Astoria,N W,5900,Astoria,59
4,5,36 Av,R06,R511,Astoria,N W,5300,Astoria,53


In [49]:
stationsWithTracts.to_csv("./data/output/stationsWithTracts.csv", index=False)