In [1]:
import pandas as pd
import numpy as np
import geopandas

In [2]:
stations_df = pd.read_csv("../data/Stations/MTA_Station_Mapping_from_turnstile_data.csv")

In [3]:
stations_df.columns

Index(['station_code', 'station', 'line_name', 'C/A', 'unit', 'GTFS_stop_id',
       'station_name', 'station_id', 'lat', 'long'],
      dtype='object')

In [6]:
# make stations a geopanda so we can use a spatial join on it
stations = stations_df[~(stations_df.lat.isnull() | stations_df.lat.isna())] # remove empty geometries
stations = geopandas.GeoDataFrame(
    stations, geometry=geopandas.points_from_xy(stations["long"], stations["lat"]))

In [7]:
# sanity check for null geometries
stations[(stations.lat.isnull() | stations.lat.isna())]

Unnamed: 0,station_code,station,line_name,C/A,unit,GTFS_stop_id,station_name,station_id,lat,long,geometry


In [8]:
# shapefile of census tracts
tracts = geopandas.read_file("../data/CensusTracts/geo_export_a7ef6b7d-a2a7-4254-9883-c91526ccfbe0.shp")

In [9]:
stationsWithTracts = geopandas.sjoin(stations, tracts, how="left", op="intersects")

In [10]:
stationsWithTracts.columns

Index(['station_code', 'station', 'line_name', 'C/A', 'unit', 'GTFS_stop_id',
       'station_name', 'station_id', 'lat', 'long', 'geometry', 'index_right',
       'boro_code', 'boro_ct201', 'boro_name', 'cdeligibil', 'ct2010',
       'ctlabel', 'ntacode', 'ntaname', 'puma', 'shape_area', 'shape_leng'],
      dtype='object')

In [11]:
columnsToKeep = ['station_code', 'station',"GTFS_stop_id", "C/A","line_name",'ct2010', "ntaname", "ctlabel", "geometry", "lat", "long"]

In [12]:
stationsWithTracts[stationsWithTracts.is_empty]

Unnamed: 0,station_code,station,line_name,C/A,unit,GTFS_stop_id,station_name,station_id,lat,long,...,boro_ct201,boro_name,cdeligibil,ct2010,ctlabel,ntacode,ntaname,puma,shape_area,shape_leng


In [13]:
stationsWithTracts = stationsWithTracts[columnsToKeep]

In [14]:
stationsWithTracts.head()

Unnamed: 0,station_code,station,GTFS_stop_id,C/A,line_name,ct2010,ntaname,ctlabel,geometry,lat,long
0,H007AR248,1 AV,L06,H007A,L,3400,East Village,34,POINT (-73.981628 40.730953),40.730953,-73.981628
1,H007R248,1 AV,L06,H007,L,3400,East Village,34,POINT (-73.981628 40.730953),40.730953,-73.981628
2,H008R248,1 AV,L06,H008,L,3400,East Village,34,POINT (-73.981628 40.730953),40.730953,-73.981628
3,N037R314,103 ST,A18,N037,BC,14300,park-cemetery-etc-Manhattan,143,POINT (-73.961454 40.796092),40.796092,-73.961454
4,R170R191,103 ST,119,R170,1,19100,Upper West Side,191,POINT (-73.968379 40.799446),40.799446,-73.968379


In [15]:
stationsWithTracts.to_csv("../data/output/stationsWithTracts.csv", index=False)