In [1]:
import pandas as pd
import numpy as np
import geopandas

In [6]:
stations_df = pd.read_csv("../data/Stations/MTA_Station_Mapping_from_turnstile_data.csv")

In [7]:
stations_df.columns

Index(['station_code', 'station', 'line_name', 'C/A', 'unit', 'GTFS_stop_id',
       'station_name', 'station_id', 'lat', 'long'],
      dtype='object')

In [40]:
# make stations a geopanda so we can use a spatial join on it
stations = stations_df[~(stations.lat.isnull() | stations.lat.isna())] # remove empty geometries
stations = geopandas.GeoDataFrame(
    stations, geometry=geopandas.points_from_xy(stations["long"], stations["lat"]))

In [43]:
# sanity check for null geometries
stations[(stations.lat.isnull() | stations.lat.isna())]

Unnamed: 0,station_code,station,line_name,C/A,unit,GTFS_stop_id,station_name,station_id,lat,long,geometry


In [44]:
# shapefile of census tracts
tracts = geopandas.read_file("../data/CensusTracts/geo_export_a7ef6b7d-a2a7-4254-9883-c91526ccfbe0.shp")

In [45]:
stationsWithTracts = geopandas.sjoin(stations, tracts, how="left", op="intersects")

In [47]:
stationsWithTracts.columns

Index(['station_code', 'station', 'line_name', 'C/A', 'unit', 'GTFS_stop_id',
       'station_name', 'station_id', 'lat', 'long', 'geometry', 'index_right',
       'boro_code', 'boro_ct201', 'boro_name', 'cdeligibil', 'ct2010',
       'ctlabel', 'ntacode', 'ntaname', 'puma', 'shape_area', 'shape_leng'],
      dtype='object')

In [51]:
columnsToKeep = ['station_code', 'station',"GTFS_stop_id", "C/A","line_name",'ct2010', "ntaname", "ctlabel", "geometry", "lat", "long"]

In [52]:
stationsWithTracts[stationsWithTracts.is_empty]

Unnamed: 0,station_code,station,line_name,C/A,unit,GTFS_stop_id,station_name,station_id,lat,long,...,boro_ct201,boro_name,cdeligibil,ct2010,ctlabel,ntacode,ntaname,puma,shape_area,shape_leng


In [53]:
stationsWithTracts = stationsWithTracts[columnsToKeep]

In [54]:
stationsWithTracts.head()

Unnamed: 0,station_code,station,GTFS_stop_id,C/A,line_name,ct2010,ntaname,ctlabel,geometry,lat,long
0,A002R051,59 ST,R11,A002,NQR456W,11402,Upper East Side-Carnegie Hill,114.02,POINT (-73.967258 40.76266),40.76266,-73.967258
1,A006R079,5 AV/59 ST,R13,A006,NQRW,14300,park-cemetery-etc-Manhattan,143.0,POINT (-73.973347 40.764811),40.764811,-73.973347
2,A007R079,5 AV/59 ST,R13,A007,NQRW,14300,park-cemetery-etc-Manhattan,143.0,POINT (-73.973347 40.764811),40.764811,-73.973347
3,A010R080,57 ST-7 AV,R14,A010,NQRW,13700,Midtown-Midtown South,137.0,POINT (-73.98065799999999 40.764664),40.764664,-73.980658
4,A011R080,57 ST-7 AV,R14,A011,NQRW,13700,Midtown-Midtown South,137.0,POINT (-73.98065799999999 40.764664),40.764664,-73.980658


In [55]:
stationsWithTracts.to_csv("../data/output/stationsWithTracts.csv", index=False)