In [1]:
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [64]:
bus_routes_gdf = gpd.read_file('/content/drive/MyDrive/KingCountyTransit/KingCountyTransitData/KingCountyTransitRoutes.geojson')
bus_stops_gdf = gpd.read_file('/content/drive/MyDrive/KingCountyTransit/KingCountyTransitData/KingCountyTransitStops.geojson')
zip_codes_gdf = gpd.read_file('/content/drive/MyDrive/KingCountyTransit/KingCountyTransitData/KingCountyZipCodes.geojson')


In [4]:
#These datasets may be using different coordinate systems
print(bus_routes_gdf.crs)
print(bus_stops_gdf.crs)
print(zip_codes_gdf.crs)

EPSG:4326
EPSG:4326
EPSG:4326


In [60]:
print(bus_routes_gdf.info())
print(bus_stops_gdf.info())
print(zip_codes_gdf.info())

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 148 entries, 0 to 147
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   ROUTE_ID            148 non-null    int64   
 1   LOCAL_EXPRESS_CODE  148 non-null    object  
 2   ROUTE_NUM           148 non-null    int64   
 3   SHAPE_Length        148 non-null    float64 
 4   geometry            148 non-null    geometry
dtypes: float64(1), geometry(1), int64(2), object(1)
memory usage: 5.9+ KB
None
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 23369 entries, 0 to 23368
Data columns (total 35 columns):
 #   Column                     Non-Null Count  Dtype              
---  ------                     --------------  -----              
 0   OBJECTID                   23369 non-null  int64              
 1   STOP_ID                    23369 non-null  int64              
 2   TRANS_LINK_ID              23369 non-null  int64              
 

Let's clean up the bus_routes geodataframe.

In [None]:
bus_stops_gdf.head()

In [26]:
bus_stops_gdf['STOP_STATUS'].unique()

array(['ACT', 'CLO', 'INA', 'PLN'], dtype=object)

In [65]:
#remove all entries from bus_stops_gdf where 'STOP_STATUS'   is not 'ACT'

bus_stops_gdf = bus_stops_gdf[bus_stops_gdf['STOP_STATUS'] == 'ACT']


In [66]:
# Dropping columns from bus_stops_gdf
columns_to_drop = ['CHANGE_NUM','MINOR_CHANGE_NUM','AUTH_CODE','AUTH_NAME','ACCESSIBILITY_DECAL','ACCESSIBILITY_DECAL','ACCESSORY_SIGN','STOP_STATUS','AWNING','BAY_CODE','BEARING_CODE',
                   'CURB','CURB_HEIGHT_FRONTDOOR','CURBPAINT','CURBPAINTLENGTH','DB_MOD_DATE','FRDOORLANDING','FRDOORSIDEWALKWIDTH',
                   'IMT_CONNECTORLIST','IN_SERVICE_FLAG','INFOSIGN_ANCHOR','SIGN_MOUNTING_DIR','SIGNPOST','SIGNPOST_ANCHOR','SCHEDULE_HOLDER']
bus_stops_gdf = bus_stops_gdf.drop(columns=columns_to_drop)

bus_stops_gdf.head()

Unnamed: 0,OBJECTID,STOP_ID,TRANS_LINK_ID,STOP_TYPE,TRF_DISTRICT_CODE,BIKE_RACK,CURRENT_NEXT_CODE,CF_CROSS_STREETNAME,CF_DIST_FROM_INTERSECTION,CF_INTERSECTION_LOCCODE,...,YCOORD,YCOORD_OFFSET,ZIPCODE,ROUTE_LIST,ON_STREET_NAME,ROUTESIGN,ROUTESIGN_OWNER,NUM_SHELTERS,GISOBJID,geometry
0,1,99596,384121,REG,NW,NO,CURRENT,N 148th St,100.0,AT,...,271625.6,271634.0,98155,,ST Light Rail,Unknown,UNK,0,100016297.0,POINT (-122.32435 47.73493)
1,2,99597,384119,REG,NE,NO,CURRENT,NE 130th St,100.0,AT,...,267463.4,267464.7,98125,,ST Light Rail,Unknown,UNK,0,100016296.0,POINT (-122.32399 47.72350)
2,3,99598,384121,REG,NE,NO,CURRENT,NE 130th St,100.0,AT,...,267462.6,267461.3,98125,,ST Light Rail,Unknown,UNK,0,100016295.0,POINT (-122.32373 47.72350)
3,4,99599,381099,LAY,NE,NO,CURRENT,ST Light Rail,33.0,FS,...,260538.8,260538.5,98125,,ST Light Rail,Unknown,UNK,0,100016127.0,POINT (-122.32821 47.70446)
4,5,99600,381084,RL,NE,NO,CURRENT,Pedestrian Walk,128.0,NM,...,260009.7,260009.4,98125,599.0,Northgate Sta Rail,Unknown,ST,0,100016114.0,POINT (-122.32812 47.70301)


In [67]:
# I want to create columns for the total number of bus stops in the zipcode

stops_count_per_zipcode = bus_stops_gdf.groupby('ZIPCODE').size().reset_index(name='STOPS_IN_ZIPCODE')
# Merge the count back onto the original GeoDataFrame
bus_stops_gdf = bus_stops_gdf.merge(stops_count_per_zipcode, on='ZIPCODE', how='left')


Now we'll clean up the bus_routes geodataframe

In [None]:
bus_routes_gdf.head()

In [68]:
# All the bus routes are in service, so we'll drop some of its columns.

columns_to_drop = ['OBJECTID','CHANGE_NUM','MINOR_CHANGE_NUM','CURRENT_NEXT_CODE','IN_SERVICE_FLAG']
bus_routes_gdf = bus_routes_gdf.drop(columns=columns_to_drop)

bus_routes_gdf.head()

Unnamed: 0,ROUTE_ID,LOCAL_EXPRESS_CODE,ROUTE_NUM,SHAPE_Length,geometry
0,100001,L,1,41571.633227,"MULTILINESTRING ((-122.31775 47.59920, -122.31..."
1,100002,L,10,22811.506576,"MULTILINESTRING ((-122.32680 47.61522, -122.32..."
2,100003,L,101,128228.499464,"MULTILINESTRING ((-122.21243 47.47169, -122.21..."
3,100048,L,153,73375.101802,"MULTILINESTRING ((-122.23280 47.38482, -122.23..."
4,100060,L,168,86790.958365,"MULTILINESTRING ((-122.03780 47.35797, -122.04..."


In [None]:
bus_routes_gdf.head()

I want to calculate the area of each zip code. Given that zip_codes_gdf is in the EPSG:4326 coordinate system in latitude and longitude, which does not preserve distances or areas. I'll project it into UTM zone 10N.


In [69]:
# Project to UTM zone 10N for this area
zip_codes_gdf_projected = zip_codes_gdf.to_crs(epsg=32610)

In [70]:
# Calculate area in square meters
zip_codes_gdf_projected['AREA_SQM'] = zip_codes_gdf_projected.area

In [72]:
# Ensure ZIPCODE is a string and this dataframe is independent
zip_area = zip_codes_gdf_projected[['ZIPCODE', 'AREA_SQM']].copy()
zip_area['ZIPCODE'] = zip_area['ZIPCODE'].astype(str)

In [78]:
# Ensure the ZIPCODE in bus_stops is a string
bus_stops_gdf['ZIPCODE'] = bus_stops_gdf['ZIPCODE'].astype(str)

# Merge
bus_stops_gdf = bus_stops_gdf.merge(zip_area, on='ZIPCODE', how='left')


In [79]:
bus_stops_gdf.head()

Unnamed: 0,OBJECTID,STOP_ID,TRANS_LINK_ID,STOP_TYPE,TRF_DISTRICT_CODE,BIKE_RACK,CURRENT_NEXT_CODE,CF_CROSS_STREETNAME,CF_DIST_FROM_INTERSECTION,CF_INTERSECTION_LOCCODE,...,ZIPCODE,ROUTE_LIST,ON_STREET_NAME,ROUTESIGN,ROUTESIGN_OWNER,NUM_SHELTERS,GISOBJID,geometry,STOPS_IN_ZIPCODE,AREA_SQM
0,1,99596,384121,REG,NW,NO,CURRENT,N 148th St,100.0,AT,...,98155,,ST Light Rail,Unknown,UNK,0,100016297.0,POINT (-122.32435 47.73493),372,21461180.0
1,2,99597,384119,REG,NE,NO,CURRENT,NE 130th St,100.0,AT,...,98125,,ST Light Rail,Unknown,UNK,0,100016296.0,POINT (-122.32399 47.72350),499,16719150.0
2,3,99598,384121,REG,NE,NO,CURRENT,NE 130th St,100.0,AT,...,98125,,ST Light Rail,Unknown,UNK,0,100016295.0,POINT (-122.32373 47.72350),499,16719150.0
3,4,99599,381099,LAY,NE,NO,CURRENT,ST Light Rail,33.0,FS,...,98125,,ST Light Rail,Unknown,UNK,0,100016127.0,POINT (-122.32821 47.70446),499,16719150.0
4,5,99600,381084,RL,NE,NO,CURRENT,Pedestrian Walk,128.0,NM,...,98125,599.0,Northgate Sta Rail,Unknown,ST,0,100016114.0,POINT (-122.32812 47.70301),499,16719150.0


In [80]:
# Now, calculate stops per square meter for each ZIP code
bus_stops_gdf['STOPS_PER_SQM'] = bus_stops_gdf['STOPS_IN_ZIPCODE'] / bus_stops_gdf['AREA_SQM']

In [81]:
# Save bus_stops_gdf to a GeoJSON file
bus_stops_gdf.to_file("bus_stops.geojson", driver='GeoJSON')

# Save bus_routes_gdf to a GeoJSON file
bus_routes_gdf.to_file("bus_routes.geojson", driver='GeoJSON')


In [82]:
# Define the file paths
bus_stops_path = '/content/drive/My Drive/KingCountyTransit/KingCountyTransitData/bus_stops.geojson'
bus_routes_path = '/content/drive/My Drive/KingCountyTransit/KingCountyTransitData/bus_routes.geojson'

# Save the GeoDataFrames
bus_stops_gdf.to_file(bus_stops_path, driver='GeoJSON')
bus_routes_gdf.to_file(bus_routes_path, driver='GeoJSON')
