# Example: CitiBike data

Adapted from Kelsey Jordahl
https://gist.github.com/kjordahl/5957573

In [2]:
import requests  
import json
from pyproj import Proj
from shapely.geometry import Point
import geopandas as gp
import fiona

NYC borough boundaries downloaded from [Bytes of the Big Apple](http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip)

In [3]:
boros = gp.GeoDataFrame.from_file('nybb_15b/nybb.shp')

Load real time bike station data from [CitiBike](http://citibikenyc.com) json API:

In [4]:
# get the bike data and convert it to a ditcionary
endpoint_url = 'http://citibikenyc.com/stations/json'
response = requests.get(endpoint_url)
data = json.loads(response.text)


In [5]:
# convert the relevant part to a geodataframe
df = gp.GeoDataFrame(data['stationBeanList'])
df.head()

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,postalCode,stAddress1,stAddress2,stationName,statusKey,statusValue,testStation,totalDocks
0,,9,28,,72,,2017-02-21 10:58:40 PM,40.767272,,-73.993929,,W 52 St & 11 Ave,,W 52 St & 11 Ave,1,In Service,False,39
1,,3,30,,79,,2017-02-21 11:00:17 PM,40.719116,,-74.006667,,Franklin St & W Broadway,,Franklin St & W Broadway,1,In Service,False,33
2,,14,13,,82,,2017-02-21 11:00:28 PM,40.711174,,-74.000165,,St James Pl & Pearl St,,St James Pl & Pearl St,1,In Service,False,27
3,,33,28,,83,,2017-02-21 11:00:38 PM,40.683826,,-73.976323,,Atlantic Ave & Fort Greene Pl,,Atlantic Ave & Fort Greene Pl,1,In Service,False,62
4,,13,26,,116,,2017-02-21 11:00:21 PM,40.741776,,-74.001497,,W 17 St & 8 Ave,,W 17 St & 8 Ave,1,In Service,False,39


In [6]:
# there is one row for each bike station.  How many stations are there? 
len(df)

665

In [7]:
# in the file above, there are lon-lats, but no geometry field
# we need to set that up

s = gp.GeoSeries([Point(x, y) for x, y in zip(df['longitude'], df['latitude'])])
df['geometry'] = s
df.crs = {'init': 'epsg:4326', 'no_defs': True}
df.geometry.total_bounds

(-74.096936600000006,
 40.661063371900603,
 -73.929891100000006,
 40.804212999999997)

In [8]:
# make sure they are on the same CRS.  
# checking the bounds is a nice way of seeing this
df.to_crs(boros.crs, inplace=True)
df.geometry.total_bounds

(957370.14732175611,
 180120.27032614074,
 1003695.8507545569,
 232275.23054640222)

In [26]:
# the geometry objects can do lots of cool stuff.  For example: 

manhattan = boros.geometry[3]
in_mn = df.geometry.within(manhattan)
print(sum(in_mn), 'stations in Manhattan')

366 stations in Manhattan


In [10]:
df.head()

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,postalCode,stAddress1,stAddress2,stationName,statusKey,statusValue,testStation,totalDocks,geometry
0,,9,28,,72,,2017-02-21 10:58:40 PM,40.767272,,-73.993929,,W 52 St & 11 Ave,,W 52 St & 11 Ave,1,In Service,False,39,POINT (985931.706502895 218814.7571059909)
1,,3,30,,79,,2017-02-21 11:00:17 PM,40.719116,,-74.006667,,Franklin St & W Broadway,,Franklin St & W Broadway,1,In Service,False,33,POINT (982402.0068864136 201269.770346975)
2,,14,13,,82,,2017-02-21 11:00:28 PM,40.711174,,-74.000165,,St James Pl & Pearl St,,St James Pl & Pearl St,1,In Service,False,27,POINT (984204.131576321 198376.4207269395)
3,,33,28,,83,,2017-02-21 11:00:38 PM,40.683826,,-73.976323,,Atlantic Ave & Fort Greene Pl,,Atlantic Ave & Fort Greene Pl,1,In Service,False,62,POINT (990816.693171227 188413.5778061197)
4,,13,26,,116,,2017-02-21 11:00:21 PM,40.741776,,-74.001497,,W 17 St & 8 Ave,,W 17 St & 8 Ave,1,In Service,False,39,POINT (983835.0432398689 209525.6347344065)


# Your turn

You can read about the range of operations available in geopandas here: 
    
http://geopandas.org/index.html

Your assignment is to: 

1. Calculate how many stations are in each borough
2. Calculate how many bikes are currently available in each borough
3. Read about and try at least two new spatial or geometric operations (beyond what I've covered here). 

In [11]:
x = 0
while x < len(boros):
    boroname = boros.geometry[x]
    in_boroname = df.geometry.within(boroname)
    print(sum(in_boroname), 'stations in', boros.BoroName[x])
    x=x+1

0 stations in Staten Island
236 stations in Brooklyn
13 stations in Queens
366 stations in Manhattan
0 stations in Bronx


In [12]:
len(boros)

5

In [23]:
boros

Unnamed: 0,BoroCode,BoroName,Shape_Area,Shape_Leng,geometry
0,5,Staten Island,1623827000.0,330466.075042,(POLYGON ((970217.0223999023 145643.3322143555...
1,3,Brooklyn,1937597000.0,741185.900596,(POLYGON ((1021176.479003906 151374.7969970703...
2,4,Queens,3045168000.0,897040.298576,(POLYGON ((1029606.076599121 156073.8142089844...
3,1,Manhattan,636446700.0,358408.460709,(POLYGON ((981219.0557861328 188655.3157958984...
4,2,Bronx,1186973000.0,464400.198868,(POLYGON ((1012821.805786133 229228.2645874023...


In [40]:
x = 0
k = 0
bikes = 0
while x < len(boros):
    boroname = boros.geometry[x]
    in_boroname = df.geometry.within(boroname)
    while k < len(in_boroname):
        if in_boroname[k]:
            bikes=bikes+df.availableBikes[k]
        k=k+1
    print(bikes,'available in',boros.BoroName[x])
    x=x+1
    bikes=0
    k=0

0 available in Staten Island
2477 available in Brooklyn
140 available in Queens
3989 available in Manhattan
0 available in Bronx


In [13]:
len(in_boroname)

665

In [39]:
x=0
a=0
while x<len(in_mn):
    if in_mn[x]:
        a=a+df.availableBikes[x]
    x=x+1
a

3989

In [28]:
in_mn

0       True
1       True
2       True
3      False
4       True
5      False
6      False
7       True
8       True
9       True
10     False
11     False
12      True
13      True
14      True
15      True
16      True
17      True
18     False
19      True
20      True
21      True
22      True
23      True
24      True
25      True
26      True
27     False
28     False
29      True
       ...  
635    False
636    False
637    False
638    False
639    False
640    False
641    False
642    False
643    False
644    False
645    False
646    False
647    False
648    False
649     True
650     True
651     True
652     True
653    False
654    False
655     True
656     True
657     True
658     True
659     True
660     True
661    False
662     True
663     True
664     True
dtype: bool