In [1]:
# Learned how to use json to get the current citibke status
import requests
import json
import pandas as pd

# Public json url from Citibike
stations_url = 'https://feeds.citibikenyc.com/stations/stations.json'

# Request the response and parse the json
data = requests.get(stations_url).json()

In [2]:
# Show a sample of the json dicts
# The Citibike Station json returns a list of dicts
data['stationBeanList'][0]

{'id': 301,
 'stationName': 'E 2 St & Avenue B',
 'availableDocks': 46,
 'totalDocks': 58,
 'latitude': 40.72217444,
 'longitude': -73.98368779,
 'statusValue': 'In Service',
 'statusKey': 1,
 'availableBikes': 11,
 'stAddress1': 'E 2 St & Avenue B',
 'stAddress2': '',
 'city': '',
 'postalCode': '',
 'location': '',
 'altitude': '',
 'testStation': False,
 'lastCommunicationTime': '2019-07-19 10:12:02 PM',
 'landMark': ''}

In [3]:
# Process the list of dictionary into DataFrame
station_df = pd.DataFrame(data['stationBeanList'])

In [4]:
# Show the list of columns in the DataFrame
station_df.columns

Index(['altitude', 'availableBikes', 'availableDocks', 'city', 'id',
       'landMark', 'lastCommunicationTime', 'latitude', 'location',
       'longitude', 'postalCode', 'stAddress1', 'stAddress2', 'stationName',
       'statusKey', 'statusValue', 'testStation', 'totalDocks'],
      dtype='object')

In [5]:
# Preview the DataFrame
station_df.head(5)

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,postalCode,stAddress1,stAddress2,stationName,statusKey,statusValue,testStation,totalDocks
0,,11,46,,301,,2019-07-19 10:12:02 PM,40.722174,,-73.983688,,E 2 St & Avenue B,,E 2 St & Avenue B,1,In Service,False,58
1,,8,20,,307,,2019-07-19 10:12:09 PM,40.714275,,-73.9899,,Canal St & Rutgers St,,Canal St & Rutgers St,1,In Service,False,30
2,,4,25,,312,,2019-07-19 10:11:11 PM,40.722055,,-73.989111,,Allen St & Stanton St,,Allen St & Stanton St,1,In Service,False,31
3,,19,15,,347,,2019-07-19 10:11:53 PM,40.728846,,-74.008591,,Greenwich St & W Houston St,,Greenwich St & W Houston St,1,In Service,False,35
4,,7,41,,358,,2019-07-19 10:12:39 PM,40.732916,,-74.007114,,Christopher St & Greenwich St,,Christopher St & Greenwich St,1,In Service,False,50


In [6]:
station_df = station_df[['id', 
                         'stationName',
                         'availableBikes', 
                         'availableDocks', 
                         'totalDocks',
                         'statusValue']]

In [7]:
station_df.sort_values(by='availableBikes', 
                             ascending=False)
station_df.head(10)

Unnamed: 0,id,stationName,availableBikes,availableDocks,totalDocks,statusValue
0,301,E 2 St & Avenue B,11,46,58,In Service
1,307,Canal St & Rutgers St,8,20,30,In Service
2,312,Allen St & Stanton St,4,25,31,In Service
3,347,Greenwich St & W Houston St,19,15,35,In Service
4,358,Christopher St & Greenwich St,7,41,50,In Service
5,426,West St & Chambers St,15,9,29,In Service
6,432,E 7 St & Avenue A,2,29,31,In Service
7,438,St Marks Pl & 1 Ave,7,38,47,In Service
8,487,E 20 St & FDR Drive,12,21,34,In Service
9,490,8 Ave & W 33 St,52,13,66,In Service


In [8]:
# Create a boolean array mask where there are no bikes
# Make sure the station is in service right now.
in_service_mask = (station_df['statusValue'] == 'In Service')

# Active station only
active_station = station_df[in_service_mask]

# Find out which station has no bike avaliable
zero_bike_mask = (active_station['availableBikes'] == 0)

# Apply the zero_bike_mask
zero_bike = active_station[zero_bike_mask]

# Sample the output
zero_bike.sample(5)

Unnamed: 0,id,stationName,availableBikes,availableDocks,totalDocks,statusValue
375,3152,3 Ave & E 71 St,0,42,42,In Service
430,3231,E 67 St & Park Ave,0,39,39,In Service
250,517,Pershing Square South,0,66,69,In Service
147,382,University Pl & E 14 St,0,32,33,In Service
77,281,Grand Army Plaza & Central Park S,0,66,66,In Service


# Question: How many stations are without bike?

In [9]:
# Get the count of stations without bikes
num_of_stations_without_bike = zero_bike['id'].count()
print('There are {0:,} stations without bike!'.format(
    num_of_stations_without_bike))

There are 75 stations without bike!


In [10]:
# Find out which station has no dock avaliable
zero_dock_mask = (active_station['availableDocks'] == 0)

# Apply the zero_dock_mask
zero_dock = active_station[zero_dock_mask]

zero_dock.sample(5)

Unnamed: 0,id,stationName,availableBikes,availableDocks,totalDocks,statusValue
759,3656,E 2 St & Avenue A,45,0,46,In Service
339,3109,Banker St & Meserole Ave,30,0,31,In Service
600,3453,Devoe St & Lorimer St,18,0,19,In Service
388,3166,Riverside Dr & W 72 St,41,0,41,In Service
148,383,Greenwich Ave & Charles St,38,0,39,In Service


# Question: How many stations are without a dock?

In [11]:
num_of_stations_without_dock = zero_dock['id'].count()
print('There are {0:,} stations without dock!'.format(
    num_of_stations_without_dock))

There are 59 stations without dock!


In [12]:
no_dock = num_of_stations_without_dock
no_bike = num_of_stations_without_bike
total = active_station.shape[0] - no_dock - no_bike

In [13]:
arr = pd.Series([no_dock, no_bike, total],
                dtype='int64',
                index=['No Dock', 'No Bike', 'Active (Has Both)'])

In [14]:
arr.plot(kind='bar', rot=0,
         ylim=(0, 900),
         title='Current Citibike System Health')

<matplotlib.axes._subplots.AxesSubplot at 0x11b761e10>

In [15]:
# Motivated to plot these station onto a map!

In [16]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap