In [19]:
"""Load data from data.cityofchicago.org

Note: You can request an app token from data.cityofchicago.org / socrata.com,
and thereby avoid API throttling; however, this didn't appear to be necessary
for this here.

"""
import requests


API_URI = 'https://data.cityofchicago.org/resource/{resource_id}'

API_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'


def get_data_url_info(resource_id, between=(), limit=1_000_000, api_uri=API_URI):
    f"""Construct API URL base and query parameters
    
    Returns tuple of URL base (str) and query parameters (dict).
    
    Arguments:
    
    resource_id: api resource identifier (str)
    between: start and end datetimes (tuple[datetime]) (optional)
    limit: maximum number of results to return (int) (default: 1 million)
    api_uri: api url base template (str) (default: {API_URI})
    
    """
    params = {
        '$limit': limit,
    }

    if between:
        params['$where'] = ("start_time between '{:{datetime_format}}' and '{:{datetime_format}}'"
                            .format(*between, datetime_format=API_DATETIME_FORMAT))

    return (
        api_uri.format(resource_id=resource_id),
        params,
    )


def request_data(*args, stream=True, **kwargs):
    """Make a request of the API
    
    Returns a Response.
    
    Arguments:
    
    [Same as get_data_url_info.]
    stream: load response content lazily, such that it can be retrieved
            by iter_content or iter_lines
    
    """
    (url, params) = get_data_url_info(*args, **kwargs)
    return requests.get(
        url,
        params=params,
        stream=stream,
    )

In [20]:
from datetime import datetime


DIVVY_RESOURCE_ID = 'fg6s-gzvg'


(url_base, url_params) = get_data_url_info(
    DIVVY_RESOURCE_ID,
    between=(
        datetime(2019, 7, 1),
        datetime(2019, 8, 1),
    )
)

In [21]:
url_base

'https://data.cityofchicago.org/resource/fg6s-gzvg'

In [22]:
url_params

{'$limit': 1000000,
 '$where': "start_time between '2019-07-01T00:00:00' and '2019-08-01T00:00:00'"}

In [23]:
response = request_data(
    DIVVY_RESOURCE_ID,
    between=(
        datetime(2019, 7, 1),
        datetime(2019, 8, 1),
    )
)

response

<Response [200]>

In [24]:
content_iterator = response.iter_lines()

content_iterator

<generator object Response.iter_lines at 0x79b91d93acf0>

In [25]:
next(content_iterator)

b'[{"trip_id":"23479388","start_time":"2019-07-01T00:00:27.000","stop_time":"2019-07-01T00:20:41.000","bike_id":"3591","trip_duration":"1214","from_station_id":"117","from_station_name":"Wilton Ave & Belmont Ave","to_station_id":"497","to_station_name":"Kimball Ave & Belmont Ave","user_type":"Subscriber","gender":"Male","birth_year":"1992","from_latitude":"41.94018","from_longitude":"-87.65304","from_location":{"type":"Point","coordinates":[-87.65304,41.94018]},"to_latitude":"41.939398","to_longitude":"-87.711561","to_location":{"type":"Point","coordinates":[-87.711561,41.939398]}}'

In [26]:
import urllib

url_qs = urllib.parse.urlencode(url_params)

url_qs

'%24limit=1000000&%24where=start_time+between+%272019-07-01T00%3A00%3A00%27+and+%272019-08-01T00%3A00%3A00%27'

In [27]:
import pandas as pd


df = pd.read_json(f'{url_base}?{url_qs}') 

In [28]:
df.head()

Unnamed: 0,trip_id,start_time,stop_time,bike_id,trip_duration,from_station_id,from_station_name,to_station_id,to_station_name,user_type,gender,birth_year,from_latitude,from_longitude,from_location,to_latitude,to_longitude,to_location
0,23479388,2019-07-01 00:00:27,2019-07-01 00:20:41,3591,1214,117,Wilton Ave & Belmont Ave,497,Kimball Ave & Belmont Ave,Subscriber,Male,1992.0,41.94018,-87.65304,"{'type': 'Point', 'coordinates': [-87.65304, 4...",41.939398,-87.711561,"{'type': 'Point', 'coordinates': [-87.711561, ..."
1,23479389,2019-07-01 00:01:16,2019-07-01 00:18:44,5353,1048,381,Western Ave & Monroe St,203,Western Ave & 21st St,Customer,,,41.879788,-87.686113,"{'type': 'Point', 'coordinates': [-87.68611280...",41.854109,-87.685838,"{'type': 'Point', 'coordinates': [-87.685838, ..."
2,23479390,2019-07-01 00:01:48,2019-07-01 00:27:42,6180,1554,313,Lakeview Ave & Fullerton Pkwy,144,Larrabee St & Webster Ave,Customer,,,41.925858,-87.638973,"{'type': 'Point', 'coordinates': [-87.638973, ...",41.921822,-87.64414,"{'type': 'Point', 'coordinates': [-87.64414, 4..."
3,23479391,2019-07-01 00:02:07,2019-07-01 00:27:10,5540,1503,313,Lakeview Ave & Fullerton Pkwy,144,Larrabee St & Webster Ave,Customer,,,41.925858,-87.638973,"{'type': 'Point', 'coordinates': [-87.638973, ...",41.921822,-87.64414,"{'type': 'Point', 'coordinates': [-87.64414, 4..."
4,23479392,2019-07-01 00:02:13,2019-07-01 00:22:26,6014,1213,168,Michigan Ave & 14th St,62,McCormick Place,Customer,,,41.864059,-87.623727,"{'type': 'Point', 'coordinates': [-87.623727, ...",41.851375,-87.618835,"{'type': 'Point', 'coordinates': [-87.61883464..."


In [31]:
df.set_index('trip_id', inplace=True)

df.head()

Unnamed: 0_level_0,start_time,stop_time,bike_id,trip_duration,from_station_id,from_station_name,to_station_id,to_station_name,user_type,gender,birth_year,from_latitude,from_longitude,from_location,to_latitude,to_longitude,to_location
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
23479388,2019-07-01 00:00:27,2019-07-01 00:20:41,3591,1214,117,Wilton Ave & Belmont Ave,497,Kimball Ave & Belmont Ave,Subscriber,Male,1992.0,41.94018,-87.65304,"{'type': 'Point', 'coordinates': [-87.65304, 4...",41.939398,-87.711561,"{'type': 'Point', 'coordinates': [-87.711561, ..."
23479389,2019-07-01 00:01:16,2019-07-01 00:18:44,5353,1048,381,Western Ave & Monroe St,203,Western Ave & 21st St,Customer,,,41.879788,-87.686113,"{'type': 'Point', 'coordinates': [-87.68611280...",41.854109,-87.685838,"{'type': 'Point', 'coordinates': [-87.685838, ..."
23479390,2019-07-01 00:01:48,2019-07-01 00:27:42,6180,1554,313,Lakeview Ave & Fullerton Pkwy,144,Larrabee St & Webster Ave,Customer,,,41.925858,-87.638973,"{'type': 'Point', 'coordinates': [-87.638973, ...",41.921822,-87.64414,"{'type': 'Point', 'coordinates': [-87.64414, 4..."
23479391,2019-07-01 00:02:07,2019-07-01 00:27:10,5540,1503,313,Lakeview Ave & Fullerton Pkwy,144,Larrabee St & Webster Ave,Customer,,,41.925858,-87.638973,"{'type': 'Point', 'coordinates': [-87.638973, ...",41.921822,-87.64414,"{'type': 'Point', 'coordinates': [-87.64414, 4..."
23479392,2019-07-01 00:02:13,2019-07-01 00:22:26,6014,1213,168,Michigan Ave & 14th St,62,McCormick Place,Customer,,,41.864059,-87.623727,"{'type': 'Point', 'coordinates': [-87.623727, ...",41.851375,-87.618835,"{'type': 'Point', 'coordinates': [-87.61883464..."


In [35]:
df.shape

(557315, 17)

In [33]:
df.start_time

trip_id
23479388   2019-07-01 00:00:27
23479389   2019-07-01 00:01:16
23479390   2019-07-01 00:01:48
23479391   2019-07-01 00:02:07
23479392   2019-07-01 00:02:13
                   ...        
24089800   2019-07-31 23:58:22
24089801   2019-07-31 23:58:42
24089802   2019-07-31 23:58:58
24089803   2019-07-31 23:59:17
24089804   2019-07-31 23:59:31
Name: start_time, Length: 557315, dtype: datetime64[ns]

In [36]:
df.start_time.min()

Timestamp('2019-07-01 00:00:27')

In [37]:
df.start_time.max()

Timestamp('2019-07-31 23:59:31')

In [38]:
df.trip_duration.median() / 60

14.316666666666666

In [39]:
df.birth_year.median()

1988.0

In [40]:
df.gender.value_counts()

Male      317333
Female    136327
Name: gender, dtype: int64

In [44]:
df.groupby('gender').birth_year.median()

gender
Female    1990.0
Male      1988.0
Name: birth_year, dtype: float64

In [45]:
df.groupby('gender').trip_duration.median()

gender
Female    881
Male      691
Name: trip_duration, dtype: int64

In [46]:
df.groupby('gender').trip_duration.mad()

gender
Female    1265.502526
Male      1015.173967
Name: trip_duration, dtype: float64

In [47]:
df.groupby('gender').trip_duration.plot()

gender
Female    AxesSubplot(0.125,0.11;0.775x0.77)
Male      AxesSubplot(0.125,0.11;0.775x0.77)
Name: trip_duration, dtype: object