# Data Gathering Notebook

This notebook is for gathering the data required for further analyses.

In [39]:
import pandas as pd
import json
import requests
import os
from datetime import datetime, timedelta

## Bike Ridership Data

The bike ridership data comes from data.seattle.gov and can be downloaded as a csv. The data represents the number of bike riders crossing the Fremont Bridge hourly. 

In the `data/raw/` directory, run the following command to download and rename the data for the bike ridership data across the Fremont Bridge:
```
wget https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD
mv rows.csv?accessType=DOWNLOAD Fremont_bridge.csv
```

In [3]:
#Read in the csv file as a pandas DataFrame
bike = pd.read_csv("../../data/raw/Fremont_bridge.csv")

In [13]:
#Get first date in Bike data
bike.iloc[0, 0]

'10/03/2012 12:00:00 AM'

In [21]:
#Reformat Date column to Datetime
bike['Date'] = pd.to_datetime(bike['Date'], format = "%m/%d/%Y %I:%M:%S %p")

In [22]:
bike.head()

Unnamed: 0,Date,Fremont Bridge Total,Fremont Bridge East Sidewalk,Fremont Bridge West Sidewalk
0,2012-10-03 00:00:00,13.0,4.0,9.0
1,2012-10-03 01:00:00,10.0,4.0,6.0
2,2012-10-03 02:00:00,2.0,1.0,1.0
3,2012-10-03 03:00:00,5.0,2.0,3.0
4,2012-10-03 04:00:00,7.0,6.0,1.0


## Weather Data

The weather data comes from the Dark Sky API, which provides up to 1000 API requests daily for free with a registered API key.

Dates for the Dark Sky API must be in this format: `[YYYY]-[MM]-[DD]T[HH]:[MM]:[SS]`

In [60]:
#Set start and end dates for API calling
start_date = datetime.fromisoformat('2012-10-03T12:00:00')
end_date = datetime.fromisoformat('2019-10-03T12:00:00')
#Set latitude/longitude for the Fremont Bridge (taken from Google Maps)
lat = "47.648170"
long = "-122.349640"

In [15]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

In [16]:
keys = get_keys("/Users/wvsharber/.secret/darksky_api.json")
api_key = keys['api_key']

In [49]:
start_date + timedelta(days=1)

datetime.datetime(2012, 10, 4, 12, 0)

In [50]:
start_date.isoformat()

'2012-10-03T12:00:00'

In [73]:
next_date = start_date
counter = 0
weather = pd.DataFrame()
url_template = "https://api.darksky.net/forecast/{}/{},{},{}?exclude=currently,minutely,hourly,alerts,flags"

In [94]:
counter = 0

In [95]:
while next_date <= end_date and counter <= 975:
    request_url = url_template.format(api_key,
                                      lat, #latitude
                                      long, #longitude
                                      next_date.isoformat())
    response = requests.get(request_url)
    if response.status_code == 200:
        response_dict = response.json()
        weather = weather.append(response_dict['daily']['data'][0], ignore_index = True)
        next_date += timedelta(days=1)
        counter += 1
    else:
        print(f"Failed at {next_date}")
        break

In [96]:
next_date

datetime.datetime(2019, 10, 4, 12, 0)

In [85]:
response.status_code

403

In [98]:
len(weather)

2557

In [97]:
import pickle

with open("../../data/raw/weather.pkl", 'wb') as handle:
    pickle.dump(weather, handle)

In [99]:
weather.tail()

Unnamed: 0,apparentTemperatureHigh,apparentTemperatureHighTime,apparentTemperatureLow,apparentTemperatureLowTime,apparentTemperatureMax,apparentTemperatureMaxTime,apparentTemperatureMin,apparentTemperatureMinTime,cloudCover,dewPoint,...,time,uvIndex,uvIndexTime,visibility,windBearing,windGust,windGustTime,windSpeed,precipAccumulation,ozone
2552,57.2,1569798000.0,39.22,1569851000.0,57.2,1569798000.0,44.08,1569763000.0,0.47,42.06,...,1569740000.0,3.0,1569787000.0,10.0,19.0,9.39,1569798000.0,4.66,,359.4
2553,60.14,1569881000.0,39.3,1569941000.0,60.14,1569881000.0,39.22,1569851000.0,0.07,36.46,...,1569827000.0,4.0,1569873000.0,10.0,13.0,7.9,1569885000.0,4.35,,341.0
2554,63.85,1569970000.0,41.19,1570019000.0,63.85,1569970000.0,39.3,1569941000.0,0.07,38.76,...,1569913000.0,4.0,1569960000.0,10.0,106.0,5.51,1569978000.0,2.92,,310.7
2555,65.89,1570054000.0,51.88,1570101000.0,65.89,1570054000.0,41.31,1570020000.0,0.53,41.73,...,1570000000.0,3.0,1570046000.0,10.0,171.0,5.52,1570059000.0,2.78,,296.4
2556,62.26,1570139000.0,45.92,1570197000.0,62.26,1570139000.0,51.56,1570169000.0,0.59,46.86,...,1570086000.0,3.0,1570133000.0,10.0,181.0,20.2,1570112000.0,7.55,,330.5


In [100]:
weather.columns

Index(['apparentTemperatureHigh', 'apparentTemperatureHighTime',
       'apparentTemperatureLow', 'apparentTemperatureLowTime',
       'apparentTemperatureMax', 'apparentTemperatureMaxTime',
       'apparentTemperatureMin', 'apparentTemperatureMinTime', 'cloudCover',
       'dewPoint', 'humidity', 'icon', 'moonPhase', 'precipIntensity',
       'precipIntensityMax', 'precipIntensityMaxTime', 'precipProbability',
       'precipType', 'pressure', 'summary', 'sunriseTime', 'sunsetTime',
       'temperatureHigh', 'temperatureHighTime', 'temperatureLow',
       'temperatureLowTime', 'temperatureMax', 'temperatureMaxTime',
       'temperatureMin', 'temperatureMinTime', 'time', 'uvIndex',
       'uvIndexTime', 'visibility', 'windBearing', 'windGust', 'windGustTime',
       'windSpeed', 'precipAccumulation', 'ozone'],
      dtype='object')