# Data Gathering Notebook

This notebook is for gathering the data required for further analyses.

In [39]:
import pandas as pd
import json
import requests
import os
from datetime import datetime, timedelta

## Bike Ridership Data

The bike ridership data comes from data.seattle.gov and can be downloaded as a csv. The data represents the number of bike riders crossing the Fremont Bridge hourly. 

In the `data/raw/` directory, run the following command to download and rename the data for the bike ridership data across the Fremont Bridge:
```
wget https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD
mv rows.csv?accessType=DOWNLOAD Fremont_bridge.csv
```

In [3]:
#Read in the csv file as a pandas DataFrame
bike = pd.read_csv("../../data/raw/Fremont_bridge.csv")

In [13]:
#Get first date in Bike data
bike.iloc[0, 0]

'10/03/2012 12:00:00 AM'

In [21]:
#Reformat Date column to Datetime
bike['Date'] = pd.to_datetime(bike['Date'], format = "%m/%d/%Y %I:%M:%S %p")

In [22]:
bike.head()

Unnamed: 0,Date,Fremont Bridge Total,Fremont Bridge East Sidewalk,Fremont Bridge West Sidewalk
0,2012-10-03 00:00:00,13.0,4.0,9.0
1,2012-10-03 01:00:00,10.0,4.0,6.0
2,2012-10-03 02:00:00,2.0,1.0,1.0
3,2012-10-03 03:00:00,5.0,2.0,3.0
4,2012-10-03 04:00:00,7.0,6.0,1.0


## Weather Data

The weather data comes from the Dark Sky API, which provides up to 1000 API requests daily for free with a registered API key.

Dates for the Dark Sky API must be in this format: `[YYYY]-[MM]-[DD]T[HH]:[MM]:[SS]`

In [60]:
#Set start and end dates for API calling
start_date = datetime.fromisoformat('2012-10-03T12:00:00')
end_date = datetime.fromisoformat('2019-10-03T12:00:00')
#Set latitude/longitude for the Fremont Bridge (taken from Google Maps)
lat = "47.648170"
long = "-122.349640"

In [15]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

In [16]:
keys = get_keys("/Users/wvsharber/.secret/darksky_api.json")
api_key = keys['api_key']

In [49]:
start_date + timedelta(days=1)

datetime.datetime(2012, 10, 4, 12, 0)

In [50]:
start_date.isoformat()

'2012-10-03T12:00:00'

In [73]:
next_date = start_date
counter = 0
weather = pd.DataFrame()
url_template = "https://api.darksky.net/forecast/{}/{},{},{}?exclude=currently,minutely,hourly,alerts,flags"

In [88]:
counter = 0

In [89]:
while next_date <= end_date and counter <= 975:
    request_url = url_template.format(api_key,
                                      lat, #latitude
                                      long, #longitude
                                      next_date.isoformat())
    response = requests.get(request_url)
    if response.status_code == 200:
        response_dict = response.json()
        weather = weather.append(response_dict['daily']['data'][0], ignore_index = True)
        next_date += timedelta(days=1)
        counter += 1
    else:
        print(f"Failed at {next_date}")
        break

In [93]:
next_date

datetime.datetime(2018, 2, 18, 12, 0)

In [85]:
response.status_code

403

In [90]:
len(weather)

1964

In [91]:
import pickle

with open("../../data/raw/weather.pkl", 'wb') as handle:
    pickle.dump(weather, handle)

In [92]:
weather.tail()

Unnamed: 0,apparentTemperatureHigh,apparentTemperatureHighTime,apparentTemperatureLow,apparentTemperatureLowTime,apparentTemperatureMax,apparentTemperatureMaxTime,apparentTemperatureMin,apparentTemperatureMinTime,cloudCover,dewPoint,...,temperatureMinTime,time,uvIndex,uvIndexTime,visibility,windBearing,windGust,windGustTime,windSpeed,precipAccumulation
1959,47.17,1518560000.0,32.7,1518604000.0,47.17,1518560000.0,30.26,1518536000.0,0.45,29.28,...,1518536000.0,1518509000.0,2.0,1518552000.0,10.0,159.0,11.15,1518592000.0,1.26,
1960,42.33,1518654000.0,37.59,1518696000.0,42.33,1518654000.0,32.7,1518604000.0,0.98,36.49,...,1518600000.0,1518595000.0,2.0,1518640000.0,7.931,199.0,14.9,1518603000.0,3.15,
1961,44.59,1518740000.0,38.1,1518796000.0,44.59,1518740000.0,37.59,1518696000.0,0.87,36.96,...,1518696000.0,1518682000.0,2.0,1518726000.0,10.0,181.0,7.8,1518731000.0,1.96,
1962,47.12,1518833000.0,39.67,1518879000.0,47.12,1518833000.0,38.1,1518796000.0,1.0,40.77,...,1518783000.0,1518768000.0,2.0,1518813000.0,7.154,187.0,12.7,1518811000.0,4.49,
1963,50.96,1518900000.0,32.97,1518956000.0,50.96,1518900000.0,36.28,1518937000.0,0.83,40.52,...,1518937000.0,1518854000.0,2.0,1518899000.0,4.893,199.0,24.76,1518891000.0,7.56,
