In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.graph_objects as go
import geopandas as gpd
%matplotlib inline 

## Ways of Getting Zipcodes with Lat, Lon Data
- To use this method, round up lat,lon to the same levels for both Divvy and downloaded zipcode data.

### Method 1: Download Preprocessed Data

#### Data

- Description: US Zip Code Latitude & Longitude (filtered for IL)
- Source: https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/export/?refine.state=IL

### Method 2: Google Geocoding API
- To use the API, need to get an API KEY.
- More about this: https://developers.google.com/maps/documentation/geocoding/get-api-key
- Please refer to 'get_zipcodes.py' for the complete code.

In [2]:
import googlemaps

#### Example of Using 'reverse_geocode' for a given point (41.88949182, -87.68821937)

In [None]:
gmaps = googlemaps.Client(key=API_KEY)

# Look up an address with reverse geocoding
reverse_geocode_result = gmaps.reverse_geocode((41.909396, -87.677692))

# Geocoding an address
# geocode_result = gmaps.geocode('1600 Amphitheatre Parkway, Mountain View, CA')

# Request directions via public transit
# now = datetime.now()
# directions_result = gmaps.directions("Sydney Town Hall",
#                                      "Parramatta, NSW",
#                                      mode="transit",
#                                      departure_time=now)

In [43]:
reverse_geocode_result = gmaps.reverse_geocode((41.88949182, -87.68821937))

#### Data Stored under 'address_components'

In [44]:
reverse_geocode_result[0]['address_components']

[{'long_name': '2429', 'short_name': '2429', 'types': ['street_number']},
 {'long_name': 'West Hubbard Street',
  'short_name': 'W Hubbard St',
  'types': ['route']},
 {'long_name': 'West Town',
  'short_name': 'West Town',
  'types': ['neighborhood', 'political']},
 {'long_name': 'Chicago',
  'short_name': 'Chicago',
  'types': ['locality', 'political']},
 {'long_name': 'Cook County',
  'short_name': 'Cook County',
  'types': ['administrative_area_level_2', 'political']},
 {'long_name': 'Illinois',
  'short_name': 'IL',
  'types': ['administrative_area_level_1', 'political']},
 {'long_name': 'United States',
  'short_name': 'US',
  'types': ['country', 'political']},
 {'long_name': '60612', 'short_name': '60612', 'types': ['postal_code']},
 {'long_name': '1417', 'short_name': '1417', 'types': ['postal_code_suffix']}]

#### Extracting the Zipcode from the above

In [41]:
reverse_geocode_result[0]['formatted_address'].split(',')[2][4:]

'60613'

In [None]:
info_dict = {'address':[], 'community':[], 'zipcode':[]}

gmaps = googlemaps.Client(key=API_KEY)

for latitude in divvy['FROM LATITUDE'].values:
    for longitude in divvy['FROM LONGITUDE'].values:
        reverse_geocode_result = gmaps.reverse_geocode(("{},{}").format(latitude, longitude))
        info_dict['address'].append(reverse_geocode_result[0]['formatted_address'].split(',')[0])
        info_dict['community'].append(reverse_geocode_result[0]['address_components'][2]['long_name'])

        if len(reverse_geocode_result[0]['address_components']) < 7:
            continue
        elif len(reverse_geocode_result[0]['address_components']) == 7:
            info_dict['zipcode'].append(reverse_geocode_result[0]['address_components'][6]['long_name'])
        else:
            info_dict['zipcode'].append(reverse_geocode_result[0]['address_components'][7]['long_name'])
        


In [148]:
reverse_geocode_result

[{'address_components': [{'long_name': '2002',
    'short_name': '2002',
    'types': ['street_number']},
   {'long_name': 'West Pierce Avenue',
    'short_name': 'W Pierce Ave',
    'types': ['route']},
   {'long_name': 'West Town',
    'short_name': 'West Town',
    'types': ['neighborhood', 'political']},
   {'long_name': 'Chicago',
    'short_name': 'Chicago',
    'types': ['locality', 'political']},
   {'long_name': 'Cook County',
    'short_name': 'Cook County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'Illinois',
    'short_name': 'IL',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']},
   {'long_name': '60622', 'short_name': '60622', 'types': ['postal_code']}],
  'formatted_address': '2002 W Pierce Ave, Chicago, IL 60622, USA',
  'geometry': {'location': {'lat': 41.9094, 'lng': -87.67769},
   'location_type': 'ROOFTOP',
   'viewport': {'

In [167]:
reverse_geocode_result[0]['formatted_address'].split(',')[0]

'2002 W Pierce Ave'

In [158]:
reverse_geocode_result[0]['address_components'][7]['long_name']

'60622'

In [170]:
reverse_geocode_result[0]['address_components'][2]['long_name']

'West Town'

## Ways of Getting Historical Weather Data

### Method 1: WebScraping From the Website
- Not Used as it required payment.

In [25]:
from bs4 import BeautifulSoup
import requests

In [27]:
page_link = 'https://www.melissa.com/v2/lookups/latlngzip4/index?lat=41.909396&lng=-87.677692'

In [28]:
page_response = requests.get(page_link, timeout=5)

In [29]:
page_content = BeautifulSoup(page_response.content, "html.parser")

In [81]:
result = page_content.find_all('td', {'class':'text-left bold-text'})

In [133]:
info = {'address':[], 'city':[], 'state':[], 'zipcode':[]}

info['address'].append(result[0].contents[0].strip())
info['city'].append(result[1].contents[0].strip())
info['state'].append(result[2].contents[0].strip())
info['zipcode'].append(result[3].contents[0].strip())
    
info

{'address': ['2001 W Pierce Ave'],
 'city': ['Chicago'],
 'state': ['IL'],
 'zipcode': ['60622-1947']}

#### From Station Location Weather Info

In [None]:
page_link = 'https://www.melissa.com/v2/lookups/latlngzip4/index?lat={}&lng={}'
info_dict = {'address':[], 'city':[], 'state':[], 'zipcode':[]}


for latitude in divvy['FROM LATITUDE'].values:
    for longitude in divvy['FROM LONGITUDE'].values:
        print(page_link.format(latitude, longitude))
        page_response = requests.get(page_link.format(latitude, longitude), timeout=5)
        page_content = BeautifulSoup(page_response.content, "html.parser")
        print(page_content)
        result = page_content.find_all('td', {'class':'text-left bold-text'})
        print(result)
        result = result[:4]
        info_dict['address'].append(result[0].contents[0].strip())
        info_dict['city'].append(result[1].contents[0].strip())
        info_dict['state'].append(result[2].contents[0].strip())
        info_dict['zipcode'].append(result[3].contents[0].strip())


### Method 2: Using 'DarkSky' 
- Darksky requires to pass datetime in 'unix' to query.
    - Created a dataframe contains UNIX date.
- Please refer to 'get_weather.py' for the complete code.
- More about this: https://darksky.net/dev

#### Create UNIX DataFrame 

    * pandas.date_range(start, end, freq)
    : Creates datetimeindex with dates range from starting date to end date. 
    
    -> Input:
        start - (str) Starting date, i.e. '1/1/2018'
        end - (str) Ending date, i.e. '12/31/2018'
        freq - (str) Frequency of date data to be generated, i.e. 'D': daily
        

In [20]:
import datetime

In [29]:
# Creates datetime index that contains daily date data from 1/1/2018 to 12/31/2018.

date_rng = pd.date_range(start='1/1/2018', end='12/31/2018', freq='D')

In [30]:
# Creates a dataframe using the above series of datetime index.
# Sets the column name as 'date'.

unix_date = pd.DataFrame(date_rng, columns=['date'])

In [31]:
unix_date.head()

Unnamed: 0,date
0,2018-01-01
1,2018-01-02
2,2018-01-03
3,2018-01-04
4,2018-01-05


In [32]:
# Function that converts date to unix.

def timedate_to_utc(date):
    return int(date.replace(tzinfo=datetime.timezone.utc).timestamp())


In [33]:
# Change all data in 'df' dataframe to unix.

unix_date = unix_date.applymap(timedate_to_utc)

In [34]:
unix_date.head()

Unnamed: 0,date
0,1514764800
1,1514851200
2,1514937600
3,1515024000
4,1515110400


In [35]:
# Save the dataframe as .csv file.

unix_date.to_csv("unix_date.csv")

#### Example of the data 

In [52]:
page_link = 'https://api.darksky.net/forecast/e642adda2f835d2851b554f38750cce2/41.88949182,-87.68821937,1514764800'

In [53]:
page_response = requests.get(page_link, timeout=5).json()

In [54]:
page_response['daily']

{'data': [{'time': 1514700000,
   'summary': 'Mostly cloudy throughout the day.',
   'icon': 'partly-cloudy-day',
   'sunriseTime': 1514726376,
   'sunsetTime': 1514759441,
   'moonPhase': 0.46,
   'precipIntensity': 0,
   'precipIntensityMax': 0,
   'precipProbability': 0,
   'temperatureHigh': 13.35,
   'temperatureHighTime': 1514750400,
   'temperatureLow': -7.73,
   'temperatureLowTime': 1514815200,
   'apparentTemperatureHigh': 3.18,
   'apparentTemperatureHighTime': 1514754000,
   'apparentTemperatureLow': -21.37,
   'apparentTemperatureLowTime': 1514815200,
   'dewPoint': -4,
   'humidity': 0.66,
   'pressure': 1035.25,
   'windSpeed': 5.47,
   'windGust': 11.71,
   'windGustTime': 1514750400,
   'windBearing': 298,
   'cloudCover': 0.54,
   'uvIndex': 1,
   'uvIndexTime': 1514743200,
   'visibility': 9.988,
   'temperatureMin': -0.62,
   'temperatureMinTime': 1514786400,
   'temperatureMax': 13.35,
   'temperatureMaxTime': 1514750400,
   'apparentTemperatureMin': -13.05,
   'ap

In [83]:
info = page_response['daily']['data'][0]

In [88]:
info

{'time': 1514700000,
 'summary': 'Mostly cloudy throughout the day.',
 'icon': 'partly-cloudy-day',
 'sunriseTime': 1514726376,
 'sunsetTime': 1514759441,
 'moonPhase': 0.46,
 'precipIntensity': 0,
 'precipIntensityMax': 0,
 'precipProbability': 0,
 'temperatureHigh': 13.35,
 'temperatureHighTime': 1514750400,
 'temperatureLow': -7.73,
 'temperatureLowTime': 1514815200,
 'apparentTemperatureHigh': 3.18,
 'apparentTemperatureHighTime': 1514754000,
 'apparentTemperatureLow': -21.37,
 'apparentTemperatureLowTime': 1514815200,
 'dewPoint': -4,
 'humidity': 0.66,
 'pressure': 1035.25,
 'windSpeed': 5.47,
 'windGust': 11.71,
 'windGustTime': 1514750400,
 'windBearing': 298,
 'cloudCover': 0.54,
 'uvIndex': 1,
 'uvIndexTime': 1514743200,
 'visibility': 9.988,
 'temperatureMin': -0.62,
 'temperatureMinTime': 1514786400,
 'temperatureMax': 13.35,
 'temperatureMaxTime': 1514750400,
 'apparentTemperatureMin': -13.05,
 'apparentTemperatureMinTime': 1514786400,
 'apparentTemperatureMax': 3.18,
 'ap