In [1]:
import os
import json
import argparse
import pprint
import requests
import sys
import urllib
import pandas as pd
import geojson
import folium
import numpy as np
import bokeh.plotting as plt
from geopy.distance import vincenty
from urllib.error import HTTPError
from urllib.parse import quote
from urllib.parse import urlencode

In [2]:
def import_credentials(filename = 'credential.json'):
    if filename in [f for f in os.listdir('.') if os.path.isfile(f)]:
        data = json.load(open(filename))
        return data
    else:
        raise IOError('This API requires Yelp credentials to work')
        
API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'
BUSINESS_PATH = '/v3/businesses/'  # Business ID will come after slash.
TOKEN_PATH = '/oauth2/token'
GRANT_TYPE = 'client_credentials'

creditials = import_credentials()
creditials

{'CLIENT_ID': 't8Kr6Zo4bTAFGxSEXfoS3g',
 'CLIENT_SECRET': '2phJlPTObZrc2h5t5fuwwtS3gNhxcROkr0wPpxNiIQ3puyC46qFjmLv7aWqUdIlQ'}

In [3]:
def obtain_bearer_token(host, path):
    '''
     Args:
        host (str): The domain host of the API.
        path (str): The path of the API after the domain.
        url_params (dict): An optional set of query parameters in the request.
    Returns:
        str: OAuth bearer token, obtained using client_id and client_secret.
    Raises:
        HTTPError: An error occurs from the HTTP request.
    '''
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    assert creditials['CLIENT_ID'], "supply your client_id"
    assert creditials['CLIENT_SECRET'], "supply your CLIENT_SECRET"
    data = urlencode({
        'client_id': creditials['CLIENT_ID'],
        'client_secret': creditials['CLIENT_SECRET'],
        'grant_type': GRANT_TYPE
    })
    
    headers = {
        'content-type': 'application/x-www-form-urlencoded',
    }
    
    response = requests.request('POST', url, data = data, headers = headers)
    bearer_token = response.json()['access_token']
    return bearer_token

In [4]:
def request(host, path, bearer_token, url_params=None):
    '''
    Args:
        host (str): The domain host of the API.
        path (str): The path of the API after the domain.
        bearer_token (str): OAuth bearer token, obtained using client_id and client_secret.
        url_params (dict): An optional set of query parameters in the request.
    Returns:
        dict: The JSON response from the request.
    Raises:
        HTTPError: An error occurs from the HTTP request.
    '''
    
    urlparams = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % bearer_token
    }
    #print(u'Querying {0} ...'.format(url))
    
    response = requests.request('GET', url, headers = headers, params = url_params)
    
    return response.json()

In [5]:
def get_business(bearer_token, business_id):
    """Query the Business API by a business ID.
    Args:
        business_id (str): The ID of the business to query.
    Returns:
        dict: The JSON response from the request.
    """
    business_path = BUSINESS_PATH + business_id
    
    return request(API_HOST, business_path, bearer_token)

In [6]:
bearer_token = obtain_bearer_token(API_HOST, TOKEN_PATH)

In [7]:
def fetch_business(name, area='Toronto', manual_override = 0):
    '''
    Given the business name, return all objects of that chain in Toronto
    '''
    bus_id = ''
    name = name.lower().replace(' ', '-')
    area = area.lower().replace(' ', '-')
    response = get_business(bearer_token, ("{0}-{1}".format(name, area)))
    if 'error' in response:
        responses = []
    else:
        responses = [response]
    i = 2
    while True:
        bus_id = "{0}-{1}-{2}".format(name, area, i)
        response = get_business(bearer_token, bus_id)
        if 'error' in response:
            if requests.get('https://www.yelp.com/biz/' + bus_id).status_code == 404:
                break
            else:
                i += 1
                continue
        else:
            if response['location']['country'] == 'CA':
                responses += [response]
            i+=1
    print('End fetch business on:', "https://api.yelp.com/v3/businesses/" + bus_id)
    return responses

In [8]:
def frame(responses):
    '''
    Given a list of objects , builds a coordinates DataFrame
    '''
    latitudes = [response['coordinates']['latitude'] for response in responses]
    longitudes = [response['coordinates']['longitude'] for response in responses]
    df = pd.DataFrame({'latitude': latitudes, 'longitude': longitudes})
    df.index.name = responses[0]['name']
    return df

In [9]:
def coordinates(df):
    '''
    return a folium map of all coordinates in the df
    '''
    
    m = folium.Map(location = [43.659746, -79.393772], zoom_start = 12)
    for row in df.itertuples():
        folium.Marker([row[1], row[2]]).add_to(m)
    return m

In [10]:
def load_coordinates(filename='toronto.geojson'):
    '''
    load coordinates collection of the city
    '''
    with open(filename) as f:
        data = f.read()
        obj = geojson.loads(data)
        ret = []
        for feature in obj['features']:
            ret += list(geojson.utils.coords(feature))
        ret = [(coord[1], coord[0]) for coord in ret]
        return ret

In [11]:
def generate_sample_point(coordinate_list, n = 3000):
    lats, longs = list(map(lambda coords: coords[0], coordinate_list)), list(map(lambda coords: coords[1], coordinate_list))
    max_lat = max(lats)
    max_long = max(longs)
    min_lat = min(lats)
    min_long = min(longs)
    ret = []
    i = 0
    while i < n:
        p_lat = np.random.uniform(min_lat, max_lat)
        p_long = np.random.uniform(min_long, max_long)
        ret.append((p_lat, p_long))
        i += 1
    
    return ret

In [12]:
def sample_point(n = 3000):
    return generate_sample_point(load_coordinates(), n)

In [13]:
cloud_points = sample_point()

In [14]:
def get_minimum_distance(coordinate, coordinate_list):
    '''
    return the minimum distance between coordinate and any point in the coordinate_list
    '''
    best_distance = 10000000000
    for coor in coordinate_list:
        dist = vincenty(coor, coordinate).meters
        if dist < best_distance:
            best_distance = dist
    return best_distance

In [15]:
def average_distance(chain_name, cloud_points = cloud_points):
    """
    calculate the average distance between each store location, which take the store name and the location
    """
    chain_df = frame(fetch_business(chain_name))
    chain_coords = list(zip(chain_df['latitude'], chain_df['longitude']))
    distances = [get_minimum_distance(point, chain_coords) for point in cloud_points]
    avg = sum(distances)/len(distances)
    return avg