In [1]:
import pandas as pd
import numpy as np
import folium
from sklearn.cluster import KMeans
import requests
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import json

In [2]:
with open('Parks Zones.geojson', 'r') as file_handle:    
    park_zones = json.load(file_handle)

In [3]:
list_of_parks = [i['properties']['propname'] for i in park_zones['features']]

In [4]:
len(list_of_parks)

588

In [5]:
len(list(set(list_of_parks)))

103

## Create a df with names of parks and center coordinates

In [6]:
parks_list_for_df = []
for item in park_zones['features']:
    park_name = item['properties']['propname']
    acres = item['properties']['acres']
    location = item['properties']['location']
    zipcode = item['properties']['zipcode']
    tmp_coord = np.array(item['geometry']['coordinates'][0][0])
#     print(tmp_coord.shape)
    coordinates = np.sum(tmp_coord, axis =0) / tmp_coord.shape[0]
    latitude= coordinates[1]
    longitude = coordinates[0]
#     print(coordinates.shape)
    parks_list_for_df.append([park_name, location, acres, zipcode, latitude, longitude])

In [7]:
parks_df = pd.DataFrame(parks_list_for_df, columns=['Park_name', 'Address', 'Acres', 'Zipcode', 'Latitude', 'Longitude'])

In [8]:
parks_df = parks_df[parks_df['Park_name'].notna()]

In [19]:
parks_df['Acres'].fillna(np.Inf, inplace=True)

In [20]:
parks_df['Acres'] = parks_df['Acres'].astype(float)

In [21]:
#We want to tackle smaller parks, since this is where there's a higher probability of not having an established coffee shop

In [22]:
five_smallest = parks_df[['Park_name','Acres']].groupby('Park_name').sum().sort_values(by='Acres').nsmallest(n=5,columns=['Acres'])

In [23]:
five_smallest_list = five_smallest.index.tolist()

In [30]:
#Get the average latitude/longitude per park to identify a "central point"
parks_to_check = parks_df[['Park_name', 'Latitude', 'Longitude']].groupby('Park_name').agg(np.mean).reset_index()

In [24]:
parks_df['To sample'] = parks_df['Park_name'].apply(lambda x: x in five_smallest_list)

In [25]:
parks_sample = parks_df[parks_df['To sample']]

In [26]:
parks_sample.head()

Unnamed: 0,Park_name,Address,Acres,Zipcode,Latitude,Longitude,To sample
30,Little Red Square,Bleecker St & Ave Of Americas,0.038576,10012,40.729376,-74.002105,True
78,O'Connors Tail,Broadway & 78 St,0.148286,11373,40.7449,-73.887671,True
327,Park Avenue Malls,"Park Ave, E96 St, E97 St",0.237877,10029,40.786897,-73.952294,True
530,Hempstead Bench Stretch,225 St & 104 Ave,0.381692,11429,40.710615,-73.729662,True
579,Downing Playground,Bleecker And Downing Sts,0.052782,10014,40.72982,-74.002387,True


## Foursquare login details, removed before committing the notebook

In [60]:
CLIENT_ID = '3JHGGHSJUJT3VSUZXAZ2M2X1TDQFT1R10RFJJBN02R2SJ34M' # your Foursquare ID
CLIENT_SECRET = 'D4D4J0RPXPVJ5JHQXI5HVMPJQVI54LCPNT4D4AIUBJLHWSGD' # your Foursquare Secret
VERSION = '20190221' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3JHGGHSJUJT3VSUZXAZ2M2X1TDQFT1R10RFJJBN02R2SJ34M
CLIENT_SECRET:D4D4J0RPXPVJ5JHQXI5HVMPJQVI54LCPNT4D4AIUBJLHWSGD


In [83]:
import pdb

In [362]:
def get_summary_of_stats_by_venue_id(venue_id):
    url = 'https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}'.format(
            venue_id,
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
    results = requests.get(url).json()["response"]
    venue_stats=[]
    if 'venue' in results.keys():
        for item_to_retrieve in ['rating', 'likes']:
            if item_to_retrieve not in results['venue'].keys():
                venue_stats.append(None)
            else:
                if item_to_retrieve == 'likes':
                    venue_stats.append(results['venue'][item_to_retrieve]['count'])
                else:
                    venue_stats.append(results['venue'][item_to_retrieve])
    else:
        venue_stats = [None, None]
    return venue_stats

## Find nearby venues to a specific point on map

In [32]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, categories_list=[]):
    radius = 500
    LIMIT = 100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)  
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        #check if category within categories list, if any provided
        if categories_list:
            filtered_results = [result for result in results if result['venue']['categories'][0]['name'] in categories_list]
            results = filtered_results
        
        # return only relevant information for each nearby venue that has a rating
        venues_list.append([
            (
            name, 
            lat, 
            lng, 
            v['venue']['name'],
            v['venue']['id'],
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name'])
            for v in results ])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Park_name',
                  'Park Latitude', 
                  'Park Longitude', 
                  'Venue',
                  'Venue ID',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [358]:
venues_nearby_df = getNearbyVenues(names=parks_sample['Park_name'], latitudes=parks_sample['Latitude'], longitudes=parks_sample['Longitude'], categories_list=['Coffee Shop', 'Café'])

Little Red Square
O'Connors Tail
Park Avenue Malls
Hempstead Bench Stretch
Downing Playground


In [359]:
venues_nearby_df

Unnamed: 0,Park_name,Park Latitude,Park Longitude,Venue,Venue ID,Venue Latitude,Venue Longitude,Venue Category
0,Little Red Square,40.729376,-74.002105,Banter,589c7ca37b43b441e3ac3ef8,40.727959,-74.001205,Café
1,Little Red Square,40.729376,-74.002105,Porto Rico Importing Co.,3fd66200f964a52059e61ee3,40.729483,-74.001722,Coffee Shop
2,Little Red Square,40.729376,-74.002105,Bluestone Lane,56acef5b498ec8e203a43bb7,40.730009,-74.002855,Café
3,Little Red Square,40.729376,-74.002105,Summers Juice & Coffee,562d0e34498e348022841311,40.72764,-74.000316,Café
4,Little Red Square,40.729376,-74.002105,west~bourne,5a1ed5c967e5f2347f77664b,40.726932,-74.002022,Café
5,O'Connors Tail,40.7449,-73.887671,969 nyc coffee,5803cb7738fa2724aedd7d02,40.747744,-73.88583,Café
6,Park Avenue Malls,40.786897,-73.952294,Da Capo,5328a418498e4be839240b62,40.787679,-73.953899,Café
7,Park Avenue Malls,40.786897,-73.952294,Bonjour Crêpes & Wine,529ba40e11d2e2aa2818ad9b,40.784543,-73.95199,Café
8,Park Avenue Malls,40.786897,-73.952294,Frenchy Cafe NYC,5a772c1625ecca5ea8cd01cd,40.789803,-73.948164,Café
9,Park Avenue Malls,40.786897,-73.952294,Corner Cafe and Bakery,49f0b229f964a52048691fe3,40.782785,-73.950983,Café


In [395]:
#Use the overall park list to try and identify what other venues are popular next to the "best performing" coffee shops
venues_nearby_all_parks_df = getNearbyVenues(names=parks_to_check['Park_name'], latitudes=parks_to_check['Latitude'], longitudes=parks_to_check['Longitude'])

Alley Pond Park
Aqueduct Walk
Astoria Park
Baisley Pond Park
Battery Park
Belt Parkway/Shore Parkway
Bloomingdale Park
Blue Heron Park Preserve
Broadway Malls
Bronx & Pelham Pkwy(2.5 Miles)
Bronx Park
Brooklyn Bridge Park
Brookville Park
Canarsie Park
Carl Schurz Park
Central Park
Claremont Park
Clove Lakes Park
Coney Island Beach & Boardwalk
Coney Island Boat Basin
Conference House Park
Crocheron Park
Crotona Park
Crotona Parkway Malls
Cunningham Park
Downing Playground
Dreier-Offerman Park
Dyker Beach Park
East River Esplanade
East River Waterfront Esplanade
Eastern Parkway
Emile Cavanaugh Triangle
FDR Boardwalk & Beach
Ferry Point Park
Flushing Meadows Park
Forest Park
Fort Greene Park
Fort Totten Park
Fort Tryon Park
Fort Washington Park
Franz Sigel Park
Fresh Kills Park
Geo Soilan Pk-Battery Pk City
Goodhue Park
Grand Central Parkway
Great Kills Park
Harlem River Park
Hempstead Bench Stretch
Herman A Macneil Park
Highbridge Park
Highland Park
Idlewild Park
Inwood Hill Park
Isham P

In [396]:
parks_with_caffes = venues_nearby_all_parks_df[venues_nearby_all_parks_df.apply(lambda x: ('Coffee' in x['Venue Category']) | ('Caf' in x['Venue Category']), axis=1)]['Park_name'].unique()

In [397]:
venues_nearby_all_parks_df['Park_with_cafe'] = venues_nearby_all_parks_df.apply(lambda x: x['Park_name'] in parks_with_caffes, axis=1)

In [398]:
venues_nearby_all_parks_df

Unnamed: 0,Park_name,Park Latitude,Park Longitude,Venue,Venue ID,Venue Latitude,Venue Longitude,Venue Category,Park_with_cafe
0,Alley Pond Park,40.748450,-73.745363,Double Dome Park,4dceb82dd164679b8d06a92f,40.747824,-73.744686,Baseball Field,False
1,Alley Pond Park,40.748450,-73.745363,Alley Pond Park Basketball Courts,4c55f7e619160f470fdf0fb4,40.746997,-73.745041,Basketball Court,False
2,Alley Pond Park,40.748450,-73.745363,Insignia,4f614744e4b011d850782b65,40.751210,-73.747938,Steakhouse,False
3,Alley Pond Park,40.748450,-73.745363,Alley Pond Softball Fields,4dbdbcca0437955ec0547698,40.752045,-73.744697,Athletics & Sports,False
4,Alley Pond Park,40.748450,-73.745363,Long Island Expressway at Exit 30,4d59bbb424466ea881857a9f,40.750906,-73.749218,Intersection,False
5,Aqueduct Walk,40.858241,-73.907932,Liberato,4c9fcdfc8afca09381840d16,40.858525,-73.903902,Spanish Restaurant,False
6,Aqueduct Walk,40.858241,-73.907932,Elsa La Reina Del Chicharrón (Elsa The Queen O...,4c6c82f56af58cfac5248717,40.858627,-73.903682,Spanish Restaurant,False
7,Aqueduct Walk,40.858241,-73.907932,Giovanni's Pizza,4c54850b4623be9ae52cebf2,40.860383,-73.906661,Pizza Place,False
8,Aqueduct Walk,40.858241,-73.907932,Bravo Supermarkets,4c63360ae1621b8d5a792553,40.860352,-73.906604,Grocery Store,False
9,Aqueduct Walk,40.858241,-73.907932,Wingstop,582097ccfa17e5344ac6984f,40.854093,-73.907899,Wings Joint,False


In [399]:
df_parks_cafe_not = venues_nearby_all_parks_df.groupby('Park_name')['Park_with_cafe'].any().reset_index()

In [400]:
mapping_dictionary = dict([(name, idx) for idx, name in enumerate(venues_nearby_all_parks_df['Venue Category'].unique())])

In [621]:
def is_cafe(string_input):
    return ('Coffee' in string_input) | ('Caf' in string_input)

In [542]:
venues_nearby_all_parks_df['Is_caffe'] = venues_nearby_all_parks_df['Venue Category'].apply(is_cafe)

In [547]:
top_rated_venues_caffe = venues_nearby_all_parks_df[['Park_name', 'Venue', 'Rating']][venues_nearby_all_parks_df['Is_caffe']].groupby('Park_name').max()

In [576]:
top_rated_venues_caffe.sort_values(by='Rating', ascending=False)[:5]

Unnamed: 0_level_0,Venue,Rating
Park_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Battery Park,Starbucks,9.2
Morningside Park,Up Coffee Co.,8.9
Starlight Park,Prospect Coffee Shop,8.4
Eastern Parkway,Jenny Coffe Shop #2,8.2
Washington Square Park,The Uncommons,8.0


In [591]:
top5_parks = top_rated_venues_caffe.sort_values(by='Rating', ascending=False)[:5].index.tolist()

In [577]:
#Let's learn what other best venues are close to the "best performing" caffes

In [578]:
def top_5_other_venues(list_of_objects):
    tmp_list = sorted(list_of_objects, reverse = True)
    return tmp_list[:5]

In [584]:
top_rated_venues_not_caffe = venues_nearby_all_parks_df[['Park_name', 'Venue Category', 'Rating']][~venues_nearby_all_parks_df['Is_caffe']].groupby('Park_name').agg(top_5_other_venues).reset_index()

In [597]:
top_rated_venues_not_caffe[top_rated_venues_not_caffe['Park_name'].apply(lambda x: x in top5_parks)]['Venue Category'].values

array([list(["Women's Store", 'Wine Shop', 'Wine Bar', 'Supplement Shop', 'Steakhouse']),
       list(['Yoga Studio', 'Wings Joint', 'Vegetarian / Vegan Restaurant', 'Thai Restaurant', 'Sushi Restaurant']),
       list(['Wine Shop', 'Wine Shop', 'Wine Bar', 'Wine Bar', 'Sports Bar']),
       list(['Video Game Store', 'Supplement Shop', 'Supermarket', 'Shopping Mall', 'Shoe Store']),
       list(['Yoga Studio', 'Wine Shop', 'Wine Bar', 'Wine Bar', 'Wine Bar'])],
      dtype=object)

In [599]:
#We can see that Women's Store, Yoga Studio and Wine shops/bars are common venues that are close to best performing
#caffes, we need to look for a park which has such amenities, but still doesn't have a coffee shop

In [603]:
parks_venues = venues_nearby_all_parks_df[~venues_nearby_all_parks_df['Park_with_cafe']].groupby('Park_name')['Venue Category'].apply(list)

In [608]:
def lookup_venues(list_of_venues):
    count_of_items = 0
    items_to_find = ["Women's Store", 'Wine Shop', 'Yoga Studio', "Wine Bar", 'Supplement Shop']
    for item in list_of_venues:
        if item in items_to_find:
            count_of_items += 1
    return count_of_items

In [612]:
parks_venues.apply(lambda x: lookup_venues(x)).sort_values(ascending=False)[:3]

Park_name
Spring Creek Park    3
Franz Sigel Park     2
Highbridge Park      1
Name: Venue Category, dtype: int64

In [613]:
#Spring Creek Park seems like the best candidate, which has popular amenities, that are similar to the ones, where the
# "best performing" coffee shops are

In [618]:
parks_df[parks_df['Park_name'] == 'Spring Creek Park'][['Park_name', 'Address','Latitude', 'Longitude']]

Unnamed: 0,Park_name,Address,Latitude,Longitude
411,Spring Creek Park,"Vandalia Ave, Gateway Dr, Erskine St, Belt Pkwy",40.651426,-73.874781


## Plot park candidates for the coffee shops

In [620]:
latitude = 40.651426
longitude =  -73.874781
# create map of Toronto using latitude and longitude values
map_NYC_with_caffes= folium.Map(location=[latitude, longitude], zoom_start=14)

# add markers to map
label = '{}, {}'.format('Spring Creek Park', 'Vandalia Ave, Gateway Dr, Erskine St, Belt Pkwy')
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
    [latitude, longitude],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='blue',
    fill_opacity=0.7).add_to(map_NYC_with_caffes)
map_NYC_with_caffes