In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sns
import os
import numpy as np

In [2]:
from shapely.geometry import Point, Polygon

In [3]:
sns.set(style="darkgrid")

In [4]:
filepath = os.path.join("Resources", "data.csv")
data = pd.read_csv(filepath)
data.head()

Unnamed: 0,neighbourhood
0,Waterfront Communities-The Island
1,Newtonbrook East
2,York University Heights
3,Dorset Park
4,Englemount-lawrence


In [5]:
nb = os.path.join('Resources', 'Neighbourhoods', "Neighbourhoods.shp")
regions = gpd.read_file(nb)
regions['neighbourhood'] = regions['FIELD_7'].str.replace(' \(.+\)', '').str.lower()
regions.sample(5)

Unnamed: 0,FIELD_1,FIELD_2,FIELD_3,FIELD_4,FIELD_5,FIELD_6,FIELD_7,FIELD_8,FIELD_9,FIELD_10,FIELD_11,FIELD_12,FIELD_13,FIELD_14,FIELD_15,geometry,neighbourhood
59,2160,25886920,25926721,49885,79,79,University (79),University (79),,,-79.40118,43.662506,16492449,2687050.0,6872.849906,"POLYGON ((-79.40772 43.65648, -79.40847 43.658...",university
92,2193,25886711,25926754,49885,30,30,Brookhaven-Amesbury (30),Brookhaven-Amesbury (30),,,-79.485589,43.701326,16492977,6715562.0,12417.055559,"POLYGON ((-79.50296 43.69574, -79.50415 43.696...",brookhaven-amesbury
40,2141,25886730,25926702,49885,4,4,Rexdale-Kipling (4),Rexdale-Kipling (4),,,-79.566228,43.723725,16492145,4801397.0,9788.586534,"POLYGON ((-79.55512 43.71510, -79.55504 43.714...",rexdale-kipling
80,2181,25886914,25926742,49885,76,76,Bay Street Corridor (76),Bay Street Corridor (76),,,-79.385721,43.657511,16492785,3459075.0,9594.336045,"POLYGON ((-79.38752 43.65067, -79.38663 43.650...",bay street corridor
66,2167,25886870,25926728,49885,91,91,Weston-Pellam Park (91),Weston-Pellam Park (91),,,-79.460244,43.673962,16492561,2794057.0,7655.243605,"POLYGON ((-79.46005 43.66723, -79.46092 43.668...",weston-pellam park


In [6]:
regions.to_csv('Resources/regions.csv')

In [7]:
starbucks_df = pd.read_csv('Resources/Starbucks locations.csv')
starbucks_df.head()

Unnamed: 0,ID,Lat,Long
0,75921-104040,43.086574,-79.059356
1,3997-146205,43.077276,-79.082792
2,75525-35359,43.078906,-79.081879
3,75633-86381,43.079703,-79.082204
4,75790-96396,43.082189,-79.082469


In [8]:
# Create a set of starbucks locations lat and lng combinations
lng_lats = []

lngs = starbucks_df['Long']
lats = starbucks_df['Lat']

lng_lats = zip(lngs, lats)

neighbourhood_id = []
neighbourhood_name = []
lats_list = []
lngs_list = []

for lng_lat in lng_lats:
    point = Point(lng_lat[0], lng_lat[1])
    
    for i in np.arange(len(regions)):
        poly = regions.loc[i, 'geometry']
        
        if point.within(poly):
            neighbourhood_id.append(regions.loc[i, 'FIELD_6'])
            neighbourhood_name.append(regions.loc[i, 'neighbourhood'])
            lats_list.append(lng_lat[1])
            lngs_list.append(lng_lat[0])

In [9]:
starbucks_dict = {
    'Lat' : lats_list,
    'Long' : lngs_list,
    'Hood ID' : neighbourhood_id,
    'Neighbourhood' : neighbourhood_name
}

clean_starbucks = pd.DataFrame(starbucks_dict)

clean_starbucks

Unnamed: 0,Lat,Long,Hood ID,Neighbourhood
0,43.797471,-79.148805,131,rouge
1,43.743753,-79.216334,139,scarborough village
2,43.770374,-79.186485,136,west hill
3,43.816306,-79.293317,130,milliken
4,43.776324,-79.259292,127,bendale
...,...,...,...,...
173,43.766139,-79.476335,27,york university heights
174,43.726773,-79.451830,31,yorkdale-glen park
175,43.757568,-79.465829,27,york university heights
176,43.725807,-79.450222,31,yorkdale-glen park


In [10]:
# number of stores for each location
starbucks_val_counts = clean_starbucks['Neighbourhood'].value_counts()
starbucks_val_counts = starbucks_val_counts.reset_index()
starbucks_val_counts.columns = ['Neighbourhood', 'Number of Stores']

starbucks_val_counts

Unnamed: 0,Neighbourhood,Number of Stores
0,bay street corridor,26
1,waterfront communities-the island,19
2,islington-city centre west,8
3,church-yonge corridor,8
4,annex,7
...,...,...
67,clanton park,1
68,agincourt south-malvern west,1
69,eringate-centennial-west deane,1
70,edenbridge-humber valley,1


In [11]:
clean_starbucks_df = clean_starbucks.drop_duplicates(subset='Neighbourhood', keep='first')
clean_starbucks_df

Unnamed: 0,Lat,Long,Hood ID,Neighbourhood
0,43.797471,-79.148805,131,rouge
1,43.743753,-79.216334,139,scarborough village
2,43.770374,-79.186485,136,west hill
3,43.816306,-79.293317,130,milliken
4,43.776324,-79.259292,127,bendale
...,...,...,...,...
164,43.698454,-79.463205,108,briar hill-belgravia
167,43.665399,-79.471892,90,junction area
169,43.651357,-79.475875,88,high park north
170,43.725315,-79.454165,31,yorkdale-glen park


In [12]:
starbucks_final = pd.merge(starbucks_val_counts, clean_starbucks_df, on='Neighbourhood', how='inner')
starbucks_final.drop(['Lat', 'Long'], axis='columns', inplace=True)

In [13]:
starbucks_final = starbucks_final[['Hood ID','Neighbourhood','Number of Stores']]
starbucks_final.rename(columns={"Hood ID": "Hood_ID"}, inplace=True)

In [14]:
starbucks_final

Unnamed: 0,Hood_ID,Neighbourhood,Number of Stores
0,76,bay street corridor,26
1,77,waterfront communities-the island,19
2,14,islington-city centre west,8
3,75,church-yonge corridor,8
4,95,annex,7
...,...,...,...
67,33,clanton park,1
68,128,agincourt south-malvern west,1
69,11,eringate-centennial-west deane,1
70,9,edenbridge-humber valley,1


In [1]:
starbucks_final.to_csv('output_data/starbucks_final.csv')

SyntaxError: EOL while scanning string literal (<ipython-input-1-9e7d8971b660>, line 1)