In [1]:
import numpy as np
import pandas as pd

# Import SQL Alchemy
from sqlalchemy import create_engine

# Get the username and password from config file
from config import username, password

import requests
import os
import seaborn as sns
from psycopg2.extensions import register_adapter, AsIs

# Import API key
from config import yelp_api_key

# GeoPandas
# import geopandas as gpd
# from shapely.geometry import Point, Polygon

In [2]:
# Instead of running the cell below to import final tables using pandas read_csv, you can run this cell.
# %run Extract.ipynb
# %run Transform.ipynb

In [3]:
# final transformed restaurant table
restaurant_transformed = pd.read_csv('clean_data/restaurant.csv')
restaurant_transformed = restaurant_transformed.set_index('restaurant_id')

# final transformed neighbourhood_restaurant table
neighbourhood_restaurant_transformed = pd.read_csv('clean_data/neighbourhood_restaurant.csv')
neighbourhood_restaurant_transformed = neighbourhood_restaurant_transformed.set_index('neighbourhood_id')

# final transformed neighbourhood table
neighbourhood_transformed = pd.read_csv('clean_data/neighbourhood.csv')
neighbourhood_transformed = neighbourhood_transformed.set_index('neighbourhood_id')

# final ethnicity transformed table
ethnicity_transformed = pd.read_csv('clean_data/ethnicity.csv')
ethnicity_transformed = ethnicity_transformed.set_index('neighbourhood_id')

# final income transformed table
income_transformed = pd.read_csv('clean_data/income.csv')
income_transformed = income_transformed.set_index('neighbourhood_id')

# final crime transformed table
crime_transformed = pd.read_csv('clean_data/crime.csv')
crime_transformed = crime_transformed.set_index('neighbourhood_id')

# final yelp ratings table
yelp_rating_transformed = pd.read_csv('clean_data/yelp_ratings.csv')
yelp_rating_transformed = yelp_rating_transformed.set_index('restaurant_id')

### Table 1) Final Transformed Income Data

In [4]:
income_transformed.head()

Unnamed: 0_level_0,neighbourhood_name,median_income,average_income
neighbourhood_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,West Humber-Clairville,66241.0,76228.0
2,Mount Olive-Silverstone-Jamestown,49934.0,58605.0
3,Thistletown-Beaumond Heights,62042.0,73512.0
4,Rexdale-Kipling,56545.0,66781.0
5,Elms-Old Rexdale,50846.0,63201.0


### Table 2) Final Transformed Neighbourhood Data

In [5]:
neighbourhood_transformed

Unnamed: 0_level_0,neighbourhood_name
neighbourhood_id,Unnamed: 1_level_1
1,West Humber-Clairville
2,Mount Olive-Silverstone-Jamestown
3,Thistletown-Beaumond Heights
4,Rexdale-Kipling
5,Elms-Old Rexdale
...,...
136,West Hill
137,Woburn
138,Eglinton East
139,Scarborough Village


### Table 3) Final Transformed Crime Data

In [6]:
crime_transformed.head()

Unnamed: 0_level_0,neighbourhood_name,total_average_crime_rate
neighbourhood_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,West Humber-Clairville,507.32
2,Mount Olive-Silverstone-Jamestown,232.13
3,Thistletown-Beaumond Heights,236.5
4,Rexdale-Kipling,245.35
5,Elms-Old Rexdale,216.8


### Table 4) Final Transformed Ethnicity Data

In [7]:
ethnicity_transformed.head()

Unnamed: 0_level_0,neighbourhood_name,oceania_origins,asian_origins,north_american_aboriginal_origins,other_north_american_origins,latin_origins,european_origins,african_origins,caribbean_origins
neighbourhood_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
129,Agincourt North,10,24305,40,1345,470,3055,535,1445
128,Agincourt South-Malvern West,0,17955,105,1190,480,3770,625,1395
20,Alderwood,0,2055,305,2355,315,9135,215,350
95,Annex,140,6485,475,5255,765,21055,1040,750
42,Banbury-Don Mills,20,12025,230,3230,585,13435,990,815


### Table 5) Final Transformed Restaurant Data

In [8]:
restaurant_transformed.head()

Unnamed: 0_level_0,category,restaurant_name,price_range,latitude,longitude,neighbourhood_id,neighbourhood_name
restaurant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Afghan,The Host,$11-30,43.669935,-79.395858,95,annex
2,Afghan,Aanch Modernist Indian Cuisine,$11-30,43.644708,-79.39067,77,waterfront communities-the island
3,Afghan,Silk Road Kabob House,Under $10,43.659816,-79.385591,76,bay street corridor
4,Afghan,Naan & Kabob,$11-30,43.669058,-79.3861,75,church-yonge corridor
5,Afghan,Afghan Cuisine,$11-30,43.70807,-79.341508,55,thorncliffe park


### Table 6) Final Transformed Neighbourhood_Restaurant Data

In [9]:
neighbourhood_restaurant_transformed.head()

Unnamed: 0_level_0,neighbourhood_name,number_of_restaurants
neighbourhood_id,Unnamed: 1_level_1,Unnamed: 2_level_1
76,bay street corridor,355
77,waterfront communities-the island,354
78,kensington-chinatown,280
75,church-yonge corridor,214
95,annex,180


### Table 7) Final Transformed Yelp Ratings Data

In [10]:
yelp_rating_transformed.head()

Unnamed: 0_level_0,restaurant_name,category,ratings,review_counts,zip_code
restaurant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
e41TP5cXZqSrz50xCBJqZw,Insomnia Restaurant & Lounge,Lounges,4.0,923,M5S 1Y6
r_BrIgzYcwo1NAuG9dLbpg,Pai Northern Thai Kitchen,Thai,4.5,2895,M5H 3G8
Uq-GOs9_IqweUsB5MdII9w,Emma's Country Kitchen,Breakfast & Brunch,4.0,394,M6C 1B6
iGEvDk6hsizigmXhDKs2Vg,Seven Lives Tacos y Mariscos,Mexican,4.5,1323,M5T 2K1
-ICGmF2qUVKdvOehVNgPbg,Lamesa Filipino Kitchen,Filipino,4.0,352,M6C 1A9


### Create database connection to Neighborhoods_DB

In [11]:
# Establish Connection to neighborhood database
engine = create_engine(f'postgresql://postgres:postgres@localhost:5432/Neighborhoods_DB')
conn = engine.connect()

In [12]:
# Confirm tables
engine.table_names()

['restaurant',
 'neighbourhood_restaurant',
 'yelp_ratings',
 'neighbourhood',
 'income',
 'crime',
 'ethnicity']

In [13]:
# Function to solve the error, ProgrammingError: (psycopg2.ProgrammingError) can't adapt type 'numpy.int64'
def adapt_numpy_int64(np_int64):
    return AsIs(np_int64)

register_adapter(np.int64, adapt_numpy_int64)

### Load DataFrames into database

In [14]:
neighbourhood_transformed.to_sql(name='neighbourhood', con=engine, if_exists='append', index=True)

In [15]:
income_transformed.to_sql(name='income', con=engine, if_exists='append', index=True)

In [16]:
crime_transformed.to_sql(name='crime', con=engine, if_exists='append', index=True)

In [17]:
ethnicity_transformed.to_sql(name='ethnicity', con=engine, if_exists='append', index=True)

In [18]:
neighbourhood_restaurant_transformed.to_sql(name='neighbourhood_restaurant', con=engine, if_exists='append', index=True)

In [19]:
yelp_rating_transformed.to_sql(name='yelp_ratings', con=engine, if_exists='append', index=True)

### View Table data in Pandas

In [20]:
Neighbourhood_records = pd.read_sql("SELECT * FROM neighbourhood" , conn) 
print (f'Data in Neighbourhood table\n--------------------------------------\n{Neighbourhood_records.head(10)}')

Data in Neighbourhood table
--------------------------------------
   neighbourhood_id                 neighbourhood_name
0                 1             West Humber-Clairville
1                 2  Mount Olive-Silverstone-Jamestown
2                 3       Thistletown-Beaumond Heights
3                 4                    Rexdale-Kipling
4                 5                   Elms-Old Rexdale
5                 6      Kingsview Village-The Westway
6                 7   Willowridge-Martingrove-Richview
7                 8           Humber Heights-Westmount
8                 9           Edenbridge-Humber Valley
9                10                 Princess-Rosethorn


In [21]:
Income_records = pd.read_sql("SELECT * FROM income" , conn) 
print (f'Data in Income table\n--------------------------------------\n{Income_records.head(10)}')

Data in Income table
--------------------------------------
   neighbourhood_id                 neighbourhood_name  median_income  \
0                 1             West Humber-Clairville          66241   
1                 2  Mount Olive-Silverstone-Jamestown          49934   
2                 3       Thistletown-Beaumond Heights          62042   
3                 4                    Rexdale-Kipling          56545   
4                 5                   Elms-Old Rexdale          50846   
5                 6      Kingsview Village-The Westway          55454   
6                 7   Willowridge-Martingrove-Richview          63701   
7                 8           Humber Heights-Westmount          60410   
8                 9           Edenbridge-Humber Valley          71646   
9                10                 Princess-Rosethorn         114844   

   average_income  
0           76228  
1           58605  
2           73512  
3           66781  
4           63201  
5           7153

In [22]:
Crime_records = pd.read_sql("SELECT * FROM crime" , conn) 
print (f'Data in Crime table\n--------------------------------------\n{Crime_records.head(10)}')

Data in Crime table
--------------------------------------
   neighbourhood_id                 neighbourhood_name  \
0                 1             West Humber-Clairville   
1                 2  Mount Olive-Silverstone-Jamestown   
2                 3       Thistletown-Beaumond Heights   
3                 4                    Rexdale-Kipling   
4                 5                   Elms-Old Rexdale   
5                 6      Kingsview Village-The Westway   
6                 7   Willowridge-Martingrove-Richview   
7                 8           Humber Heights-Westmount   
8                 9           Edenbridge-Humber Valley   
9                10                 Princess-Rosethorn   

   total_average_crime_rate  
0                    507.32  
1                    232.13  
2                    236.50  
3                    245.35  
4                    216.80  
5                    131.05  
6                    163.98  
7                    123.32  
8                    131.97  
9 

In [23]:
Ethnicity_records = pd.read_sql("SELECT * FROM ethnicity" , conn) 
print (f'Data in Ethnicity table\n--------------------------------------\n{Ethnicity_records.head(10)}')

Data in Ethnicity table
--------------------------------------
   neighbourhood_id            neighbourhood_name  oceania_origins  \
0               129               Agincourt North               10   
1               128  Agincourt South-Malvern West                0   
2                20                     Alderwood                0   
3                95                         Annex              140   
4                42             Banbury-Don Mills               20   
5                34                Bathurst Manor                0   
6                76           Bay Street Corridor               60   
7                52               Bayview Village               45   
8                49         Bayview Woods-Steeles               15   
9                39          Bedford Park-Nortown               50   

   asian_origins  north_american_aboriginal_origins  \
0          24305                                 40   
1          17955                                105   
2

In [24]:
Restaurant_records = pd.read_sql("SELECT * FROM restaurant" , conn) 
print (f'Data in Restaurant table\n--------------------------------------\n{Restaurant_records.head(10)}')

Data in Restaurant table
--------------------------------------
Empty DataFrame
Columns: [restaurant_id, category, restaurant_name, price_range, latitude, longitude, neighbourhood_id, neighbourhood_name]
Index: []


In [25]:
Neighbourhood_Restaurant_records = pd.read_sql("SELECT * FROM neighbourhood_restaurant" , conn) 
print (f'Data in Neighbourhood Restaurant table\n--------------------------------------\n{Neighbourhood_Restaurant_records.head(10)}')

Data in Neighbourhood Restaurant table
--------------------------------------
   neighbourhood_id                 neighbourhood_name  number_of_restaurants
0                76                bay street corridor                    355
1                77  waterfront communities-the island                    354
2                78               kensington-chinatown                    280
3                75              church-yonge corridor                    214
4                95                              annex                    180
5                81                  trinity-bellwoods                    136
6                14         islington-city centre west                    120
7                70                    south riverdale                    106
8               130                           milliken                     97
9                80            palmerston-little italy                     91


In [26]:
Yelp_Ratings_records = pd.read_sql("SELECT * FROM yelp_ratings" , conn) 
print (f'Data in Yelp Ratings table\n--------------------------------------\n{Yelp_Ratings_records.head(10)}')

Data in Yelp Ratings table
--------------------------------------
            restaurant_id                 restaurant_name            category  \
0  e41TP5cXZqSrz50xCBJqZw    Insomnia Restaurant & Lounge             Lounges   
1  r_BrIgzYcwo1NAuG9dLbpg       Pai Northern Thai Kitchen                Thai   
2  Uq-GOs9_IqweUsB5MdII9w          Emma's Country Kitchen  Breakfast & Brunch   
3  iGEvDk6hsizigmXhDKs2Vg    Seven Lives Tacos y Mariscos             Mexican   
4  -ICGmF2qUVKdvOehVNgPbg         Lamesa Filipino Kitchen            Filipino   
5  nHFJtud7jWZhM9dHQ1eIRA        Buk Chang Dong Soon Tofu              Korean   
6  crstB-H5rOfbXhV8pX0e6g                    Ramen Isshin               Ramen   
7  uAAWlLdsoUf872F1FKiX1A  Bang Bang Ice Cream and Bakery            Desserts   
8  q9_gLvTNf11etVxbH7JY0Q                            Yasu            Japanese   
9  RtUvSWO_UZ8V3Wpj0n077w          KINKA IZAKAYA ORIGINAL            Japanese   

   ratings  review_counts zip_code  
0    