# Capstone Project Notebook
## This notebook will be mainly used for the capstone project

In [2]:
import pandas as pd
import numpy as np

# !pip install googlemaps
import googlemaps
from datetime import datetime
import json

## Get Brisbane suburbs (neighbourhoods)

In [4]:
# !pip install bs4
# !pip install requests
from bs4 import BeautifulSoup as bs
import requests

In [5]:
url = 'https://en.wikipedia.org/wiki/List_of_Brisbane_suburbs#City_of_Brisbane'
page = requests.get(url)

soup = bs(page.content, 'html.parser')
soup.find(id='City_of_Brisbane')
# soup.prettify()

<span class="mw-headline" id="City_of_Brisbane">City of Brisbane</span>

In [6]:
p_elems = soup.find_all('p', class_='')
# print(p_elems)
a_elems = []
for p_elem in p_elems:
    elems = p_elem.find_all('a')
    for elem in elems:
        a_elems.append(elem.text)
#         print(elem.text)

a_elems

['suburbs',
 'Brisbane',
 'Australian Bureau of Statistics',
 '[1]',
 'Bowen Hills',
 'Brisbane',
 'East Brisbane',
 'Fortitude Valley',
 'Herston',
 'Highgate Hill',
 'Kangaroo Point',
 'Kelvin Grove',
 'New Farm',
 'Newstead',
 'Paddington',
 'Petrie Terrace',
 'Red Hill',
 'South Brisbane',
 'Spring Hill',
 'Teneriffe',
 'West End',
 'Woolloongabba',
 'Albion',
 'Alderley',
 'Ascot',
 'Aspley',
 'Bald Hills',
 'Banyo',
 'Boondall',
 'Bracken Ridge',
 'Bridgeman Downs',
 'Brighton',
 'Brisbane Airport',
 'Carseldine',
 'Chermside',
 'Chermside West',
 'Clayfield',
 'Deagon',
 'Eagle Farm',
 'Everton Park',
 'Ferny Grove',
 'Fitzgibbon',
 'Gaythorne',
 'Geebung',
 'Gordon Park',
 'Grange',
 'Hamilton',
 'Hendra',
 'Kalinga',
 'Kedron',
 'Keperra',
 'Lutwyche',
 'McDowall',
 'Mitchelton',
 'Myrtletown',
 'Newmarket',
 'Northgate',
 'Nudgee',
 'Nudgee Beach',
 'Nundah',
 'Pinkenba',
 'Sandgate',
 'Shorncliffe',
 'Stafford',
 'Stafford Heights',
 'Taigum',
 'Virginia',
 'Wavell Heights',

### Clean suburb data

In [7]:
a_elems = a_elems[4:]
print(len(a_elems))
a_elems

452


['Bowen Hills',
 'Brisbane',
 'East Brisbane',
 'Fortitude Valley',
 'Herston',
 'Highgate Hill',
 'Kangaroo Point',
 'Kelvin Grove',
 'New Farm',
 'Newstead',
 'Paddington',
 'Petrie Terrace',
 'Red Hill',
 'South Brisbane',
 'Spring Hill',
 'Teneriffe',
 'West End',
 'Woolloongabba',
 'Albion',
 'Alderley',
 'Ascot',
 'Aspley',
 'Bald Hills',
 'Banyo',
 'Boondall',
 'Bracken Ridge',
 'Bridgeman Downs',
 'Brighton',
 'Brisbane Airport',
 'Carseldine',
 'Chermside',
 'Chermside West',
 'Clayfield',
 'Deagon',
 'Eagle Farm',
 'Everton Park',
 'Ferny Grove',
 'Fitzgibbon',
 'Gaythorne',
 'Geebung',
 'Gordon Park',
 'Grange',
 'Hamilton',
 'Hendra',
 'Kalinga',
 'Kedron',
 'Keperra',
 'Lutwyche',
 'McDowall',
 'Mitchelton',
 'Myrtletown',
 'Newmarket',
 'Northgate',
 'Nudgee',
 'Nudgee Beach',
 'Nundah',
 'Pinkenba',
 'Sandgate',
 'Shorncliffe',
 'Stafford',
 'Stafford Heights',
 'Taigum',
 'Virginia',
 'Wavell Heights',
 'Wilston',
 'Windsor',
 'Wooloowin',
 'Zillmere',
 'Acacia Ridge',


## Move suburb data into dataframe and get coordinates

In [8]:
df = pd.DataFrame(a_elems, columns=['Suburbs'])
df.head()

Unnamed: 0,Suburbs
0,Bowen Hills
1,Brisbane
2,East Brisbane
3,Fortitude Valley
4,Herston


In [9]:
latitudes = []
longitudes = []

for suburb in df['Suburbs']:
    name = suburb + ', Brisbane'
#     print(name)
    result = gmaps.geocode(name)
    data = json.loads(json.dumps(result[0]))
    lat = data['geometry']['location']['lat']
    long = data['geometry']['location']['lng']
#     print('{} : {} : {}'.format(name, lat, long))
    latitudes.append(data['geometry']['location']['lat'])
    longitudes.append(data['geometry']['location']['lng'])

In [10]:
df['Latitude'] = latitudes
df['Longitude'] = longitudes
df.head()

Unnamed: 0,Suburbs,Latitude,Longitude
0,Bowen Hills,-27.443194,153.038389
1,Brisbane,-27.469771,153.025124
2,East Brisbane,-27.482,153.051
3,Fortitude Valley,-27.4565,153.0345
4,Herston,-27.445149,153.020689


## Visualize suburbs

In [11]:
# !conda install -c conda-forge folium
import folium

In [16]:
brisbane_lat = -27.4698
brisbane_long = 153.0251
map_brisbane = folium.Map(location=[brisbane_lat, brisbane_long], zoom_start=9)

for lat, long, suburb in zip(df['Latitude'], df['Longitude'], df['Suburbs']):
    label = '{}, Brisbane'.format(suburb)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(map_brisbane)

In [17]:
# map_brisbane

In [20]:
# geojson_url = 'https://data.gov.au/geoserver/qld-suburb-locality-boundaries-psma-administrative-boundaries/wfs?request=GetFeature&typeName=ckan_6bedcb55_1b1f_457b_b092_58e88952e9f0&outputFormat=json'
geojson_url = 'C:\\Users\\ogaboga\\PycharmProjects\\IBM_DS_Capstone\\cleaned_brisbane.geojson'
brisbane_boundaries = f'{geojson_url}'

# folium.GeoJson(
#     brisbane_boundaries,
#     name='geojson'
# ).add_to(map_brisbane)

choropleth = folium.Choropleth(
    geo_data=brisbane_boundaries,
    name='choropleth',
    data=df,
    columns=['Suburbs', 'Latitude'],
#     key_on='feature.properties.NAME_2',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Arecanut Arrival(in Quintal)',
    highlight=True,
    line_color='black'
).add_to(map_brisbane)

map_brisbane

## Use Foursquare API to retrieve venues

In [25]:
CLIENT_ID = '2MW505KVIAFPKCPCOM5CLYVZVCNIUNT5FM1RSCGODZEILNFZ' # your Foursquare ID
CLIENT_SECRET = 'FH33H1PV040WUKHXCSMACQ31PYQT5VFCAE2XKNJE04EETM55' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100

### Test Foursquare API with one suburb

In [47]:
# test_lat = df.loc[0, 'Latitude']
# test_long = df.loc[0, 'Longitude']
test_lat = df[df['Suburbs'] == 'Woolloongabba']['Latitude'].iloc[0]
test_long = df[df['Suburbs'] == 'Woolloongabba']['Longitude'].iloc[0]
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    test_lat,
    test_long,
    500,
    LIMIT
)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=2MW505KVIAFPKCPCOM5CLYVZVCNIUNT5FM1RSCGODZEILNFZ&client_secret=FH33H1PV040WUKHXCSMACQ31PYQT5VFCAE2XKNJE04EETM55&v=20180605&ll=-27.4856,153.0291&radius=500&limit=100'

In [49]:
results = requests.get(url).json()
results['response']['groups']

[{'type': 'Recommended Places',
  'name': 'recommended',
  'items': [{'reasons': {'count': 0,
     'items': [{'summary': 'This spot is popular',
       'type': 'general',
       'reasonName': 'globalInteractionReason'}]},
    'venue': {'id': '4b058733f964a520f28322e3',
     'name': 'Morrison Hotel',
     'location': {'address': '640 Stanley St.',
      'lat': -27.485460005586383,
      'lng': 153.0308582066337,
      'labeledLatLngs': [{'label': 'display',
        'lat': -27.485460005586383,
        'lng': 153.0308582066337}],
      'distance': 174,
      'postalCode': '4102',
      'cc': 'AU',
      'city': 'Woolloongabba',
      'state': 'QLD',
      'country': 'Australia',
      'formattedAddress': ['640 Stanley St.',
       'Woolloongabba QLD 4102',
       'Australia']},
     'categories': [{'id': '4bf58dd8d48988d116941735',
       'name': 'Bar',
       'pluralName': 'Bars',
       'shortName': 'Bar',
       'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/pub_'

## Get venues for all suburbs

In [50]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list = []
    
    for name, lat, long in zip(names, latitudes, longitudes):
        print(name)
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            long,
            radius,
            LIMIT
        )
        
        results = requests.get(url).json()['response']['groups'][0]['items']
        venues_list.append([(
            name,
            lat,
            long,
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']
        ) for v in results])
    
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
        'Suburb',
        'Suburb Latitude',
        'Suburb Longitude',
        'Venue',
        'Venue Latitude',
        'Venue Longitude',
        'Venue Category'
    ]
    
    return nearby_venues

In [51]:
brisbane_venues = getNearbyVenues(
    names=df['Suburbs'],
    latitudes=df['Latitude'],
    longitudes=df['Longitude']
)

Bowen Hills
Brisbane
East Brisbane
Fortitude Valley
Herston
Highgate Hill
Kangaroo Point
Kelvin Grove
New Farm
Newstead
Paddington
Petrie Terrace
Red Hill
South Brisbane
Spring Hill
Teneriffe
West End
Woolloongabba
Albion
Alderley
Ascot
Aspley
Bald Hills
Banyo
Boondall
Bracken Ridge
Bridgeman Downs
Brighton
Brisbane Airport
Carseldine
Chermside
Chermside West
Clayfield
Deagon
Eagle Farm
Everton Park
Ferny Grove
Fitzgibbon
Gaythorne
Geebung
Gordon Park
Grange
Hamilton
Hendra
Kalinga
Kedron
Keperra
Lutwyche
McDowall
Mitchelton
Myrtletown
Newmarket
Northgate
Nudgee
Nudgee Beach
Nundah
Pinkenba
Sandgate
Shorncliffe
Stafford
Stafford Heights
Taigum
Virginia
Wavell Heights
Wilston
Windsor
Wooloowin
Zillmere
Acacia Ridge
Algester
Annerley
Archerfield
Burbank
Calamvale
Coopers Plains
Darra
Doolandella
Drewvale
Durack
Dutton Park
Eight Mile Plains
Ellen Grove
Fairfield
Forest Lake
Greenslopes
Heathwood
Holland Park
Holland Park West
Inala
Karawatha
Kuraby
Larapinta
MacGregor
Mackenzie
Mansfield

In [52]:
brisbane_venues

Unnamed: 0,Suburb,Suburb Latitude,Suburb Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bowen Hills,-27.443194,153.038389,Di Bella Coffee HQ,-27.443332,153.038579,Coffee Shop
1,Bowen Hills,-27.443194,153.038389,Mrs Luu's - Vietnamese Canteen,-27.441523,153.040499,Vietnamese Restaurant
2,Bowen Hills,-27.443194,153.038389,Merlo,-27.442231,153.040401,Coffee Shop
3,Bowen Hills,-27.443194,153.038389,Golden Pig,-27.442530,153.042760,Asian Restaurant
4,Bowen Hills,-27.443194,153.038389,Bowen Hills YMCA,-27.442802,153.038893,Gym
...,...,...,...,...,...,...,...
2117,Woodford,-26.955000,152.778000,CJ's Pastries,-26.954490,152.777592,Bakery
2118,Woodford,-26.955000,152.778000,Jalla's Cafe,-26.953645,152.777856,Café
2119,Woodford,-26.955000,152.778000,Woodford Hotel,-26.952398,152.777416,Pub
2120,Woodford,-26.955000,152.778000,Woolworths Woodford,-26.951886,152.777963,Supermarket


In [53]:
brisbane_venues.groupby('Suburb').count()

Unnamed: 0_level_0,Suburb Latitude,Suburb Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Suburb,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Albany Creek,16,16,16,16,16,16
Albion,14,14,14,14,14,14
Alderley,5,5,5,5,5,5
Alexandra Hills,5,5,5,5,5,5
Algester,1,1,1,1,1,1
...,...,...,...,...,...,...
Yarrabilba,2,2,2,2,2,2
Yeerongpilly,5,5,5,5,5,5
Yeronga,10,10,10,10,10,10
Yugar,1,1,1,1,1,1


In [55]:
print('Unique venue categories: {}'.format(len(brisbane_venues['Venue Category'].unique())))

Unique venue categories: 252
