<H1>CAPSTONE PROJECT- BATTLE OF THE NEIGHBOURHOODS</H1>

<H3>IMPORTING THE REQUIRED LIBRARIES</H3>

In [1]:
import numpy as np # library to handle data in a vectorized manner

import lxml.html as lh

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


<h3>READING THE DATASET</h3>

In [2]:
df1=pd.read_csv("worldcities.csv")
df1.shape

(12959, 11)

In [3]:
df1.columns

Index(['city', 'city_ascii', 'lat', 'lng', 'country', 'iso2', 'iso3',
       'admin_name', 'capital', 'population', 'id'],
      dtype='object')

<h3>DROPPING THE UNNECESSARY COLUMNS</h3>

In [4]:
df1=df1.drop(columns=['city_ascii','iso2','iso3','admin_name','capital','population','id'])
df1.columns

Index(['city', 'lat', 'lng', 'country'], dtype='object')

<h3>ACCESSING ONLY THE CANADA DETAILS</h3>

In [5]:
df1=df1[df1['country']=='Canada']

In [6]:
df1.head()

Unnamed: 0,city,lat,lng,country
5199,Selkirk,50.15,-96.8833,Canada
5200,Trepassey,46.737,-53.3633,Canada
5201,Schefferville,54.8,-66.8167,Canada
5202,Whitehorse,60.7167,-135.05,Canada
5203,Trout River,49.4837,-58.1166,Canada


<h3>CREATING MAP AND SUPERIMOSING CITIES</h3>

In [7]:
map_kl = folium.Map(location=[55.585901, -105.750596], zoom_start=3)

# add markers to map
for lat1, lng1, neighborhood in zip(df1['lat'], df1['lng'], df1['city']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat1, lng1],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_kl)  
    
map_kl

<h3>USING FOURSQUARE API</h3>

In [8]:
CLIENT_ID = '1PX1G0KPMBLDNRHPCHEHCZLXHCV2GL1JCJQ3SYTEXUDZ5N1I' # your Foursquare ID
CLIENT_SECRET = 'Q2V3UMZ2O2IFYEC1BKHVVBFJZI3VRCJQSR3AY4KILZHQSDBH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

<H3>GETTING DETAILS WITHIN 500 METER RADIUS</H3>

In [9]:
radius = 500
LIMIT = 10

venues = []

for lat1, long, neighborhood in zip(df1['lat'], df1['lng'], df1['city']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat1,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat1, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))
        
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['city', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(615, 7)


Unnamed: 0,city,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Selkirk,50.15,-96.8833,Enterprise Rent-A-Car,50.151446,-96.88159,Rental Car Location
1,Selkirk,50.15,-96.8833,Snak Shak,50.148854,-96.881052,Burger Joint
2,Trepassey,46.737,-53.3633,Trepassey Hotel,46.737694,-53.363645,Hotel
3,Schefferville,54.8,-66.8167,Au Shack,54.798532,-66.81406,Gym
4,Schefferville,54.8,-66.8167,Café Blabla,54.800873,-66.822312,Diner


In [10]:
venues_df

Unnamed: 0,city,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Selkirk,50.15,-96.8833,Enterprise Rent-A-Car,50.151446,-96.88159,Rental Car Location
1,Selkirk,50.15,-96.8833,Snak Shak,50.148854,-96.881052,Burger Joint
2,Trepassey,46.737,-53.3633,Trepassey Hotel,46.737694,-53.363645,Hotel
3,Schefferville,54.8,-66.8167,Au Shack,54.798532,-66.81406,Gym
4,Schefferville,54.8,-66.8167,Café Blabla,54.800873,-66.822312,Diner
5,Whitehorse,60.7167,-135.05,Klondike Rib & Salmon Barbecue,60.720301,-135.052947,Seafood Restaurant
6,Whitehorse,60.7167,-135.05,Baked,60.719988,-135.050889,Café
7,Whitehorse,60.7167,-135.05,Dirty Northern Public House,60.719821,-135.051134,Pub
8,Whitehorse,60.7167,-135.05,Starbucks,60.719552,-135.052333,Coffee Shop
9,Whitehorse,60.7167,-135.05,G And P Steakhouse,60.719306,-135.053775,Steakhouse


<h3>CHECKING HOW MANY VALUES WERE RETURNED FOR EACH CITY IN THE DATASET</h3>

In [11]:
venues_df.groupby(["city"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abbotsford,4,4,4,4,4,4
Amos,4,4,4,4,4,4
Arctic Bay,2,2,2,2,2,2
Baddeck,10,10,10,10,10,10
Baie-Comeau,3,3,3,3,3,3
Baker Lake,3,3,3,3,3,3
Banff,10,10,10,10,10,10
Barrie,10,10,10,10,10,10
Belleville,10,10,10,10,10,10
Biggar,2,2,2,2,2,2


</h3>UNIQUE CATEGORIES THAT CAN BE CURATED</h3>

In [12]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))
venues_df['VenueCategory'].unique()[:50]

There are 162 uniques categories.


array(['Rental Car Location', 'Burger Joint', 'Hotel', 'Gym', 'Diner',
       'Seafood Restaurant', 'Café', 'Pub', 'Coffee Shop', 'Steakhouse',
       'Tourist Information Center', 'New American Restaurant',
       'Historic Site', 'Restaurant', 'Fast Food Restaurant',
       'Gas Station', 'Home Service', 'Motel', 'Moving Target',
       'Photography Studio', 'Bakery', 'Grocery Store',
       'Food & Drink Shop', 'Skating Rink', 'Food Truck',
       'Mobile Phone Shop', 'Video Store', 'Convenience Store',
       'Pharmacy', 'Thrift / Vintage Store', 'Movie Theater',
       'Liquor Store', 'Auto Garage', 'Trail', 'Baseball Field',
       'Athletics & Sports', 'Italian Restaurant', 'Music Store',
       'Vegetarian / Vegan Restaurant', 'Park',
       'Construction & Landscaping', 'Sporting Goods Shop',
       'Airport Terminal', 'Hockey Arena', 'Gym / Fitness Center',
       'Candy Store', 'American Restaurant', 'Electronics Store',
       'Bistro', 'Inn'], dtype=object)

<h3>CHECKING IF THE RESULT CONTAINS HOTEL</h3>

In [13]:
"Hotel" in venues_df['VenueCategory'].unique()

True

<H3>ANALYZING EACH CITY</H3>

In [14]:
kl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['city'] = venues_df['city'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head()

(615, 163)


Unnamed: 0,city,Accessories Store,Airport,Airport Terminal,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Bay,Bed & Breakfast,Beer Store,Bike Shop,Bistro,Boat or Ferry,Bookstore,Border Crossing,Bowling Alley,Breakfast Spot,Brewery,Buffet,Burger Joint,Bus Station,Business Service,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Casino,Chinese Restaurant,Clothing Store,Coffee Shop,Comedy Club,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Gas Station,Gastropub,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Historic Site,History Museum,Hockey Arena,Home Service,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Inn,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Lake,Lawyer,Liquor Store,Lounge,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Mobile Phone Shop,Mobility Store,Molecular Gastronomy Restaurant,Motel,Mountain,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,Nail Salon,National Park,New American Restaurant,Nightclub,Noodle House,Optical Shop,Park,Performing Arts Venue,Pet Store,Pharmacy,Photography Studio,Pier,Pizza Place,Platform,Playground,Plaza,Pub,Rental Car Location,Restaurant,River,Rock Club,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,South American Restaurant,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Stables,State / Provincial Park,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Theater,Thrift / Vintage Store,Tourist Information Center,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Selkirk,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Selkirk,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Trepassey,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Schefferville,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Schefferville,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


<h3>GROUPING THE RESULT</h3>

In [15]:
kl_grouped = kl_onehot.groupby(["city"]).mean().reset_index()

print(kl_grouped.shape)
kl_grouped

(143, 163)


Unnamed: 0,city,Accessories Store,Airport,Airport Terminal,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Bay,Bed & Breakfast,Beer Store,Bike Shop,Bistro,Boat or Ferry,Bookstore,Border Crossing,Bowling Alley,Breakfast Spot,Brewery,Buffet,Burger Joint,Bus Station,Business Service,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Casino,Chinese Restaurant,Clothing Store,Coffee Shop,Comedy Club,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Gas Station,Gastropub,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Historic Site,History Museum,Hockey Arena,Home Service,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Inn,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Lake,Lawyer,Liquor Store,Lounge,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Mobile Phone Shop,Mobility Store,Molecular Gastronomy Restaurant,Motel,Mountain,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,Nail Salon,National Park,New American Restaurant,Nightclub,Noodle House,Optical Shop,Park,Performing Arts Venue,Pet Store,Pharmacy,Photography Studio,Pier,Pizza Place,Platform,Playground,Plaza,Pub,Rental Car Location,Restaurant,River,Rock Club,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,South American Restaurant,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Stables,State / Provincial Park,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Theater,Thrift / Vintage Store,Tourist Information Center,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Abbotsford,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Amos,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Arctic Bay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Baddeck,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Baie-Comeau,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Baker Lake,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Banff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0
7,Barrie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0
8,Belleville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Biggar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
len(kl_grouped[kl_grouped["Hotel"] > 0])

27

<h3>CREATING A NEW DATAFRAME FOR HOTELS ONLY</h3>

In [17]:
kl_mall = kl_grouped[["city","Hotel"]]
kl_mall.head()

Unnamed: 0,city,Hotel
0,Abbotsford,0.0
1,Amos,0.0
2,Arctic Bay,0.5
3,Baddeck,0.1
4,Baie-Comeau,0.0


<h3>CLUSTERING</h3>

<H5>USING K MEANS</H5>

In [18]:
kclusters = 3

kl_clustering = kl_mall.drop(["city"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 1, 0, 0, 1, 0, 0, 0, 0], dtype=int32)

In [19]:
kl_merged = kl_mall.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_
kl_merged.head()

Unnamed: 0,city,Hotel,Cluster Labels
0,Abbotsford,0.0,0
1,Amos,0.0,0
2,Arctic Bay,0.5,1
3,Baddeck,0.1,0
4,Baie-Comeau,0.0,0


In [20]:
kl_merged = kl_merged.join(df1.set_index("city"), on="city")
kl_merged.head()

Unnamed: 0,city,Hotel,Cluster Labels,lat,lng,country
0,Abbotsford,0.0,0,49.0504,-122.3,Canada
1,Amos,0.0,0,48.5666,-78.1167,Canada
2,Arctic Bay,0.5,1,73.0333,-85.1666,Canada
3,Baddeck,0.1,0,46.1,-60.754,Canada
4,Baie-Comeau,0.0,0,49.2227,-68.158,Canada


<h3>SORTING THE CLUSTERS</h3>

In [22]:
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged

Unnamed: 0,city,Hotel,Cluster Labels,lat,lng,country
0,Abbotsford,0.0,0,49.0504,-122.3,Canada
102,Rankin Inlet,0.0,0,62.8167,-92.0953,Canada
101,Québec,0.0,0,46.84,-71.2456,Canada
100,Quesnel,0.0,0,52.9837,-122.4833,Canada
99,Prince Rupert,0.0,0,54.3167,-130.33,Canada
98,Prince George,0.0,0,53.9167,-122.7667,Canada
97,Prince Albert,0.2,0,53.2,-105.75,Canada
96,Powell River,0.0,0,49.8837,-124.55,Canada
95,Port-Menier,0.0,0,49.8226,-64.348,Canada
94,Port Hardy,0.0,0,50.7171,-127.5,Canada


<h3>VISUALIZING THE RESULTING CLUSTERS</h3>

In [23]:
map_clusters = folium.Map(location=[55.585901, -105.750596], zoom_start=3)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['lat'], kl_merged['lng'], kl_merged['city'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>EXAMINING THE CLUSTERS</h3>

<H5>CLUSTER 0</H5>

In [24]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]

Unnamed: 0,city,Hotel,Cluster Labels,lat,lng,country
0,Abbotsford,0.0,0,49.0504,-122.3,Canada
102,Rankin Inlet,0.0,0,62.8167,-92.0953,Canada
101,Québec,0.0,0,46.84,-71.2456,Canada
100,Quesnel,0.0,0,52.9837,-122.4833,Canada
99,Prince Rupert,0.0,0,54.3167,-130.33,Canada
98,Prince George,0.0,0,53.9167,-122.7667,Canada
97,Prince Albert,0.2,0,53.2,-105.75,Canada
96,Powell River,0.0,0,49.8837,-124.55,Canada
95,Port-Menier,0.0,0,49.8226,-64.348,Canada
94,Port Hardy,0.0,0,50.7171,-127.5,Canada


<h5>CLUSTER 1</h5>

In [25]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Unnamed: 0,city,Hotel,Cluster Labels,lat,lng,country
2,Arctic Bay,0.5,1,73.0333,-85.1666,Canada
5,Baker Lake,0.666667,1,64.317,-96.0167,Canada
19,Chesterfield Inlet,0.5,1,63.3383,-90.7001,Canada
28,Creston,0.333333,1,49.1,-116.5167,Canada
55,Jasper,0.7,1,52.8833,-118.0834,Canada
22,Churchill,0.5,1,58.766,-94.166,Canada


<H5>CLUSTER 2</H5>

In [26]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

Unnamed: 0,city,Hotel,Cluster Labels,lat,lng,country
30,Dawson City,1.0,2,64.0666,-139.4167,Canada
129,Trepassey,1.0,2,46.737,-53.3633,Canada
15,Camrose,1.0,2,53.0167,-112.8166,Canada
109,Sandspit,1.0,2,53.2404,-131.8333,Canada
120,Stephenville,1.0,2,48.5504,-58.5666,Canada


<h3>OBSERVATIONS:</h3>