In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import openpyxl
from opencage.geocoder import OpenCageGeocode #how we get the lat/long for our dataset
%matplotlib inline 

### We will scape using pandas and then utilize the len function to find our dataframe

In [78]:
#read in the zipcode data
path = 'http://ciclt.net/sn/clt/capitolimpact/gw_ziplist.aspx?ClientCode=capitolimpact&State=oh&StName=Ohio&StFIPS=39&FIPS=39049'
df = pd.read_html(path)

In [79]:
#find the table to scrape, table is df[2] is the table we are looking for
len(df)

4

In [80]:
df_columbus = df[2]
df_columbus.head()

Unnamed: 0,Zip Code,City,County
0,43002,Amlin,Franklin County
1,43004,Blacklick,Franklin County
2,43016,Dublin,Franklin County
3,43017,Dublin,Franklin County
4,43026,Hilliard,Franklin County


### We need to add the state into the dataframe in order to get our lat/long

In [81]:
#We need to add a column 'State' to get our lat/long
df_columbus.insert(2, 'State', True) #splice, column, add value
df_columbus['State'] = df_columbus['State'].replace([True],'OH') #Converting boolean value to state
df_columbus.head(2) #check to make sure everything loaded correctly

Unnamed: 0,Zip Code,City,State,County
0,43002,Amlin,OH,Franklin County
1,43004,Blacklick,OH,Franklin County


In [82]:
#Repeat the above steps for West Palm Beach
path_2 = 'http://www.ciclt.net/sn/clt/capitolimpact/gw_ziplist.aspx?zip=334&stfips=12&state=fl&stname=Florida'
df1 = pd.read_html(path_2)
df_wpalm = df1[2]
df_wpalm.insert(2, 'State', True)
df_wpalm['State'] = df_wpalm['State'].replace([True], 'FL')
df_wpalm.head(2)

Unnamed: 0,Zip Code,City,State,County
0,33401,West Palm Beach,FL,Palm Beach County
1,33402,West Palm Bch,FL,Palm Beach County


In [83]:
print("Columbus: ", df_columbus.shape)
print(20 * '-')
print("West Palm: ", df_wpalm.shape)

Columbus:  (52, 4)
--------------------
West Palm:  (52, 4)


### check our data to ensure we do not having anything missing or null values that my hinder our analysis

In [84]:
df_columbus.info()
print(35 * '-')
df_wpalm.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Zip Code  52 non-null     int64 
 1   City      52 non-null     object
 2   State     52 non-null     object
 3   County    52 non-null     object
dtypes: int64(1), object(3)
memory usage: 1.8+ KB
-----------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Zip Code  52 non-null     int64 
 1   City      52 non-null     object
 2   State     52 non-null     object
 3   County    52 non-null     object
dtypes: int64(1), object(3)
memory usage: 1.8+ KB


In [85]:
key = '86a0203a94634c8aafb080ffb8b03aaa'  # get api key from:  https://opencagedata.com

geocoder = OpenCageGeocode(key)

In [86]:
#Adding lat/lon to the Columbus dataframe 
list_lat = []   # create empty lists

list_long = []

for index, row in df_columbus.iterrows(): # iterate over rows in dataframe



    City = row['City']
    State = row['State']       
    query = str(City)+','+str(State)

    results = geocoder.geocode(query)   
    lat = results[0]['geometry']['lat']
    long = results[0]['geometry']['lng']

    list_lat.append(lat)
    list_long.append(long)

	
# create new columns from lists    

df_columbus['Latitude'] = list_lat   

df_columbus['Longitude'] = list_long
df_columbus.head(2)

Unnamed: 0,Zip Code,City,State,County,Latitude,Longitude
0,43002,Amlin,OH,Franklin County,40.076401,-83.180799
1,43004,Blacklick,OH,Franklin County,39.995619,-82.811286


In [87]:
#Adding the lat/lon to the West Palm data set
list_lat = []   # create empty lists

list_long = []

for index, row in df_wpalm.iterrows(): # iterate over rows in dataframe

    City = row['City']
    State = row['State']       
    query = str(City)+','+str(State)

    results = geocoder.geocode(query)   
    lat = results[0]['geometry']['lat']
    long = results[0]['geometry']['lng']

    list_lat.append(lat)
    list_long.append(long)

# create new columns from lists    

df_wpalm['Latitude'] = list_lat   

df_wpalm['Longitude'] = list_long
df_wpalm.head(2)

Unnamed: 0,Zip Code,City,State,County,Latitude,Longitude
0,33401,West Palm Beach,FL,Palm Beach County,26.715364,-80.053294
1,33402,West Palm Bch,FL,Palm Beach County,26.715364,-80.053294


In [88]:
df_columbus.info()
print(40 * "-")
df_wpalm.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Zip Code   52 non-null     int64  
 1   City       52 non-null     object 
 2   State      52 non-null     object 
 3   County     52 non-null     object 
 4   Latitude   52 non-null     float64
 5   Longitude  52 non-null     float64
dtypes: float64(2), int64(1), object(3)
memory usage: 2.6+ KB
----------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Zip Code   52 non-null     int64  
 1   City       52 non-null     object 
 2   State      52 non-null     object 
 3   County     52 non-null     object 
 4   Latitude   52 non-null     float64
 5   Longitude  52 non-null     float64
dtypes: float64(2), int64(1), object(3)
memory usage:

### We can go ahead and drop the state column as we no longer need this for our analysis

In [89]:
#Dropping State from dataframe
df_wpalm.drop('State', axis='columns', inplace=True)
df_wpalm.head()

Unnamed: 0,Zip Code,City,County,Latitude,Longitude
0,33401,West Palm Beach,Palm Beach County,26.715364,-80.053294
1,33402,West Palm Bch,Palm Beach County,26.715364,-80.053294
2,33402,West Palm Beach,Palm Beach County,26.715364,-80.053294
3,33403,Lake Park,Palm Beach County,26.800215,-80.066163
4,33403,North Palm Beach,Palm Beach County,26.823794,-80.055927


In [90]:
#Dropping State from dataframe
df_columbus.drop('State', axis='columns', inplace=True)
df_columbus.head()

Unnamed: 0,Zip Code,City,County,Latitude,Longitude
0,43002,Amlin,Franklin County,40.076401,-83.180799
1,43004,Blacklick,Franklin County,39.995619,-82.811286
2,43016,Dublin,Franklin County,40.099229,-83.114077
3,43017,Dublin,Franklin County,40.099229,-83.114077
4,43026,Hilliard,Franklin County,40.033814,-83.159611


In [91]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [92]:
CLIENT_ID = '24BSYOBXKYDSF4KVTMA1LFBBSCKN2AXJ3VLK10X35NH30YMP' # your Foursquare ID
CLIENT_SECRET = '52YTX4UFSH5HYRZU30XZQUD0QWWKDE2P1PCIYNLH1KLBDRFP' # your Foursquare Secret
ACCESS_TOKEN = 'RISIBPSXX41HKVNKOR11POZR0CQCDD4L125IFS3HRA0WEFGI' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 24BSYOBXKYDSF4KVTMA1LFBBSCKN2AXJ3VLK10X35NH30YMP
CLIENT_SECRET:52YTX4UFSH5HYRZU30XZQUD0QWWKDE2P1PCIYNLH1KLBDRFP


In [93]:
print('The dataframe has {} counties and {} neighborhoods.'.format(
        len(df_columbus['County'].unique()) + len(df_wpalm['County'].unique()),
        df_columbus.shape[0] + df_wpalm.shape[0]
    )
)

The dataframe has 2 counties and 104 neighborhoods.


In [94]:
#Let's first explore Columbus, Ohio
address = 'columbus, ohio'
geolocator = Nominatim(user_agent="cbuspalm_explorer")
location = geolocator.geocode(address)
c_latitude = location.latitude
c_longitude = location.longitude
print('The geographical coordinate of Columbus are {}, {}.'.format(c_latitude,c_longitude))

The geographical coordinate of Columbus are 39.9622601, -83.0007065.


In [95]:
map_columbus = folium.Map(location=[c_latitude, c_longitude], zoom_start =12)

for lat, lng, county, neighborhood in zip(df_columbus['Latitude'], df_columbus['Longitude'],
                                           df_columbus['County'], df_columbus['City']):
    label = '{}, {}'.format(neighborhood, county)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng],
    radius=5,
    popup=label,
    color='green',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_columbus)
    
map_columbus

In [96]:
#Let's explore West Palm Beach
address = 'West Palm Beach, FL'
geolocator = Nominatim(user_agent="cbuspalm_explorer")
location = geolocator.geocode(address)
w_latitude = location.latitude
w_longitude = location.longitude
print('The geographical coordinate of West Palm Beach are {}, {}.'.format(w_latitude,w_longitude))

The geographical coordinate of West Palm Beach are 26.715364, -80.0532942.


In [97]:
map_wpalm = folium.Map(location=[w_latitude, w_longitude], zoom_start =12)

for lat, lng, county, neighborhood in zip(df_wpalm['Latitude'], df_wpalm['Longitude'],
                                           df_wpalm['County'], df_wpalm['City']):
    label = '{}, {}'.format(neighborhood, county)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_wpalm)
    
map_wpalm

Reviewing both maps, you can see some initial between Columbus and West Palm. The neighborhoods are spreadout with plenty of locations that fall within the county of each location. What I found interesting is that both Columbus and West Palm offer a Reliever airport and International airport.

In [98]:
stats = 'https://www.census.gov/quickfacts/columbuscityohio'
df_c_stat = pd.read_html(stats)
print(len(df_c_stat))

4


In [99]:
df_c = df_c_stat[1]
df_c.rename(columns={'Population': 'Columbus, OH Population', 'Unnamed: 1': 'Statistics'}, inplace=True)
df_c['Statistics'] = df_c['Statistics'].str.replace('', '')
df_c['Statistics'] = df_c['Statistics'].str.replace('X', '')
nan_value = float("NaN")
df_c.replace("", nan_value, inplace=True)
df_c.dropna(subset = ["Statistics"], inplace=True)

In [100]:
stats_w = 'https://www.census.gov/quickfacts/fact/table/westpalmbeachcityflorida,palmbeachcountyflorida/PST045219'
df_w_stat = pd.read_html(stats_w)
df_w = df_w_stat[1]
df_w.drop('Unnamed: 2', axis='columns', inplace=True)


In [101]:
df_w.rename(columns={'Population': 'West Palm Beach, FL Population', 'Unnamed: 1': 'Statistics'}, inplace=True)
df_w['Statistics'] = df_w['Statistics'].str.replace('', '')
df_w['Statistics'] = df_w['Statistics'].str.replace('X', '')
nan_value = float("NaN")
df_w.replace("", nan_value, inplace=True)
df_w.dropna(subset = ["Statistics"], inplace=True)

In [102]:
from itertools import chain, cycle
from IPython.display import display_html

In [103]:
def display_side_by_side(*args, titles=cycle([''])):
    html_str=''
    for df, title in zip(args, chain(titles, cycle(['</br>']))):
        html_str+='<th style="text-align:center"><td style="vertical-align:top">'
        html_str+=f'<h3>{title}</h3>'
        html_str+=df.to_html().replace('table', 'table style = "display:inline"')
        html_str+='</td></th>'
    display_html(html_str, raw=True)

In [104]:
display_side_by_side(df_c, df_w, titles=['Columbus', 'West Palm Beach'])

Unnamed: 0,"Columbus, OH Population",Statistics
0,"Population estimates, July 1, 2019, (V2019)",898553
1,"Population estimates base, April 1, 2010, (V2019)",789018
2,"Population, percent change - April 1, 2010 (estimates base) to July 1, 2019, (V2019)",13.9%
3,"Population, Census, April 1, 2010",787033
6,"Persons under 5 years, percent",7.3%
7,"Persons under 18 years, percent",22.5%
8,"Persons 65 years and over, percent",10.2%
9,"Female persons, percent",51.1%
11,"White alone, percent",58.6%
12,"Black or African American alone, percent(a)",29.0%

Unnamed: 0,"West Palm Beach, FL Population",Statistics
0,"Population estimates, July 1, 2019, (V2019)",111955
1,"Population estimates base, April 1, 2010, (V2019)",100665
2,"Population, percent change - April 1, 2010 (estimates base) to July 1, 2019, (V2019)",11.2%
3,"Population, Census, April 1, 2010",99919
6,"Persons under 5 years, percent",6.1%
7,"Persons under 18 years, percent",18.4%
8,"Persons 65 years and over, percent",18.6%
9,"Female persons, percent",52.1%
11,"White alone, percent",58.2%
12,"Black or African American alone, percent(a)",34.4%


## Demographic information:
There's a lot of similiarities between West Palm and Columbus.  The median income, persons in poverty, civilian labor force, Per capital Income in the past 12 months are all pretty much inline and similar to both cities.  The interesting find in this comparison is related to median home value and mortage payment. The median homevalue of housing in West Palm is about 100,000 dollars more than Columbus, yet the West Palm median mortage payment for that premium is only costing home owners an extra 317.00. Upon running a basic mortgage calculation on todays rate, the difference in mortage payment between 150k and 250k should be closer to 600.00. One assumption we can make is that housing values appreciate more in West Palm, yet West Palm is less dense in population than Columbus.

In [105]:
#columbus, ohio 
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    c_latitude, 
    c_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=24BSYOBXKYDSF4KVTMA1LFBBSCKN2AXJ3VLK10X35NH30YMP&client_secret=52YTX4UFSH5HYRZU30XZQUD0QWWKDE2P1PCIYNLH1KLBDRFP&v=20180604&ll=39.9622601,-83.0007065&radius=500&limit=100'

In [106]:
#west palm beach, florida
LIMIT = 100
radius = 500
w_url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    w_latitude, 
    w_longitude, 
    radius, 
    LIMIT)
w_url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=24BSYOBXKYDSF4KVTMA1LFBBSCKN2AXJ3VLK10X35NH30YMP&client_secret=52YTX4UFSH5HYRZU30XZQUD0QWWKDE2P1PCIYNLH1KLBDRFP&v=20180604&ll=26.715364,-80.0532942&radius=500&limit=100'

In [107]:
#Columbus results
c_results = requests.get(url).json()
c_results

{'meta': {'code': 200, 'requestId': '60b689ce022ce918355229c1'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'},
    {'name': '$-$$$$', 'key': 'price'}]},
  'headerLocation': 'Uptown District',
  'headerFullLocation': 'Uptown District, Columbus',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 48,
  'suggestedBounds': {'ne': {'lat': 39.966760104500004,
    'lng': -82.99484636702641},
   'sw': {'lat': 39.9577600955, 'lng': -83.0065666329736}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b0aafe2f964a520642623e3',
       'name': 'Café Brioso',
       'location': {'address': '14 E Gay St',
        'crossStreet': 'at N. High Street',
        'lat': 39.96363995001144,
        'lng': -83.0

In [108]:
#West Palm reesults
w_results = requests.get(w_url).json()
w_results

{'meta': {'code': 200, 'requestId': '60b689cec4b8bb7cbc19310b'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'},
    {'name': '$-$$$$', 'key': 'price'}]},
  'headerLocation': 'Downtown West Palm Beach',
  'headerFullLocation': 'Downtown West Palm Beach, West Palm Beach',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 71,
  'suggestedBounds': {'ne': {'lat': 26.719864004500007,
    'lng': -80.0482658226132},
   'sw': {'lat': 26.710863995499995, 'lng': -80.05832257738679}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '50f04681e4b055ea02528174',
       'name': 'Field of Greens',
       'location': {'address': '412 Clematis St',
        'lat': 26.71334497190468,
        'lng': -80.0540981

In [109]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else: 
        return categories_list[0]['name']

In [110]:
#Columbus
venues = c_results['response']['groups'][0]['items']

c_nearby_venues = pd.json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
c_nearby_venues = c_nearby_venues.loc[:, filtered_columns]
c_nearby_venues['venue.categories'] = c_nearby_venues.apply(get_category_type, axis=1)

c_nearby_venues.columns = [col.split(".")[-1] for col in c_nearby_venues.columns]

c_nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Café Brioso,Coffee Shop,39.96364,-83.000568
1,Si Señor!,Latin American Restaurant,39.963314,-82.998769
2,Ohio Theatre,Theater,39.960281,-82.99909
3,Moonlight Market,Arts & Crafts Store,39.963803,-82.999985
4,"Hotel LeVeque, Autograph Collection",Hotel,39.962333,-83.002168


In [111]:
#West Palm
venues = w_results['response']['groups'][0]['items']
w_nearby_venues = pd.json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
w_nearby_venues = w_nearby_venues.loc[:, filtered_columns]
w_nearby_venues['venue.categories'] = w_nearby_venues.apply(get_category_type, axis=1)

w_nearby_venues.columns = [col.split('.')[-1]for col in w_nearby_venues.columns]
w_nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Field of Greens,Salad Place,26.713345,-80.054098
1,Palm Beach Photographic Centre,Hobby Shop,26.713454,-80.054604
2,Rocco's Tacos and Tequila Bar,Mexican Restaurant,26.713306,-80.051618
3,Roxy's Pub,Pub,26.713622,-80.052328
4,Middle East Bakery,Middle Eastern Restaurant,26.718091,-80.052806


In [112]:
print('{} Columnbus venues were returned by Foursquare. '.format(c_nearby_venues.shape[0]))
print(24 * '-')
print('{} West Palm venues were returned by Foursquare. '.format(w_nearby_venues.shape[0]))

48 Columnbus venues were returned by Foursquare. 
------------------------
71 West Palm venues were returned by Foursquare. 


In [113]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'City Latitude', 
                  'City Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [114]:
cbus_venues = getNearbyVenues(names=df_columbus['City'], latitudes=df_columbus['Latitude'],
                                  longitudes=df_columbus['Longitude'])

Amlin
Blacklick
Dublin
Dublin
Hilliard
New Albany
Reynoldsburg
Westerville
Columbus
Worthington
Westerville
Brice
Canal Winchester
Galloway
Darbydale
Grove City
Urbancrest
Groveport
Harrisburg
Columbus
Columbus
Columbus
Columbus
Columbus
Columbus
Columbus
Obetz
Bexley
Columbus
Columbus
Columbus
Columbus
Columbus
Whitehall
Columbus
Columbus
Columbus
Columbus
Columbus
Columbus
Columbus
Upper Arlington
Columbus
Upper Arlington
Columbus
Columbus
Columbus
Columbus
Columbus
Lincoln Village Fin Unit
Columbus
Columbus


In [115]:
print(cbus_venues.shape)
cbus_venues.head()

(1622, 7)


Unnamed: 0,City,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Amlin,40.076401,-83.180799,NiDovi Pizza & BBQ,40.076947,-83.180631,BBQ Joint
1,Amlin,40.076401,-83.180799,Amlin Carryout,40.076775,-83.180678,Pizza Place
2,Amlin,40.076401,-83.180799,Chapman Drainage & Basement Repair,40.079369,-83.182673,Home Service
3,Amlin,40.076401,-83.180799,The Breakfast Bar,40.078124,-83.175978,American Restaurant
4,Blacklick,39.995619,-82.811286,Train Stop,39.99673,-82.81066,Light Rail Station


In [116]:
wpalm_venues = getNearbyVenues(names=df_wpalm['City'], latitudes=df_wpalm['Latitude'], 
                               longitudes=df_wpalm['Longitude'])

West Palm Beach
West Palm Bch
West Palm Beach
Lake Park
North Palm Beach
Palm Beach Gardens
Riviera Beach
West Palm Beach
Singer Island
Palm Beach Shores
Riviera Beach
West Palm Beach
West Palm Beach
Cloud Lake
Glen Ridge
Lake Clarke Shores
Palm Springs
West Palm Beach
Riviera Beach
West Palm Beach
Juno Beach
Lake Park
North Palm Beach
Palm Beach Gardens
West Palm Beach
Haverhill
West Palm Beach
Lake Park
North Palm Beach
Palm Beach Gardens
West Palm Beach
Royal Palm Beach
West Palm Beach
Palm Beach Gardens
Royal Palm Beach
West Palm Beach
Green Acres
Greenacres
West Palm Beach
Royal Plm Beach
Wellington
West Palm Beach
Green Acres
Greenacres
Haverhill
West Palm Beach
West Palm Beach
Haverhill
West Palm Beach
Palm Beach Gardens
West Palm Beach
Riviera Beach


In [117]:
print(wpalm_venues.shape)
wpalm_venues.head()

(1563, 7)


Unnamed: 0,City,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,West Palm Beach,26.715364,-80.053294,Field of Greens,26.713345,-80.054098,Salad Place
1,West Palm Beach,26.715364,-80.053294,Palm Beach Photographic Centre,26.713454,-80.054604,Hobby Shop
2,West Palm Beach,26.715364,-80.053294,Rocco's Tacos and Tequila Bar,26.713306,-80.051618,Mexican Restaurant
3,West Palm Beach,26.715364,-80.053294,Roxy's Pub,26.713622,-80.052328,Pub
4,West Palm Beach,26.715364,-80.053294,Middle East Bakery,26.718091,-80.052806,Middle Eastern Restaurant


In [118]:
cbus_venues.groupby('City').count()

Unnamed: 0_level_0,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Amlin,4,4,4,4,4,4
Bexley,2,2,2,2,2,2
Blacklick,5,5,5,5,5,5
Brice,5,5,5,5,5,5
Canal Winchester,1,1,1,1,1,1
Columbus,1344,1344,1344,1344,1344,1344
Darbydale,2,2,2,2,2,2
Dublin,52,52,52,52,52,52
Galloway,4,4,4,4,4,4
Grove City,10,10,10,10,10,10


In [119]:
print("There are {} unique venue categories in Columbus".format(len(cbus_venues['Venue Category'].unique())))

There are 109 unique venue categories in Columbus


In [120]:
wpalm_venues.groupby('City').count()

Unnamed: 0_level_0,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Cloud Lake,10,10,10,10,10,10
Glen Ridge,3,3,3,3,3,3
Green Acres,2,2,2,2,2,2
Greenacres,2,2,2,2,2,2
Haverhill,9,9,9,9,9,9
Juno Beach,12,12,12,12,12,12
Lake Clarke Shores,4,4,4,4,4,4
Lake Park,36,36,36,36,36,36
North Palm Beach,12,12,12,12,12,12
Palm Beach Gardens,5,5,5,5,5,5


In [121]:
print("There are {} unique venue cateogories in West Palm".format(len(wpalm_venues['Venue Category'].unique())))

There are 103 unique venue cateogories in West Palm


Columbus and West Palm Beach have a similar unqiue venue count, Columbus having just 5 more than West Palm Beach.

In [122]:
#one hot encoding 
cbus_onehot = pd.get_dummies(cbus_venues[['Venue Category']], prefix="",prefix_sep="")
#cbus_onehot.drop(['Neighborhood'], axis=1, inplace=True) --> was already dropped, didn't need this
cbus_onehot.insert(loc=0, column='City', value=cbus_venues['City'])
print(cbus_onehot.shape)
cbus_onehot.head()

(1622, 110)


Unnamed: 0,City,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bank,Bar,Bookstore,Breakfast Spot,Brewery,Business Service,Café,Capitol Building,Chinese Restaurant,Coffee Shop,Concert Hall,Construction & Landscaping,Convenience Store,Convention Center,Cosmetics Shop,Cuban Restaurant,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dive Bar,Donut Shop,Event Space,Fabric Shop,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health Food Store,Hobby Shop,Home Service,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Insurance Office,Intersection,Irish Pub,Italian Restaurant,Jewelry Store,Latin American Restaurant,Leather Goods Store,Library,Light Rail Station,Mexican Restaurant,Mobile Phone Shop,Neighborhood,New American Restaurant,Optical Shop,Park,Pharmacy,Pizza Place,Playground,Plaza,Pool,Post Office,Pub,Public Art,Record Shop,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Shopping Mall,Skate Park,Smoke Shop,Spa,Spiritual Center,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Theater,Trail,Video Store,Whisky Bar,Wine Shop,Winery,Wings Joint,Yoga Studio
0,Amlin,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Amlin,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Amlin,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Amlin,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Blacklick,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [123]:
cbus_grouped = cbus_onehot.groupby('City').mean().reset_index()
cbus_grouped.head()

Unnamed: 0,City,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bank,Bar,Bookstore,Breakfast Spot,Brewery,Business Service,Café,Capitol Building,Chinese Restaurant,Coffee Shop,Concert Hall,Construction & Landscaping,Convenience Store,Convention Center,Cosmetics Shop,Cuban Restaurant,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dive Bar,Donut Shop,Event Space,Fabric Shop,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health Food Store,Hobby Shop,Home Service,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Insurance Office,Intersection,Irish Pub,Italian Restaurant,Jewelry Store,Latin American Restaurant,Leather Goods Store,Library,Light Rail Station,Mexican Restaurant,Mobile Phone Shop,Neighborhood,New American Restaurant,Optical Shop,Park,Pharmacy,Pizza Place,Playground,Plaza,Pool,Post Office,Pub,Public Art,Record Shop,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Shopping Mall,Skate Park,Smoke Shop,Spa,Spiritual Center,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Theater,Trail,Video Store,Whisky Bar,Wine Shop,Winery,Wings Joint,Yoga Studio
0,Amlin,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bexley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Blacklick,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Brice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Canal Winchester,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [124]:
wpalm_onehot = pd.get_dummies(wpalm_venues[['Venue Category']], prefix="", prefix_sep="")
#wpalm_onehot.drop(['Neighborhood'], axis=1, inplace=True)
wpalm_onehot.insert(loc=0, column='City', value=wpalm_venues['City'])
print(wpalm_onehot.shape)
wpalm_onehot.head()

(1563, 104)


Unnamed: 0,City,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,Bank,Bar,Beach,Bike Shop,Bistro,Board Shop,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Business Service,Café,Candy Store,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Donut Shop,Farmers Market,Fast Food Restaurant,Food,Food & Drink Shop,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Gastropub,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Hobby Shop,Home Service,Hotel,Hotel Pool,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Juice Bar,Karaoke Bar,Lake,Lawyer,Lounge,Market,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Motel,Movie Theater,Music Venue,Nail Salon,Nightclub,Park,Pet Store,Pharmacy,Pizza Place,Poke Place,Pool,Pub,Rental Car Location,Resort,Restaurant,Road,Roof Deck,Salad Place,Sandwich Place,Science Museum,Sculpture Garden,Seafood Restaurant,Shipping Store,South American Restaurant,Spa,Sporting Goods Shop,Steakhouse,Supplement Shop,Sushi Restaurant,Theater,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio,Zoo Exhibit
0,West Palm Beach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,West Palm Beach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,West Palm Beach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,West Palm Beach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,West Palm Beach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [125]:
wpalm_grouped = wpalm_onehot.groupby('City').mean().reset_index()
wpalm_grouped.head()

Unnamed: 0,City,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,Bank,Bar,Beach,Bike Shop,Bistro,Board Shop,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Business Service,Café,Candy Store,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Donut Shop,Farmers Market,Fast Food Restaurant,Food,Food & Drink Shop,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Gastropub,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Hobby Shop,Home Service,Hotel,Hotel Pool,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Juice Bar,Karaoke Bar,Lake,Lawyer,Lounge,Market,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Motel,Movie Theater,Music Venue,Nail Salon,Nightclub,Park,Pet Store,Pharmacy,Pizza Place,Poke Place,Pool,Pub,Rental Car Location,Resort,Restaurant,Road,Roof Deck,Salad Place,Sandwich Place,Science Museum,Sculpture Garden,Seafood Restaurant,Shipping Store,South American Restaurant,Spa,Sporting Goods Shop,Steakhouse,Supplement Shop,Sushi Restaurant,Theater,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio,Zoo Exhibit
0,Cloud Lake,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Glen Ridge,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333
2,Green Acres,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Greenacres,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Haverhill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0


In [126]:
#Print the top 5 most common venues for each Neighborhood
num_top_venues = 5

for i in cbus_grouped['City']:
    print('*****'+i+('*****'))
    temp = cbus_grouped[cbus_grouped['City'] == i].T.reset_index()
    temp.columns = ['venue', 'frequency']
    temp = temp.iloc[1:]
    temp['frequency'] = temp['frequency'].astype(float)
    temp = temp.round({'frequency': 2})
    print(temp.sort_values('frequency',ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

*****Amlin*****
                     venue  frequency
0      American Restaurant       0.25
1             Home Service       0.25
2                BBQ Joint       0.25
3              Pizza Place       0.25
4  New American Restaurant       0.00


*****Bexley*****
                     venue  frequency
0                     Park        1.0
1      American Restaurant        0.0
2  New American Restaurant        0.0
3              Record Shop        0.0
4               Public Art        0.0


*****Blacklick*****
                venue  frequency
0  Seafood Restaurant        0.2
1                Pool        0.2
2  Light Rail Station        0.2
3    Business Service        0.2
4       Garden Center        0.2


*****Brice*****
            venue  frequency
0  Discount Store        0.2
1  Hardware Store        0.2
2   Grocery Store        0.2
3     Pizza Place        0.2
4            Park        0.2


*****Canal Winchester*****
                     venue  frequency
0               Skate Park    

In [127]:
num_top_venuewp = 5

for i in wpalm_grouped['City']:
    print('*****'+i+('*****'))
    temp1 = wpalm_grouped[wpalm_grouped['City']==i].T.reset_index()
    temp1.columns = ['venue', 'frequency']
    temp1 = temp1[1:]
    temp1['frequency'] = temp1['frequency'].astype(float)
    temp1=temp1.round({'frequency': 2})
    print(temp1.sort_values('frequency', ascending=False).reset_index(drop=True).head(num_top_venuewp))
    print('\n')

*****Cloud Lake*****
                 venue  frequency
0  American Restaurant        0.1
1          Coffee Shop        0.1
2           Steakhouse        0.1
3           Restaurant        0.1
4  Rental Car Location        0.1


*****Glen Ridge*****
                venue  frequency
0         Zoo Exhibit       0.33
1      Science Museum       0.33
2  Miscellaneous Shop       0.33
3                Pool       0.00
4         Pizza Place       0.00


*****Green Acres*****
                       venue  frequency
0       Gym / Fitness Center        1.0
1        American Restaurant        0.0
2  Middle Eastern Restaurant        0.0
3                Pizza Place        0.0
4                   Pharmacy        0.0


*****Greenacres*****
                 venue  frequency
0   Chinese Restaurant        1.0
1  American Restaurant        0.0
2                 Pool        0.0
3          Pizza Place        0.0
4             Pharmacy        0.0


*****Haverhill*****
                 venue  frequency
0      

In [152]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [185]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted_c = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted_c['City'] = cbus_grouped['City']

for ind in np.arange(cbus_grouped.shape[0]):
    neighborhoods_venues_sorted_c.iloc[ind, 1:] = return_most_common_venues(cbus_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted_c.head()

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Amlin,American Restaurant,BBQ Joint,Pizza Place,Home Service,Asian Restaurant,Garden Center,Fabric Shop,Farmers Market,Fast Food Restaurant,Art Gallery
1,Bexley,Park,Yoga Studio,Furniture / Home Store,Donut Shop,Event Space,Fabric Shop,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop
2,Blacklick,Business Service,Pool,Seafood Restaurant,Garden Center,Light Rail Station,Fried Chicken Joint,Donut Shop,Event Space,Fabric Shop,Farmers Market
3,Brice,Hardware Store,Grocery Store,Pizza Place,Park,Discount Store,Yoga Studio,French Restaurant,Donut Shop,Event Space,Fabric Shop
4,Canal Winchester,Skate Park,Yoga Studio,Diner,Dive Bar,Donut Shop,Event Space,Fabric Shop,Farmers Market,Fast Food Restaurant,Flower Shop


In [186]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted_w = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted_w['City'] = wpalm_grouped['City']

for ind in np.arange(wpalm_grouped.shape[0]):
    neighborhoods_venues_sorted_w.iloc[ind, 1:] = return_most_common_venues(wpalm_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted_w.head()

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Cloud Lake,Hotel,Steakhouse,Coffee Shop,Gym / Fitness Center,Hotel Pool,Lawyer,Miscellaneous Shop,Rental Car Location,Restaurant,American Restaurant
1,Glen Ridge,Zoo Exhibit,Miscellaneous Shop,Science Museum,Construction & Landscaping,Cosmetics Shop,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner
2,Green Acres,Gym / Fitness Center,Zoo Exhibit,Food,Cosmetics Shop,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
3,Greenacres,Chinese Restaurant,Zoo Exhibit,Construction & Landscaping,Cosmetics Shop,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store
4,Haverhill,Yoga Studio,Cosmetics Shop,Intersection,Food,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store


In [187]:
#set cluster number
kclusters = 5

cbus_grouped_clustering = cbus_grouped.drop(['City'], 1)
#run k-means clsutering
kmeans = KMeans(n_clusters = kclusters, random_state=0).fit(cbus_grouped_clustering)
#check cluster labels generated
kmeans.labels_[0:10]

array([1, 3, 1, 1, 2, 1, 0, 1, 1, 1], dtype=int32)

In [188]:
kclusters = 5
wpalm_grouped_clustering = wpalm_grouped.drop(['City'], 1)
kmeans_w = KMeans(n_clusters = kclusters, random_state=0).fit(wpalm_grouped_clustering)
kmeans_w.labels_[0:10]

array([0, 4, 2, 1, 0, 0, 0, 0, 0, 3], dtype=int32)

In [189]:
#add cluster labels
neighborhoods_venues_sorted_c.insert(0, 'Cluster Labels', kmeans.labels_)

cbus_merged = df_columbus

#merge toronto_group with toronto_data to add latitude/longitude for each neighborhood
cbus_merged = cbus_merged.join(neighborhoods_venues_sorted_c.set_index('City'), on='City')


In [190]:
cbus_merged.head()

Unnamed: 0,Zip Code,City,County,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43002,Amlin,Franklin County,40.076401,-83.180799,1,American Restaurant,BBQ Joint,Pizza Place,Home Service,Asian Restaurant,Garden Center,Fabric Shop,Farmers Market,Fast Food Restaurant,Art Gallery
1,43004,Blacklick,Franklin County,39.995619,-82.811286,1,Business Service,Pool,Seafood Restaurant,Garden Center,Light Rail Station,Fried Chicken Joint,Donut Shop,Event Space,Fabric Shop,Farmers Market
2,43016,Dublin,Franklin County,40.099229,-83.114077,1,Pizza Place,Bar,Café,Italian Restaurant,Restaurant,Yoga Studio,Spa,Convention Center,French Restaurant,Gift Shop
3,43017,Dublin,Franklin County,40.099229,-83.114077,1,Pizza Place,Bar,Café,Italian Restaurant,Restaurant,Yoga Studio,Spa,Convention Center,French Restaurant,Gift Shop
4,43026,Hilliard,Franklin County,40.033814,-83.159611,1,Ice Cream Shop,Diner,Bank,Pub,Playground,Park,Donut Shop,Event Space,Mexican Restaurant,Salon / Barbershop


In [191]:
# create map
map_clusters_c = folium.Map(location=[c_latitude, c_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cbus_merged['Latitude'], cbus_merged['Longitude'], cbus_merged['City'], cbus_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_c)
       
map_clusters_c

In [192]:
#add cluster labels
neighborhoods_venues_sorted_w.insert(0, 'Cluster Labels', kmeans_w.labels_)

wpalm_merged = df_wpalm

#merge toronto_group with toronto_data to add latitude/longitude for each neighborhood
wpalm_merged = wpalm_merged.join(neighborhoods_venues_sorted_w.set_index('City'), on='City')


In [193]:
wpalm_merged.head()

Unnamed: 0,Zip Code,City,County,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,33401,West Palm Beach,Palm Beach County,26.715364,-80.053294,0,Bar,Pizza Place,French Restaurant,Gastropub,Coffee Shop,Farmers Market,Park,Bank,Music Venue,Sandwich Place
1,33402,West Palm Bch,Palm Beach County,26.715364,-80.053294,0,Bar,Pizza Place,French Restaurant,Gastropub,Coffee Shop,Farmers Market,Park,Bank,Music Venue,Sandwich Place
2,33402,West Palm Beach,Palm Beach County,26.715364,-80.053294,0,Bar,Pizza Place,French Restaurant,Gastropub,Coffee Shop,Farmers Market,Park,Bank,Music Venue,Sandwich Place
3,33403,Lake Park,Palm Beach County,26.800215,-80.066163,0,Music Venue,Art Gallery,Athletics & Sports,Gastropub,Market,Nail Salon,Café,Home Service,Arts & Crafts Store,Vietnamese Restaurant
4,33403,North Palm Beach,Palm Beach County,26.823794,-80.055927,0,Comedy Club,Harbor / Marina,Beach,Spa,Zoo Exhibit,Food,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop


In [194]:
# create map
map_clusters_w = folium.Map(location=[w_latitude, w_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(wpalm_merged['Latitude'], wpalm_merged['Longitude'], wpalm_merged['City'], wpalm_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_w)
       
map_clusters_w

## Let's compare the dominant clusters.  Columbus cluster 1 vs West Palm cluster 0

In [263]:
c_cluster_1 = cbus_merged.loc[cbus_merged['Cluster Labels']==1, cbus_merged.columns[[1]+ list(range(5, cbus_merged.shape[1]))]]
c_cluster_1

Unnamed: 0,City,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Amlin,1,American Restaurant,BBQ Joint,Pizza Place,Home Service,Asian Restaurant,Garden Center,Fabric Shop,Farmers Market,Fast Food Restaurant,Art Gallery
1,Blacklick,1,Business Service,Pool,Seafood Restaurant,Garden Center,Light Rail Station,Fried Chicken Joint,Donut Shop,Event Space,Fabric Shop,Farmers Market
2,Dublin,1,Pizza Place,Bar,Café,Italian Restaurant,Restaurant,Yoga Studio,Spa,Convention Center,French Restaurant,Gift Shop
3,Dublin,1,Pizza Place,Bar,Café,Italian Restaurant,Restaurant,Yoga Studio,Spa,Convention Center,French Restaurant,Gift Shop
4,Hilliard,1,Ice Cream Shop,Diner,Bank,Pub,Playground,Park,Donut Shop,Event Space,Mexican Restaurant,Salon / Barbershop
5,New Albany,1,Restaurant,Pizza Place,Bank,New American Restaurant,Furniture / Home Store,Gas Station,Breakfast Spot,Pharmacy,Concert Hall,Intersection
6,Reynoldsburg,1,Pizza Place,Bank,Fast Food Restaurant,Chinese Restaurant,Gym / Fitness Center,Italian Restaurant,Sandwich Place,Convenience Store,Construction & Landscaping,Video Store
7,Westerville,1,American Restaurant,Ice Cream Shop,Gift Shop,Gastropub,Pizza Place,Pharmacy,Park,Library,Hobby Shop,Health Food Store
8,Columbus,1,Park,Theater,Coffee Shop,Hotel,American Restaurant,Sandwich Place,Café,Gym / Fitness Center,Gym,Public Art
9,Worthington,1,Library,Ice Cream Shop,Grocery Store,Bakery,Bank,Sandwich Place,Pharmacy,Fried Chicken Joint,Event Space,Fabric Shop


In [201]:
wp_cluster_0 = wpalm_merged.loc[wpalm_merged['Cluster Labels']==0, wpalm_merged.columns[[1]+ list(range(5, wpalm_merged.shape[1]))]]
wp_cluster_0

Unnamed: 0,City,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,West Palm Beach,0,Bar,Pizza Place,French Restaurant,Gastropub,Coffee Shop,Farmers Market,Park,Bank,Music Venue,Sandwich Place
1,West Palm Bch,0,Bar,Pizza Place,French Restaurant,Gastropub,Coffee Shop,Farmers Market,Park,Bank,Music Venue,Sandwich Place
2,West Palm Beach,0,Bar,Pizza Place,French Restaurant,Gastropub,Coffee Shop,Farmers Market,Park,Bank,Music Venue,Sandwich Place
3,Lake Park,0,Music Venue,Art Gallery,Athletics & Sports,Gastropub,Market,Nail Salon,Café,Home Service,Arts & Crafts Store,Vietnamese Restaurant
4,North Palm Beach,0,Comedy Club,Harbor / Marina,Beach,Spa,Zoo Exhibit,Food,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop
6,Riviera Beach,0,Fast Food Restaurant,Health & Beauty Service,Harbor / Marina,Coffee Shop,Business Service,Zoo Exhibit,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop
7,West Palm Beach,0,Bar,Pizza Place,French Restaurant,Gastropub,Coffee Shop,Farmers Market,Park,Bank,Music Venue,Sandwich Place
8,Singer Island,0,Seafood Restaurant,Café,American Restaurant,Grocery Store,Food & Drink Shop,Mexican Restaurant,Dessert Shop,Convenience Store,Clothing Store,Burger Joint
9,Palm Beach Shores,0,Pool,Seafood Restaurant,Resort,Beach,Cocktail Bar,Bar,Convenience Store,Fast Food Restaurant,Cuban Restaurant,Deli / Bodega
10,Riviera Beach,0,Fast Food Restaurant,Health & Beauty Service,Harbor / Marina,Coffee Shop,Business Service,Zoo Exhibit,Cuban Restaurant,Deli / Bodega,Department Store,Dessert Shop


In [270]:
print("COLUMBUS")
print(c_cluster_1['1st Most Common Venue'].value_counts())
print(50 * '-')
print("WEST PALM")
print(wp_cluster_0['1st Most Common Venue'].value_counts())

COLUMBUS
Park                    28
Pizza Place              4
American Restaurant      3
Ice Cream Shop           2
Hardware Store           2
Business Service         2
Library                  1
Fast Food Restaurant     1
Restaurant               1
Arts & Crafts Store      1
Name: 1st Most Common Venue, dtype: int64
--------------------------------------------------
WEST PALM
Bar                     19
Fast Food Restaurant     4
Yoga Studio              3
Comedy Club              3
Music Venue              3
Seafood Restaurant       2
Pharmacy                 2
Pizza Place              1
Hotel                    1
Pool                     1
Harbor / Marina          1
Food & Drink Shop        1
Coffee Shop              1
Name: 1st Most Common Venue, dtype: int64


In [271]:
print("COLUMBUS")
print(c_cluster_1['2nd Most Common Venue'].value_counts())
print(50 * '-')
print("WEST PALM")
print(wp_cluster_0['2nd Most Common Venue'].value_counts())

COLUMBUS
Theater             28
Ice Cream Shop       4
Bar                  2
Gym                  1
Pizza Place          1
Pool                 1
Diner                1
Sandwich Place       1
BBQ Joint            1
Gas Station          1
Grocery Store        1
Bank                 1
Brewery              1
Insurance Office     1
Name: 2nd Most Common Venue, dtype: int64
--------------------------------------------------
WEST PALM
Pizza Place                   19
Health & Beauty Service        4
Art Gallery                    3
Harbor / Marina                3
Cosmetics Shop                 3
Grocery Store                  2
American Restaurant            1
Seafood Restaurant             1
Business Service               1
Steakhouse                     1
Café                           1
Roof Deck                      1
Construction & Landscaping     1
Movie Theater                  1
Name: 2nd Most Common Venue, dtype: int64


In [272]:
print("COLUMBUS")
print(c_cluster_1['3rd Most Common Venue'].value_counts())
print(50 * '-')
print("WEST PALM")
print(wp_cluster_0['3rd Most Common Venue'].value_counts())

COLUMBUS
Coffee Shop             28
Pizza Place              3
Gift Shop                2
Bank                     2
Café                     2
Sports Bar               1
Spa                      1
Fast Food Restaurant     1
Mexican Restaurant       1
Seafood Restaurant       1
Deli / Bodega            1
Convenience Store        1
Grocery Store            1
Name: 3rd Most Common Venue, dtype: int64
--------------------------------------------------
WEST PALM
French Restaurant       19
Harbor / Marina          4
Beach                    3
Athletics & Sports       3
Intersection             3
Fast Food Restaurant     2
Resort                   1
Karaoke Bar              1
American Restaurant      1
Gym                      1
Lake                     1
Bike Shop                1
Food                     1
Coffee Shop              1
Name: 3rd Most Common Venue, dtype: int64


In [273]:
print("COLUMBUS")
print(c_cluster_1['4th Most Common Venue'].value_counts())
print(50 * '-')
print("WEST PALM")
print(wp_cluster_0['4th Most Common Venue'].value_counts())

COLUMBUS
Hotel                      28
Gastropub                   2
Italian Restaurant          2
Gym                         1
Park                        1
Breakfast Spot              1
Fast Food Restaurant        1
Bakery                      1
Chinese Restaurant          1
New American Restaurant     1
Garden Center               1
Bar                         1
Pub                         1
Food Truck                  1
Dive Bar                    1
Home Service                1
Name: 4th Most Common Venue, dtype: int64
--------------------------------------------------
WEST PALM
Gastropub               22
Coffee Shop              4
Spa                      3
Food                     3
Bank                     2
Zoo Exhibit              1
Beach                    1
Gym / Fitness Center     1
Ice Cream Shop           1
Lake                     1
Board Shop               1
Cosmetics Shop           1
Grocery Store            1
Name: 4th Most Common Venue, dtype: int64


In [274]:
print("COLUMBUS")
print(c_cluster_1['5th Most Common Venue'].value_counts())
print(50 * '-')
print("WEST PALM")
print(wp_cluster_0['5th Most Common Venue'].value_counts())

COLUMBUS
American Restaurant       28
Pizza Place                3
Restaurant                 2
Discount Store             1
Fabric Shop                1
Theater                    1
Yoga Studio                1
Gym / Fitness Center       1
Playground                 1
Taco Place                 1
Furniture / Home Store     1
Asian Restaurant           1
Bank                       1
Hardware Store             1
Light Rail Station         1
Name: 5th Most Common Venue, dtype: int64
--------------------------------------------------
WEST PALM
Coffee Shop             19
Zoo Exhibit              4
Cuban Restaurant         4
Business Service         4
Market                   3
Pizza Place              2
Fast Food Restaurant     1
Hotel Pool               1
Cocktail Bar             1
Café                     1
Indian Restaurant        1
Food & Drink Shop        1
Name: 5th Most Common Venue, dtype: int64


# Conclusion and interesting findings in clustering
I decided to look at the two most dominant clusters for each area, Cluster 1 in Columbus and Cluster 0 in West Palm. The data is interesting as it fits with each area.  Columbus is more of a business hub, so you find more 