## I. Scrape page for getting postal codes

Import Libraries

In [1]:
import requests
import lxml.html as lh
import pandas as pd

Scrape Table Cells

In [2]:
url='https://www.kprichitoregalos.com.co/codigo-postal/'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)
#Store the contents of the website under doc
doc = lh.fromstring(page.content)
#Parse data that are stored between <tr>..</tr> of HTML
tr_elements = doc.xpath('//tr')

Check the length of the first 10 rows

In [3]:
[len(T) for T in tr_elements[:10]]

[3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

Parse Table Header

In [4]:
tr_elements = doc.xpath('//tr')
#Create empty list
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    print ('%d:"%s"' % (i,name))
    col.append((name,[]))

1:"CODIGO POSTAL"
2:"LOCALIDAD"
3:"BARRIOS CONTENIDOS EN EL CODIGO POSTAL"


Name of columns

In [5]:
col

[('CODIGO POSTAL', []),
 ('LOCALIDAD', []),
 ('BARRIOS CONTENIDOS EN EL CODIGO POSTAL', [])]

Creating Pandas DataFrame

In [6]:
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 3, the //tr data is not from our table 
    if len(T)!=3:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Check if row is empty
        if i>0:
        #Convert any numerical value to integers
            try:
                data=int(data)
            except:
                pass
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

Check if all columns have the same number of rows

In [7]:
[len(C) for (title,C) in col]

[63, 63, 63]

Create the DataFrame with postal codes of Bogota

In [8]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)
df.head(10)

Unnamed: 0,CODIGO POSTAL,LOCALIDAD,BARRIOS CONTENIDOS EN EL CODIGO POSTAL
0,111511,Antonio Nariño,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua..."
1,111211,Barrios Unidos,"Alcázares Norte, Doce de Octubre, Entrerríos, ..."
2,111221,Barrios Unidos,"Alcázares, Baquero, Benjamín Herrera, Colombia..."
3,110711,Bosa,"Betania, Brasil, Brasilia, Cañaveralejo, Cañav..."
4,110721,Bosa,"Bosa Nova, Bosa Nova El Porvenir, Chicó Sur, C..."
5,110731,Bosa,"Antonia Santos, Argelia II, Bosa, El Jardín, G..."
6,110741,Bosa,"Andalucía II, Cementerio Jardínes Apogeo, Char..."
7,111711,Candelaria,"Belén, Centro Administrativo, Egipto, La Cated..."
8,110211,Chapinero,"Esperanza, Páramo, Páramo I, Páramo II, Páramo..."
9,110221,Chapinero,"Antiguo Country, Bellavista, Chicó Norte, Chic..."


Number of rows and columns of dataframe

In [9]:
df.shape

(63, 3)

## II.  Get latitude and longitude coordinates of each neighborhood

In [10]:
# Install packages
!pip install pgeocode

Collecting pgeocode
  Using cached https://files.pythonhosted.org/packages/f4/1e/d35805c59f167751fccb3fc7093fb2e45ee1e9e4a057e5d74da926ef9518/pgeocode-0.2.0-py2.py3-none-any.whl
Installing collected packages: pgeocode
Successfully installed pgeocode-0.2.0
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


##### Explore Dataset

Filter the codes of Colombia and then Bogota

In [11]:
import pgeocode
bog = pgeocode.Nominatim('CO')

In [12]:
# Try with the postal codes of Bogota
bog=bog.query_postal_code(['111511', '111211', '111221', '110711', '110721', '110731', '110741', '111711', '110211', '110221', '110231', '111911', '111921', '111931', '111941', '111951', '111961', '111971', '111011', '111021', '111031', '111041', '111051', '111061', '111071', '110911', '110921', '110931', '110811', '110821', '110831', '110841', '110851', '110861', '110871', '110881', '111411', '111611', '111621', '111631', '111811', '111821', '111831', '111841', '110411', '110421', '110431', '110441', '110311', '110321', '111111', '111121', '111131', '111141', '111151', '111156', '111161', '111166', '111171', '111176', '112011', '112021', '112031', '112041', '111311', '111321', '110611', '110621', '110111', '110121', '110131', '110141', '110151', '110511', '110521', '110531', '110541', '110551','110571'])
bog.head()

Unnamed: 0,postal_code,country code,place_name,state_name,state_code,county_name,county_code,community_name,community_code,latitude,longitude,accuracy
0,111511,CO,Antonio Nariño,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.5889,-74.103,4
1,111211,CO,Barrios Unidos,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.677,-74.0692,4
2,111221,CO,Barrios Unidos,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.6617,-74.078,4
3,110711,CO,Bosa,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.6369,-74.1885,4
4,110721,CO,Bosa,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.628,-74.2055,4


Change the names of variables

In [13]:
df_codes=df.rename(columns = {'BARRIOS CONTENIDOS EN EL CODIGO POSTAL':'Neighbourhood', 'CODIGO POSTAL':'postal_code', 'LOCALIDAD':'Borough'})
df_codes.head()

Unnamed: 0,postal_code,Borough,Neighbourhood
0,111511,Antonio Nariño,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua..."
1,111211,Barrios Unidos,"Alcázares Norte, Doce de Octubre, Entrerríos, ..."
2,111221,Barrios Unidos,"Alcázares, Baquero, Benjamín Herrera, Colombia..."
3,110711,Bosa,"Betania, Brasil, Brasilia, Cañaveralejo, Cañav..."
4,110721,Bosa,"Bosa Nova, Bosa Nova El Porvenir, Chicó Sur, C..."


Merge two data bases

In [14]:
final_base = pd.merge(df_codes, bog, how = "left", on = "postal_code")
final_base.head()

Unnamed: 0,postal_code,Borough,Neighbourhood,country code,place_name,state_name,state_code,county_name,county_code,community_name,community_code,latitude,longitude,accuracy
0,111511,Antonio Nariño,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",CO,Antonio Nariño,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.5889,-74.103,4
1,111211,Barrios Unidos,"Alcázares Norte, Doce de Octubre, Entrerríos, ...",CO,Barrios Unidos,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.677,-74.0692,4
2,111221,Barrios Unidos,"Alcázares, Baquero, Benjamín Herrera, Colombia...",CO,Barrios Unidos,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.6617,-74.078,4
3,110711,Bosa,"Betania, Brasil, Brasilia, Cañaveralejo, Cañav...",CO,Bosa,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.6369,-74.1885,4
4,110721,Bosa,"Bosa Nova, Bosa Nova El Porvenir, Chicó Sur, C...",CO,Bosa,"Bogota, D.C.",34,Bogotá D.C.,11001,,,4.628,-74.2055,4


In [15]:
final_base.shape

(63, 14)

Keep only variables of interes

In [16]:
final_base=final_base.drop(['country code', 'place_name', 'state_name', 'state_code', 'county_name', 'county_code', 'community_name', 'community_code', 'accuracy'], axis=1)

Finally, we get the data to do maps

In [17]:
final_base=final_base.rename(columns = {'latitude':'Latitude', 'longitude':'Longitude', 'postal_code':'Postcode'})
final_base.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,111511,Antonio Nariño,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103
1,111211,Barrios Unidos,"Alcázares Norte, Doce de Octubre, Entrerríos, ...",4.677,-74.0692
2,111221,Barrios Unidos,"Alcázares, Baquero, Benjamín Herrera, Colombia...",4.6617,-74.078
3,110711,Bosa,"Betania, Brasil, Brasilia, Cañaveralejo, Cañav...",4.6369,-74.1885
4,110721,Bosa,"Bosa Nova, Bosa Nova El Porvenir, Chicó Sur, C...",4.628,-74.2055


## III. Explore and cluster the neighborhoods in Bogota

First, let's download all the dependencies that we will need

In [18]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.2

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/nbuser/anaconda3_501

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    conda-4.8.2                |           py36_0         3.0 MB  conda-forge
    conda-package-handling-1.6.0|   py36h516909a_1         941 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will be INSTALLED:

    conda-package-handling: 1.6.0-py36h516909a_1 conda-forge
    geographiclib:          1.50-

pyrsistent-0.15.7    | 89 KB     | ##################################### | 100% 
scikit-image-0.14.2  | 24.0 MB   | ##################################### | 100% 
ca-certificates-2019 | 145 KB    | ##################################### | 100% 
soupsieve-1.9.4      | 58 KB     | ##################################### | 100% 
wurlitzer-2.0.0      | 12 KB     | ##################################### | 100% 
branca-0.4.0         | 26 KB     | ##################################### | 100% 
scikit-learn-0.20.1  | 5.7 MB    | ##################################### | 100% 
vincent-0.4.4        | 28 KB     | ##################################### | 100% 
_anaconda_depends-20 | 5 KB      | ##################################### | 100% 
folium-0.5.0         | 45 KB     | ##################################### | 100% 
altair-4.0.1         | 575 KB    | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Libraries imported.


### a. Explore Dataset

In [19]:
base_bogota = final_base
base_bogota.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,111511,Antonio Nariño,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103
1,111211,Barrios Unidos,"Alcázares Norte, Doce de Octubre, Entrerríos, ...",4.677,-74.0692
2,111221,Barrios Unidos,"Alcázares, Baquero, Benjamín Herrera, Colombia...",4.6617,-74.078
3,110711,Bosa,"Betania, Brasil, Brasilia, Cañaveralejo, Cañav...",4.6369,-74.1885
4,110721,Bosa,"Bosa Nova, Bosa Nova El Porvenir, Chicó Sur, C...",4.628,-74.2055


### b. Create a map of Bogota with neighborhoods superimposed on top

In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent <em>m_explorer</em>, as shown below.

In [20]:
address = 'Bogota, BOG'

geolocator = Nominatim(user_agent="m_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bogota are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bogota are 4.700912450000001, -74.14606074859216.


In [21]:
# create map of Bogotá using latitude and longitude values
map_bogota = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(base_bogota['Latitude'], base_bogota['Longitude'], base_bogota['Borough'], base_bogota['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bogota)  
    
map_bogota

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

### c. Define Foursquare Credentials and Version

In [22]:
CLIENT_ID = 'QY2KJJE52HKB4B5W2ZQRC1PYQZQAJIHRDLATVHSDURMAY0XU' # your Foursquare ID
CLIENT_SECRET = 'DCM2JYNYQUUCJMQQJT2KTYIZHGCSOY5GMRAPULQTNM3BZL33' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QY2KJJE52HKB4B5W2ZQRC1PYQZQAJIHRDLATVHSDURMAY0XU
CLIENT_SECRET:DCM2JYNYQUUCJMQQJT2KTYIZHGCSOY5GMRAPULQTNM3BZL33


### d. Let's explore neighborhoods in our dataframe

Get the neighborhood's name

In [23]:
base_bogota.loc[0, 'Neighbourhood']

'Caracas, Ciudad Berna, Ciudad Jardín Sur, Eduardo Frey, La Fragua, La Fraguita, La Hortúa, Policarpa, Restrepo, Restrepo Occidental, San Antonio, Santander, Santander Sur, Sena, Sevilla, Villa Mayor Oriental'

Get the neighborhood's latitude and longitude values

In [24]:
neighborhood_latitude = base_bogota.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = base_bogota.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = base_bogota.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Caracas, Ciudad Berna, Ciudad Jardín Sur, Eduardo Frey, La Fragua, La Fraguita, La Hortúa, Policarpa, Restrepo, Restrepo Occidental, San Antonio, Santander, Santander Sur, Sena, Sevilla, Villa Mayor Oriental are 4.5889, -74.10300000000002.


### e. Now, let's get the top 100 venues that are in Borough within a radius of 500 meters

First, let's create the GET request URL. Name your URL url.

In [25]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=QY2KJJE52HKB4B5W2ZQRC1PYQZQAJIHRDLATVHSDURMAY0XU&client_secret=DCM2JYNYQUUCJMQQJT2KTYIZHGCSOY5GMRAPULQTNM3BZL33&v=20180605&ll=4.5889,-74.10300000000002&radius=500&limit=100'

Send the GET request and examine the resutls

In [26]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e4f71d3949393001b649b65'},
 'response': {'headerLocation': 'Restrepo',
  'headerFullLocation': 'Restrepo, Bogotá',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 24,
  'suggestedBounds': {'ne': {'lat': 4.593400004500004,
    'lng': -74.09849395292447},
   'sw': {'lat': 4.584399995499996, 'lng': -74.10750604707557}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4eac5ce830f823f86eb3a5df',
       'name': 'Pasteleria Guernika',
       'location': {'address': 'Cr 19 15-19 S',
        'lat': 4.586891048300204,
        'lng': -74.10022794127197,
        'labeledLatLngs': [{'label': 'display',
          'lat': 4.586891048300204,
          'lng': -74.10022794127197}],
        'distance': 380,
        'cc': 'CO',
        '

From the Foursquare lab in the previous module, we know that all the information is in the items key. Before we proceed, let's borrow the get_category_type function from the Foursquare lab

In [27]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a pandas dataframe

In [28]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Pasteleria Guernika,Pie Shop,4.586891,-74.100228
1,sport body,Gym,4.587048,-74.099812
2,Kokoriko,Fried Chicken Joint,4.58653,-74.100282
3,Wenbrulee,Restaurant,4.585942,-74.099846
4,Farmatodo,Convenience Store,4.585889,-74.102457


And how many venues were returned by Foursquare?

In [29]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

24 venues were returned by Foursquare.


## V. Explore Neighborhoods in Bogota

Let's create a function to repeat the same process to all the neighborhoods in Bogota

In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Now write the code to run the above function on each neighborhood and create a new dataframe called bogota_venues

In [31]:
bogota_venues = getNearbyVenues(names=base_bogota['Neighbourhood'],
                                   latitudes=base_bogota['Latitude'],
                                   longitudes=base_bogota['Longitude']
                                  )

Caracas, Ciudad Berna, Ciudad Jardín Sur, Eduardo Frey, La Fragua, La Fraguita, La Hortúa, Policarpa, Restrepo, Restrepo Occidental, San Antonio, Santander, Santander Sur, Sena, Sevilla, Villa Mayor Oriental
Alcázares Norte, Doce de Octubre, Entrerríos, Escuela Militar, Jorge Eliecer Gaitán, Juan XXIII, La Aurora, La Castellana, La Libertad, La Patria, Los Andes, Metrópolis, Once de Noviembre, Polo Club, Rionegro, San Felipe, San Fernando Occidental, Santa So
Alcázares, Baquero, Benjamín Herrera, Colombia, Concepción Norte, El Rosario, José Joaquín Vargas, La Esperanza, La Merced Norte, La Paz, Muequeta, Parque Distrital Salitre, Parque Popular Salitre, Popular Modelo, Quinta Mutis, Rafael Uribe, San Fernando
Betania, Brasil, Brasilia, Cañaveralejo, Cañaveralejo Rural, Chicalá, El Corzo, El Corzo Rural, El Danubio Azul, El Portal del Brasil, Escocia, Escocia, La Cabaña, La Libertad, Osorio X, Osorio X Urbano, Osorio XXIII, Parcela El Porvenir, San Martín
Bosa Nova, Bosa Nova El Porveni

Aguas Claras, Buenos Aires, Calvo Sur, El Triángulo, Hoya San Cristóbal, La María, Las Brisas, Los Laureles I, Modelo Sur, Molino Rural, Molinos de Oriente, Monte Carlo, Nariño Sur, Primero de Mayo, Quinta Ramos, San Cristóbal Sur, San Javier, Santa Ana S
Atenas Sur, Bello Horizonte, Córdoba, Granada Sur, Las Mercedes, Los Alpes, Montebello, Ranajal, San Blas, San Blas II, San Isidro, San Pedro, San Vicente, Santa Inés Sur, Suramérica, Veinte de Julio, Villa de Los Alapes, Villa de Los Alapes I
Altamira, Altos del Poblado, Altos del Zipa, Altos del Zuque, Barcelona Sur, Bellavista Sur, Bosque de Los Alpes, La Arboleda, La Gloria Occidental, La Gloria Oriental, La Victoria, Las Guacamayas, Las Lomas, Moralba, Puente Colorado, Quindío
Canadá o Guira, Chiguaza, Ciudad Londres I, El Paraíso, El Pinar, Juan Rey (La Paz), La Belleza, Las Gaviotas, Los Libertadores, Nueva Delhí, Nueva Gloria, San Rafael Usme, Santa Rita Sur Oriental, Villabel, Yomasa
Bosque Izquierdo, Hoya Teusaca, La Alameda

Let's check the size of the resulting dataframe

In [32]:
print(bogota_venues.shape)
bogota_venues.head()

(347, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103,Pasteleria Guernika,4.586891,-74.100228,Pie Shop
1,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103,sport body,4.587048,-74.099812,Gym
2,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103,Kokoriko,4.58653,-74.100282,Fried Chicken Joint
3,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103,Wenbrulee,4.585942,-74.099846,Restaurant
4,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103,Farmatodo,4.585889,-74.102457,Convenience Store


Let's check how many venues were returned for each neighborhood

In [33]:
bogota_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alamos, El Cedro, El Madrigal, El Muelle, Garcés Navas, Garcés Navas Oriental, Los Ángeles, San Antonio Engativá, Santa Mónica, Villa Amalia, Villa del Mar, Villa Sagrario",1,1,1,1,1,1
"Alcalá, Alqueria, Autopista Sur, Galán, La Camelia, Ospina Pérez, Ospina Pérez Sur, Provivienda Norte, San Eusebio, Tejar",8,8,8,8,8,8
"Alcázares Norte, Doce de Octubre, Entrerríos, Escuela Militar, Jorge Eliecer Gaitán, Juan XXIII, La Aurora, La Castellana, La Libertad, La Patria, Los Andes, Metrópolis, Once de Noviembre, Polo Club, Rionegro, San Felipe, San Fernando Occidental, Santa So",4,4,4,4,4,4
"Alcázares, Baquero, Benjamín Herrera, Colombia, Concepción Norte, El Rosario, José Joaquín Vargas, La Esperanza, La Merced Norte, La Paz, Muequeta, Parque Distrital Salitre, Parque Popular Salitre, Popular Modelo, Quinta Mutis, Rafael Uribe, San Fernando",6,6,6,6,6,6
"Almirante Colon, Altos de Chozica, Andes Norte, Ciudad Hunza, Club de Los Lagartos, Julio Flórez, Los Naranjos, Niza Norte, Niza Suba, Niza Sur, Potosí, Santa Rosa, Vereda Suba Naranjos",5,5,5,5,5,5
"Alqueria La Fragua, Alqueria La Fragua Norte, Boita, La Campiña, Las Delicias, Nueva York, Provivienda, Provivienda Occidental, Renania Urapanes, Santa Catalina, Timiza, Timiza A, Timiza B, Timiza C, Tundama",7,7,7,7,7,7
"Altos de Suba, Atenas, Britalia, Cantagallo, Casa Blanca Suba I, Casablanca Suba, Delmonte, El Plan, Escuela de Carabineros, Gilmar, Granada Norte, Iragua, Mazuren, Portales del Norte, Prado Pinzón, San José V Sector, Santa Helena, Suba Cerros, Suba Urban",12,12,12,12,12,12
"Andalucía II, Cementerio Jardínes Apogeo, Charles de Gaulle, El Retazo, Gualoche, José Antonio Galán, José María Carbonel, La Estación Bosa, Nueva Granada Bosa, Olarte, San Pablo Bosa, Villa Anny I, Villa Anny II, Villa del Río",3,3,3,3,3,3
"Antiguo Country, Bellavista, Chicó Norte, Chicó Norte II Sector, Chicó Norte III Sector, El Bagazal, El Chicó, El Nogal, El Refugio, El Retiro, Espartillal, La Cabrera, Lago Gaitán, Los Rosales, Porciuncula, Seminario",4,4,4,4,4,4
"Antonia Santos, Argelia II, Bosa, El Jardín, Gran Colombiano, Islandia, Jiménez de Quesada, Los Laureles, Paso Ancho, San Bernardino I, San Diego Bosa",1,1,1,1,1,1


Let's find out how many unique categories can be curated from all the returned venues

In [34]:
print('There are {} uniques categories.'.format(len(bogota_venues['Venue Category'].unique())))

There are 117 uniques categories.


## VI. Analyze Each Neighborhood

In [35]:
# one hot encoding
bogota_onehot = pd.get_dummies(bogota_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bogota_onehot['Neighborhood'] = bogota_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [bogota_onehot.columns[-1]] + list(bogota_onehot.columns[:-1])
bogota_onehot = bogota_onehot[fixed_columns]

bogota_onehot.head()

Unnamed: 0,Wings Joint,Airport,American Restaurant,Arcade,Arepa Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Bakery,Bar,Bed & Breakfast,Beer Garden,Bookstore,Bowling Alley,Breakfast Spot,Buffet,Building,Burger Joint,Bus Stop,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop,Drugstore,Falafel Restaurant,Fast Food Restaurant,Food,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gastropub,Golf Course,Gym,Gym / Fitness Center,Gymnastics Gym,Historic Site,History Museum,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motorcycle Shop,Mountain,Movie Theater,Multiplex,Museum,Music Store,Music Venue,Nature Preserve,Neighborhood,Nightclub,Notary,Other Great Outdoors,Paper / Office Supplies Store,Park,Performing Arts Venue,Pharmacy,Pie Shop,Pizza Place,Playground,Plaza,Pool,Pub,Restaurant,Rock Club,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shop & Service,Shopping Mall,Skate Park,Snack Place,Soccer Field,Soup Place,South American Restaurant,Spa,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Swiss Restaurant,Theater,Theme Park,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Winery
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size

In [36]:
bogota_onehot.shape

(347, 117)

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [37]:
bogota_grouped = bogota_onehot.groupby('Neighborhood').mean().reset_index()
bogota_grouped

Unnamed: 0,Neighborhood,Wings Joint,Airport,American Restaurant,Arcade,Arepa Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Bakery,Bar,Bed & Breakfast,Beer Garden,Bookstore,Bowling Alley,Breakfast Spot,Buffet,Building,Burger Joint,Bus Stop,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop,Drugstore,Falafel Restaurant,Fast Food Restaurant,Food,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gastropub,Golf Course,Gym,Gym / Fitness Center,Gymnastics Gym,Historic Site,History Museum,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motorcycle Shop,Mountain,Movie Theater,Multiplex,Museum,Music Store,Music Venue,Nature Preserve,Nightclub,Notary,Other Great Outdoors,Paper / Office Supplies Store,Park,Performing Arts Venue,Pharmacy,Pie Shop,Pizza Place,Playground,Plaza,Pool,Pub,Restaurant,Rock Club,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shop & Service,Shopping Mall,Skate Park,Snack Place,Soccer Field,Soup Place,South American Restaurant,Spa,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Swiss Restaurant,Theater,Theme Park,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Winery
0,"Alamos, El Cedro, El Madrigal, El Muelle, Garc...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alcalá, Alqueria, Autopista Sur, Galán, La Cam...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Alcázares Norte, Doce de Octubre, Entrerríos, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Alcázares, Baquero, Benjamín Herrera, Colombia...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Almirante Colon, Altos de Chozica, Andes Norte...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Alqueria La Fragua, Alqueria La Fragua Norte, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857
6,"Altos de Suba, Atenas, Britalia, Cantagallo, C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Andalucía II, Cementerio Jardínes Apogeo, Char...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Antiguo Country, Bellavista, Chicó Norte, Chic...",0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Antonia Santos, Argelia II, Bosa, El Jardín, G...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Let's confirm the new size

In [38]:
bogota_grouped.shape

(47, 117)

Let's print each neighborhood along with the top 10 most common venues

In [73]:
num_top_venues = 10

for hood in bogota_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = bogota_grouped[bogota_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alamos, El Cedro, El Madrigal, El Muelle, Garcés Navas, Garcés Navas Oriental, Los Ángeles, San Antonio Engativá, Santa Mónica, Villa Amalia, Villa del Mar, Villa Sagrario----
                           venue  freq
0                    Pizza Place   1.0
1                      Multiplex   0.0
2                       Pie Shop   0.0
3                       Pharmacy   0.0
4          Performing Arts Venue   0.0
5                           Park   0.0
6  Paper / Office Supplies Store   0.0
7           Other Great Outdoors   0.0
8                         Notary   0.0
9                      Nightclub   0.0


----Alcalá, Alqueria, Autopista Sur, Galán, La Camelia, Ospina Pérez, Ospina Pérez Sur, Provivienda Norte, San Eusebio, Tejar----
                           venue  freq
0                   Burger Joint  0.38
1                    Gaming Cafe  0.12
2            Fried Chicken Joint  0.12
3           Fast Food Restaurant  0.12
4                      BBQ Joint  0.12
5            Sporting Goo

                           venue  freq
0                     Playground  0.33
1                          Trail  0.33
2                    Golf Course  0.33
3                      Multiplex  0.00
4                       Pharmacy  0.00
5          Performing Arts Venue  0.00
6                           Park  0.00
7  Paper / Office Supplies Store  0.00
8           Other Great Outdoors  0.00
9                         Notary  0.00


----Barcelona, Batallón Caldas, Centro Industrial, Colón, Comuneros, Cundinamarca, El Ejido, Estación Central, Gorgonzola, Industrial Centenario, La Florida Occidental, La Pradera, La Trinidad, Los Ejidos, Ortezal, Pensilvania, Primavera Occidental----
             venue  freq
0        BBQ Joint  0.17
1             Pool  0.17
2            Hotel  0.17
3    Historic Site  0.17
4              Bar  0.17
5           Notary  0.17
6      Wings Joint  0.00
7      Music Venue  0.00
8  Nature Preserve  0.00
9        Nightclub  0.00


----Barlovento, El Ensueño, El Mirador 

                           venue  freq
0             Mexican Restaurant  0.33
1            Fried Chicken Joint  0.33
2                    Wings Joint  0.00
3                         Museum  0.00
4                       Pie Shop  0.00
5                       Pharmacy  0.00
6          Performing Arts Venue  0.00
7                           Park  0.00
8  Paper / Office Supplies Store  0.00
9           Other Great Outdoors  0.00


----Calandaima, Campo Hermoso, El Paraíso, El Paraíso Bosa, Galán, Galán Rural, Osorio III, Patio Bonito, Patio Bonito II, Patio Bonito III, Tairona----
                           venue  freq
0                    Supermarket  0.33
1                   Burger Joint  0.33
2                  Shopping Mall  0.33
3                    Wings Joint  0.00
4                         Museum  0.00
5                       Pharmacy  0.00
6          Performing Arts Venue  0.00
7                           Park  0.00
8  Paper / Office Supplies Store  0.00
9           Other Great Ou

#### Let's put that into a pandas dataframe

First, let's write a function to sort the venues in descending order

In [62]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood

In [74]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = bogota_grouped['Neighborhood']

for ind in np.arange(bogota_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bogota_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alamos, El Cedro, El Madrigal, El Muelle, Garc...",Pizza Place,Food,Concert Hall,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop
1,"Alcalá, Alqueria, Autopista Sur, Galán, La Cam...",Burger Joint,Gaming Cafe,Fried Chicken Joint,BBQ Joint,Sporting Goods Shop,Fast Food Restaurant,Department Store,Dessert Shop,Food,Cupcake Shop
2,"Alcázares Norte, Doce de Octubre, Entrerríos, ...",Restaurant,Furniture / Home Store,Mexican Restaurant,Music Venue,Winery,Fast Food Restaurant,Construction & Landscaping,Convenience Store,Creperie,Cultural Center
3,"Alcázares, Baquero, Benjamín Herrera, Colombia...",Soccer Field,Clothing Store,Fried Chicken Joint,Auto Garage,Motorcycle Shop,Creperie,Cultural Center,Cupcake Shop,Department Store,Food & Drink Shop
4,"Almirante Colon, Altos de Chozica, Andes Norte...",Park,Other Great Outdoors,Gym,Bus Stop,Scenic Lookout,Winery,Falafel Restaurant,Convenience Store,Creperie,Cultural Center


## VII. Cluster Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters

In [75]:
# set number of clusters
kclusters = 5

bogota_grouped_clustering = bogota_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bogota_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 3], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

But first, I am going to change the name of one column

In [76]:
base_bogota=base_bogota.rename(columns = {'Neighbourhood':'Neighborhood'})

In [77]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

bogota_merged = base_bogota

# merge bogota_grouped with bogota_data to add latitude/longitude for each neighborhood
bogota_merged = bogota_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

bogota_merged # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,111511,Antonio Nariño,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103,1.0,Restaurant,Clothing Store,Seafood Restaurant,BBQ Joint,Miscellaneous Shop,Pie Shop,Pizza Place,Convenience Store,Department Store,Sandwich Place
1,111211,Barrios Unidos,"Alcázares Norte, Doce de Octubre, Entrerríos, ...",4.677,-74.0692,1.0,Restaurant,Furniture / Home Store,Mexican Restaurant,Music Venue,Winery,Fast Food Restaurant,Construction & Landscaping,Convenience Store,Creperie,Cultural Center
2,111221,Barrios Unidos,"Alcázares, Baquero, Benjamín Herrera, Colombia...",4.6617,-74.078,1.0,Soccer Field,Clothing Store,Fried Chicken Joint,Auto Garage,Motorcycle Shop,Creperie,Cultural Center,Cupcake Shop,Department Store,Food & Drink Shop
3,110711,Bosa,"Betania, Brasil, Brasilia, Cañaveralejo, Cañav...",4.6369,-74.1885,4.0,Shopping Mall,Movie Theater,Food & Drink Shop,Winery,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store
4,110721,Bosa,"Bosa Nova, Bosa Nova El Porvenir, Chicó Sur, C...",4.628,-74.2055,3.0,Construction & Landscaping,Winery,Food & Drink Shop,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
5,110731,Bosa,"Antonia Santos, Argelia II, Bosa, El Jardín, G...",4.6116,-74.1978,3.0,Bar,Winery,Food Truck,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
6,110741,Bosa,"Andalucía II, Cementerio Jardínes Apogeo, Char...",4.6017,-74.1796,1.0,Coffee Shop,Restaurant,Mexican Restaurant,Food,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store
7,111711,Candelaria,"Belén, Centro Administrativo, Egipto, La Cated...",4.5966,-74.0721,1.0,Café,Restaurant,Hostel,Latin American Restaurant,Italian Restaurant,Coffee Shop,Breakfast Spot,Burger Joint,History Museum,Vegetarian / Vegan Restaurant
8,110211,Chapinero,"Esperanza, Páramo, Páramo I, Páramo II, Páramo...",4.6687,-74.0263,1.0,Mountain,Food & Drink Shop,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
9,110221,Chapinero,"Antiguo Country, Bellavista, Chicó Norte, Chic...",4.6639,-74.0448,1.0,Hotel,Art Gallery,Gym,Athletics & Sports,Food & Drink Shop,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store


There is no information for some neighborhoods.
For this reason, only process the cells that have an assigned cluster. Ignore cells with a Cluster Label that is NaN.

In [79]:
bogota_merged_2 = bogota_merged.dropna()

Reset index

In [80]:
bogota_merged_2 = bogota_merged_2.reset_index(drop=True)
bogota_merged_2.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,111511,Antonio Nariño,"Caracas, Ciudad Berna, Ciudad Jardín Sur, Edua...",4.5889,-74.103,1.0,Restaurant,Clothing Store,Seafood Restaurant,BBQ Joint,Miscellaneous Shop,Pie Shop,Pizza Place,Convenience Store,Department Store,Sandwich Place
1,111211,Barrios Unidos,"Alcázares Norte, Doce de Octubre, Entrerríos, ...",4.677,-74.0692,1.0,Restaurant,Furniture / Home Store,Mexican Restaurant,Music Venue,Winery,Fast Food Restaurant,Construction & Landscaping,Convenience Store,Creperie,Cultural Center
2,111221,Barrios Unidos,"Alcázares, Baquero, Benjamín Herrera, Colombia...",4.6617,-74.078,1.0,Soccer Field,Clothing Store,Fried Chicken Joint,Auto Garage,Motorcycle Shop,Creperie,Cultural Center,Cupcake Shop,Department Store,Food & Drink Shop
3,110711,Bosa,"Betania, Brasil, Brasilia, Cañaveralejo, Cañav...",4.6369,-74.1885,4.0,Shopping Mall,Movie Theater,Food & Drink Shop,Winery,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store
4,110721,Bosa,"Bosa Nova, Bosa Nova El Porvenir, Chicó Sur, C...",4.628,-74.2055,3.0,Construction & Landscaping,Winery,Food & Drink Shop,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
5,110731,Bosa,"Antonia Santos, Argelia II, Bosa, El Jardín, G...",4.6116,-74.1978,3.0,Bar,Winery,Food Truck,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
6,110741,Bosa,"Andalucía II, Cementerio Jardínes Apogeo, Char...",4.6017,-74.1796,1.0,Coffee Shop,Restaurant,Mexican Restaurant,Food,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store
7,111711,Candelaria,"Belén, Centro Administrativo, Egipto, La Cated...",4.5966,-74.0721,1.0,Café,Restaurant,Hostel,Latin American Restaurant,Italian Restaurant,Coffee Shop,Breakfast Spot,Burger Joint,History Museum,Vegetarian / Vegan Restaurant
8,110211,Chapinero,"Esperanza, Páramo, Páramo I, Páramo II, Páramo...",4.6687,-74.0263,1.0,Mountain,Food & Drink Shop,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
9,110221,Chapinero,"Antiguo Country, Bellavista, Chicó Norte, Chic...",4.6639,-74.0448,1.0,Hotel,Art Gallery,Gym,Athletics & Sports,Food & Drink Shop,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store


Finally, let's visualize the resulting clusters

In [81]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bogota_merged_2['Latitude'], bogota_merged_2['Longitude'], bogota_merged_2['Neighborhood'], bogota_merged_2['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## VIII. Examine Clusters

We examine each cluster and determine the discriminating venue categories that distinguish each cluster.

#### Cluster 1

In [83]:
bogota_merged_2.loc[bogota_merged_2['Cluster Labels'] == 0, bogota_merged_2.columns[[1] + list(range(5, bogota_merged_2.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Puente Aranda,0.0,Park,Pizza Place,Restaurant,Mexican Restaurant,Winery,Fast Food Restaurant,Construction & Landscaping,Convenience Store,Creperie,Cultural Center
34,Rafael Uribe Uribe,0.0,Park,Winery,Food,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop
41,Suba,0.0,Park,Auto Garage,Winery,Food & Drink Shop,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop
43,Suba,0.0,Coffee Shop,Park,Food,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop


#### Cluster 2

In [84]:
bogota_merged_2.loc[bogota_merged_2['Cluster Labels'] == 1, bogota_merged_2.columns[[1] + list(range(5, bogota_merged_2.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Antonio Nariño,1.0,Restaurant,Clothing Store,Seafood Restaurant,BBQ Joint,Miscellaneous Shop,Pie Shop,Pizza Place,Convenience Store,Department Store,Sandwich Place
1,Barrios Unidos,1.0,Restaurant,Furniture / Home Store,Mexican Restaurant,Music Venue,Winery,Fast Food Restaurant,Construction & Landscaping,Convenience Store,Creperie,Cultural Center
2,Barrios Unidos,1.0,Soccer Field,Clothing Store,Fried Chicken Joint,Auto Garage,Motorcycle Shop,Creperie,Cultural Center,Cupcake Shop,Department Store,Food & Drink Shop
6,Bosa,1.0,Coffee Shop,Restaurant,Mexican Restaurant,Food,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store
7,Candelaria,1.0,Café,Restaurant,Hostel,Latin American Restaurant,Italian Restaurant,Coffee Shop,Breakfast Spot,Burger Joint,History Museum,Vegetarian / Vegan Restaurant
8,Chapinero,1.0,Mountain,Food & Drink Shop,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
9,Chapinero,1.0,Hotel,Art Gallery,Gym,Athletics & Sports,Food & Drink Shop,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store
11,Ciudad Bolivar,1.0,Park,Department Store,Burger Joint,Winery,Food,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Dessert Shop
12,Ciudad Bolivar,1.0,Music Store,Winery,Food,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop
13,Engativa,1.0,Pizza Place,Shop & Service,Multiplex,Shopping Mall,Toy / Game Store,Gym / Fitness Center,Bar,Bakery,Pub,Drugstore


#### Cluster 3

In [85]:
bogota_merged_2.loc[bogota_merged_2['Cluster Labels'] == 2, bogota_merged_2.columns[[1] + list(range(5, bogota_merged_2.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Engativa,2.0,Pizza Place,Food,Concert Hall,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop


#### Cluster 4

In [86]:
bogota_merged_2.loc[bogota_merged_2['Cluster Labels'] == 3, bogota_merged_2.columns[[1] + list(range(5, bogota_merged_2.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Bosa,3.0,Construction & Landscaping,Winery,Food & Drink Shop,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
5,Bosa,3.0,Bar,Winery,Food Truck,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop
28,Kennedy,3.0,Construction & Landscaping,Bar,Food Truck,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Donut Shop


#### Cluster 5

In [87]:
bogota_merged_2.loc[bogota_merged_2['Cluster Labels'] == 4, bogota_merged_2.columns[[1] + list(range(5, bogota_merged_2.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Bosa,4.0,Shopping Mall,Movie Theater,Food & Drink Shop,Winery,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store
10,Ciudad Bolivar,4.0,Movie Theater,Ice Cream Shop,Shopping Mall,Winery,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop,Department Store
27,Kennedy,4.0,Burger Joint,Supermarket,Shopping Mall,Winery,Food,Construction & Landscaping,Convenience Store,Creperie,Cultural Center,Cupcake Shop
29,Los Martires,4.0,Shopping Mall,Department Store,Mobile Phone Shop,Mexican Restaurant,BBQ Joint,Sporting Goods Shop,Restaurant,Shop & Service,Skate Park,Clothing Store


In [82]:
bogota_merged_2.shape

(47, 16)