In [1]:
import pandas as pd
import numpy as np

import folium # map rendering library

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests

# import k-means from clustering stage
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

## Data Preparation

#### Create the Dataframe

In [2]:
sf_zipcode_list = [
    {"zipcode": 94102 , "Neighborhood": "Hayes Valley, Tenderloin, North of Market"},
    {"zipcode": 94103 , "Neighborhood": "South of Market"},
    {"zipcode": 94104 , "Neighborhood": "Financial District"},
    {"zipcode": 94105 , "Neighborhood": "Embarcadero South"},
    {"zipcode": 94107 , "Neighborhood": "Portrero Hill"},
    {"zipcode": 94108 , "Neighborhood": "Chinatown"},
    {"zipcode": 94109 , "Neighborhood": "Polk Hill, Nob Hill, Russian Hill"},
    {"zipcode": 94110 , "Neighborhood": "Mission District, Inner Mission, Bernal Heights"},
    {"zipcode": 94111 , "Neighborhood": "Embarcadero North"},
    {"zipcode": 94112 , "Neighborhood": "Ingleside"},
    {"zipcode": 94114 , "Neighborhood": "Castro, Noe Valley"},
    {"zipcode": 94115 , "Neighborhood": "Pacific Heights, Western Addition, Japantown"},
    {"zipcode": 94116 , "Neighborhood": "Outter Sunset, Parkside, Forest Hill"},
    {"zipcode": 94117 , "Neighborhood": "Haight District, Cole Valley"},
    {"zipcode": 94118 , "Neighborhood": "Richmond District, Inner Richmond"},
    {"zipcode": 94121 , "Neighborhood": "Outter Richmond"},
    {"zipcode": 94123 , "Neighborhood": "Cow Hollow, Marina District"},
    {"zipcode": 94124 , "Neighborhood": "Bayview"},
    {"zipcode": 94127 , "Neighborhood": "St Francis Wood, Miraloma, West Portal"},
    {"zipcode": 94122 , "Neighborhood": "Sunset District"},
    {"zipcode": 94129 , "Neighborhood": "Presidio"},
    {"zipcode": 94131 , "Neighborhood": "Twin Peaks, Glen Park"},
    {"zipcode": 94132 , "Neighborhood": "Lake Merced"},
    {"zipcode": 94133 , "Neighborhood": "North beach, Fishermans Wharf, Chinatown"},
    {"zipcode": 94134 , "Neighborhood": "Visitacion Valley, Sunnydale"}
]

In [3]:
sf_zipcode = pd.DataFrame(data=sf_zipcode_list)
sf_zipcode

Unnamed: 0,zipcode,Neighborhood
0,94102,"Hayes Valley, Tenderloin, North of Market"
1,94103,South of Market
2,94104,Financial District
3,94105,Embarcadero South
4,94107,Portrero Hill
5,94108,Chinatown
6,94109,"Polk Hill, Nob Hill, Russian Hill"
7,94110,"Mission District, Inner Mission, Bernal Heights"
8,94111,Embarcadero North
9,94112,Ingleside


In [4]:
sf_zipcode.shape

(25, 2)

#### Get the Latitude and Longitude

In [5]:
geocode = pd.read_csv("us-zip-code-latitude-and-longitude.csv", sep = ";")
geocode = geocode.drop(columns=['City','State','Timezone','Daylight savings time flag','geopoint'])
geocode.columns = ["zipcode","Latitude","Longitude"]
geocode.head()

Unnamed: 0,zipcode,Latitude,Longitude
0,94175,37.784827,-122.727802
1,94160,37.784827,-122.727802
2,94164,37.784827,-122.727802
3,94131,37.741797,-122.4378
4,94114,37.758434,-122.43512


#### Merge 2 data into 1

In [6]:
sf_df = pd.merge(sf_zipcode,
                 geocode[["zipcode","Latitude","Longitude"]],
                 on='zipcode')

In [7]:
sf_df

Unnamed: 0,zipcode,Neighborhood,Latitude,Longitude
0,94102,"Hayes Valley, Tenderloin, North of Market",37.779329,-122.41915
1,94103,South of Market,37.772329,-122.41087
2,94104,Financial District,37.791728,-122.4019
3,94105,Embarcadero South,37.789228,-122.3957
4,94107,Portrero Hill,37.766529,-122.39577
5,94108,Chinatown,37.792678,-122.40793
6,94109,"Polk Hill, Nob Hill, Russian Hill",37.792778,-122.42188
7,94110,"Mission District, Inner Mission, Bernal Heights",37.74873,-122.41545
8,94111,Embarcadero North,37.798228,-122.40027
9,94112,Ingleside,37.720931,-122.44241


In [8]:
sf_df.shape

(25, 4)

## Cluster the neighborhoods in San Francisco, CA

In [9]:
address = 'San Francisco'

geolocator = Nominatim(user_agent="San Francisco")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of San Francisco are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of San Francisco are 37.7790262, -122.4199061.


#### Visualize SF the neighborhoods

In [10]:
# create map of SF using latitude and longitude values
map_sf = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(sf_df['Latitude'], sf_df['Longitude'], sf_df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_sf)  
    
map_sf

#### Explore the nearby venues in each neighborhood

In [11]:
#Define Foursquare Credentials and Version
CLIENT_ID =  # your Foursquare ID
CLIENT_SECRET =  # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [12]:
LIMIT = 100
radius = 500

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
sf_venues = getNearbyVenues(names=sf_df['Neighborhood'],
                                   latitudes=sf_df['Latitude'],
                                   longitudes=sf_df['Longitude']
                                  )

Hayes Valley, Tenderloin, North of Market
South of Market
Financial District
Embarcadero South
Portrero Hill
Chinatown
Polk Hill, Nob Hill, Russian Hill
Mission District, Inner Mission, Bernal Heights
Embarcadero North
Ingleside
Castro, Noe Valley
Pacific Heights, Western Addition, Japantown
Outter Sunset, Parkside, Forest Hill
Haight District, Cole Valley
Richmond District, Inner Richmond
Outter Richmond
Cow Hollow, Marina District
Bayview
St Francis Wood, Miraloma, West Portal
Sunset District
Presidio
Twin Peaks, Glen Park
Lake Merced
North beach, Fishermans Wharf, Chinatown
Visitacion Valley, Sunnydale


In [14]:
print(sf_venues.shape)
sf_venues.head()

(1419, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Hayes Valley, Tenderloin, North of Market",37.779329,-122.41915,Louise M. Davies Symphony Hall,37.777976,-122.420157,Concert Hall
1,"Hayes Valley, Tenderloin, North of Market",37.779329,-122.41915,War Memorial Opera House,37.778601,-122.420816,Opera House
2,"Hayes Valley, Tenderloin, North of Market",37.779329,-122.41915,San Francisco Ballet,37.77858,-122.420798,Dance Studio
3,"Hayes Valley, Tenderloin, North of Market",37.779329,-122.41915,Herbst Theater,37.779548,-122.420953,Concert Hall
4,"Hayes Valley, Tenderloin, North of Market",37.779329,-122.41915,War Memorial Court,37.779042,-122.420971,Park


In [15]:
sf_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bayview,26,26,26,26,26,26
"Castro, Noe Valley",82,82,82,82,82,82
Chinatown,82,82,82,82,82,82
"Cow Hollow, Marina District",77,77,77,77,77,77
Embarcadero North,100,100,100,100,100,100
Embarcadero South,82,82,82,82,82,82
Financial District,100,100,100,100,100,100
"Haight District, Cole Valley",39,39,39,39,39,39
"Hayes Valley, Tenderloin, North of Market",95,95,95,95,95,95
Ingleside,34,34,34,34,34,34


In [16]:
print('There are {} uniques categories.'.format(len(sf_venues['Venue Category'].unique())))

There are 251 uniques categories.


#### Analyze each neighborhood

In [17]:
# one hot encoding
sf_onehot = pd.get_dummies(sf_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
sf_onehot['Neighborhood'] = sf_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [sf_onehot.columns[-1]] + list(sf_onehot.columns[:-1])
sf_onehot = sf_onehot[fixed_columns]

sf_onehot.head()

Unnamed: 0,Yoga Studio,ATM,Acai House,Accessories Store,Adult Boutique,African Restaurant,Alternative Healer,American Restaurant,Antique Shop,Argentinian Restaurant,...,Tunnel,Turkish Restaurant,Tuscan Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
sf_onehot.shape

(1419, 251)

In [19]:
sf_grouped = sf_onehot.groupby('Neighborhood').mean().reset_index()
sf_grouped

Unnamed: 0,Neighborhood,Yoga Studio,ATM,Acai House,Accessories Store,Adult Boutique,African Restaurant,Alternative Healer,American Restaurant,Antique Shop,...,Tunnel,Turkish Restaurant,Tuscan Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint
0,Bayview,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Castro, Noe Valley",0.02439,0.0,0.0,0.0,0.012195,0.0,0.0,0.012195,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.012195,0.0
2,Chinatown,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,...,0.0,0.0,0.0,0.0,0.012195,0.012195,0.0,0.0,0.0,0.0
3,"Cow Hollow, Marina District",0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.025974,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.038961,0.012987,0.0
4,Embarcadero North,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.03,0.0,0.0
5,Embarcadero South,0.012195,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0
6,Financial District,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0
7,"Haight District, Cole Valley",0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0
8,"Hayes Valley, Tenderloin, North of Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,...,0.0,0.0,0.0,0.0,0.021053,0.010526,0.0,0.031579,0.0,0.0
9,Ingleside,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0


In [20]:
sf_grouped.shape

(25, 251)

#### Top 10 most common venues in each neighborhood

In [21]:
num_top_venues = 10

for hood in sf_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = sf_grouped[sf_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bayview----
                             venue  freq
0  Southern / Soul Food Restaurant  0.15
1               Mexican Restaurant  0.12
2               Light Rail Station  0.08
3                           Bakery  0.08
4                          Theater  0.04
5                        BBQ Joint  0.04
6                      Bus Station  0.04
7                           Garden  0.04
8               Chinese Restaurant  0.04
9                              Gym  0.04


----Castro, Noe Valley----
               venue  freq
0            Gay Bar  0.10
1    Thai Restaurant  0.05
2        Coffee Shop  0.05
3     Scenic Lookout  0.04
4  Indian Restaurant  0.04
5        Yoga Studio  0.02
6              Plaza  0.02
7          Pet Store  0.02
8      Deli / Bodega  0.02
9  Convenience Store  0.02


----Chinatown----
                 venue  freq
0                Hotel  0.10
1          Coffee Shop  0.06
2               Bakery  0.05
3          Men's Store  0.04
4      Bubble Tea Shop  0.04
5   Chinese R

In [22]:
# save the most common venues into a pandas dataframe
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = sf_grouped['Neighborhood']

for ind in np.arange(sf_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(sf_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bayview,Southern / Soul Food Restaurant,Mexican Restaurant,Light Rail Station,Bakery,Theater,Park,Plaza,Dumpling Restaurant,Coffee Shop,Chinese Restaurant
1,"Castro, Noe Valley",Gay Bar,Thai Restaurant,Coffee Shop,Indian Restaurant,Scenic Lookout,Yoga Studio,Pet Store,Deli / Bodega,Café,Convenience Store
2,Chinatown,Hotel,Coffee Shop,Bakery,Tea Room,Chinese Restaurant,Men's Store,Italian Restaurant,Bubble Tea Shop,Sushi Restaurant,Dim Sum Restaurant
3,"Cow Hollow, Marina District",French Restaurant,Italian Restaurant,Wine Bar,Gym / Fitness Center,Sandwich Place,Sushi Restaurant,Deli / Bodega,Park,Cosmetics Shop,Coffee Shop
4,Embarcadero North,Food Truck,Scenic Lookout,Café,Coffee Shop,Men's Store,Italian Restaurant,Wine Bar,New American Restaurant,Park,Cosmetics Shop


## Cluster Neighborhoods

In [38]:
# set number of clusters
kclusters = 5

sf_grouped_clustering = sf_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(sf_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 4, 4, 4, 4, 0, 4, 4, 4, 2,
       3, 0, 1], dtype=int32)

In [39]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

sf_merged = sf_df

# merge sf_grouped with sf_data to add latitude/longitude for each neighborhood
sf_merged = sf_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

# dropna()
sf_merged = sf_merged.dropna()

sf_merged['Cluster Labels'] = sf_merged['Cluster Labels'].astype(int)

sf_merged.head() # check the last columns!

Unnamed: 0,zipcode,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,94102,"Hayes Valley, Tenderloin, North of Market",37.779329,-122.41915,4,Coffee Shop,Hotel,Theater,Wine Bar,Café,Mexican Restaurant,Boutique,Cocktail Bar,Poke Place,Park
1,94103,South of Market,37.772329,-122.41087,4,Nightclub,Gay Bar,Cocktail Bar,Motorcycle Shop,Thai Restaurant,Sushi Restaurant,Bar,Restaurant,Coffee Shop,Food Truck
2,94104,Financial District,37.791728,-122.4019,4,Coffee Shop,Food Truck,Men's Store,Japanese Restaurant,Italian Restaurant,Cocktail Bar,Café,Sushi Restaurant,Sandwich Place,Gym
3,94105,Embarcadero South,37.789228,-122.3957,4,Coffee Shop,Food Truck,Café,Sandwich Place,Art Gallery,Gym,Salad Place,New American Restaurant,Lounge,Street Food Gathering
4,94107,Portrero Hill,37.766529,-122.39577,0,Breakfast Spot,Coffee Shop,Deli / Bodega,Wine Shop,Café,Park,French Restaurant,Gym / Fitness Center,Bookstore,Rock Club


In [40]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(sf_merged['Latitude'], sf_merged['Longitude'], sf_merged['Neighborhood'], sf_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

#### Cluster 1

In [41]:
sf_merged.loc[sf_merged['Cluster Labels'] == 0, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Portrero Hill,Breakfast Spot,Coffee Shop,Deli / Bodega,Wine Shop,Café,Park,French Restaurant,Gym / Fitness Center,Bookstore,Rock Club
7,"Mission District, Inner Mission, Bernal Heights",Mexican Restaurant,Coffee Shop,Pizza Place,Grocery Store,Gym / Fitness Center,Dive Bar,Deli / Bodega,Park,Dry Cleaner,Food & Drink Shop
21,"Twin Peaks, Glen Park",Park,Trail,Grocery Store,Athletics & Sports,Coffee Shop,Korean Restaurant,Outdoors & Recreation,Playground,Shopping Mall,Dim Sum Restaurant
23,"North beach, Fishermans Wharf, Chinatown",Coffee Shop,Pizza Place,Café,Park,Italian Restaurant,Bakery,Deli / Bodega,Chinese Restaurant,Trail,Yoga Studio


#### Cluster 2

In [42]:
sf_merged.loc[sf_merged['Cluster Labels'] == 1, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,"Visitacion Valley, Sunnydale",Garden,Baseball Field,Trail,Park,Wings Joint,Fish Market,Fast Food Restaurant,Field,Filipino Restaurant,Flower Shop


#### Cluster 3

In [43]:
sf_merged.loc[sf_merged['Cluster Labels'] == 2, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,"St Francis Wood, Miraloma, West Portal",Garden,Bus Line,Wings Joint,Frozen Yogurt Shop,French Restaurant,Fountain,Food Truck,Food Stand,Food Court,Food & Drink Shop


#### Cluster 4

In [44]:
sf_merged.loc[sf_merged['Cluster Labels'] == 3, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Sunset District,Chinese Restaurant,Pharmacy,Electronics Store,Café,Liquor Store,Playground,Food Stand,Food Court,Food & Drink Shop,Eye Doctor


#### Cluster 5

In [45]:
sf_merged.loc[sf_merged['Cluster Labels'] == 4, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Hayes Valley, Tenderloin, North of Market",Coffee Shop,Hotel,Theater,Wine Bar,Café,Mexican Restaurant,Boutique,Cocktail Bar,Poke Place,Park
1,South of Market,Nightclub,Gay Bar,Cocktail Bar,Motorcycle Shop,Thai Restaurant,Sushi Restaurant,Bar,Restaurant,Coffee Shop,Food Truck
2,Financial District,Coffee Shop,Food Truck,Men's Store,Japanese Restaurant,Italian Restaurant,Cocktail Bar,Café,Sushi Restaurant,Sandwich Place,Gym
3,Embarcadero South,Coffee Shop,Food Truck,Café,Sandwich Place,Art Gallery,Gym,Salad Place,New American Restaurant,Lounge,Street Food Gathering
5,Chinatown,Hotel,Coffee Shop,Bakery,Tea Room,Chinese Restaurant,Men's Store,Italian Restaurant,Bubble Tea Shop,Sushi Restaurant,Dim Sum Restaurant
6,"Polk Hill, Nob Hill, Russian Hill",Grocery Store,Gym / Fitness Center,Sushi Restaurant,Wine Bar,Vietnamese Restaurant,Deli / Bodega,Gym,Diner,Italian Restaurant,Coffee Shop
8,Embarcadero North,Food Truck,Scenic Lookout,Café,Coffee Shop,Men's Store,Italian Restaurant,Wine Bar,New American Restaurant,Park,Cosmetics Shop
9,Ingleside,Pizza Place,Mexican Restaurant,Vietnamese Restaurant,Sandwich Place,Bus Station,Japanese Restaurant,Food Truck,Gas Station,Furniture / Home Store,Metro Station
10,"Castro, Noe Valley",Gay Bar,Thai Restaurant,Coffee Shop,Indian Restaurant,Scenic Lookout,Yoga Studio,Pet Store,Deli / Bodega,Café,Convenience Store
11,"Pacific Heights, Western Addition, Japantown",Bakery,Café,Cosmetics Shop,Spa,Chinese Restaurant,Pizza Place,Boutique,Yoga Studio,Arts & Crafts Store,Bubble Tea Shop
