## Battle of Neighborhoods 

In [1]:
import numpy as np

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

import geopandas as gp
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from geopy.point import Point

import tqdm
from tqdm._tqdm_notebook import tqdm_notebook

print('Libraries imported.')

Libraries imported.


In [4]:
latlong_link = 'https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&refine.state=CA&timezone=America/Los_Angeles&lang=en&use_labels_for_header=true&csv_separator=%3B'
df_latlong = pd.read_csv(latlong_link, delimiter = ';')
df_latlong.to_pickle('latlong')
df_latlong.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
0,92232,Calexico,CA,33.026203,-115.284581,-8,1,"33.026203,-115.284581"
1,93227,Goshen,CA,36.357151,-119.425371,-8,1,"36.357151,-119.425371"
2,93234,Huron,CA,36.209815,-120.0847,-8,1,"36.209815,-120.0847"
3,93529,June Lake,CA,37.765218,-119.07769,-8,1,"37.765218,-119.07769"
4,93761,Fresno,CA,36.746375,-119.639658,-8,1,"36.746375,-119.639658"


In [5]:
df_latlong = pd.read_pickle('latlong')

In [6]:
df_income = pd.read_excel(r"C:\Users\Wu Fam\Desktop\PostGradLearning\IBM_DS\ADSC\zipcode and income.xlsx")
df_income.rename(columns={"Zip Code": "Zip"}, inplace=True)
df_income.head()

Unnamed: 0,Zip,Community,Estimated Median Income
0,90001,"Los Angeles (South Los Angeles), Florence-Graham",35660
1,90002,"Los Angeles (Southeast Los Angeles, Watts)",34000
2,90003,"Los Angeles (South Los Angeles, Southeast Los ...",34397
3,90004,"Los Angeles (Hancock Park, Rampart Village, Vi...",46581
4,90005,"Los Angeles (Hancock Park, Koreatown, Wilshire...",32461


In [241]:
df_final = df_latlong.join(df_income.set_index('Zip'), on='Zip')
df_final.rename(columns={"Estimated Median Income":"Income"}, inplace=True)
df_final.drop(['State','Timezone','Daylight savings time flag','geopoint','Community'], axis = 1, inplace=True)
df_final.dropna(inplace=True)
df_final.head()

Unnamed: 0,Zip,City,Latitude,Longitude,Income
7,90038,Los Angeles,34.089459,-118.3285,36996.0
8,90063,Los Angeles,34.045161,-118.1865,44121.0
23,90301,Inglewood,33.955913,-118.35868,42100.0
60,90220,Compton,33.890566,-118.23666,54014.0
63,91302,Calabasas,34.133513,-118.66464,122967.0


In [8]:
latitude = 34.052235
longitude = -118.243683
#https://www.latlong.net/place/los-angeles-ca-usa-1531.html

In [156]:
map_LA = folium.Map(location=[latitude, longitude], zoom_start=8)
# add markers to map
for lat, lng, label in zip(df_final['Latitude'], df_final['Longitude'], df_final['City']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_LA)  
    
map_LA

In [10]:
df_clustering = df_final.drop(['City','State','Zip'], axis=1)
df_clustering.head()

Unnamed: 0,Latitude,Longitude,Income
7,34.089459,-118.3285,36996.0
8,34.045161,-118.1865,44121.0
23,33.955913,-118.35868,42100.0
60,33.890566,-118.23666,54014.0
63,34.133513,-118.66464,122967.0


In [72]:
k_means = KMeans(init="k-means++", n_clusters=10, n_init=200)

In [73]:
k_means.fit(df_clustering)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=10, n_init=200, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [75]:
k_means_labels = k_means.labels_
labels = k_means.labels_

In [76]:
k_means_cluster_centers = k_means.cluster_centers_

In [77]:
df_clustering["Labels"] = labels

In [78]:
df_processed = df_clustering.groupby('Labels').mean()
df_processed

Unnamed: 0_level_0,Latitude,Longitude,Income
Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,34.074314,-118.23818,41572.903846
1,34.075709,-118.561669,124335.454545
2,34.057189,-118.21649,79536.046512
3,34.084199,-118.21053,66417.423077
4,34.009337,-118.349854,153243.25
5,34.07864,-118.236614,90983.392857
6,33.991374,-118.262772,28850.882353
7,34.079136,-118.20054,53591.040816
8,34.127172,-118.360335,103772.666667
9,34.075756,-118.492255,182842.0


In [79]:
df_processed['geom'] = df_processed['Latitude'].map(str) + ',' + df_processed['Longitude'].map(str)

In [80]:
locator = Nominatim(user_agent="myGeocoder", timeout=10)

In [81]:
df_processed['geom']

Labels
0    34.074313826923074,-118.23817999999999
1     34.07570936363636,-118.56166927272729
2     34.05718893023256,-118.21648979069772
3     34.08419874999999,-118.21052999999996
4           34.00933675,-118.34985400000001
5      34.07863982142858,-118.2366143214286
6    33.991373764705884,-118.26277247058822
7              34.07913595918367,-118.20054
8     34.12717238095239,-118.36033504761905
9                     34.075756,-118.492255
Name: geom, dtype: object

In [82]:
location=[]

for i in range(10):
    location.append(locator.reverse(df_processed['geom'][i]))

In [242]:
df_location = pd.DataFrame(location, columns=['Address','Coordinates'])
location_strings = df_location['Address'].str.split(',', n=0, expand=True)
location_strings

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Solano Avenue,Elysian Park,Los Angeles,Los Angeles County,California,90012,United States of America,,
1,1545,Palisades Circle,Pacific Palisades,Los Angeles,Los Angeles County,California,90272,United States of America,
2,I-10 Metro ExpressLanes,Brooklyn Heights,Boyle Heights,Los Angeles,Los Angeles County,California,90033,United States of America,
3,267,Avenue 33,Lincoln Heights,Los Angeles,Los Angeles County,California,90031,United States of America,
4,4343,Don Diablo Drive,Baldwin Hills/Crenshaw,Los Angeles,Los Angeles County,California,90008,United States of America,
5,841,Solano Avenue,Elysian Park,Los Angeles,Los Angeles County,California,90012,United States of America,
6,Los Angeles Academy Middle School,644,East 56th Street,South Park,Los Angeles,Los Angeles County,California,90011,United States of America
7,2842,Eva Terrace,Lincoln Heights,Los Angeles,Los Angeles County,California,90031,United States of America,
8,7572,Mulholland Drive,Hollywood Hills West,Los Angeles,Los Angeles County,California,90046,United States of America,
9,960,North Kenter Avenue,Westgate Heights,Brentwood,Los Angeles,Los Angeles County,California,90049,United States of America


In [106]:
def squeeze_nan(x):
    original_columns = x.index.tolist()

    squeezed = x.dropna()[::-1]
    squeezed.index = [original_columns[n] for n in range(squeezed.count())]

    return squeezed.reindex(original_columns, fill_value=np.nan)

In [148]:
df_cities = location_strings.apply(squeeze_nan, axis=1)
df_cities.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,United States of America,90012,California,Los Angeles County,Los Angeles,Elysian Park,Solano Avenue,,
1,United States of America,90272,California,Los Angeles County,Los Angeles,Pacific Palisades,Palisades Circle,1545,
2,United States of America,90033,California,Los Angeles County,Los Angeles,Boyle Heights,Brooklyn Heights,I-10 Metro ExpressLanes,
3,United States of America,90031,California,Los Angeles County,Los Angeles,Lincoln Heights,Avenue 33,267,
4,United States of America,90008,California,Los Angeles County,Los Angeles,Baldwin Hills/Crenshaw,Don Diablo Drive,4343,


In [149]:
df_cities.columns = ['US','Zip','State','LAC','LA','City','Street','spec1','spec2']

In [150]:
df_cities.drop(['US','State','LAC','LA','Street','spec1','spec2'], axis=1, inplace=True)

In [190]:
df_cities['Zip']=df_cities['Zip'].astype(int)

In [197]:
df_final['Zip']=df_final['Zip'].astype(int)

In [205]:
df = df_final.merge(df_cities, on='Zip', how='left')
df.dropna(axis=0, inplace=True)
df.drop(columns=['City_x','State'], axis=1, inplace=True)

In [211]:
df.rename(columns={'City_y':'Neighborhood'}, inplace=True)

In [212]:
df

Unnamed: 0,Zip,Latitude,Longitude,Income,Neighborhood
13,90011,34.007063,-118.25868,33824.0,South Park
14,90008,34.009754,-118.33705,36641.0,Baldwin Hills/Crenshaw
16,90033,34.050411,-118.21195,31683.0,Boyle Heights
36,90049,34.067409,-118.47528,121671.0,Brentwood
145,90012,34.061611,-118.23944,38786.0,Elysian Park
146,90012,34.061611,-118.23944,38786.0,Elysian Park
154,90046,34.098908,-118.36241,65990.0,Hollywood Hills West
181,90272,34.050505,-118.53374,180962.0,Pacific Palisades
235,90031,34.07871,-118.2161,41126.0,Lincoln Heights
236,90031,34.07871,-118.2161,41126.0,Lincoln Heights


In [213]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [214]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [215]:
LA_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

 South Park
 Baldwin Hills/Crenshaw
 Boyle Heights
 Brentwood
 Elysian Park
 Elysian Park
 Hollywood Hills West
 Pacific Palisades
 Lincoln Heights
 Lincoln Heights


In [216]:
LA_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Baldwin Hills/Crenshaw,40,40,40,40,40,40
Boyle Heights,11,11,11,11,11,11
Brentwood,3,3,3,3,3,3
Elysian Park,174,174,174,174,174,174
Hollywood Hills West,50,50,50,50,50,50
Lincoln Heights,24,24,24,24,24,24
Pacific Palisades,7,7,7,7,7,7
South Park,8,8,8,8,8,8


In [217]:
print('There are {} uniques categories.'.format(len(LA_venues['Venue Category'].unique())))

There are 101 uniques categories.


In [218]:
# one hot encoding
LA_onehot = pd.get_dummies(LA_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
LA_onehot['Neighborhood'] = LA_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [LA_onehot.columns[-1]] + list(LA_onehot.columns[:-1])
LA_onehot = LA_onehot[fixed_columns]

LA_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Art Gallery,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Bus Line,Café,Cajun / Creole Restaurant,Campground,Cheese Shop,Chinese Restaurant,Clothing Store,Coffee Shop,Comedy Club,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Discount Store,Donut Shop,Fast Food Restaurant,Filipino Restaurant,Flea Market,Food,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health & Beauty Service,High School,Historic Site,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Kids Store,Lingerie Store,Liquor Store,Lounge,Mediterranean Restaurant,Mexican Restaurant,Mobile Phone Shop,Monument / Landmark,Movie Theater,Nail Salon,New American Restaurant,Nightclub,Noodle House,Paper / Office Supplies Store,Pedestrian Plaza,Pharmacy,Pizza Place,Plaza,Ramen Restaurant,Record Shop,Recreation Center,Restaurant,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Shopping Plaza,Snack Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Speakeasy,State / Provincial Park,Supplement Shop,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint
0,South Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,South Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,South Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,South Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,South Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [219]:
LA_onehot.shape

(317, 102)

In [220]:
LA_grouped = LA_onehot.groupby('Neighborhood').mean().reset_index()
LA_grouped

Unnamed: 0,Neighborhood,Accessories Store,Art Gallery,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Bus Line,Café,Cajun / Creole Restaurant,Campground,Cheese Shop,Chinese Restaurant,Clothing Store,Coffee Shop,Comedy Club,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Discount Store,Donut Shop,Fast Food Restaurant,Filipino Restaurant,Flea Market,Food,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health & Beauty Service,High School,Historic Site,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Kids Store,Lingerie Store,Liquor Store,Lounge,Mediterranean Restaurant,Mexican Restaurant,Mobile Phone Shop,Monument / Landmark,Movie Theater,Nail Salon,New American Restaurant,Nightclub,Noodle House,Paper / Office Supplies Store,Pedestrian Plaza,Pharmacy,Pizza Place,Plaza,Ramen Restaurant,Record Shop,Recreation Center,Restaurant,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Shopping Plaza,Snack Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Speakeasy,State / Provincial Park,Supplement Shop,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint
0,Baldwin Hills/Crenshaw,0.025,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.025,0.025,0.0,0.05,0.0,0.0,0.0,0.025,0.075,0.0,0.0,0.025,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.025,0.05,0.0,0.0,0.0,0.05,0.025,0.0,0.0,0.0,0.025,0.0,0.0,0.025,0.0,0.0,0.025,0.0,0.0,0.025,0.0,0.0,0.05,0.0,0.025,0.0,0.025,0.025,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.025
1,Boyle Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Brentwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Elysian Park,0.0,0.011494,0.011494,0.011494,0.0,0.034483,0.0,0.022989,0.022989,0.0,0.011494,0.011494,0.0,0.022989,0.011494,0.0,0.0,0.195402,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.0,0.0,0.011494,0.011494,0.0,0.011494,0.011494,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.011494,0.011494,0.011494,0.0,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.091954,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.011494,0.022989,0.011494,0.011494,0.011494,0.0,0.034483,0.0,0.022989,0.0,0.0,0.011494,0.0,0.011494,0.011494,0.0,0.011494,0.011494,0.0,0.0,0.0,0.0,0.011494,0.022989,0.011494,0.0,0.0,0.0,0.0,0.0,0.08046,0.0
4,Hollywood Hills West,0.0,0.0,0.02,0.0,0.02,0.0,0.06,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.06,0.02,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.02,0.0,0.0,0.04,0.0,0.02,0.02,0.02,0.0,0.04,0.02,0.0,0.02,0.0,0.0,0.0,0.06,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
5,Lincoln Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0
6,Pacific Palisades,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0
7,South Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.375,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [221]:
LA_grouped.shape

(8, 102)

In [222]:
num_top_venues = 5

for hood in LA_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = LA_grouped[LA_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Baldwin Hills/Crenshaw----
                             venue  freq
0             Fast Food Restaurant  0.08
1                 Department Store  0.05
2               Mexican Restaurant  0.05
3  Southern / Soul Food Restaurant  0.05
4                   Sandwich Place  0.05


---- Boyle Heights----
                venue  freq
0  Mexican Restaurant  0.18
1          Taco Place  0.18
2  Seafood Restaurant  0.09
3     Thai Restaurant  0.09
4         Pizza Place  0.09


---- Brentwood----
               venue  freq
0       Home Service  0.33
1      Historic Site  0.33
2         Campground  0.33
3  Accessories Store  0.00
4          Nightclub  0.00


---- Elysian Park----
                   venue  freq
0     Chinese Restaurant  0.20
1     Mexican Restaurant  0.09
2  Vietnamese Restaurant  0.08
3                 Bakery  0.03
4         Sandwich Place  0.03


---- Hollywood Hills West----
              venue  freq
0              Bank  0.06
1          Pharmacy  0.06
2       Coffee Shop  0.06


In [223]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [225]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = LA_grouped['Neighborhood']

for ind in np.arange(LA_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(LA_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Baldwin Hills/Crenshaw,Fast Food Restaurant,Sandwich Place,Mexican Restaurant,Lingerie Store,Department Store,Southern / Soul Food Restaurant,Wings Joint,Hardware Store,Paper / Office Supplies Store,New American Restaurant
1,Boyle Heights,Mexican Restaurant,Taco Place,Pharmacy,Sandwich Place,Seafood Restaurant,Coffee Shop,Burger Joint,Pizza Place,Thai Restaurant,Gym / Fitness Center
2,Brentwood,Home Service,Historic Site,Campground,Wings Joint,Frozen Yogurt Shop,Discount Store,Donut Shop,Fast Food Restaurant,Filipino Restaurant,Flea Market
3,Elysian Park,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Sandwich Place,Bakery,Plaza,Seafood Restaurant,Café,Tea Room,Bar
4,Hollywood Hills West,Coffee Shop,Pharmacy,Bank,Sushi Restaurant,Movie Theater,Gym / Fitness Center,Liquor Store,Fast Food Restaurant,Grocery Store,Nail Salon
5,Lincoln Heights,Taco Place,Grocery Store,Mexican Restaurant,Pharmacy,Thrift / Vintage Store,Seafood Restaurant,Fast Food Restaurant,Bank,Frozen Yogurt Shop,Discount Store
6,Pacific Palisades,Gym,Deli / Bodega,Trail,Scenic Lookout,Theater,High School,State / Provincial Park,Wings Joint,Flea Market,Filipino Restaurant
7,South Park,Fast Food Restaurant,Ice Cream Shop,Discount Store,Pizza Place,Mexican Restaurant,Fried Chicken Joint,Wings Joint,Frozen Yogurt Shop,Donut Shop,Filipino Restaurant


In [229]:
results = df.merge(neighborhoods_venues_sorted, on='Neighborhood', how='left')
results.drop(columns={'Zip','Latitude','Longitude'}, inplace=True)

In [240]:
results = results.drop_duplicates()

In [238]:
results.sort_values(by='Income', ascending=False)

Unnamed: 0,Income,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,180962.0,Pacific Palisades,Gym,Deli / Bodega,Trail,Scenic Lookout,Theater,High School,State / Provincial Park,Wings Joint,Flea Market,Filipino Restaurant
3,121671.0,Brentwood,Home Service,Historic Site,Campground,Wings Joint,Frozen Yogurt Shop,Discount Store,Donut Shop,Fast Food Restaurant,Filipino Restaurant,Flea Market
6,65990.0,Hollywood Hills West,Coffee Shop,Pharmacy,Bank,Sushi Restaurant,Movie Theater,Gym / Fitness Center,Liquor Store,Fast Food Restaurant,Grocery Store,Nail Salon
8,41126.0,Lincoln Heights,Taco Place,Grocery Store,Mexican Restaurant,Pharmacy,Thrift / Vintage Store,Seafood Restaurant,Fast Food Restaurant,Bank,Frozen Yogurt Shop,Discount Store
4,38786.0,Elysian Park,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Sandwich Place,Bakery,Plaza,Seafood Restaurant,Café,Tea Room,Bar
1,36641.0,Baldwin Hills/Crenshaw,Fast Food Restaurant,Sandwich Place,Mexican Restaurant,Lingerie Store,Department Store,Southern / Soul Food Restaurant,Wings Joint,Hardware Store,Paper / Office Supplies Store,New American Restaurant
0,33824.0,South Park,Fast Food Restaurant,Ice Cream Shop,Discount Store,Pizza Place,Mexican Restaurant,Fried Chicken Joint,Wings Joint,Frozen Yogurt Shop,Donut Shop,Filipino Restaurant
2,31683.0,Boyle Heights,Mexican Restaurant,Taco Place,Pharmacy,Sandwich Place,Seafood Restaurant,Coffee Shop,Burger Joint,Pizza Place,Thai Restaurant,Gym / Fitness Center
