In [8]:
# install geopy & folium
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          90 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.20.0-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.20.0         | 57 KB     | ##################################### | 100% 
geographiclib-1.49   | 32 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Solving environ

In [9]:
import numpy as np

import pandas as pd 
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests 
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium 

print("Libraries imported sucessfully.")

Libraries imported sucessfully.


In [12]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Raleigh,_North_Carolina_neighborhoods").text
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')
print(soup.prettify())


<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Raleigh, North Carolina neighborhoods - Wikipedia
  </title>
  <script>
   document.documentElement.className=document.documentElement.className.replace(/(^|\s)client-nojs(\s|$)/,"$1client-js$2");RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Raleigh,_North_Carolina_neighborhoods","wgTitle":"Raleigh, North Carolina neighborhoods","wgCurRevisionId":855137498,"wgRevisionId":855137498,"wgArticleId":17266626,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Lists of neighborhoods in U.S. cities","Neighborhoods in Raleigh, North Carolina"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","Ma

In [52]:
# create a list to store neighborhood data
neighborhoodList = []
# append the data into the list
areaList = []
soup.find_all("li")
for area in soup.find_all("li"):
    areaText = area.find("li").text
    areaList.append(areaText)

#create a new DataFrame from the list
raleigh_df = pd.DataFrame({"Neighborhood": neighborhoodList})

raleigh_df.head()


[<li class="toclevel-1 tocsection-1"><a href="#Inside_the_Beltline"><span class="tocnumber">1</span> <span class="toctext">Inside the Beltline</span></a></li>,
 <li class="toclevel-1 tocsection-2"><a href="#North_Raleigh"><span class="tocnumber">2</span> <span class="toctext">North Raleigh</span></a></li>,
 <li class="toclevel-1 tocsection-3"><a href="#West_Raleigh_and_Southwest_Raleigh"><span class="tocnumber">3</span> <span class="toctext">West Raleigh and Southwest Raleigh</span></a></li>,
 <li class="toclevel-1 tocsection-4"><a href="#South_and_East_Raleigh"><span class="tocnumber">4</span> <span class="toctext">South and East Raleigh</span></a></li>,
 <li>Anderson Heights</li>,
 <li>Avent West</li>,
 <li>Belvidere Park</li>,
 <li><a href="/wiki/Battery_Heights_Historic_District" title="Battery Heights Historic District">Battery Heights</a></li>,
 <li><a href="/wiki/Bloomsbury_Historic_District" title="Bloomsbury Historic District">Bloomsbury</a></li>,
 <li><a class="mw-redirect" h

In [27]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Raleigh, North Carolina'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [None]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in raleigh_df["Neighborhood"].tolist() ]
coords

In [None]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
# merge the coordinates into the original dataframe
raleigh_df['Latitude'] = df_coords['Latitude']
raleigh_df['Longitude'] = df_coords['Longitude']

# check the neighborhoods and the coordinates
print(raleigh_df.shape)
raleigh_df

In [None]:
# save the DataFrame as CSV file
raleigh_df.to_csv("raleigh_df.csv", index=False)


In [None]:
# get the coordinates of Kuala Lumpur
address = 'Raleigh, North Carolina'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate {}, {}.'.format(latitude, longitude))

In [None]:
# create map of Toronto using latitude and longitude values
map_raleigh = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(raleigh_df['Latitude'], raleigh_df['Longitude'], raleigh_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_raleigh)  
    
map_raleigh


In [None]:
# save the map as HTML file
map_raleigh.save('map_raleigh.html')

In [None]:
# define Foursquare Credentials and Version
CLIENT_ID = 'your Foursquare ID' # your Foursquare ID
CLIENT_SECRET = 'your Foursquare Secret' # your Foursquare Secret
VERSION = '2019xxxx' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

In [None]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(raleigh_df['Latitude'], raleigh_df['Longitude'], raleigh_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [None]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

In [None]:
venues_df.groupby(["Neighborhood"]).count()

In [None]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))
# print out the list of categories
venues_df['VenueCategory'].unique()[:50]

In [None]:

# check if the results contain "Shopping center"
"Neighborhood" in venues_df['VenueCategory'].unique()

In [None]:

# one hot encoding
raleigh_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
raleigh_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [raleigh_onehot.columns[-1]] + list(raleigh_onehot.columns[:-1])
raleigh_onehot = raleigh_onehot[fixed_columns]

print(raleigh_onehot.shape)
raleigh_onehot.head()

raleigh_grouped = raleigh_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(raleigh_grouped.shape)
raleigh_grouped



In [None]:

len(raleigh_grouped[raleigh_grouped["Shopping Mall"] > 0])
raleigh_mall = raleigh_grouped[["Neighborhoods","Shopping Mall"]]

raleigh_mall.head()

In [None]:
# set number of clusters
kclusters = 3

raleigh_clustering = raleigh_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(raleigh_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

In [None]:
#create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
raleigh_merged = raleigh_mall.copy()

# add clustering labels
raleigh_merged["Cluster Labels"] = kmeans.labels_

raleigh_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
raleigh_merged.head()


# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
raleigh_merged = raleigh_merged.join(raleigh_df.set_index("Neighborhood"), on="Neighborhood")

print(raleigh_merged.shape)
raleigh_merged.head() 

In [None]:
# sort the results by Cluster Labels
raleigh_merged.sort_values(["Cluster Labels"], inplace=True)
raleigh_merged


In [None]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], raleigh_merged['Longitude'], raleigh_merged['Neighborhood'], raleigh_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
map_clusters.save('map_clusters.html')

In [None]:
raleigh_merged.loc[raleigh_merged['Cluster Labels'] == 2]