### Importing Libraries including BeautifulSoup which can read and transform HTML tables to Pandas Dataframes

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

__ We will read the URL and put in a pandas dataframe. If there is more than one table, different dataframes are created and hence we are using dfs.. please note extra s there __

In [2]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
dfs = pd.read_html(str(table))


In [3]:
# We are just reading the first table and then setting first row as column and rest as rows
df = dfs[0]
df.columns = df.iloc[0]
df = df[1:]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Not assigned
10,M8A,Not assigned,Not assigned


__ We will capture only records where Borough is anything other than 'Not assigned' and then resetting the index using a method other than reset_index  __

In [4]:
df = df[df['Borough'] != 'Not assigned']
df.index = np.arange(0, len(df))

#### We will not set the Neighbourhood to the Borough's value of that row if it is Not assigned. Else, we will keep it as is.

In [5]:
df.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df['Borough']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


#### We will not groupBy with Post Code and join all other columns with a comma
##### Then, we will define a function and read only upto first Comma for the Borough Column so that it does not contain duplicate values

In [6]:
df = df.groupby('Postcode', as_index=False).agg(lambda x: ', '.join(x))

def returnUpComma(boroughValue):
    if(boroughValue.find(',')==-1):
        return boroughValue
    else:
        return boroughValue.split(",")[0]

df['Borough'] = df['Borough'].apply(lambda x: returnUpComma(x))
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### Printing the shape of the dataframe

In [7]:
df.shape

(103, 3)

## Validating by checking one the records from the Question i.e Problem Statement from Coursera itself

In [8]:
df[df['Postcode'] == 'M4B']

Unnamed: 0,Postcode,Borough,Neighbourhood
35,M4B,East York,"Woodbine Gardens, Parkview Hill"


### Reading the Latitude & Longitude from the External CSV file now

In [9]:
import pandas as pd
import io
import requests
url="http://cocl.us/Geospatial_data"
geodatacontent=requests.get(url).content
geodatadf=pd.read_csv(io.StringIO(geodatacontent.decode('utf-8')))
geodatadf.rename(columns={'Postal Code':'Postcode'}, inplace=True)
geodatadf

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### Now, merging the Latitude & Longitude columns into main DF

In [10]:
mergeddf = pd.merge(left=df,right=geodatadf)
mergeddf

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [11]:
mergeddf.shape

(103, 5)

In [12]:
torontodf= mergeddf[mergeddf['Borough'].str.contains('Toronto')]
torontodf

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


In [13]:
torontodf.shape

(38, 5)

In [15]:
import folium
from geopy.geocoders import Nominatim
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [24]:
# create map of Tornto  using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, Postcode, Borough, Neighbourhood in zip(torontodf['Latitude'], torontodf['Longitude'],
                                                       torontodf['Postcode'], torontodf['Borough'],
                                                       torontodf['Neighbourhood']):
# label = '{}, {}, {}'.format(Postcode, Borough, Neighbourhood)
    label = '{}, {}'.format(Postcode, Neighbourhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        #parse_html=False
        ).add_to(map_toronto)

map_toronto

In [17]:
# Import request 
import requests

In [18]:
#Setup Four square API 
CLIENT_ID = '4XABAN0SWQBF5IFGMH3AEQUZ4VWELOHIKQEKAVWH40POBUHT' # your Foursquare ID
CLIENT_SECRET = 'M5UM1SWPWU0Q25OILNLX1VJWEESUGKA43WQPVM0KJ503MMSN' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Credentials Setup done')

Credentials Setup done


In [19]:
#Define radius to search near by place and count of total location 
radius = 500
limit = 100

In [20]:
# Function to fetch the data from Fours sqare API 

def getNearbyVenues(codes, names, latitudes, longitudes, radius=500):
    venues_list = []
    for code, name, lat, lng in zip(codes, names, latitudes, longitudes):
        print(code, name)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}\
            &v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            lng,
            radius,
            limit)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']



        # return only relevant information for each nearby venue
        venues_list.append([(
            code,
            name,
            lat,
            lng,
            #            v['venue']['code'],
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postal Code',
                             'Borough',
                             'Latitude',
                             'Longitude',
                             'Venue',
                             'Venue Latitude',
                             'Venue Longitude',
                             'Venue Category']

    return (nearby_venues)

In [26]:
# Details of nearby places for addtional research 
toronto_venues = getNearbyVenues(codes=torontodf['Postcode'],
                                  names=torontodf['Borough'],
                                  latitudes=torontodf['Latitude'],
                                  longitudes=torontodf['Longitude']
                                   )


print(toronto_venues.shape)
print(toronto_venues.head())



M4E East Toronto
M4K East Toronto
M4L East Toronto
M4M East Toronto
M4N Central Toronto
M4P Central Toronto
M4R Central Toronto
M4S Central Toronto
M4T Central Toronto
M4V Central Toronto
M4W Downtown Toronto
M4X Downtown Toronto
M4Y Downtown Toronto
M5A Downtown Toronto
M5B Downtown Toronto
M5C Downtown Toronto
M5E Downtown Toronto
M5G Downtown Toronto
M5H Downtown Toronto
M5J Downtown Toronto
M5K Downtown Toronto
M5L Downtown Toronto
M5N Central Toronto
M5P Central Toronto
M5R Central Toronto
M5S Downtown Toronto
M5T Downtown Toronto
M5V Downtown Toronto
M5W Downtown Toronto
M5X Downtown Toronto
M6G Downtown Toronto
M6H West Toronto
M6J West Toronto
M6K West Toronto
M6P West Toronto
M6R West Toronto
M6S West Toronto
M7Y East Toronto
(1693, 8)
  Postal Code       Borough   Latitude  Longitude  \
0         M4E  East Toronto  43.676357 -79.293031   
1         M4E  East Toronto  43.676357 -79.293031   
2         M4E  East Toronto  43.676357 -79.293031   
3         M4E  East Toronto  43.6

In [25]:
# Grouping by post code and borough and printing count
toronto_venues.groupby(['Postal Code', 'Borough']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postal Code,Borough,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
M4E,East Toronto,4,4,4,4,4,4
M4K,East Toronto,43,43,43,43,43,43
M4L,East Toronto,18,18,18,18,18,18
M4M,East Toronto,39,39,39,39,39,39
M4N,Central Toronto,3,3,3,3,3,3
M4P,Central Toronto,9,9,9,9,9,9
M4R,Central Toronto,20,20,20,20,20,20
M4S,Central Toronto,34,34,34,34,34,34
M4T,Central Toronto,3,3,3,3,3,3
M4V,Central Toronto,15,15,15,15,15,15
