# Capstone Final Project

## Objective: To find a suitable neighbourhood for rent in Bangalore, based on 3 factors

#### 1. Proximity to Office
#### 2. Nearby Facilities
#### 3. Affordibility

In [1]:
pip install folium

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## 1. Defined User Preferences

In [6]:
office_address ='murugeshpalaya, Bangalore'
Proximity_from_office = 6.5 #in km
User_preference = ['Pub','Indian Restaurant','American Restaurant','Badminton Court','Pizza Place','Lounge','Gym','Burrito Place','Brewery','Breakfast Spot','Clothing Store','Convenience Store','Department Store','Diner','Fast Food Restaurant','Hospital','Italian Restaurant','Liquor Store','Market','Mexican Restaurant','Middle Eastern Restaurant','Movie Theater','Multiplex','Nightclub','North Indian Restaurant','Park','Playground','Salad Place','Shopping Mall','Steakhouse']
User_preference_short=['Pub','Badminton Court','Pizza Place','Park','Gym','Brewery','Department Store','Hospital','Diner','Liquor Store','Salad Place']
Proximity_weight = 3
Facility_weight = 2
Affordibility_weight = 1

## 2. Find Office Location

In [7]:
geolocator = Nominatim(user_agent="foursquare_agent")
location_office = geolocator.geocode(office_address)
latitude_office = location_office.latitude
longitude_office = location_office.longitude
print('The geograpical coordinate of Office is {}, {}.'.format(latitude_office, longitude_office))

The geograpical coordinate of Office is 12.9561284, 77.6575713.


#### Visualize Office Location

In [8]:
import folium
# create map of New York using latitude and longitude values
map_bangalore_full = folium.Map(location=[latitude_office, longitude_office], zoom_start=12)

# add markers to map
label = 'Office Address, Murugeshpalaya'
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
    [latitude_office, longitude_office],
    radius=10,
    popup=label,
    color='green',
    fill=True,
    fill_color='green',
    fill_opacity=1,
    parse_html=False).add_to(map_bangalore_full)
    
map_bangalore_full

## 3. Find Neighbourhood close to office

#### 2.1 Upload all neighbourhoods of Bangalore with Postal Code

In [9]:

#mytable = soup.find_all('table')
import requests
url = requests.get('https://www.mapsofindia.com/pincode/india/karnataka/bangalore/').text

from bs4 import BeautifulSoup
soup = BeautifulSoup(url,'lxml')
#print(soup.prettify())

mytable = soup.find('table',{'class':''})

In [10]:
#Create array to hold the data we extract
postal_code = []
borough = []
neighbourhood = []

#for table in mytable:
rows = mytable.find_all('tr')
    
for row in rows:
    cells = row.find_all('td')
    if len(cells) > 1:
        #postal_code = cells[0]
        postal_code.append(cells[1].text.strip())

        #borough = cells[1]
        borough.append(cells[0].text.strip())

df = pd.DataFrame()
df['PostalCode'] = postal_code
df['Borough'] = borough
df.drop(df[df['PostalCode']=='Pincode'].index, inplace = True) 
df.head()

Unnamed: 0,PostalCode,Borough
1,560063,A F station yelahanka
2,560030,Adugodi
3,560034,Agara
4,560007,Agram
5,560007,Air Force hospital


### 3.2 Find latitude and longitude information

In [11]:
i=1
for postalcode in df['PostalCode']:
    address = 'Bangalore '+ str(postalcode)
    geolocator = Nominatim(user_agent="foursquare_agent")
    location = geolocator.geocode(address)
    if (location!=None):
        df.loc[i,'Latitude'] = location.latitude
        df.loc[i,'Longitude'] = location.longitude
        #print('The geograpical coordinate of Office is {}, {}.'.format(latitude, longitude))
    i=i+1

#### Visualize localities

In [12]:
import folium
# create map of New York using latitude and longitude values
map_bangalore_full = folium.Map(location=[latitude_office, longitude_office], zoom_start=12)

# add markers to map
for lat, lng, borough in zip(df['Latitude'], df['Longitude'], df['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bangalore_full)  

label = 'Office Address, Murugeshpalaya'
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
    [latitude_office, longitude_office],
    radius=10,
    popup=label,
    color='green',
    fill=True,
    fill_color='green',
    fill_opacity=1,
    parse_html=False).add_to(map_bangalore_full)
    
map_bangalore_full

### 3.3 Find distance from office

In [13]:
# Python 3 program to calculate Distance Between Two Points on Earth 
from math import radians, cos, sin, asin, sqrt 
def distance(lat1, lat2, lon1, lon2): 
      
    # The math module contains a function named 
    # radians which converts from degrees to radians. 
    lon1 = radians(lon1) 
    lon2 = radians(lon2) 
    lat1 = radians(lat1) 
    lat2 = radians(lat2) 
       
    # Haversine formula  
    dlon = lon2 - lon1  
    dlat = lat2 - lat1 
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
  
    c = 2 * asin(sqrt(a))  
     
    # Radius of earth in kilometers. Use 3956 for miles 
    r = 6371
       
    # calculate the result 
    return(c * r) 

In [14]:
for i, row in df.iterrows():
    df.loc[i,'Distance'] = distance(df.loc[i]['Latitude'],latitude_office,df.loc[i]['Longitude'],longitude_office)
df.head()

Unnamed: 0,PostalCode,Borough,Latitude,Longitude,Distance
1,560063,A F station yelahanka,12.898725,77.639322,6.682357
2,560030,Adugodi,12.941862,77.602382,6.187522
3,560034,Agara,12.928178,77.635982,3.890116
4,560007,Agram,12.968553,77.630313,3.260862
5,560007,Air Force hospital,12.968553,77.630313,3.260862


### 3.4 Dropping locations whose distance is greater than user defined criteria

In [15]:
import folium
# create map of New York using latitude and longitude values
map_bangalore_full = folium.Map(location=[latitude_office, longitude_office], zoom_start=12)

# add markers to map
for lat, lng, borough, dist in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Distance']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bangalore_full) 
    label2 = '{}'.format(dist)+'km'
    label2 = folium.Popup(dist, parse_html=True)
    #folium.PolyLine(locations=[[latitude_office, longitude_office], [lat, lng]], color='blue',popup=dist,weight=.7, opacity=.7).add_to(map_bangalore_full)

label = 'Office Address, Murugeshpalaya'
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
    [latitude_office, longitude_office],
    radius=10,
    popup=label,
    color='green',
    fill=True,
    fill_color='green',
    fill_opacity=1,
    parse_html=False).add_to(map_bangalore_full)
    
folium.Circle(location=[latitude_office, longitude_office], popup='Point 1A', fill_color='#000', radius=1000 * Proximity_from_office, weight=2, color="#000").add_to(map_bangalore_full)

map_bangalore_full

In [16]:
bangalore_data=df[df['Distance']<6].reset_index(drop=True)

#### After filtering

In [17]:
import folium
# create map of New York using latitude and longitude values
map_bangalore_full = folium.Map(location=[latitude_office, longitude_office], zoom_start=12)

# add markers to map
for lat, lng, borough, dist in zip(bangalore_data['Latitude'], bangalore_data['Longitude'], bangalore_data['Borough'], bangalore_data['Distance']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bangalore_full) 
    label2 = '{}'.format(dist)+'km'
    label2 = folium.Popup(dist, parse_html=True)
    #folium.PolyLine(locations=[[latitude_office, longitude_office], [lat, lng]], color='blue',popup=dist,weight=.7, opacity=.7).add_to(map_bangalore_full)

label = 'Office Address, Murugeshpalaya'
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
    [latitude_office, longitude_office],
    radius=10,
    popup=label,
    color='green',
    fill=True,
    fill_color='green',
    fill_opacity=1,
    parse_html=False).add_to(map_bangalore_full)
    
folium.Circle(location=[latitude_office, longitude_office], popup='Point 1A', fill_color='#000', radius=1000 * Proximity_from_office, weight=2, color="#000").add_to(map_bangalore_full)

map_bangalore_full

## 4 Explore venues in those shorlisted neighbourhoods

In [18]:
CLIENT_ID = 'ONOOXEKNAJ1TGGGEJT0FQVF5WNPZVQH2I0JAA2ACPTE3K4ER' # your Foursquare ID
CLIENT_SECRET = 'HAJ3ODQXGK3I0LCU5KLCNGOVIBVFYSUXHUQ1XAY2L0GFUIEX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ONOOXEKNAJ1TGGGEJT0FQVF5WNPZVQH2I0JAA2ACPTE3K4ER
CLIENT_SECRET:HAJ3ODQXGK3I0LCU5KLCNGOVIBVFYSUXHUQ1XAY2L0GFUIEX


In [19]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:

# type your answer here
bangalore_venues = getNearbyVenues(names=bangalore_data['Borough'],
                                   latitudes=bangalore_data['Latitude'],
                                   longitudes=bangalore_data['Longitude']
                                  )

Agara
Agram
Air Force hospital
Austin Town
Bangalore Air port
Bangalore Sub fgn post
Basaveshwaranagar
Bellandur
C.V.raman nagar
Cmp Centre and school
Doddanekkundi
Domlur
Doorvaninagar
H.A.l ii stage
Hsr Layout
Hulsur Bazaar
Indiranagar
Indiranagar Com. complex
K H b colony
Kamakshipalya
Kendriya Sadan
Koramangala
Koramangala I block
Koramangala Vi bk
Krishnarajapuram R s
Kundalahalli
Marathahalli Colony
Museum Road
Nal
New Thippasandra
Ramamurthy Nagar
Rameshnagar
Richmond Town
Sivan Chetty gardens
Someswarapura
St. john's medical college
Vimapura
Viveknagar
Yemalur


In [22]:
print(bangalore_venues.shape)
bangalore_venues.head()

(798, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Agara,12.928178,77.635982,Cafe Thulp,12.928999,77.635208,Burger Joint
1,Agara,12.928178,77.635982,iService,12.926713,77.633485,Mobile Phone Shop
2,Agara,12.928178,77.635982,Barbeque Nation,12.92549,77.637101,BBQ Joint
3,Agara,12.928178,77.635982,La Traviata,12.925106,77.635812,Italian Restaurant
4,Agara,12.928178,77.635982,Natural's,12.930501,77.633056,Ice Cream Shop


## 5. Rate venues according to User's Preference List

In [23]:
bangalore_venues['rating']=0
for index,row in bangalore_venues.iterrows():
    if bangalore_venues.loc[index]['Venue Category'] in User_preference_short:
        #print("Yes, 'at' found in List : " , bangalore_venues.loc[index]['Venue Category'])
        bangalore_venues.loc[index,'rating']=1
        

In [24]:
bangalore_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,rating
0,Agara,12.928178,77.635982,Cafe Thulp,12.928999,77.635208,Burger Joint,0
1,Agara,12.928178,77.635982,iService,12.926713,77.633485,Mobile Phone Shop,0
2,Agara,12.928178,77.635982,Barbeque Nation,12.92549,77.637101,BBQ Joint,0
3,Agara,12.928178,77.635982,La Traviata,12.925106,77.635812,Italian Restaurant,0
4,Agara,12.928178,77.635982,Natural's,12.930501,77.633056,Ice Cream Shop,0


### 5.1 Sum across the localities

In [25]:
a = ['Neighborhood','rating']
choices_match = bangalore_venues[a]
choices_match = choices_match.groupby('Neighborhood').sum()   
choices_match

Unnamed: 0_level_0,rating
Neighborhood,Unnamed: 1_level_1
Agara,2
Agram,1
Air Force hospital,1
Austin Town,0
Bangalore Air port,1
Bangalore Sub fgn post,8
Basaveshwaranagar,0
Bellandur,0
C.V.raman nagar,1
Cmp Centre and school,8


In [26]:
#Merge with distance column
distance_choice = pd.merge(bangalore_data, choices_match, how='left', left_on='Borough',right_on='Neighborhood')
distance_choice.head()

Unnamed: 0,PostalCode,Borough,Latitude,Longitude,Distance,rating
0,560034,Agara,12.928178,77.635982,3.890116,2.0
1,560007,Agram,12.968553,77.630313,3.260862,1.0
2,560007,Air Force hospital,12.968553,77.630313,3.260862,1.0
3,560047,Austin Town,12.966216,77.614254,4.826111,0.0
4,560017,Bangalore Air port,12.955934,77.658272,0.078934,1.0


## 6. Upload rent for different regions

In [None]:
rent = []
borough = []

#mytable = soup.find_all('table')
import requests
url = requests.get('https://www.makaan.com/price-trends/property-rates-for-rent-in-bangalore').text

from bs4 import BeautifulSoup
soup = BeautifulSoup(url,'lxml')
#print(soup.prettify())

mytable = soup.find('table',{'class':'tbl'})
#for table in mytable:
rows = mytable.find_all('tr')
    
for row in rows:
    cells = row.find_all('td')
    if len(cells) > 1:
        #postal_code = cells[0]
        rent.append((cells[4].text.strip()).replace(',',''))
        #rent1.append(cells[4].text.strip())
        #borough = cells[1]
        borough.append(cells[0].text.strip())

for i in range(84):
    url = requests.get('https://www.makaan.com/price-trends/property-rates-for-rent-in-bangalore?page='+str(i+2)).text
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(url,'lxml')

    mytable = soup.find('table',{'class':'tbl'})
    #for table in mytable:
    rows = mytable.find_all('tr')

    for row in rows:
        cells = row.find_all('td')
        if len(cells) > 1:
            #postal_code = cells[0]
            rent.append((cells[4].text.strip()).replace(',',''))
            #rent1.append(cells[4].text.strip())
            #borough = cells[1]
            borough.append(cells[0].text.strip())


In [None]:
#Create array to hold the data we extract
df1 = pd.DataFrame()
df1['Borough'] = borough
df1['Rent'] = rent
df1.drop(df1[df1['Rent']=='-'].index, inplace = True) 
df1

In [None]:
#Some modifications to match data with Pin Code Data (extra space modification, extra letter modification)
df1.loc[56,'Borough'] = 'C.V.raman nagar'
df1.loc[97,'Borough'] = 'Doddanekkundi'
df1.loc[424,'Borough'] = 'Doorvaninagar'
df1.loc[579,'Borough'] = 'H.A.l ii stage'
df1.loc[0,'Borough'] = 'Hsr Layout'
df1.loc[1512,'Borough'] = 'K H b colony'
df1.loc[704,'Borough'] = 'Koramangala Vi bk'
df1.loc[13,'Borough'] = 'Marathahalli Colony'
df1.loc[787,'Borough'] = 'Sivan Chetty gardens'
df1.loc[509,'Borough'] = 'Vimapura'
df1.loc[405,'Borough'] = 'Viveknagar'

## 7. Add a distance column and rent column and matching score with user venue preference in a single table

In [None]:
distance_choice_rent = pd.merge(distance_choice, df1, how='left', left_on='Borough',right_on='Borough')
distance_choice_rent

In [None]:
#Subsitute NA Rating values with 0
distance_choice_rent['rating'] = distance_choice_rent['rating'].fillna(0)

#Subsitute NA Rent values with average rent?
distance_choice_rent.dropna(subset=["Rent"], inplace = True) 
#average_rent = distance_choice_rent.mean()
#distance_choice_rent['Rent'] = distance_choice_rent['Rent'].fillna(average_rent)

## 8. Define a metric for ranking

In [None]:
def normalize(df):
    normalized_df=(df-df.min())/df.std()
    return normalized_df

distance_choice_rent['Final Rating'] = normalize(-Proximity_weight * normalize(distance_choice_rent['Distance']) + Facility_weight * normalize(distance_choice_rent['rating']) + Affordibility_weight*normalize(distance_choice_rent['Rent'].astype(float)))

In [None]:
distance_choice_rent.sort_values(by=['Final Rating'], inplace=True, ascending=False)

## 9. Final Ranking Table

In [None]:
distance_choice_rent

## 10. Visualize preffered location with final ranking data (radius of circle)

In [None]:
import folium
# create map of New York using latitude and longitude values
map_bangalore_full = folium.Map(location=[latitude_office, longitude_office], zoom_start=13)

# add markers to map
for lat, lng, borough, rate in zip(distance_choice_rent['Latitude'], distance_choice_rent['Longitude'], distance_choice_rent['Borough'], distance_choice_rent['Final Rating']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=rate*4+1,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bangalore_full) 
    label2 = '{}'.format(dist)+'km'
    label2 = folium.Popup(dist, parse_html=True)
    #folium.PolyLine(locations=[[latitude_office, longitude_office], [lat, lng]], color='blue',popup=dist,weight=.7, opacity=.7).add_to(map_bangalore_full)

label = 'Office Address, Murugeshpalaya'
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
    [latitude_office, longitude_office],
    radius=10,
    popup=label,
    color='green',
    fill=True,
    fill_color='green',
    fill_opacity=1,
    parse_html=False).add_to(map_bangalore_full)
    
#folium.Circle(location=[latitude_office, longitude_office], popup='Point 1A', fill_color='#000', radius=1000 * Proximity_from_office, weight=2, color="#000").add_to(map_bangalore_full)

map_bangalore_full