## Get places using google maps

In [1]:
import googlemaps
import numpy as np
import pandas as pd
import math
import json

In [2]:
#Open a google maps client

YourApiKey = 'AIzaS.................' #provide your Google Maps API key here  

gm = googlemaps.Client(YourApiKey)

In [3]:
## Create a grid in Lat Lon space


def meters_to_lat(meters):
    """
    Convert meters to a crude latitude-longitude measure, 
    using the unit longitude distance at the equator
    """
    return meters/111.2e3

def meters_to_long_melCBD(meters):
    """
    Convert meters to a crude longitude measure in melbourne cbd, 
    using the unit longitude distance at the equator
    """
    return (4/3.16) * (meters/111.2e3)

def lat_to_meters(la):
    """
    Convert meters to a crude latitude-longitude measure, 
    using the unit longitude distance at the equator
    """
    return la * 111.2e3

def long_melCBD_to_meters(lon):
    """
    Convert meters to a crude longitude measure in melbourne cbd, 
    using the unit longitude distance at the equator
    """
    return lon * 111.2e3 * (3.16/4)

def create_grids(ulLatLon, lrLatLon, meters_spacing = 400., shape=()):
    """
    This function creates a set of cells defined by a rectangle, and spacing in meters.
    It return the coordinates of the cell centers - 
        which are the points where we conduct our places_nearby() searches
        
    Note that the overall cell soze will be slightly smaller meters_spacing paramter, 
    as I use : nx = int(np.ceil((lrLatLon[1] - ulLatLon[1])/lldist))
               xs = np.linspace(ulLatLon[1], lrLatLon[1], nx)
    Alternatively a shape tuple can be provide, eg (3, 5) defining the size of the grid. 
 
    
    """
    if not shape:
        lldist = meters_to_lat(meters_spacing)
        print ('lldist:', lldist)
        lldist_long = meters_to_long_melCBD(meters_spacing)
        print ('lldist_long:', lldist_long)
        #nx = int(abs(np.ceil((lrLatLon[1] - ulLatLon[1])/lldist_long)))
        nx = int(np.ceil(abs((lrLatLon[1] - ulLatLon[1])/lldist_long)))
        print ('nx:', nx)
        #ny = int(abs(np.ceil((lrLatLon[0] - ulLatLon[0])/lldist)))
        ny = int(np.ceil(abs((lrLatLon[0] - ulLatLon[0])/lldist)))
        print ('ny:', ny)
        xs, lldist_x = np.linspace(ulLatLon[1], lrLatLon[1], nx, endpoint=False, retstep=True)
        #print 'xs:', xs
        ys, lldist_y =  np.linspace(ulLatLon[0], lrLatLon[0], ny, endpoint=False, retstep=True)
        #print 'ys:', ys
        if nx == 1:
            xs = []
            xs.append(ulLatLon[1])
            lldist_x = lrLatLon[1] - ulLatLon[1]
        if ny == 1:
            ys = []
            ys.append(ulLatLon[0])
            lldist_y = lrLatLon[0] - ulLatLon[0]
            
    else:
        #lldist = meters_to_lat(meters_spacing)
        #xs, lldist_x = np.linspace(ulLatLon[1], lrLatLon[1], shape[0], endpoint=False, retstep=True)
        #ys, lldist_y =  np.linspace(ulLatLon[0], lrLatLon[0], shape[1], endpoint=False, retstep=True)
        xs, lldist_x = np.linspace(ulLatLon[1], lrLatLon[1], shape[1], endpoint=False, retstep=True)
        ys, lldist_y =  np.linspace(ulLatLon[0], lrLatLon[0], shape[0], endpoint=False, retstep=True)
        #nx = shape[0]
        nx = shape[1]
        if nx == 1:
            xs = []
            xs.append(ulLatLon[1])
            lldist_x = lrLatLon[1] - ulLatLon[1]
        #ny = shape[1]
        ny = shape[0]
        if ny == 1:
            ys = []
            ys.append(ulLatLon[0])
            lldist_y = lrLatLon[0] - ulLatLon[0]
    X, Y = np.meshgrid(xs , ys) #Thes are coordinates of the cell vertices
    subX, subY = np.copy(X), np.copy(Y)
    #subX = subX[:-1, :-1] + lldist/2. #These give the points at the centre of the cells
    #subY = subY[:-1, :-1] + lldist/2.
    subX = subX + lldist_x/2. #These give the points at the centre of the cells
    subY = subY + lldist_y/2.
    return subX, subY, lldist_x, lldist_y

In [4]:

y = -37.8091677
x = 144.9684903
places  = gm.places_nearby(keyword='cafe', location=(y,x), radius=5)#new_spacing*math.sqrt(2) )
df = pd.DataFrame(columns=['name', 'lat', 'lon', 'rating','place_id'])
RecordsAll = []
RecordsAll.append(places['results'])  
for cell in RecordsAll:
    for place in cell:
        #print(place)
        name = place['name'].encode('utf-8')
        lat = place['geometry']['location']['lat']
        lon = place['geometry']['location']['lng']
        place_id=place['place_id'].encode('utf-8')
        try:
            rating = place['rating'] #Not all places have ratings, so use try - except
        except:
            rating = ''
        series = pd.Series([name, lat, lon, rating,place_id], index=['name', 'lat', 'lon', 'rating','place_id'])
        df = df.append(series,  ignore_index=True)
print(name, lat, lon, rating)
df.head()
df2 = df.drop_duplicates('place_id')
df2.shape
print (df2)
#output to csv
df2.to_csv('results-xx.csv')

b'Jack Sprat Cafe' -37.8086019 144.9684545 4.4
                              name        lat         lon rating  \
0          b'Beyond Internet cafe' -37.809524  144.966885      0   
1   b'The League of Honest Coffee' -37.809243  144.968448    4.5   
2               b'65 Degrees Cafe' -37.808741  144.968724    4.4   
3                  b'Barista Cafe' -37.809500  144.969702    4.2   
4                b'Blu Point Cafe' -37.808391  144.966355    4.6   
5                 b"B & B 's Cafe" -37.808845  144.968812      2   
6               b'THE J Walk Cafe' -37.810329  144.967674      4   
7                b'Global Kitchen' -37.808602  144.968455    3.9   
8                b'Cafe Caledonia' -37.810713  144.968668    4.6   
9               b'No Vacancy Cafe' -37.810375  144.965973    3.8   
10             b'Traveller Coffee' -37.811417  144.971246    4.7   
11              b'Jack Sprat Cafe' -37.808602  144.968455    4.4   

                          place_id  
0   b'ChIJR96FVslC1moRErMxY6o1l

In [5]:
##########
#In this block we search through a grid and extract records cel-by-cell. 
#If the records query in anny cell maxes out (i.e. hits 20 record), 
#we create a finer grid and search again
########## -37.8091677,144.9684903      #melbourne university -37.80004479999999,144.9600488
##
#https://maps.googleapis.com/maps/api/geocode/json?address=Melbourne%20University&key=AIza.........
###


ULCRN = (-37.80004479999999+meters_to_lat(1000), 144.9600488-meters_to_long_melCBD(1000)) #Approx melbourne CBD limits
LRCRN = (-37.80004479999999-meters_to_lat(1000), 144.9600488+meters_to_long_melCBD(1000))



spacing = 200. #intial spacing of cells in our grid
X, Y, lldist_x, lldist_y = create_grids(ULCRN, LRCRN, meters_spacing = spacing, shape=()) #create grids; meters_spacing no use 
print ('lldist_x1: ', lldist_x)
print ('lldist_y1: ', lldist_y)
orig_shape = Y.shape
print ('orig_shape: ',orig_shape)
max_levels = 2000. #this paramter will limit the number of refinement levels 
lengths = [21]  #A dummy parameter, ultimately we try to reduce this below 20, meaning we have 
                #will have returned all results for each searhc area
fac = 1.        #This is the scaling factor for our grid cell size and search radius

indexMatrixDictList = []
#indexMatrixDictParent = {}
#indexMatrixDictNow = {}
isfirstrun = True
needFurtherDivision = True
RecordsAll = []

while needFurtherDivision and (len(lengths) == 0 or max(lengths) >= 20) and fac <= max_levels:
    needFurtherDivision = False
    print('##Iteration at grid level: ' + str(int(fac)))
    new_shape = tuple(np.array(orig_shape)*fac)
    print ('new_shape: ',new_shape)
    new_spacing = spacing/fac
    #print('##Search radius in meters: ' + str(new_spacing))
    #records = []
    lengths = []
    X, Y, lldist_x, lldist_y = create_grids(ULCRN, LRCRN, meters_spacing = new_spacing, shape=new_shape)#create grids; meters_spacing no use -Hz
    print ('lldist_x: ', lldist_x)
    print ('lldist_y: ', lldist_y)
    longmeters = long_melCBD_to_meters(abs(lldist_x))
    print ('longmeters: ',longmeters)
    latmeters  = lat_to_meters(abs(lldist_y))
    print ('latmeters: ',latmeters)
    radius_r = 0.5 * math.sqrt(longmeters*longmeters + latmeters*latmeters)#+2
    radius_r = math.ceil(radius_r)+20
    print ('radius_r: ', radius_r)
    print('##Search radius in meters: ' + str(radius_r))
    print ('new_spacing: ', new_spacing)
    #print(X.shape)
    if isfirstrun:
        isfirstrun = False
        indexMatrixDictNow = {}
        for index, value in np.ndenumerate(X): #loop through cells of current grid            
            print (index)
            y = Y[index]
            x = X[index]
            #print(x,y, new_spacing)
            places  = gm.places_nearby(keyword='supermarket', location=(y,x), radius=radius_r)#, type='grocery_or_supermarket')#,type='cafe')#new_spacing*math.sqrt(2) )
            #places=gm.places_autocomplete_query("supermarket near Melbourne University", location=(y,x), radius=radius_r)
            #places=gm.places("supermarket near Melbourne University", location=(y,x), radius=radius_r)#,type="grocery_or_supermarket")
            #records.append(places['results'])
            print(len(places['results']))
            print ("places", places)
            if(len(places['results']) >= 20):
                needFurtherDivision = True
                indexMatrixDictNow[index] = 1 #1 for need further division
                lengths.append(len(places['results']))
            else:
                indexMatrixDictNow[index] = -1 #-1 for no need further division
                RecordsAll.append(places['results'])               
        
        indexMatrixDictList.append(indexMatrixDictNow)
        
    else:
        isfirstrun = False        
        indexMatrixDictParent = indexMatrixDictList[-1]
        indexMatrixDictNow = {}
        for index, value in indexMatrixDictParent.iteritems():
            a = index[0]
            b = index[1]
            V_2a = 2 * a
            V_2b = 2 * b
            V_2a_1 = 2 * a + 1
            V_2b_1 = 2 * b + 1
            index00 = (V_2a,V_2b)
            index01 = (V_2a,V_2b_1)
            index10 = (V_2a_1,V_2b)
            index11 = (V_2a_1,V_2b_1)
            if value < 0:
                indexMatrixDictNow[index00] = -1
                indexMatrixDictNow[index01] = -1
                indexMatrixDictNow[index10] = -1
                indexMatrixDictNow[index11] = -1
            else:
                indexMatrixDictNow[index00] = 1
                indexMatrixDictNow[index01] = 1
                indexMatrixDictNow[index10] = 1
                indexMatrixDictNow[index11] = 1                
        
        for index, value in np.ndenumerate(X): #loop through cells of current grid
            #print index,
            if indexMatrixDictNow[index] > 0:
                y = Y[index]
                x = X[index]
                #print(x,y, new_spacing)
                places  = gm.places_nearby(keyword='supermarket', location=(y,x), radius=radius_r)#, type='grocery_or_supermarket')#,type='cafe')#new_spacing*math.sqrt(2) )
                #places=gm.places_autocomplete_query("supermarket near Melbourne University",  location=(y,x), radius=radius_r)
                #places=gm.places( "supermarket near Melbourne University", location=(y,x), radius=radius_r)#,type="grocery_or_supermarket")
                #records.append(places['results'])
                print (index)
                print(len(places['results']))
                if(len(places['results']) >= 20):
                    needFurtherDivision = True
                    indexMatrixDictNow[index] = 1
                    lengths.append(len(places['results']))
                else:
                    indexMatrixDictNow[index] = -1
                    RecordsAll.append(places['results'])               
        
        indexMatrixDictList.append(indexMatrixDictNow)
        
    fac *= 2 #Increase the grid refinment factor
print('##finished')
import datetime
t=datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S")

lldist: 0.0017985611510791368
lldist_long: 0.0022766596849102994
nx: 11
ny: 11
lldist_x1:  0.002069690622647613
lldist_y1:  -0.0016350555918904774
orig_shape:  (11, 11)
##Iteration at grid level: 1
new_shape:  (11.0, 11.0)
lldist_x:  0.002069690622647613
lldist_y:  -0.0016350555918904774
longmeters:  181.8181818183475
latmeters:  181.8181818182211
radius_r:  149
##Search radius in meters: 149
new_spacing:  200.0
(0, 0)




0
places {'html_attributions': [], 'results': [], 'status': 'ZERO_RESULTS'}
(0, 1)
0
places {'html_attributions': [], 'results': [], 'status': 'ZERO_RESULTS'}
(0, 2)
0
places {'html_attributions': [], 'results': [], 'status': 'ZERO_RESULTS'}
(0, 3)
0
places {'html_attributions': [], 'results': [], 'status': 'ZERO_RESULTS'}
(0, 4)
0
places {'html_attributions': [], 'results': [], 'status': 'ZERO_RESULTS'}
(0, 5)
0
places {'html_attributions': [], 'results': [], 'status': 'ZERO_RESULTS'}
(0, 6)
0
places {'html_attributions': [], 'results': [], 'status': 'ZERO_RESULTS'}
(0, 7)
0
places {'html_attributions': [], 'results': [], 'status': 'ZERO_RESULTS'}
(0, 8)
2
places {'html_attributions': [], 'results': [{'geometry': {'location': {'lat': -37.7927456, 'lng': 144.9695322}, 'viewport': {'northeast': {'lat': -37.79139577010728, 'lng': 144.9708820298927}, 'southwest': {'lat': -37.79409542989272, 'lng': 144.9681823701073}}}, 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/shopping-71

In [6]:
#records
#import json

#RecordsAll1=[]
#with open(t+'_supermarketset.json', 'w') as f:
#    json.dump(RecordsAll1, f)
print (places['results'])


[{'geometry': {'location': {'lat': -37.8098337, 'lng': 144.9711351}, 'viewport': {'northeast': {'lat': -37.80845542010729, 'lng': 144.9724179798928}, 'southwest': {'lat': -37.81115507989273, 'lng': 144.9697183201073}}}, 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/shopping-71.png', 'id': 'd74c9bcee86a0cf6a01080a1d691f771874715d3', 'name': 'IGA X-press', 'opening_hours': {'open_now': True}, 'photos': [{'height': 2448, 'html_attributions': ['<a href="https://maps.google.com/maps/contrib/108024793198560180034/photos">Mick MSS Photography</a>'], 'photo_reference': 'CmRaAAAAWoi2tROkOO1iatTvtYEccUIwrZR8RLayliY-guqjwRdk33R12GAKByZnxW4hukbM_kRSmmnWql5A_Cv_KkovY8BjbcyVikmlXe-8gzi5gBuGCxFxNImmGcZqALssftykEhDjPDzrnjve-14GXmWPI-o8GhR5UoCEKon7IEB3MpJVd1mnaa3vEA', 'width': 3264}], 'place_id': 'ChIJt8vcQM9C1moRMNcMXOrTiI8', 'plus_code': {'compound_code': '5XRC+3F Melbourne, Victoria, Australia', 'global_code': '4RJ65XRC+3F'}, 'rating': 4.2, 'reference': 'ChIJt8vcQM9C1moRMNcMXOrTiI8', 's

In [7]:
RecordidAll={}
count=0
for cell in RecordsAll:
    for place in cell:

        place_id=place['place_id'].encode('utf-8')
        if place_id not in RecordidAll.keys():
            RecordidAll[place_id]=place
        else:
            print (place_id)
            count=count+1
print (count)
RecordsAll1=RecordidAll.values()
with open(t+'_supermarket.json', 'w') as f:
    json.dump(list(RecordsAll1), f)

b'ChIJTbth3ChD1moRPzGE3uUQMD4'
b'ChIJTbth3ChD1moRPzGE3uUQMD4'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJTbth3ChD1moRPzGE3uUQMD4'
b'ChIJHbHg3yhD1moRtigMFWQSBaY'
b'ChIJTbth3ChD1moRPzGE3uUQMD4'
b'ChIJTbth3ChD1moRPzGE3uUQMD4'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJPwxYiNZC1moRfdORY6zd3fU'
b'ChIJTbth3ChD1moRPzGE3uUQMD4'
b'ChIJHbHg3yhD1moRtigMFWQSBaY'
b'ChIJPwxYiNZC1moRfdORY6zd3fU'
b'ChIJTbth3ChD1moRPzGE3uUQMD4'
b'ChIJHbHg3yhD1moRtigMFWQSBaY'
b'ChIJPwxYiNZC1moRfdORY6zd3fU'
b'ChIJTbth3ChD1moRPzGE3uUQMD4'
b'ChIJHbHg3yhD1moRtigMFWQSBaY'
b'ChIJ4bv-Ojtd1moR0Fxso3VWbdk'
b'ChIJk1rXOy5d1moR46D0zicVgOI'
b'ChIJk1rXOy5d1moR46D0zicVgOI'
b'ChIJk1rXOy5d1moR46D0zicVgOI'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJk1rXOy5d1moR46D0zicVgOI'
b'ChIJ4cVgqixd1moRCnQEULE-5tM'
b'ChIJ4c

In [8]:
y,x

(-37.80822007795944, 144.97039725311325)

### Parse data with Pandas

In [9]:
#import datetime
#datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S")

with open('data.json', 'w') as f:
    json.dump(places, f)

In [10]:
df = pd.DataFrame(columns=['name', 'lat', 'lon', 'rating','price_level','place_id','types'])
for cell in RecordsAll:
    for place in cell:
        #print(place)
        name = place['name'].encode('utf-8')
        lat = place['geometry']['location']['lat']
        lon = place['geometry']['location']['lng']
        place_id=place['place_id'].encode('utf-8')
        types=place['types']
        rating=place.get('rating','')
        price_level=place.get('price_level','')
        #address=place['formatted_address'].encode('utf-8')
        '''try:
            rating = place['rating'] #Not all places have ratings, so use try - except
            price_level=place['price_level']
        except:
            rating = ''
            price_level= '''''
        series = pd.Series([name, lat, lon, rating,price_level,place_id,types], index=['name', 'lat', 'lon', 'rating',
                                                                                       'price_level','place_id','types'])
        df = df.append(series,  ignore_index=True)
print(name, lat, lon, rating,price_level,place_id,types)

b'Laguna Oriental Supermarket' -37.8102716 144.966642 4.3  b'ChIJt9TRxMtC1moR-oKhszjxbDA' ['grocery_or_supermarket', 'store', 'point_of_interest', 'food', 'establishment']


In [11]:
#Take a quick look at our data frame
df.head()

Unnamed: 0,name,lat,lon,rating,price_level,place_id,types
0,b'IGA Supermarket',-37.792746,144.969532,3.2,,b'ChIJTbth3ChD1moRPzGE3uUQMD4',"[supermarket, grocery_or_supermarket, store, p..."
1,b'IGA Supermarket',-37.792744,144.969535,0.0,,b'ChIJHbHg3yhD1moRtigMFWQSBaY',"[supermarket, grocery_or_supermarket, store, p..."
2,b'IGA Supermarket',-37.792746,144.969532,3.2,,b'ChIJTbth3ChD1moRPzGE3uUQMD4',"[supermarket, grocery_or_supermarket, store, p..."
3,b'IGA Supermarket',-37.792746,144.969532,3.2,,b'ChIJTbth3ChD1moRPzGE3uUQMD4',"[supermarket, grocery_or_supermarket, store, p..."
4,b'FoodWorks Melbourne University',-37.796862,144.960401,1.9,,b'ChIJ4cVgqixd1moRCnQEULE-5tM',"[supermarket, grocery_or_supermarket, store, p..."


In [12]:
#Drop duplicates
df2 = df.drop_duplicates('place_id')
df2.shape

(24, 7)

In [13]:
#output to csv
df2.to_csv(t+'_supermarket.csv')