In [1]:
import requests
import json
import pandas as pd

# Create a Google places class for webscraping

In [107]:
class GooglePlaces(object):
    def __init__(self, apiKey):
        super(GooglePlaces, self).__init__()
        self.apiKey = apiKey

    def search_places(self,name,inputtype):
        endpoint_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"
        params = {
            'input': name,
            'inputtype':inputtype,
            'key': self.apiKey
        }
        res = requests.get(endpoint_url, params = params)
        results =  json.loads(res.content)
        return results
    def get_place_details(self, place_id,fields):
        endpoint_url = "https://maps.googleapis.com/maps/api/place/details/json"
        params = {
            'placeid': place_id,
            'fields': ",".join(fields),
            'key': self.apiKey
        }
        res = requests.get(endpoint_url, params = params)
        place_details =  json.loads(res.content)
        return place_details


##### Load the Vancouver Art facilities data

In [65]:
poi = pd.read_csv('/Users/yuxuancui/Desktop/MDS/data599/r5r/vancouver_facilities.csv')

In [72]:
poi_names=poi["Facility_Name"]

In [220]:
apiKey="AIzaSyA_O9wNiYYALLuROYMDBUkGTuyr0DlEzU0"

In [222]:
def get_pid(poi_name,apiKey):
    """
    get_pid funciton takes two arguments 
    1. names: The Art facilit names 
    2. Google APIkey
    will return a list of place_id
    """
    api = GooglePlaces(apiKey)
    pid=[]
    names=[]
    for name in poi_name:
        places = api.search_places(name,"textquery")
        if places['candidates']!=[]:
            p_id=places['candidates'][0]["place_id"]
            pid.append(p_id)
            names.append(name)
        else:
            pid.append(0)
            name="can not find"
            names.append(name)
    df=pd.DataFrame()
    df["poi_name"]=poi_name
    df["names"]=names
    df["pid"]=pid
        
    return df



In [223]:
pid = get_pid(poi_names,apiKey)

In [248]:
fields = ['name', 'formatted_address', 'rating','user_ratings_total']

In [297]:
def get_info(pid,fields):
    """
    get_info takes two argument 
    1. pid: place_id where can get it by using get_pid
    2. fields: properties that want to get 
    funciton will return a panda datafram with 4 columns, name, adress, rating and review
    """
    names=[]
    ratings=[]
    reviews_num=[]
    for id in pid:
        details= api.get_place_details(id,fields)
        if all (k in details['result'] for k in ('rating','user_ratings_total')):
            name=details['result']['name']
            rating=details['result']['rating']
            review_num = details['result']['user_ratings_total']
            names.append(name)
            ratings.append(rating)
            reviews_num.append(review_num)
        elif 'rating' in details['result']:
            name=details['result']['name']
            rating=details['result']['rating']
            review=0
            n==0
            names.append(name)
            ratings.append(rating)
            reviews_num.append(n)
        elif 'user_ratings_total' in details['result']:
            name=details['result']['name']
            rating= "no data"
            review_num = details['result']['user_ratings_total']
            n=len(review)
            names.append(name)
            ratings.append(rating)
            reviews_num.append(review_num)

        else:
            name=details['result']['name']
            rating= "no data"
            review = 0
            n=0
            names.append(name)
            ratings.append(rating)
            reviews_num.append(n)



    df = pd.DataFrame()
    df["Name"]=names
    df["Rating"]=ratings
    df["Total_Review"]=reviews_num
    df["Pid"]=pid
    return df





In [298]:
pid_list=pid["pid"]
# remove 0s otherwise will result invalid request
pid_list=list(filter(lambda num: num != 0, pid_list)) 
result=get_info(pid_list,fields)

In [300]:
result

Unnamed: 0,Name,Rating,Total_Review,Pid
0,39 Service Battalion (12 Company) Richmond,5,7,ChIJhwQHwzh1hlQRJdE3ZdQPI9I
1,15th Field Artillery Regiment (RCA),5,4,ChIJpdEUnLBzhlQR0ome6cv5el8
2,ABC Preschool Academy,no data,0,ChIJF3ZMao7RhVQRYMRn09PBo34
3,Agriforest Bio-Technologies Ltd,4.9,8,ChIJQfsQ7fOLfVMRcUmdvbgiS-Y
4,Agassiz Library,5,8,ChIJNaGyI2sThFQRQ33H2ji42y4
...,...,...,...,...
413,Woodward's 43,4.3,108,ChIJo8K5knlxhlQRBkyzjcK92Bg
414,Vancouver Island Regl Library - Woss Branch,5,1,ChIJlb5JzHEAY1QRgjcJ_n2Bbrk
415,Yarrow Library,5,5,ChIJa5aUHc9HhFQRkikK61az_Lk
416,"York Theatre, The Cultch",4.5,306,ChIJQY5X1xVxhlQRXcRkTyGbCbs


Merge two dataset together which poi name will match the name in vancouver_facilities


In [316]:
merged=pid.merge(result, on='pid', how='left').drop_duplicates().reset_index(drop=True)

In [319]:
merged[["poi_name","Name","pid","Rating","Total_Review"]]

Unnamed: 0,poi_name,Name,pid,Rating,Total_Review
0,12 Service Battalion Museum,39 Service Battalion (12 Company) Richmond,ChIJhwQHwzh1hlQRJdE3ZdQPI9I,5,7.0
1,15th Field Artillery Regiment Museum And Archives,15th Field Artillery Regiment (RCA),ChIJpdEUnLBzhlQR0ome6cv5el8,5,4.0
2,221A Artist Run Centre,,0,,
3,7302754 Canada Inc,,0,,
4,Abc Heritage Preschool And Child Care,ABC Preschool Academy,ChIJF3ZMao7RhVQRYMRn09PBo34,no data,0.0
...,...,...,...,...,...
440,Woss Branch,Vancouver Island Regl Library - Woss Branch,ChIJlb5JzHEAY1QRgjcJ_n2Bbrk,5,1.0
441,Yarrow Library,Yarrow Library,ChIJa5aUHc9HhFQRkikK61az_Lk,5,5.0
442,York Theatre,"York Theatre, The Cultch",ChIJQY5X1xVxhlQRXcRkTyGbCbs,4.5,306.0
443,Yuk Yuk's Comedy Club,Yuk Yuk's Comedy Club Vancouver,ChIJuycZ1-dzhlQRSZsBQvvwklo,4.4,782.0


In [322]:
merged.to_csv("/Users/yuxuancui/Desktop/MDS/data599/google-reviews-arts/google_reviews_poi.csv",index=True)