In [None]:
from selenium import webdriver
from time import sleep
import json
import pprint as pp

In [None]:
# This function automates the clicking of next button to navigate to next page.
def get_reviews(url, traveller_type, max_page, wait_time=1):     
    driver = None
    reviews_data = []
   
    #######################################################################################################################
    # Inner functions
    #######################################################################################################################
    
    def initialize():
        nonlocal driver
        driver = webdriver.Chrome("../drivers/chromedriver.exe")
        driver.get(url)
        
    def select_traveller_type():
        nonlocal driver
        driver.find_element_by_css_selector('#taplc_location_review_filter_controls_0_filterSegment_' + traveller_type).click()
            
    def extract_data():
        nonlocal driver
        nonlocal reviews_data
        print("Start data extraction.")
        while(True):
            # Wait for page loading
            sleep(wait_time)

            # Check if current page is within the searching scope
            current_page = driver.find_elements_by_css_selector(".pageNum.current")[0].get_attribute("data-page-number")
            if(int(current_page) > max_page): exit(); break
            print("Processing page " + current_page + "...")

            # Scan all review blocks
            reviews = driver.find_elements_by_css_selector('.review-container')
            for review in reviews:
                username = review.find_element_by_css_selector('.scrname').text
                rating = get_rating(review.find_element_by_css_selector('.rating span').get_attribute('class'))
                title = review.find_element_by_css_selector('.noQuotes').text
                description = review.find_element_by_css_selector('.partial_entry').text
                date = review.find_element_by_css_selector('.ratingDate').get_attribute('title')

                reviews_data.append({
                    "username": username,
                    "rating": rating,
                    "title": title,
                    "description": description,
                    "date": date
                })

            # Try click on next button, error raised if next button is not found
            try:
                driver.find_element_by_css_selector('#taplc_location_reviews_list_0 .nav.next.taLnk').click()
                no_more_pages = False
            except:
                no_more_pages = True
            if(no_more_pages): exit(); break
                
        print("Done data extraction.")
    
    def exit():
        nonlocal driver
        driver.quit()
        
    def get_rating(rating_class):
        for i in [1,2,3,4,5]:
            if(('bubble_' + str(i) + '0') in rating_class):
                return i
            
    #######################################################################################################################
    
    initialize()
    select_traveller_type()
    extract_data()
    return reviews_data

In [None]:
# Get reviews (max 2 pages) with Solo type traveller for attraction 'Singapore Zoo'
url = "https://www.tripadvisor.com.sg/Attraction_Review-g294265-d324542-Reviews-Singapore_Zoo-Singapore.html"
traveller_type = "Solo"
all_reviews = get_reviews(url=url, traveller_type=traveller_type, max_page=2)

In [None]:
# Save as local json file
with open('data_tripadvisor.json', 'w') as file:
  json.dump(all_reviews, file, sort_keys=True, indent=4)