## This is an example of web scraping using the Selenium package. We will be gathering all reviews on a vaporizer available on the website "vaporfi". The reason for using Selenium in conjunction with Chromedriver in this case is to account for pop-ups and add-ons that only appear when you click or hover over.

In [1]:
import json
import subprocess as sp
import shlex
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.touch_actions import TouchActions
from bs4 import BeautifulSoup
import time

url = 'https://www.vaporfi.com/vaporesso-podstick-starter-kit/'

In [2]:
## This will be the function for collecting all reviews on a given page.
def collect_reviews(driver, url, reviews):
    """Collects all reviews on the url by 
        i) removing popups
        ii) scrolling down to the review section, toggling the reviews to open them and
        iii) collecting the review text with BS4"""
    ## Initiate driver w/ url
    driver.get(url)
    wait = WebDriverWait(driver, 10)
    ## Close pop-up ads
    try:
        driver.find_element_by_id("yes").click()
    except:
        pass
    try:
        element = wait.until(EC.element_to_be_clickable((By.ID, 'ltkpopup-close-button')))
        driver.find_element_by_class_name("ltkpopup-close").click()
    except:
        pass
    ## Scroll down to the reviews section
    driver.execute_script("window.scrollTo(0, 600)")
    element = wait.until(EC.element_to_be_clickable((By.ID, 'tab_review_tabbed')))
    ## Click tab to open up list of reviews
    driver.find_element_by_id('tab_review_tabbed').click()
    driver.execute_script("window.scrollTo(0, 1500)")
    page_source = driver.page_source
    ## Collect review text
    soup = BeautifulSoup(page_source, 'lxml')
    
    ## Loop through reviews and add them to list
    for rev in soup.find_all('div', class_='content-review')[1:]:
        reviews.append(rev.get_text())
    
    driver.execute_script("window.scrollTo(0, 2200)")
    
    return driver, reviews, soup

In [4]:
## Finally, we need a wrapper to loop through all the pages and call our collect_reviews function on each page.
def collect_all_reviews(driver_path, url, pagecount = 1, max_pages=5, reviews = []):
    """Collects all reviews on the product by looping through pages and calling the collect_reviews function above"""
    ## Initialize driver, url
    driver = webdriver.Chrome(driver_path)
    url = 'https://www.vaporfi.com/vaporesso-podstick-starter-kit/'
    ## Call collect reviews function on the first page
    driver, reviews, soup = collect_reviews(driver, url, reviews)
    
    ## Ensure a next page exists and the current page is not beyond the "max_pages" parameter
    while (soup.find('a', {'rel':'next'})['aria-disabled'] == 'false') & (pagecount < max_pages):
        ## Flip to the next page, call collect_reviews to gather reviews
        pagecount += 1
        url = 'https://www.vaporfi.com/vaporesso-podstick-starter-kit/?yoReviewsPage={page}'.format(page=pagecount)
        driver, reviews, soup = collect_reviews(driver, url, reviews)

    return reviews


reviews = collect_all_reviews('drivers/chromedriver', 'https://www.vaporfi.com/vaporesso-podstick-starter-kit/')
reviews

[' I love this Vaporesso Pod Stick. I was just recently learned about the pod stick and now that I have it, I am most impressed. It is compact, very easy to fill, and a great smooth vape. Holds the charge longer than I thought it would and the batteries seem to be lasting quite awhile. I recommend this product. ',
 ' I found the Vaporesso PodStick a very pleasurable experience. Easy to use and refill. I like the way it feels in my hand. The fact that you have 3 options of power intake makes it my top choice. Also a great choice if your a first time user. ',
 ' I’m really enjoying these ',
 ' Good using this. Excellent features. ',
 ' Awsome........... ',
 ' These vape well. Easy to change pods. I haven’t had any leakage. ',
 ' Very nice ',
 " I love how simple and effective this device is. Has a very long battery life and charges extremely fast! The pods also last at least 10 refills. Dont fill pods up too much or you'll have some backsplash. ",
 ' Very good,just what I wanted ',
 " I 

In [30]:
## We have collected 25 reviews!
len(reviews)

25