## Objective

- Able to scrape data from multiple pages by opening a new browser instance for each page
- It is time consuming operation because of the number of browser tabs it opens
- Instead in this we are going to try to open one instance of the browser and then use to navigate to multiple pages

In [1]:
# Imports
import selenium
from selenium.webdriver import Chrome,ChromeOptions
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException,WebDriverException
from selenium.webdriver.chrome.options import Options
import pandas as pd
from tqdm import trange

def get_review(user_url):
    '''Extracts reviews from user given `flipkart` product page and returns a `pandas dataframe`.
    
    Parameters
    -----------
    url: Product for which user wants to extract the review
    pages: Number of Pages of reviews the user likes to extract.By default `get_review`
    extracts any number of pages
    
    Example
    -------
    >>> df=get_review("https://www.flipkart.com/redmi-8-ruby-red-64-gb/p/itmef9ed5039fca6?pid=MOBFKPYDCVSCZBYR",10)'''
    global product_name
    pages=6
    # User entered url
    url = user_url
    if 'flipkart' in url:
        review_url= url.replace('/p/','/product-reviews/')

    # Browser Options
    options = Options()
    options.add_argument("--headless")
    options.add_argument('start-maximized')
    
    #Driver essential to run automated chrome window
    driver = webdriver.Chrome(options=options)  # No option because its in currdir
    Review_Title,Review_Text,Review_Rating,Upvote,Downvote,Num_Photos=[],[],[],[],[],[]
    
    # Extracting  pages of review
    for i in range(1,pages+1):
    
        #Change web Page
        ping=f'{review_url}&page={i}'
        driver.execute_script('window.open("{}","_self");'.format(ping))

        WebDriverWait(driver, 10).until(EC.staleness_of)
        
        #Check Read More Buttons
        read_more_btns = driver.find_elements_by_class_name('_1EPkIx')
        
        #Click on all read more in the current page
        for rm in read_more_btns:
            driver.execute_script("return arguments[0].scrollIntoView();", rm)
            driver.execute_script("window.scrollBy(0, -150);")
            rm.click()
            
        #Get the product name to save contents inside this folder
        if i==1:
            product_name=driver.find_element_by_xpath("//div[@class='o9Xx3p _1_odLJ']").text
            
        #Extracting contents
        for block in driver.find_elements_by_xpath("//div[@class='col _390CkK _1gY8H-']"):#col _390CkK _1gY8H-
            Review_Title.append(block.find_element_by_xpath(".//p[@class='_2xg6Ul']").text)
            Review_Text.append(block.find_element_by_xpath(".//div[@class='qwjRop']").text)
            Review_Rating.append(block.find_element_by_xpath(".//div[@class='hGSR34 E_uFuv'or @class='hGSR34 _1x2VEC E_uFuv' or @class='hGSR34 _1nLEql E_uFuv']").text)
            Upvote.append(block.find_element_by_xpath(".//div[@class='_2ZibVB']").text)
            Downvote.append(block.find_element_by_xpath(".//div[@class='_2ZibVB _1FP7V7']").text)
            Num_Photos.append(len(block.find_elements_by_xpath(".//div[@class='_3Z21tn _2wWSCV']")))
        
    driver.close()
        
    #Creating df of reviews
    df=pd.DataFrame(data=list(zip(Review_Title,Review_Text,Review_Rating,Upvote,Downvote,Num_Photos)),columns=['Review_Title','Review_Text','Review_Rating','Upvote','Downvote',"Num_Photos"])
    
    # Handling dtypes of Review_Rating,Upvote,Downvote
    for i in ['Review_Rating','Upvote','Downvote','Num_Photos']:
        df[i]=df[i].astype("int")
    #Return dataframe
    return df
df=get_review("https://www.flipkart.com/realme-6-comet-blue-64-gb/p/itm212944b2e7fb0?pid=MOBFPCX7F9NBPRRT&lid=LSTMOBFPCX7F9NBPRRTSHD9FQ&marketplace=FLIPKART&srno=b_1_1&otracker=nmenu_sub_Electronics_0_realme%206&fm=organic&iid=a762e160-29ab-438e-af3f-0d7f835e6267.MOBFPCX7F9NBPRRT.SEARCH&ppt=browse&ppn=browse&ssid=42nao3jn9s0000001587003767986")
df.head()

Unnamed: 0,Review_Title,Review_Text,Review_Rating,Upvote,Downvote,Num_Photos
0,Wonderful,Best mid range phone.. and worth every penny!,5,5637,592,3
1,Just wow!,best phone under this price 👌👍👍,5,2757,341,2
2,Simply awesome,Amazing.... phone I'm totally certified this p...,5,869,99,1
3,Nice product,awesome camera performance,4,1615,203,5
4,Wonderful,Great product\nwith android 10\nBattery Full c...,4,732,86,3
