In [19]:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time
import re
import requests
from time import sleep

* **requests: Allows sending HTTP requests to interact with web APIs.**
* **selenium: Automates web browsers, enabling interaction with web elements to scrape data.**
* **By: Used to locate elements on a web page.**
* **NoSuchElementException, TimeoutException: Handle exceptions when elements are not found or operations time out.**
* **WebDriverWait, EC: Implement dynamic waiting to wait for elements to appear or disappear.**

In [20]:
def scrape_reviews(url, output_file_path, chromedriver_path):
    # Create a new service object with ChromeDriver executable path
    service = Service(chromedriver_path)
    
    # Create a new instance of Chrome WebDriver with the service
    driver = webdriver.Chrome(service=service)
    
    # Open the provided URL
    driver.get(url)
    
    driver.maximize_window()
    
    try:
        more_review_button = driver.find_element(By.XPATH, "(//a[@class='a-link-emphasis a-text-bold'])")
        more_review_button.click()
        reviews = []
        for i in range(100):
            print('Scraping page', i+1)
            review = driver.find_elements(By.XPATH, "//span[@class='a-size-base review-text review-text-content']")
            for r in review:
                reviews.append(r.text)
            try:
                next_button = driver.find_element(By.XPATH, "//li[@class='a-last']/a")
                next_button.click()
                sleep(10)
            except NoSuchElementException:
                print("No more pages to scrape. Stopping.")
                break

        driver.quit()

        # Create DataFrame in Pandas
        df = pd.DataFrame({'Reviews': reviews})

        # Export DataFrame to CSV
        df.to_csv(output_file_path, index=False, mode='w', encoding='utf-8-sig')  # 'w' mode will override the file if it already exists

        # Return the DataFrame after saving to CSV and Return the list of reviews extracted
        return reviews, df

    except NoSuchElementException:
        print("The Product has no Reviews.")
        driver.quit()  # Ensure driver is quit even if no reviews are found
        return None  # Return None if no reviews were scraped

* **initialize_driver: Function to set up the Chrome WebDriver with the specified path and options.**
* **ChromeDriver: Required executable to control the Chrome browser.**

* **scrape_reviews: Main function to scrape reviews from the given URL.**
* **Review Extraction: Reviews are extracted from the review elements and stored in a list.**
* **Pagination Handling: The script attempts to click the "Next" button to load more reviews, handling up to 100 pages.**
* **Error Handling: Captures exceptions and ensures the browser is closed properly in the finally block.**

In [21]:
def send_reviews_to_api(reviews, api_endpoint):
    try:
        response = requests.post(api_endpoint, json={'reviews': reviews})
        if response.status_code == 200:
            print("Sentiment analysis results:", response.json())
            return "Done!"
        else:
            print(f"Failed to send data. Status code: {response.status_code}")
            return f"Failed to send data. Status code: {response.status_code}"
    except requests.RequestException as e:
        print(f"An error occurred while sending data to the API: {e}")
        return "Failed to send data due to a network error"

* **send_reviews_to_api: Sends the collected reviews to the specified API endpoint.**
* **POST Request: Sends the reviews in JSON format to the API.**
* **Error Handling: Catches network-related exceptions and prints appropriate messages.**

In [22]:
# Example usage
url = "https://www.amazon.eg/%D9%85%D9%88%D8%A8%D8%A7%D9%8A%D9%84-%D8%B3%D8%A7%D9%85%D8%B3%D9%88%D9%86%D8%AC-%D8%AC%D8%A7%D9%84%D8%A7%D9%83%D8%B3%D9%8A-%D8%A7%D9%86%D8%AF%D8%B1%D9%88%D9%8A%D8%AF-%D8%A8%D8%B4%D8%B1%D9%8A%D8%AD%D8%AA%D9%8A%D9%86/dp/B0BXNW1SP4/ref=pd_rhf_dp_s_pd_crcbs_d_sccl_2_8/258-0173424-6573400?pd_rd_w=lYI8C&content-id=amzn1.sym.61158c5e-d6f1-4356-a455-f35d0aca508d&pf_rd_p=61158c5e-d6f1-4356-a455-f35d0aca508d&pf_rd_r=GM12CPF4AK6E2C52C998&pd_rd_wg=mPU7D&pd_rd_r=454410d4-c8fe-43d5-b15b-d93177218b57&pd_rd_i=B0BXNW1SP4&th=1"
output_file_path = 'Reviews.csv'
chromedriver_path = 'chromedriver.exe'

scrape_reviews(url, output_file_path, chromedriver_path)


Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5
Scraping page 6
Scraping page 7
Scraping page 8
No more pages to scrape. Stopping.


(['جيد',
  'موبايل ممتاز جدا\nشركة سامسونج معروفة طبعا\nيكفي إنك هتاخد تحديثات 4سنوات لنظام التشغيل والسنة الخامسة للأمان\nالكاميرا يعتمد عليها جدا\nأنا اشتريته في عرض كويس جدا من أمازون\nأنصح به\nشكرا أمازون',
  'شكرا امازون علي سياسة الارجاع\nاشتريت الموبايل وقمت بارجاعه ليس إلا لحجمه الصغير فقط',
  'انا طلبت تليفون سامسونج a54 واللى جالى ف الطرد باور بنك انكر والتليفون سعره 13600\nوكلمت خدمه العملاء رفضت ترجعلى فلوسي أو التليفون اللى انا طلبته ودى اكبر دليل على أنها شركه نصابه',
  'هو أفضل من فى الفئة السعرية الخاص به\nلا يدعم غير الهاندفرى بلج اند بلاى',
  'نصيحة عامة لجميع الاصدقاء\nليس هناك فرق بين فيتنامى ولا هندي\nكله يندرج تحت الشركة المصنعة سامسونج.\nالموبيل لقد اشتريته ولم اقوم بفتح العلبة من باب الامانة وتركتها مغلفة كما هى.لاننى قمت بشراءه اثناء فترة العرض بسعر ١٣٥٩٩.\nفقمت بعمل جوله للسؤال عليه ف المحلات لمعرفة سعره وجدت صدمه انه بثمن ١٢٠٠٠ ل ١٢٧٠٠.\nوعندها اخذت القرار كما هو لم افتح العلبة وقمت بارجاعة من باب الامانة.\nونادرا جداااا ما اقوم بارجاع المنتجات التى اقوم بشرا