In [24]:
import csv
from time import sleep
from datetime import datetime
from random import random
from selenium.common import exceptions
from selenium import webdriver


class AmazonScraper:

    def __init__(self) -> None:
        pass


    def generate_filename(self, search_term):
        timestamp = datetime.now().strftime("%Y%m%d%H%S%M")
        stem = path = '_'.join(search_term.split(' '))
        filename = stem + '_' + timestamp + '.csv'
        return filename


    def save_data_to_csv(self, record, filename, new_file=False):
        header = ['description', 'price', 'rating', 'review_count', 'url']
        if new_file:
            with open(filename, 'w', newline='', encoding='utf-8') as f:
                writer = csv.writer(f)
                writer.writerow(header)
        else:
            with open(filename, 'a+', newline='', encoding='utf-8') as f:
                writer = csv.writer(f)
                writer.writerow(record)


    def create_webdriver(self):
        driver = webdriver.Firefox(executable_path=r'C:\\Program Files (x86)\\Geckodriver\\geckodriver.exe')
        return driver


    def generate_url(self, search_term, page):
        base_template = 'https://www.amazon.in/s?k={}&ref=nb_sb_noss_2'
        search_term = search_term.replace(' ', '+')
        stem = base_template.format(search_term)
        url_template = stem + '&page={}'
        if page == 1:
            return stem
        else:
            return url_template.format(page)


    def extract_card_data(self, card):
        description = card.find_element_by_xpath('.//h2/a').text.strip()
        url = card.find_element_by_xpath('.//h2/a').get_attribute('href')
        try:
            price = card.find_element_by_xpath('.//span[@class="a-price-whole"]').text
        except exceptions.NoSuchElementException:
            return
        try:
            temp = card.find_element_by_xpath('.//span[contains(@aria-label, "out of")]')
            rating = temp.get_attribute('aria-label')
        except exceptions.NoSuchElementException:
            rating = ""
        try:
            temp = card.find_element_by_xpath('.//span[contains(@aria-label, "out of")]/following-sibling::span')
            review_count = temp.get_attribute('aria-label')
        except exceptions.NoSuchElementException:
            review_count = ""
        return description, price, rating, review_count, url


    def collect_product_cards_from_page(self, driver):
        cards = driver.find_elements_by_xpath('//div[@data-component-type="s-search-result"]')
        return cards


    def sleep_for_random_interval(self):
        time_in_seconds = random() * 2
        sleep(time_in_seconds)


    def run(self, search_term): 
        """Run the Amazon webscraper"""
        filename = self.generate_filename(search_term)
        self.save_data_to_csv(None, filename, new_file=True)  # initialize a new file
        driver = self.create_webdriver()
        num_records_scraped = 0

        for page in range(1, 41):  # max of 40 pages
            # load the next page
            search_url = self.generate_url(search_term, page)
            print(search_url)
            driver.get(search_url)
            print('TIMEOUT while waiting for page to load')

            # extract product data
            cards = self.collect_product_cards_from_page(driver)
            for card in cards:
                record = self.extract_card_data(card)
                if record:
                    self.save_data_to_csv(record, filename)
                    num_records_scraped += 1
            self.sleep_for_random_interval()

        # shut down and report results
        driver.quit()
        print(f"Scraped {num_records_scraped:,d} for the search term: {search_term}")

if __name__ == '__main__':
    search_term = 'laptop'
    webscraper = AmazonScraper()
    webscraper.run(search_term)

https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=2
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=3
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=4
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=5
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=6
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=7
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=8
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=9
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_noss_2&page=10
TIMEOUT while waiting for page to load
https://www.amazon.in/s?k=laptop&ref=nb_sb_nos