In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [None]:
#Function to extract Product Name
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'})
        
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string

#Function to extract Product Price
def get_price(soup):

    try:
        pri_str =  soup.find("span", attrs = {"class" : "a-price-whole"})
        price = "₹" + pri_str.text[:-1]

    except AttributeError:

        price = ""

    return price

#Function to extract Rating
def get_rating(soup):

    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
    
    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""	

    return rating

#Function to extract Number of Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        review_count = ""	

    return review_count

#Function to get Description
def get_description(soup):
    try:
        description =  soup.find("ul" , attrs = {"class" : "a-unordered-list a-vertical a-spacing-mini"}).text.strip()
    
    except:
        description = ""
        
    return description

#Function to get Manufacturer
def get_manufacturer(soup):
    try:
        manufacturer = soup.find_all("td", attrs = {"class" : "a-size-base prodDetAttrValue"})[1].string.strip()[1:]
    except :
        try:
            manufacturer = soup.find("div", attrs = {"id" : "merchant-info"}).find_all("span")[1].text
        except:
            manufacturer = ""
            
    if "Kilograms" in manufacturer:
        manufacturer = ""
    
    return manufacturer
    

In [97]:
if __name__ == '__main__':
    #dictionary for the dataframe
    d = {"title" : [], "price" : [], "rating" : [], "reviews" : [], "Link" : [], "description" : [], "manufacturer" : [], "ASIN" : []}
    
    for page in range(1,11):
        #Headers for request
        HEADERS = ({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', 'Accept-Language' : 'en-US,en;q=0.5'})

        #URL assignment
        URL = 'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_' + str(page)

        #HTTP Request
        webpage = requests.get(URL, headers = HEADERS)

        # Soup Object containing all data
        soup = BeautifulSoup(webpage.content, "html.parser")

        # Fetch links as List of Tag Objects
        links = soup.find_all("a", attrs = {"class" : "a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal"})

        # Store the links
        links_list = []

        # Loop for extracting links from Tag Objects
        for link in links:
                links_list.append("https://www.amazon.in" + link.get('href'))
        d['Link'].extend(links_list)

        full_asin = []
        try:
            for i in range(201):
                full_asin.append(soup.find('div',{'data-index':i})['data-asin'])
        except:
            full_asin.append('')

        d['ASIN'].extend(full_asin[2:])




        # Loop for extracting product details from each link 
        for link in links_list:
            new_webpage = requests.get(link, headers=HEADERS)

            new_soup = BeautifulSoup(new_webpage.content, "html.parser")

            # Function calls to display all necessary product information
            d['title'].append(get_title(new_soup))
            d['price'].append(get_price(new_soup))
            d['rating'].append(get_rating(new_soup))
            d['reviews'].append(get_review_count(new_soup))
            d['description'].append(get_description(new_soup))
            d['manufacturer'].append(get_manufacturer(new_soup))

    s1 = pd.Series(d['title'], name='Title')
    s2 = pd.Series(d['price'], name='price')
    s3 = pd.Series(d['rating'], name='rating')
    s4 = pd.Series(d['reviews'], name='reviews')
    s5 = pd.Series(d['Link'], name = 'Link')
    s6 = pd.Series(d['description'], name='description')
    s7 = pd.Series(d['manufacturer'], name='manufacturer')
    s8 = pd.Series(d['ASIN'], name='ASIN')

    amazon_df = pd.concat([s1,s2,s3,s4,s5,s6,s7,s8], axis=1)
    amazon_df['Title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['Title'])
    amazon_df.to_csv("amazon_data.csv", header=True, index=False)


    

In [98]:
amazon_df

Unnamed: 0,Title,price,rating,reviews,Link,description,manufacturer,ASIN
0,NORTH ZONE 30 ltrs ( Cms) backpack(INVALID DAT...,₹599.00,3.7 out of 5 stars,33 ratings,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,KEY FEATURES - Black Colour Laptop Backpack fo...,SB NTERPRISES,B0BH8S9QGH
1,DYBBUK Lightweight school bags Backpacks for B...,₹664,3.6 out of 5 stars,19 ratings,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,COMPATIBLE WITH 15.6 INCH LAPTOP”: The Northzo...,SB NTERPRISES,B0BK4VL6P9
2,American Tourister 32 Ltrs Black Casual Backpa...,"₹1,199.00",4.1 out of 5 stars,"51,380 ratings",https://www.amazon.in/American-Tourister-AMT-S...,"Laptop Compatibility: No, Strap Type: Adjustab...",,B07CJCGM1M
3,ADISA 15.6 inch Laptop Backpack Office Bag Col...,₹499.00,3.9 out of 5 stars,463 ratings,https://www.amazon.in/ADISA-Laptop-Backpack-Of...,Material: Water Resistant Light-Weight Polyest...,Appario Retail Private Ltd,B09TPX22NF
4,Skybags Brat Black 46 Cms Casual Backpack,₹669.00,4.1 out of 5 stars,"3,373 ratings",https://www.amazon.in/Skybags-Brat-Black-Casua...,Combination of functional & safety features in...,RetailEZ Pvt Ltd,B08Z1HHHTD
...,...,...,...,...,...,...,...,...
335,Half Moon Large 37L Laptop Bag Backpack for me...,₹849.00,3.9 out of 5 stars,"2,129 ratings",https://www.amazon.in/Half-Moon-Backpack-Lugga...,SPACIOUS AND LIGHTWEIGHT: Light weight and spa...,Half_Moon,
336,Bennett™ Mystic Formal Business Briefcase Bag ...,₹512.00,4.1 out of 5 stars,"4,862 ratings",https://www.amazon.in/Bennett-Mystic-Shoulder-...,PREMIUM QUALITY MATERIAL: This laptop computer...,"Texcoco Enterprises, 9958166528, Texcoco Enter...",
337,ADISA Laptop Backpack 31 Ltrs,₹499,4.0 out of 5 stars,"8,992 ratings",https://www.amazon.in/ADISA-BP004-Weight-Casua...,Adjustable Shoulder Handles / Two Bottle holde...,Appario Retail Private Ltd,
338,DYBBUK Lightweight school bags Backpacks for B...,₹664.00,3.6 out of 5 stars,19 ratings,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,COMPATIBLE WITH 15.6 INCH LAPTOP”: The Northzo...,SB NTERPRISES,
