<a href="https://colab.research.google.com/github/vibhash506/amazon_webscrapping/blob/main/amazon_product_webscrapping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
from bs4 import BeautifulSoup

# Function to extract image source of the product
def get_img_src(soup):
    try:
        img_tag = soup.find("div", id="imgTagWrapperId").find("img")
        img_src = img_tag['src']
    except AttributeError:
        img_src = ""
    return img_src

# Function to extract brand of the product
def scrapBrand(soup):
  try:
    # Find the element that contains the Brand information
    brand_element = soup.find("a", {"id": "bylineInfo"})
    # Extract the brand
    brand_name = brand_element.text.strip().replace("Brand: ", "").replace("Visit ","").replace("the ","")
    index_of_store = brand_name.find("Store")
    # Extract the part of the string before 'Store'
    brand_name = brand_name[:index_of_store] if index_of_store != -1 else brand_name

    print("Brand:",brand_name)
  except:
    print("Brand not Mantioned")


# Function to extract price of the product
def get_price(soup):
    try:
        price_element = soup.find("div", {"id": "corePriceDisplay_desktop_feature_div"}).find("div", {"class": "aok-relative"})
        price_number = price_element.find("span", {"class": "a-price-whole"}).text.strip()
        price_symbol = price_element.find("span", {"class": "a-price-symbol"}).text.strip()
        return f"{price_symbol}{price_number}"
    except AttributeError:
        try:
            price_element = soup.find("span", {"class": "a-offscreen"})
            price_number = price_element.text.strip()
            if price_number == "Page 1 of 1":  # Check if price is "Page 1 of 1"
                price_number = "Currently Unavailable"  # Set price as "Currently Unavailable"
            return price_number
        except AttributeError:
            return "Currently Unavailable"  # Return "Currently Unavailable" if price not found

# Function to extract title of the product
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id": "productTitle", "class": "a-size-large product-title-word-break"})
        title_string = title.text.strip()
    except AttributeError:
        title_string = ""
    return title_string

# Main function
if __name__ == '__main__':
    # Add your user agent
    HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', 'Accept-Language': 'en-US, en;q=0.5'}

    # List of webpage URLs
    URLS = [
        "https://www.amazon.com.au/dp/B01BIRVL6A",
        "https://www.amazon.com.au/dp/B00544A9SK",
        "https://www.amazon.com.au/dp/B00NKJZE4K",
        "https://www.amazon.com.au/dp/B08XNS9BFS",
        "https://www.amazon.ca/dp/B0CP9QSBKR",
        'https://www.amazon.co.uk/dp/B0CMCSYHZM',
        "https://www.amazon.ca/dp/B0CP8H1C8L",
        "https://www.amazon.com/dp/B0CQXMJ66Y",
        "https://www.amazon.ca/dp/B0CP9PVRHD",
        "https://www.amazon.com/dp/B0CRCRLT5T",
        "https://www.amazon.com.mx/dp/B0CJRMJGPJ",
        "https://www.amazon.de/-/en/dp/B0CQK87DQ5",
        "https://www.amazon.de/-/en/dp/B0CQK5GH9M",
        "https://www.amazon.com.tr/dp/B0CSG29SGB",
        "https://www.amazon.com/dp/B0CH5BMM7M",
        "https://www.amazon.de/-/en/dp/B0CQD7BLRG",
        "https://www.amazon.de/-/en/dp/B0CQD3JRDZ",
        "https://www.amazon.de/-/en/dp/B0CQD5QQR6",
    ]

    # Iterate over each URL
    for index, url in enumerate(URLS, start=1):
        # HTTP Request
        response = requests.get(url, headers=HEADERS)

        if response.status_code == 200:
            # Soup Object containing all data
            soup = BeautifulSoup(response.content, "html.parser")

            # Extract product information
            title = get_title(soup)
            price = get_price(soup)
            brand = get_brand(soup)
            img_src = get_img_src(soup)

            # Print the extracted data in the desired format
            print(f"{index}. {url}")
            print(f"Title Name: {title}")
            print(f"Price: {price}")
            print(f"Brand: {brand}")
            print(f"Image URL: {img_src}")
            print()
        else:
            print(f"{index}. {url}")
            print("Page not found")
            print()


1. https://www.amazon.com.au/dp/B01BIRVL6A
Title Name: Lemsip Max Lemon Cold and Flu Decongestant Hot Drink (Pack of 10)
Price: Currently Unavailable
Brand: Lemsip
Image URL: https://m.media-amazon.com/images/I/A1JANUL33bL.__AC_SX300_SY300_QL70_ML2_.jpg

2. https://www.amazon.com.au/dp/B00544A9SK
Title Name: Disprin Original Fasting Acting Pain Relief Tablets (Count of 24) (33234)
Price: Currently Unavailable
Brand: Disprin
Image URL: https://m.media-amazon.com/images/I/81Y03aKsYJL.__AC_SX300_SY300_QL70_ML2_.jpg

3. https://www.amazon.com.au/dp/B00NKJZE4K
Title Name: Gaviscon Dual Action Chewable Peppermint Heartburn & Indigestion Relief Tablets (Count of 16)
Price: Currently Unavailable
Brand: Gaviscon
Image URL: https://m.media-amazon.com/images/I/819w-FS5HIL.__AC_SX300_SY300_QL70_ML2_.jpg

4. https://www.amazon.com.au/dp/B08XNS9BFS
Title Name: Strepfen Intensive Honey & Lemon Throat Lozenges 16 Pack
Price: Currently Unavailable
Brand: Strepfen
Image URL: https://m.media-amazon.com/i