<center><b>Web Scraping Amazon Products Data Using Selenium</b></center>

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Initialize Selenium WebDriver with options to ignore SSL errors
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
s = Service("chromedriver.exe")
driver = webdriver.Chrome(service=s, options=options)

# Initialize lists to store scraped data
name_list = []
price_list = []
availability_list = []
brand_list = []
description_list = []
best_seller_of = []
rating_list = []
user_name_list = []
review_list = []
star_list = []
date_list = []

def scrape_product_info(url):
    driver.get(url)
    
    try:
        name = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "productTitle"))
        ).text.strip()
        print(f"Scraping product: {name}")
    except Exception as e:
        print(f"Error scraping product name: {e}")
        name = ""

    try:
        price = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "a-price-whole"))
        ).text.strip()
        print(f"Scraping product: {price}")
    except Exception as e:
        print(f"Error scraping product price: {e}")
        price = ""
    

    

    time.sleep(2)
    
    try:
        availability = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "availability"))
        ).text.strip()
    except:
        availability = ""
    
    try:
        brand = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "bylineInfo"))
        ).text.strip()
    except:
        brand = ""
    
    try:
        description = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "productDescription"))
        ).text.strip()
    except:
        description = ""
    
    try:
        best_seller = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "a-list-item"))
        ).text.strip()
    except:
        best_seller = "" 

    

    try:
        rating = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "acrPopover"))
        ).text.strip()
    except:
        rating = ""
    
    '''try:
        user_names = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.review .a-profile-name'))
        )
        user_name_texts = [user.text.strip() for user in user_names]
    except Exception as e:
        print(f"Error scraping user names: {e}")
        user_name_texts = [""] * 20'''
    
    try:
        reviews = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.review-text-content span'))
        )
        review_texts = [review.text.strip() for review in reviews]
    except Exception as e:
        print(f"Error scraping reviews: {e}")
        review_texts = [""] * 20
    
    try:
        stars = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.review-rating .a-icon-alt'))
        )
        star_texts = [star.get_attribute("innerHTML").strip() for star in stars]
    except Exception as e:
        print(f"Error scraping star ratings: {e}")
        star_texts = [""] * 20

    try:
        dates = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.review-date'))
        )
        date_texts = [date.text.strip() for date in dates]
    except Exception as e:
        print(f"Error scraping review dates: {e}")
        date_texts = [""] * 20

    if len(review_texts) > len(date_texts):
        date_texts.extend([""] * (len(review_texts) - len(date_texts)))
    if len(review_texts) > len(star_texts):
        star_texts.extend([""] * (len(review_texts) - len(star_texts)))
    #if len(review_texts) > len(user_name_texts):
        #user_name_texts.extend([""] * (len(review_texts) - len(user_name_texts)))
    if len(star_texts) < len(review_texts):
        texts=len(review_texts)
    else:
        texts=len(star_texts)
    
    name_list.extend([name] * texts)
    price_list.extend([price] * texts)
    availability_list.extend([availability] * texts)
    brand_list.extend([brand] * texts)
    description_list.extend([description] * texts)
    best_seller_of.extend([best_seller] * texts)
    rating_list.extend([rating] * texts)
    #user_name_list.extend(user_name_texts)
    review_list.extend(review_texts)
    star_list.extend(star_texts)
    date_list.extend(date_texts)

# List of product URLs to scrape
product_urls = [
    'https://www.amazon.com/BIC-Wite-Out-Correct-Correction-2-Count/dp/B010DS3X20/ref=sr_1_7?_encoding=UTF8&content-id=amzn1.sym.7acbca95-6949-4783-9691-9ac9df8021d9&keywords=school+supplies&pd_rd_r=c89c2794-89b0-4de7-b758-55ce73dcf567&pd_rd_w=7HxTD&pd_rd_wg=wghpT&pf_rd_p=7acbca95-6949-4783-9691-9ac9df8021d9&pf_rd_r=S0R50HDNDFPZW49434F3&qid=1707246080&sr=8-7',
    # Add more URLs as needed
    
    "https://www.amazon.com/Dove-Nourishing-Secrets-Hydration-Refreshing/dp/B08BTG8NQC/ref=sr_1_27?crid=2ATST9B94UK2W&keywords=Shampoo&qid=1707282207&sprefix=shampoo%2Caps%2C979&sr=8-27&th=1", ''

    "https://www.amazon.com/LEGO-Easter-Rabbits-Display-288pcs/dp/B09RQ67B4T/ref=sr_1_6?_encoding=UTF8&content-id=amzn1.sym.05648f74-03c2-4db2-860d-48b2281825cb&dib=eyJ2IjoiMSJ9.QvTY7CsfdDfhAYNXexYkdS772_h9gMxiBbMF-0KAXwVr4ScR8cuJjmpac7qbW9i8x3OberUMj8aFmSC_5R-zFALyYQmFTF5xYtaRfzzRbJTSCercMiHAgx3IA_6lpp401CWj59HtLoqJLx4d2c9oHtQXldbFBRfvNWhITljy3rbNXv-2uY7PlHoZTdYwvjtjmtNud51lL3PixXgwi-LTEHs7GHKpIyyUpbiFi0qyPs_0y6Zd98YTiSPB6r7KebgexxJi557Q6_WZaRsveOI6YWRnDfwiHyPj0OhIKfmDGAw.mIGUcdbdCKRYypRdXCznDh6Xo65AyCLWRvP1ew8gFhc&dib_tag=se&keywords=easter&pd_rd_r=ac67e150-1fbe-4cc4-a80c-1dd6441aa8ce&pd_rd_w=auPnK&pd_rd_wg=629Yt&pf_rd_p=05648f74-03c2-4db2-860d-48b2281825cb&pf_rd_r=NWXM7F3KRNM5FVYBMH35&qid=1710663165&sr=8-6",

    "https://www.amazon.com/Paris-Makeup-True-Natural-Glow-Illuminator-Highlighter-Day-Radiant-Glow/dp/B074PTZCNX/?_encoding=UTF8&pd_rd_w=zu11I&content-id=amzn1.sym.9929d3ab-edb7-4ef5-a232-26d90f828fa5&pf_rd_p=9929d3ab-edb7-4ef5-a232-26d90f828fa5&pf_rd_r=NWXM7F3KRNM5FVYBMH35&pd_rd_wg=XGwQy&pd_rd_r=cefd8a18-9e93-40cf-ba0b-dbb84850e8a0&ref_=pd_gw_crs_zg_bs_3760911",

    "https://www.amazon.com/Stanley-IceFlow-Stainless-Steel-Tumbler/dp/B0CT4BB651/?_encoding=UTF8&pd_rd_w=v8Gr7&content-id=amzn1.sym.9929d3ab-edb7-4ef5-a232-26d90f828fa5&pf_rd_p=9929d3ab-edb7-4ef5-a232-26d90f828fa5&pf_rd_r=NWXM7F3KRNM5FVYBMH35&pd_rd_wg=XGwQy&pd_rd_r=cefd8a18-9e93-40cf-ba0b-dbb84850e8a0&ref_=pd_gw_crs_zg_bs_284507&th=1",

    "https://www.amazon.com/Gloria-Vanderbilt-Classic-Tapered-Scottsdale/dp/B01MFEU2WU/ref=sr_1_47?_encoding=UTF8&content-id=amzn1.sym.b0c3902d-ae70-4b80-8f54-4d0a3246745a&crid=1TZCO6ZC2HZVA&dib=eyJ2IjoiMSJ9.hFh-Jxg336nLrZPJBIAXycrJVczYq1ckza0nuSxsI-2IbvVzPhfalhq71j2JQd5QMDjrCcNJ5Lvs7YlNuR-lDJOUZr5YkmXwwcGWDtRG6GExEwZackefgdAlwqGQJ1Q8qFMnTO9AqXnAbCgwueF2H9vBO2G5K2S5r_-hhzVfOt4ljghI7sm3vpWx-xZwQUZzes07aJr4wkWhmRq9BJTwReVkiGUpYo-ODK4vurx7WHkxNEn9gW8HZbOqtCJ5kz3wdCl1rkiiPbAfq1wtajxZYJiCvY58SwuDby00cDD48v4.howTQnlZ-XpaNU5bgkpEC1Lknt6siHp5y84DzlvUduM&dib_tag=se&keywords=Jeans&pd_rd_r=c48d46c2-77d5-45cc-9f79-40c3980269d3&pd_rd_w=zt1gV&pd_rd_wg=lq1RH&pf_rd_p=b0c3902d-ae70-4b80-8f54-4d0a3246745a&pf_rd_r=EWQGJ6G3Y7QEN32ZRVHN&qid=1710664688&refinements=p_36%3A-5000&rnid=2941120011&s=apparel&sprefix=jeans%2Caps%2C155&sr=1-47",

    "https://www.amazon.com/Gibson-Elite-Mayfair-Embossed-Dinnerware/dp/B09VYJ4GZR/ref=sr_1_3?_encoding=UTF8&content-id=amzn1.sym.c0480761-6b7c-400b-bca5-28ff417248d1&crid=IBML6MYDLJ4A&dib=eyJ2IjoiMSJ9.A-CZgww1amaMUjLGWoY-wdMs-EBdEdZgQDDFEpKDwn13uOQ1FVpuyBn36ztxIQJ28fdyri4zcUVna6NOPimTeLHLXWL6x-kjgNWWmoR6EPMZaevIAKmYoZFhGUfMT73igQeOw_aaAYGZGb9r6_1ICyyx6amWYT5WbgIPS8bmfR-WEU1un3l-xlNnb5-2DD57X1f98T4FmdXq-Pmsv83OnshLS3o5MEVPZTTxjJgbGVys8Q-SHw8sw2EMMmmx6PMF1A-6NACyRn8teREaFGICQLO5j83uzVWvDXKEoh556Ec.oXw_oId9ouOBYEDn21JKCD3WPwRaDIaoBJfZB_GZ7-s&dib_tag=se&keywords=Dinnerware%2B%26%2Baccessories&pd_rd_r=ac67e150-1fbe-4cc4-a80c-1dd6441aa8ce&pd_rd_w=eHoVN&pd_rd_wg=629Yt&pf_rd_p=c0480761-6b7c-400b-bca5-28ff417248d1&pf_rd_r=NWXM7F3KRNM5FVYBMH35&qid=1710664397&sprefix=dinnerware%2B%26%2Baccessorie%2Caps%2C190&sr=8-3&th=1",
    "https://www.amazon.com/dp/B09XMY6XL7?ref=emc_s_m_5_i_n",

    "https://www.amazon.com/dp/B08TQTT8LK/ref=sspa_dk_detail_3?psc=1&pd_rd_i=B08TQTT8LK&pd_rd_w=JVSQw&content-id=amzn1.sym.0d1092dc-81bb-493f-8769-d5c802257e94&pf_rd_p=0d1092dc-81bb-493f-8769-d5c802257e94&pf_rd_r=Y3ZWK6HQ17JKG9WYV7VX&pd_rd_wg=VwvV6&pd_rd_r=9207fb06-04c3-4245-8a5d-ce3509bc8749&s=beauty&sp_csd=d2lkZ2V0TmFtZT1zcF9kZXRhaWwy",

    "https://www.amazon.com/Amazon-Essentials-Slim-Fit-Long-Sleeve-Burgundy/dp/B07BJLD528/ref=sr_1_11_ffob_sspa?crid=YDB8F1FZJO73&keywords=mens%2Bshirts&qid=1707277283&sprefix=men%2Bshirts%2Caps%2C557&sr=8-11-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9tdGY&th=1&psc=1",

    "https://www.amazon.com/Pleasures-Estee-Lauder-Women-Parfum/dp/B000C1Z5AW/ref=sr_1_27?crid=31Q23U2P909H5&keywords=perfumes&qid=1707278983&sprefix=perfume%2Caps%2C394&sr=8-27&th=1",

    "https://www.amazon.com/Protector-Compatible-MIL-Grade-Protection-Anti-Yellowing/dp/B09YTN9WKC/ref=sr_1_13?crid=1RBC0XBZ3R2M1&keywords=Phone%2Bcover&qid=1707280837&sprefix=phone%2Bcover%2Caps%2C458&sr=8-13&th=1",

    "https://www.amazon.com/Crayola-Washable-Glitter-Exclusive-Stocking/dp/B07BYWS5XW/ref=sr_1_8?crid=1RXL6VWLZWVVJ&keywords=paints+for+kids&qid=1707280992&sprefix=paints%2Caps%2C465&sr=8-8",
     
     "https://www.amazon.com/AmazonBasics-Pre-sharpened-Wood-Cased-Pencils/dp/B071JM699P/ref=sr_1_6?_encoding=UTF8&content-id=amzn1.sym.7acbca95-6949-4783-9691-9ac9df8021d9&keywords=school%2Bsupplies&pd_rd_r=c89c2794-89b0-4de7-b758-55ce73dcf567&pd_rd_w=7HxTD&pd_rd_wg=wghpT&pf_rd_p=7acbca95-6949-4783-9691-9ac9df8021d9&pf_rd_r=S0R50HDNDFPZW49434F3&qid=1707246080&sr=8-6&th=1",

	"https://www.amazon.com/Erase-Markers-Chisel-Assorted-Colors/dp/B0004F7GUI/ref=sr_1_3?_encoding=UTF8&content-id=amzn1.sym.7acbca95-6949-4783-9691-9ac9df8021d9&keywords=school+supplies&pd_rd_r=c89c2794-89b0-4de7-b758-55ce73dcf567&pd_rd_w=7HxTD&pd_rd_wg=wghpT&pf_rd_p=7acbca95-6949-4783-9691-9ac9df8021d9&pf_rd_r=S0R50HDNDFPZW49434F3&qid=1707246080&sr=8-3",

    "https://www.amazon.com/Razer-Ornata-Gaming-Keyboard-Low-Profile/dp/B09X6GJ691/ref=sr_1_1?_encoding=UTF8&content-id=amzn1.sym.12129333-2117-4490-9c17-6d31baf0582a&keywords=gaming%2Bkeyboard&pd_rd_r=c89c2794-89b0-4de7-b758-55ce73dcf567&pd_rd_w=IcB4v&pd_rd_wg=wghpT&pf_rd_p=12129333-2117-4490-9c17-6d31baf0582a&pf_rd_r=S0R50HDNDFPZW49434F3&qid=1707246274&sr=8-1&th=1",

    "https://www.amazon.com/SWAROVSKI-Crystal-Evil-Gold-Plated-Necklace/dp/B016ROFMKU/ref=sr_1_16?_encoding=UTF8&content-id=amzn1.sym.b4114be9-6d3d-4aed-8b31-fcbf38a83486&crid=3J1F4CEQCXT22&keywords=jewelry&pd_rd_r=c89c2794-89b0-4de7-b758-55ce73dcf567&pd_rd_w=6L4Xd&pd_rd_wg=wghpT&pf_rd_p=b4114be9-6d3d-4aed-8b31-fcbf38a83486&pf_rd_r=S0R50HDNDFPZW49434F3&qid=1707246353&sprefix=jewelry%2Caps%2C152&sr=8-16",

    "https://www.amazon.com/dp/B07PHQBSBF/ref=sspa_dk_detail_1?pd_rd_i=B07PHQBSBF&pd_rd_w=6Ct16&content-id=amzn1.sym.eb7c1ac5-7c51-4df5-ba34-ca810f1f119a&pf_rd_p=eb7c1ac5-7c51-4df5-ba34-ca810f1f119a&pf_rd_r=75FNGSA0MQNA094XCNQQ&pd_rd_wg=kcly9&pd_rd_r=2b12db13-ad64-4bb7-912c-1369b16e72f8&s=shoes&sp_csd=d2lkZ2V0TmFtZT1zcF9kZXRhaWw&th=1&psc=1",

    "https://www.amazon.com/l-f-Monochromatic-Multi-Cheeks-Dazzling/dp/B088W7L534/?_encoding=UTF8&pd_rd_w=9GnU7&content-id=amzn1.sym.9929d3ab-edb7-4ef5-a232-26d90f828fa5&pf_rd_p=9929d3ab-edb7-4ef5-a232-26d90f828fa5&pf_rd_r=S0R50HDNDFPZW49434F3&pd_rd_wg=UaY03&pd_rd_r=c2749e68-b887-408c-adf8-a3220db5d4b0&ref_=pd_gw_crs_zg_bs_3760911",

    "https://www.amazon.com/wet-wild-MegaGlo-Dual-Ended-Contour/dp/B01D8BV4VG/ref=pd_ci_mcx_pspc_dp_d_2_t_1?pd_rd_w=ayIzT&content-id=amzn1.sym.568f3b6b-5aad-4bfd-98ee-d827f03151e4&pf_rd_p=568f3b6b-5aad-4bfd-98ee-d827f03151e4&pf_rd_r=JD7B3Y5J77TT1FZ9X9Q6&pd_rd_wg=vu0QE&pd_rd_r=593dd586-b76a-46af-aa57-97d27c233ffd&pd_rd_i=B01D8BV4VG",

    "https://www.amazon.com/dp/B0CMCL6F7Z/ref=syn_sd_onsite_desktop_0?ie=UTF8&psc=1&pf_rd_p=d77a94d7-221a-4129-af34-3c16ad136bb7&pf_rd_r=Z0FCYXGWF4GTE2S0T4AN&pd_rd_wg=wJGRE&pd_rd_w=ehdBd&pd_rd_r=556746d3-d098-4b13-b710-f8d8037d29c7&aref=cVtWTlGvs3",

    "https://www.amazon.com/Cetaphil-Baby-Soothing-Colloidal-Hypoallergenic/dp/B07ZZTTDWP/?_encoding=UTF8&pd_rd_w=kW7nU&content-id=amzn1.sym.0f185419-5eda-46ce-8658-4e251c4fa1c4&pf_rd_p=0f185419-5eda-46ce-8658-4e251c4fa1c4&pf_rd_r=78D90DF417VYHHTZ0ME9&pd_rd_wg=vpRAg&pd_rd_r=6783a495-1ef3-4900-a0c5-8393233ec2b6&ref_=pd_hp_d_btf_exports_top_sellers_unrec",

    "https://www.amazon.com/King-Sloth-Kings-Sin-4/dp/1728289750/?_encoding=UTF8&pd_rd_w=9g5BA&content-id=amzn1.sym.9929d3ab-edb7-4ef5-a232-26d90f828fa5&pf_rd_p=9929d3ab-edb7-4ef5-a232-26d90f828fa5&pf_rd_r=78D90DF417VYHHTZ0ME9&pd_rd_wg=vpRAg&pd_rd_r=6783a495-1ef3-4900-a0c5-8393233ec2b6&ref_=pd_hp_d_btf_crs_zg_bs_283155",

    "https://www.amazon.com/Under-Armour-Womens-Distant-High-Vis/dp/B0BZWTWVPZ/ref=sr_1_59_sspa?_encoding=UTF8&content-id=amzn1.sym.3a233c37-ffc8-403c-b9df-0a8759916b7a&crid=1ZVS48U85MWPY&dib=eyJ2IjoiMSJ9.phUnC94D0o6Dwlc85q0cFcHrk7U5wFLzjpdtwhDH3q9pQ9fi0Y2f3-0ul1uudE-1EquPtoz3qbeobm2lXeo8NWTpEh4mtc58iRbleoEuHq6U1bZZZVf3fmQB9GliPcMH4lDKkcu3gIMAi1GkO18yXRh2qQ4V5fi4-1JLTyOGX5Zjwuhml22W6A77cDZP6smZhxFnwwwbEQfRlcU1xMUOjy2oWzGWuyjx-vDTsT69eBuUc2C5VcDQ3fA6_mXeRcI6_tkP1g-X8UU4b0riwHQCdXP5YKwh-IUb3jz5LzPLuu4.GYRTWLdyb9nOwwL9NyhfvwRdwVj1hEMEaduFy_13moo&dib_tag=se&keywords=women%2Bfootwear&pd_rd_r=2394271b-62ae-4cb3-a26f-444946992807&pd_rd_w=cMSZm&pd_rd_wg=jfqLh&pf_rd_p=3a233c37-ffc8-403c-b9df-0a8759916b7a&pf_rd_r=J76DYJR901YRPCRFVYXT&qid=1716202526&refinements=p_36%3A-5000&rnid=2661611011&sprefix=Women%2BFootw%2Caps%2C286&sr=8-59-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9idGY&th=1&psc=1"
]   

# Iterate through each product URL and scrape data
for url in product_urls:
    scrape_product_info(url)

# Ensure all lists are padded to the same length
max_len = max(len(name_list), len(price_list), len(availability_list), len(brand_list), len(description_list), len(review_list), len(star_list), len(date_list))
name_list += [''] * (max_len - len(name_list))
price_list += [''] * (max_len - len(price_list))
availability_list += [''] * (max_len - len(availability_list))
brand_list += [''] * (max_len - len(brand_list))
description_list += [''] * (max_len - len(description_list))
best_seller_of += [''] * (max_len - len(best_seller_of))
rating_list += [''] * (max_len - len(rating_list))
#user_name_list += [''] * (max_len - len(user_name_list))
review_list += [''] * (max_len - len(review_list))
star_list += [''] * (max_len - len(star_list))
date_list += [''] * (max_len - len(date_list))

# Create a dictionary with the padded data
data = {
    'Name': name_list,
    'Price': price_list,
    'Availability': availability_list,
    'Brand': brand_list,
    'Description': description_list,
    'Category': best_seller_of,
    'Rating': rating_list,
    #'User_name': user_name_list,
    'Review': review_list,
    'Star': star_list,
    'Date': date_list
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('Amazon_reviews_dataset.csv', index=False)

# Print success message
print("Data saved to 'Amazon_reviews_dataset.csv'")

# Close the browser
driver.quit()


Error scraping product name: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF65CA91502+60802]
	(No symbol) [0x00007FF65CA0AC02]
	(No symbol) [0x00007FF65C8C7CE4]
	(No symbol) [0x00007FF65C916D4D]
	(No symbol) [0x00007FF65C916E1C]
	(No symbol) [0x00007FF65C95CE37]
	(No symbol) [0x00007FF65C93ABBF]
	(No symbol) [0x00007FF65C95A224]
	(No symbol) [0x00007FF65C93A923]
	(No symbol) [0x00007FF65C908FEC]
	(No symbol) [0x00007FF65C909C21]
	GetHandleVerifier [0x00007FF65CD9411D+3217821]
	GetHandleVerifier [0x00007FF65CDD60B7+3488055]
	GetHandleVerifier [0x00007FF65CDCF03F+3459263]
	GetHandleVerifier [0x00007FF65CB4B846+823494]
	(No symbol) [0x00007FF65CA15F9F]
	(No symbol) [0x00007FF65CA10EC4]
	(No symbol) [0x00007FF65CA11052]
	(No symbol) [0x00007FF65CA018A4]
	BaseThreadInitThunk [0x00007FF8E3D07344+20]
	RtlUserThreadStart [0x00007FF8E4C826B1+33]

Scraping product: 17.
Scraping product: Dove Nourishing Secrets Hydrating Shampoo Coconut and Hydration 6 Count for Daily Use Dry Hair Shampoo Wit