In [1]:
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

# Set up the Selenium WebDriver
driver = webdriver.Chrome()

# Create an empty list to store the laptop data
laptop_data = []

# Define the names of the key specs you're looking for
key_spec_names = ["Product Name", "Brand", "Year of Release", "Color Category",
                  "Display Type", "Screen Size", "Screen Resolution", "Touch Screen", "Operating System",
                  "Voice Assistant Built-in", "Storage Type", "Total Storage Capacity", "System Memory (RAM)",
                  "Type of Memory (RAM)", "Battery Type", "Battery Life (up to)", "Graphics Type", "GPU Brand",
                  "Graphics", "Processor Brand", "Processor Model", "Number of CPU Cores", "CPU Base Clock Frequency",
                  "Product Weight", "Backlit Keyboard", "Front Facing Camera Video Resolution",
                  "Number of HDMI Outputs (Total)"]

# Iterate over each page
for page_num in range(1, 70):
    print(f"Processing page {page_num}")
    # Construct the URL for the page
    if page_num == 1:
        url = 'https://www.bestbuy.com/site/laptop-computers/all-laptops/pcmcat138500050001.c?id=pcmcat138500050001'
    else:
        url = f'https://www.bestbuy.com/site/searchpage.jsp?_dyncharset=UTF-8&browsedCategory=pcmcat138500050001&cp={page_num}&id=pcat17071&iht=n&ks=960&list=y&sc=Global&st=categoryid%24pcmcat138500050001&type=page&usc=All%20Categories'

    # Connect to the webpage that lists all laptops
    driver.get(url)

    # Wait for the links to be loaded
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "image-link")))

    # Find all links to individual laptops on the page
    laptop_links = driver.find_elements(by=By.CLASS_NAME, value="image-link")

    # Get the URLs of the laptop pages
    laptop_urls = [link.get_attribute('href') for link in laptop_links]

    # Iterate over each laptop URL
    for url in laptop_urls:
        # Go to the laptop's individual page
        driver.get(url)

        # Wait for the page to load
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "sku-title")))

        # Get the page content
        page_content = driver.page_source
        soup = BeautifulSoup(page_content, 'html.parser')

        # Initialize a dictionary to store the laptop specs
        laptop_specs = {}

        # Price
        price_element = soup.find("div", class_="priceView-hero-price priceView-customer-price")
        if price_element is not None:
            try:
                price = price_element.find("span", attrs={"aria-hidden": "true"}).text
                laptop_specs["Price"] = price
            except AttributeError:
                laptop_specs["Price"] = "Not available"
        else:
            laptop_specs["Price"] = "Not available"

        # Get review stats element
        review_element = soup.find("li", class_="ugc-stat mr-100 pr-100 customer-review-stats mb-150 mb-lg-100")

        # Extract stars
        try:
            stars_element = review_element.find("span", class_="ugc-c-review-average font-weight-medium order-1")
            if stars_element is not None:
                stars = float(stars_element.text)
            else:
                stars = None
        except AttributeError:
            stars = None

        laptop_specs["Stars"] = stars

        # Extract number of reviews
        try:
            num_reviews_element = review_element.find("span", class_="c-reviews order-2")
            num_reviews = num_reviews_element.text.replace(' Reviews', '').replace('(', '').replace(')', '').replace(
                ',', '').strip()
            laptop_specs["Number of Reviews"] = num_reviews
        except AttributeError:
            laptop_specs["Number of Reviews"] = None

        # Initialize a dictionary to store the values of the key specs
        key_specs = {name: None for name in key_spec_names}

        # Find all list items
        list_items = soup.find_all("li", class_="list-item")

        # Iterate over all list items
        for item in list_items:
            # Find the title of the current item
            title_element = item.find("div", class_="title-container v-fw-medium v-p-right-xs col-xs-6")

            # If the title element was found
            if title_element is not None:
                title = title_element.text.strip()

                # If the title matches the name of one of the key specs you're looking for
                if title in key_specs:
                    # Extract its value
                    value_element = item.find("div", class_="row-value v-fw-regular col-xs-6")
                    key_specs[title] = value_element.text.strip()

        # Merge laptop_specs and key_specs
        laptop_specs.update(key_specs)

        # Append laptop_specs to laptop_data
        laptop_data.append(laptop_specs)

# Close the Selenium WebDriver
driver.quit()

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(laptop_data)

# Save the DataFrame to a CSV file
df.to_csv('laptops.csv', index=False, sep=',')


Processing page 1
Processing page 2
Processing page 3
Processing page 4
Processing page 5
Processing page 6
Processing page 7
Processing page 8
Processing page 9
Processing page 10
Processing page 11
Processing page 12
Processing page 13
Processing page 14
Processing page 15
Processing page 16
Processing page 17
Processing page 18
Processing page 19
Processing page 20
Processing page 21
Processing page 22
Processing page 23
Processing page 24
Processing page 25
Processing page 26
Processing page 27
Processing page 28
Processing page 29
Processing page 30
Processing page 31
Processing page 32
Processing page 33
Processing page 34
Processing page 35
Processing page 36
Processing page 37
Processing page 38
Processing page 39
Processing page 40
Processing page 41
Processing page 42
Processing page 43
Processing page 44
Processing page 45
Processing page 46
Processing page 47
Processing page 48
Processing page 49
Processing page 50
Processing page 51
Processing page 52
Processing page 53
Pr

In [2]:
df = pd.read_csv('laptops.csv', delimiter=',')
df

Unnamed: 0,Price,Stars,Number of Reviews,Product Name,Brand,Year of Release,Color Category,Display Type,Screen Size,Screen Resolution,...,GPU Brand,Graphics,Processor Brand,Processor Model,Number of CPU Cores,CPU Base Clock Frequency,Product Weight,Backlit Keyboard,Front Facing Camera Video Resolution,Number of HDMI Outputs (Total)
0,$149.99,4.2,1926,"14"" Laptop - Intel Celeron - 4GB Memory - 64GB...",HP,2022.0,Silver,LCD,14 inches,1366 x 768 (HD),...,Intel,Intel UHD Graphics 600,Intel,Intel Celeron,4-core (quad-core),1.1 gigahertz,3.24 pounds,No,,1.0
1,$749.99,4.6,384,"ENVY 2-in-1 14"" Full HD Touch-Screen Laptop - ...",HP,2023.0,Silver,,14 inches,1920 x 1080 (Full HD),...,Intel,Intel Iris Xe Graphics,Intel,Intel 13th Generation Core i7,10-core,5 gigahertz,3.35 pounds,Yes,,1.0
2,$429.99,4.7,693,"Ideapad 3i 15.6"" FHD Touch Laptop - Core i5-11...",Lenovo,2022.0,Gray,LCD,15.6 inches,1920 x 1080 (Full HD),...,Intel,Intel Iris Xe Graphics,Intel,Intel 11th Generation Core i5,4-core (quad-core),4.2 gigahertz,3.74 pounds,No,720p,1.0
3,$649.99,4.5,1810,"15.6"" Touch-Screen Laptop - Intel Core i7 - 16...",HP,2021.0,Silver,LED,15.6 inches,1920 x 1080 (Full HD),...,Intel,Intel Iris Xe Graphics,Intel,Intel 11th Generation Core i7,4-core (quad-core),4.7 gigahertz,3.75 pounds,No,720p,1.0
4,$149.00,4.4,266,"14"" Chromebook - Intel Celeron - 4GB Memory - ...",HP,2022.0,Silver,,14 inches,1366 x 768 (HD),...,Intel,Intel UHD Graphics 600,Intel,Intel Celeron,4-core (quad-core),2.6 gigahertz,3.35 pounds,No,720p,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1343,Not available,,,"Galaxy Book3 Pro 360 2-in-1 16"" 3K AMOLED Touc...",Samsung,2023.0,Brown,OLED,16 inches,2880 x 1800,...,Intel,Intel Iris Xe Graphics,Intel,Intel 13th Generation Core i7,12-core,5 gigahertz,3.66 pounds,Yes,1080p,1.0
1344,$177.99,,,"14.0"" Laptop - Intel Celeron N4020 - 4GB Memor...",ASUS,2022.0,Rose gold,,14 inches,1360 x 768 (HD),...,Intel,Intel UHD Graphics,Intel,Intel Pentium,,,2.87 pounds,No,,
1345,Not available,,,"Galaxy Book3 Ultra 16"" 3K AMOLED Laptop - Inte...",Samsung,2023.0,Gray,OLED,16 inches,2880 x 1800,...,NVIDIA,NVIDIA GeForce RTX 4050,Intel,Intel 13th Generation Core i7,14-core,5 gigahertz,3.95 pounds,Yes,1080p,1.0
1346,"$1,325.99",,,"Victus 16.1"" Gaming Laptop - Intel Core i7-12...",HP,2022.0,Silver,LED,16.1 inches,1920 x 1080 (Full HD),...,NVIDIA,NVIDIA GeForce RTX 3060,Intel,Intel 12th Generation Core i7,,,5.44 pounds,Yes,720p,1.0
