## **Install dependencies**

In [1]:
!pip install selenium webdriver-manager pandas numpy matplotlib seaborn pyodbc



## **Import libraries + Setup Selenium**

In [2]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

## **Create Selenium driver**

In [3]:
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.page_load_strategy = 'eager'

prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)

chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36")

# Driver Start
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

driver.set_page_load_timeout(30)

print("Optimized Browser Opened!")

Optimized Browser Opened!


## **Category 1 Sports & Outdoor**

In [None]:
category_url = "https://www.banggood.com/Wholesale-Sports-and-Outdoors-ca-6001.html?bid=210702&from=nav"
product_links = []

print(f"Going to category page: {category_url}")

# --- FIX: Timeout Handling ---
try:
    driver.get(category_url)
except TimeoutException:
    print("Page load took too long! Continuing anyway (Eager mode is working)...")
    driver.execute_script("window.stop();") 

time.sleep(5) 

# === SCROLLING LOGIC ===
print("Scrolling to load items...")
last_height = driver.execute_script("return document.body.scrollHeight")
for _ in range(3): 
    try:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    except Exception as e:
        print(f"Scroll error (ignoring): {e}")
        break

try:
    print("Extracting links...")
    
    all_links = driver.find_elements(By.TAG_NAME, "a")
    
    for a in all_links:
        try:
            href = a.get_attribute("href")
            if href and "-p-" in href and ".html" in href:
                if href not in product_links:
                    product_links.append(href)
        except:
            continue 

    print(f"Method 1 Found: {len(product_links)}")

    if len(product_links) == 0:
        print("Method 1 yielded 0 links. Trying CSS selectors...")
        potential_selectors = [".product-list a.title", ".p-wrap a", "ul.goodlist a", ".list-view a.title"]
        
        for selector in potential_selectors:
            elements = driver.find_elements(By.CSS_SELECTOR, selector)
            if elements:
                for elem in elements:
                    try:
                        href = elem.get_attribute("href")
                        if href and "-p-" in href and href not in product_links:
                            product_links.append(href)
                    except:
                        continue
                if len(product_links) > 0:
                    break

    print(f"Total Unique Links Found: {len(product_links)}")

except Exception as e:
    print(f"Error during extraction: {e}")

print(f"First 5 Links: {product_links[:5]}")

Going to category page: https://www.banggood.com/Wholesale-Sports-and-Outdoors-ca-6001.html?bid=210702&from=nav
Scrolling to load items...
Extracting links...
Method 1 Found: 94
Total Unique Links Found: 94
First 5 Links: ['https://www.banggood.com/MO-FLY-P-51D-Mustang-1-or-16-Scale-685mm-Wingspan-2_4GHz-10CH-Built-in-Gyro-3D-or-6G-Switchable-One-Key-Aerobatics-Brushless-EPP-FPV-RC-Airplane-Glider-BNF-or-RTF-Compatible-DSM-SBUS-p-2040504.html?bid=88259&utm_design=132', 'https://www.banggood.com/POPDEER-PD-JA5-5000A-Jump-Starter-with-Air-Compressor-150-PSI-Tire-Inflator-12V-37Wh-Power-Bank-PD30W-Type-C-Quick-Charge-LCD-Display-Flashlight-Storage-Case-Suitable-for-10L-Gas-or-8L-Diesel-Engines-p-2036291.html?bid=86368&cur_warehouse=CZ&ID=6287830', 'https://www.banggood.com/BlitzWolf-BW-TRV16-WiFi-Smart-Thermostatic-Radiator-Valve-Intelligent-Thermostat-Temperature-Controller-Programmable-Set-Schedules-APP-and-Voice-Control-Work-with-Alexa-Google-Home-p-2041704.html?bid=76741&cur_warehouse

In [None]:
scraped_data = []
links_to_scrape = product_links

print(f"Starting extraction for {len(links_to_scrape)} products...")

for i, url in enumerate(links_to_scrape):
    # --- 1. Page Load with Retry ---
    for attempt in range(3):
        try:
            driver.get(url)
            if "site can’t be reached" in driver.title:
                raise Exception("Page load failed")
            break 
        except Exception:
            time.sleep(2)
            if attempt == 2:
                print(f"[{i+1}] Failed to load URL.")

    try:

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight / 2);")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

        product_info = {"URL": url, "Category": "Sports & Outdoors"}

        try:
            title = driver.find_element(By.CSS_SELECTOR, ".product-title-text").text
            product_info['Name'] = title
        except:
            try:
                title = driver.find_element(By.TAG_NAME, "h1").text
            except:
                product_info['Name'] = "N/A"

        # --- 2. Price ---
        try:
            try:
                price = driver.find_element(By.CSS_SELECTOR, ".main-price").text
            except:
                price = driver.find_element(By.CSS_SELECTOR, ".current-price").text
            
            product_info['Price'] = price
        except:
            product_info['Price'] = "0.00"

        # --- 3. Rating (XPath Method) ---
        try:
            rating = driver.find_element(By.CSS_SELECTOR, ".review-score").text
        except:
            try:
                rating = driver.find_element(By.XPATH, "//span[contains(@class, 'score')]").text
            except:
                try:

                    rating = driver.find_element(By.CSS_SELECTOR, ".star-num").text
                except:
                    rating = "0.0" 
        
        product_info['Rating'] = rating

        # --- 4. Reviews (XPath Text Method) ---
        try:
            review_text = driver.find_element(By.XPATH, "//*[contains(text(), 'Reviews') and contains(@class, 'num')]").text
            import re
            reviews = re.findall(r'\d+', review_text)
            product_info['Reviews'] = reviews[0] if reviews else "0"
        except:
            try:
                review_text = driver.find_element(By.CSS_SELECTOR, ".review-num").text
                import re
                reviews = re.findall(r'\d+', review_text)
                product_info['Reviews'] = reviews[0] if reviews else "0"
            except:
                product_info['Reviews'] = "0"

        print(f"[{i+1}] Name: {product_info['Name'][:10]}... | Price: {product_info['Price']} | Rate: {product_info['Rating']} | Rev: {product_info['Reviews']} | URL: {product_info['URL'][:20]}...")
        scraped_data.append(product_info)

    except Exception as e:
        print(f"[{i+1}] Skipped: {e}")
        continue

print("Extraction Complete!")

KeyboardInterrupt: 

In [None]:

df = pd.DataFrame(scraped_data)
try:
    df['Price'] = df['Price'].str.replace('US', '').str.replace('$', '').str.strip()
except:
    pass

csv_name = "banggood_sports_&_outdoor_data.csv"
df.to_csv(csv_name, index=False)

print(f"Data Saved Successfully to {csv_name}")
print(df.head())

Data Saved Successfully to banggood_sports_&_outdoor_data.csv
                                                 URL           Category  \
0  https://www.banggood.com/MO-FLY-P-51D-Mustang-...  Sports & Outdoors   
1  https://www.banggood.com/POPDEER-PD-JA5-5000A-...  Sports & Outdoors   
2  https://www.banggood.com/BlitzWolf-BW-TRV16-Wi...  Sports & Outdoors   
3  https://www.banggood.com/BlitzWolf-BW-AG1-Pro-...  Sports & Outdoors   
4  https://www.banggood.com/BlitzWolf-BW-FYE17-bl...  Sports & Outdoors   

                                                Name   Price Rating Reviews  
0  MO-FLY P-51D Mustang 1/16 Scale 685mm Wingspan...  135.99              4  
1                                                                         0  
2                                                                         0  
3  BlitzWolf BW-AG1 Pro AI Smart Glasses with HD ...   52.99              4  
4  BlitzWolf® BW-FYE17 bluetooth Earphone 2-in-1 ...   29.99              0  


## **Category 2 Automobiles-and-Motorcycles**

In [4]:
category_url = "https://www.banggood.com/Wholesale-Automobiles-and-Motorcycles-ca-4001.html?bid=210704&from=nav"
product_links = []

print(f"Going to category page: {category_url}")


try:
    driver.get(category_url)
except TimeoutException:
    print("Page load took too long! Continuing anyway (Eager mode is working)...")
    driver.execute_script("window.stop();") 

time.sleep(5)

print("Scrolling to load items...")
last_height = driver.execute_script("return document.body.scrollHeight")
for _ in range(3):
    try:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    except Exception as e:
        print(f"Scroll error (ignoring): {e}")
        break

try:
    print("Extracting links...")
    
    all_links = driver.find_elements(By.TAG_NAME, "a")
    
    for a in all_links:
        try:
            href = a.get_attribute("href")
            if href and "-p-" in href and ".html" in href:
                if href not in product_links:
                    product_links.append(href)
        except:
            continue

    print(f"Method 1 Found: {len(product_links)}")

    if len(product_links) == 0:
        print("Method 1 yielded 0 links. Trying CSS selectors...")
        potential_selectors = [".product-list a.title", ".p-wrap a", "ul.goodlist a", ".list-view a.title"]
        
        for selector in potential_selectors:
            elements = driver.find_elements(By.CSS_SELECTOR, selector)
            if elements:
                for elem in elements:
                    try:
                        href = elem.get_attribute("href")
                        if href and "-p-" in href and href not in product_links:
                            product_links.append(href)
                    except:
                        continue
                if len(product_links) > 0:
                    break

    print(f"Total Unique Links Found: {len(product_links)}")

except Exception as e:
    print(f"Error during extraction: {e}")

print(f"First 5 Links: {product_links[:5]}")

Going to category page: https://www.banggood.com/Wholesale-Automobiles-and-Motorcycles-ca-4001.html?bid=210704&from=nav
Scrolling to load items...
Extracting links...
Method 1 Found: 60
Total Unique Links Found: 60
First 5 Links: ['https://www.banggood.com/SEALIGHT-S2S-H7-Pair-Car-Front-LED-Headlight-Car-Headlamp-Hi-or-Low-Beam-Super-Brightness-Light-Bulb-6500K-Cool-White-p-2008375.html?rmmds=AutomobilesFlashDeals&cur_warehouse=CN&ID=0', 'https://www.banggood.com/12V-or-24V-Car-Heater-Fan-Fast-Heat-Truck-or-Excavator-Interior-Heating-Auto-Electric-Heater-Cooling-Auto-Windshield-Defroster-Defogger-Demister-p-2023894.html?rmmds=AutomobilesFlashDeals&cur_warehouse=CN&ID=495546287830', 'https://www.banggood.com/Rain-Shoe-Covers-Waterproof-Anti-Slip-Rainproof-Motorcycle-Shoe-Covers-High-Long-Tube-Waterproof-Shoe-Covers-Non-Slip-Thickened-Outdoor-Riding-Sandproof-Snowproof-p-2013254.html?rmmds=AutomobilesFlashDeals&cur_warehouse=CN&ID=521822', 'https://www.banggood.com/250W-PD-Car-Charger-QC

In [5]:
scraped_data = []
links_to_scrape = product_links

print(f"Starting extraction for {len(links_to_scrape)} products...")

for i, url in enumerate(links_to_scrape):
    for attempt in range(3):
        try:
            driver.get(url)
            if "site can’t be reached" in driver.title:
                raise Exception("Page load failed")
            break 
        except Exception:
            time.sleep(2)
            if attempt == 2:
                print(f"[{i+1}] Failed to load URL.")

    try:
        # --- FIX: Deep Scroll to trigger Reviews ---
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight / 2);")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2) 
        product_info = {"URL": url, "Category": "Sports & Outdoors"}

        # --- 1. Product Name ---
        try:
            title = driver.find_element(By.CSS_SELECTOR, ".product-title-text").text
            product_info['Name'] = title
        except:
            try:
                title = driver.find_element(By.TAG_NAME, "h1").text
            except:
                product_info['Name'] = "N/A"

        # --- 2. Price ---
        try:
            try:
                price = driver.find_element(By.CSS_SELECTOR, ".main-price").text
            except:
                price = driver.find_element(By.CSS_SELECTOR, ".current-price").text
            
            product_info['Price'] = price
        except:
            product_info['Price'] = "0.00"

        # --- 3. Rating (XPath Method) ---
        try:
            rating = driver.find_element(By.CSS_SELECTOR, ".review-score").text
        except:
            try:
                rating = driver.find_element(By.XPATH, "//span[contains(@class, 'score')]").text
            except:
                try:
                    rating = driver.find_element(By.CSS_SELECTOR, ".star-num").text
                except:
                    rating = "0.0" 
        
        product_info['Rating'] = rating

        # --- 4. Reviews (XPath Text Method) ---
        try:
            review_text = driver.find_element(By.XPATH, "//*[contains(text(), 'Reviews') and contains(@class, 'num')]").text
            import re
            reviews = re.findall(r'\d+', review_text)
            product_info['Reviews'] = reviews[0] if reviews else "0"
        except:
            try:
                review_text = driver.find_element(By.CSS_SELECTOR, ".review-num").text
                import re
                reviews = re.findall(r'\d+', review_text)
                product_info['Reviews'] = reviews[0] if reviews else "0"
            except:
                product_info['Reviews'] = "0"

        print(f"[{i+1}] Name: {product_info['Name'][:10]}... | Price: {product_info['Price']} | Rate: {product_info['Rating']} | Rev: {product_info['Reviews']} | URL: {product_info['URL'][:20]}...")
        scraped_data.append(product_info)

    except Exception as e:
        print(f"[{i+1}] Skipped: {e}")
        continue

print("Extraction Complete!")

Starting extraction for 60 products...
[1] Name: SEALIGHT S... | Price: £14.70 | Rate:  | Rev: 31 | URL: https://www.banggood...
[2] Name: 12V/24V Ca... | Price: £9.28 | Rate:  | Rev: 2 | URL: https://www.banggood...
[3] Name: Rain Shoe ... | Price: £7.73 | Rate:  | Rev: 10 | URL: https://www.banggood...
[4] Name: 250W PD Ca... | Price: £4.64 | Rate:  | Rev: 70 | URL: https://www.banggood...
[5] Name: 5 Inch Sin... | Price: £35.60 | Rate:  | Rev: 13 | URL: https://www.banggood...
[6] Name: Car Rear V... | Price: £12.38 | Rate:  | Rev: 0 | URL: https://www.banggood...
[7] Name: AZDOME GS6... | Price: £85.15 | Rate:  | Rev: 1 | URL: https://www.banggood...
[8] Name: 4G GPS Car... | Price: £16.71 | Rate:  | Rev: 2 | URL: https://www.banggood...
[9] Name: 4000/5000/... | Price: £21.67 | Rate:  | Rev: 253 | URL: https://www.banggood...
[10] Name: VCDS 24.7 ... | Price: £13.55 | Rate:  | Rev: 19 | URL: https://www.banggood...
[11] Name: SEALIGHT S... | Price: £12.84 | Rate:  | Rev: 17 | URL:

In [9]:
df = pd.DataFrame(scraped_data)

try:
    df['Price'] = df['Price'].str.replace('US', '').str.replace('$', '').str.strip()
except:
    pass

csv_name = "banggood_automobiles_&_motorcycles_data.csv"
df.to_csv(csv_name, index=False)

print(f"Data Saved Successfully to {csv_name}")
print(df.head())

Data Saved Successfully to banggood_automobiles_&_motorcycles_data.csv
                                                 URL           Category  \
0  https://www.banggood.com/Ninkear-S14-Laptop-14...  Sports & Outdoors   
1  https://www.banggood.com/Wanbo-Vali-1-Smart-Gi...  Sports & Outdoors   
2  https://www.banggood.com/Blitzwolf-V10-3-inch-...  Sports & Outdoors   
3  https://www.banggood.com/BlitzWolf-BW-CML2-Air...  Sports & Outdoors   
4  https://www.banggood.com/AULA-F75-80-Keys-Tri-...  Sports & Outdoors   

                                                Name    Price Rating Reviews  
0  Ninkear S14 Laptop 14 Inch 2.2K Screen AMD Ryz...  £479.97             13  
1  [Netfilx Certified]Wanbo Vali 1 Smart Gimbal P...  £193.53              9  
2  Blitzwolf V10 3" LCD Projector 120-Inch Screen...   £54.18             28  
3  BlitzWolf® BW-CML2 Air Monitor Light Bar Touch...    £9.28            442  
4  AULA F75 80 Keys Tri-mode Customized Mechanica...   £46.44              2  


## **Category 3 Electronics**

In [None]:
category_url = "https://www.banggood.com/Wholesale-Electronics-ca-2001.html?bid=210705&from=nav"
product_links = []

print(f"Going to category page: {category_url}")

# --- FIX: Timeout Handling ---
try:
    driver.get(category_url)
except TimeoutException:
    print("Page load took too long! Continuing anyway (Eager mode is working)...")
    driver.execute_script("window.stop();") 

time.sleep(5) 

# === SCROLLING LOGIC ===
print("Scrolling to load items...")
last_height = driver.execute_script("return document.body.scrollHeight")
for _ in range(3):
    try:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    except Exception as e:
        print(f"Scroll error (ignoring): {e}")
        break

# === EXTRACT LINKS (Robust Method) ===
try:
    print("Extracting links...")
    
    all_links = driver.find_elements(By.TAG_NAME, "a")
    
    for a in all_links:
        try:
            href = a.get_attribute("href")
            if href and "-p-" in href and ".html" in href:
                if href not in product_links:
                    product_links.append(href)
        except:
            continue 

    print(f"Method 1 Found: {len(product_links)}")
    if len(product_links) == 0:
        print("Method 1 yielded 0 links. Trying CSS selectors...")
        potential_selectors = [".product-list a.title", ".p-wrap a", "ul.goodlist a", ".list-view a.title"]
        
        for selector in potential_selectors:
            elements = driver.find_elements(By.CSS_SELECTOR, selector)
            if elements:
                for elem in elements:
                    try:
                        href = elem.get_attribute("href")
                        if href and "-p-" in href and href not in product_links:
                            product_links.append(href)
                    except:
                        continue
                if len(product_links) > 0:
                    break

    print(f"Total Unique Links Found: {len(product_links)}")

except Exception as e:
    print(f"Error during extraction: {e}")

print(f"First 5 Links: {product_links[:5]}")

Going to category page: https://www.banggood.com/Wholesale-Electronics-ca-2001.html?bid=210705&from=nav
Scrolling to load items...
Extracting links...
Method 1 Found: 149
Total Unique Links Found: 149
First 5 Links: ['https://www.banggood.com/EU-DirectFLSUN-T1-Max-3D-Printer-1000mm-or-s-Printing-Speed-0_1mm-Precision-300mmx297mm-Build-Volume-Auto-Leveling-Intuitive-Easy-to-Use-Compatible-with-PLA-or-PETG-or-TPU-p-2043277.html?bid=88145&akmClientCountry=HK&cur_warehouse=CZ&ID=47184', 'https://www.banggood.com/EU-or-US-DirectLONGER-RAY5-40W-Laser-Engraver-with-Air-Assist-Kit-High-Precision-Laser-Cutter-and-Engraver-for-Wood-Metal-Acrylic-Cylindrical-Objects-Tumblers-p-2041656.html?bid=87999&cur_warehouse=CZ&ID=47184', 'https://www.banggood.com/EU-or-US-Direct-ATOMSTACK-Swift-7W-Portable-Laser-Engraver-300x300mm-10000mm-or-min-DIY-Laser-Cut-Engraving-Machine-for-Wood-and-Metal-Dark-Acrylic-Glass-Metal-p-2042356.html?bid=87998&akmClientCountry=HK&cur_warehouse=CZ&ID=628783047184', 'https:/

In [None]:
scraped_data = []
links_to_scrape = product_links

print(f"Starting extraction for {len(links_to_scrape)} products...")

for i, url in enumerate(links_to_scrape):
    # --- 1. Page Load with Retry ---
    for attempt in range(3):
        try:
            driver.get(url)
            if "site can’t be reached" in driver.title:
                raise Exception("Page load failed")
            break 
        except Exception:
            time.sleep(2)
            if attempt == 2:
                print(f"[{i+1}] Failed to load URL.")

    try:
        # --- FIX: Deep Scroll to trigger Reviews ---
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight / 2);")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2) 

        product_info = {"URL": url, "Category": "Sports & Outdoors"}

        # --- 1. Product Name ---
        try:
            title = driver.find_element(By.CSS_SELECTOR, ".product-title-text").text
            product_info['Name'] = title
        except:
            try:
                title = driver.find_element(By.TAG_NAME, "h1").text
            except:
                product_info['Name'] = "N/A"

        # --- 2. Price ---
        try:
            try:
                price = driver.find_element(By.CSS_SELECTOR, ".main-price").text
            except:
                price = driver.find_element(By.CSS_SELECTOR, ".current-price").text
            
            product_info['Price'] = price
        except:
            product_info['Price'] = "0.00"

        # --- 3. Rating (XPath Method) ---
        try:
            rating = driver.find_element(By.CSS_SELECTOR, ".review-score").text
        except:
            try:
                rating = driver.find_element(By.XPATH, "//span[contains(@class, 'score')]").text
            except:
                try:
                    rating = driver.find_element(By.CSS_SELECTOR, ".star-num").text
                except:
                    rating = "0.0"
        
        product_info['Rating'] = rating

        # --- 4. Reviews (XPath Text Method) ---
        try:
            review_text = driver.find_element(By.XPATH, "//*[contains(text(), 'Reviews') and contains(@class, 'num')]").text
            import re
            reviews = re.findall(r'\d+', review_text)
            product_info['Reviews'] = reviews[0] if reviews else "0"
        except:
            try:
                review_text = driver.find_element(By.CSS_SELECTOR, ".review-num").text
                import re
                reviews = re.findall(r'\d+', review_text)
                product_info['Reviews'] = reviews[0] if reviews else "0"
            except:
                product_info['Reviews'] = "0"

        print(f"[{i+1}] Name: {product_info['Name'][:10]}... | Price: {product_info['Price']} | Rate: {product_info['Rating']} | Rev: {product_info['Reviews']} | URL: {product_info['URL'][:20]}...")
        scraped_data.append(product_info)

    except Exception as e:
        print(f"[{i+1}] Skipped: {e}")
        continue

print("Extraction Complete!")

Starting extraction for 149 products...
[1] Name: [EU Direct... | Price: US$699.00 | Rate:  | Rev: 0 | URL: https://www.banggood...
[2] Name: [EU/US Dir... | Price: US$0.00 | Rate:  | Rev: 0 | URL: https://www.banggood...
[3] Name: [EU/US Dir... | Price: US$0.00 | Rate:  | Rev: 0 | URL: https://www.banggood...
[4] Name: 4MP HD WiF... | Price: US$44.99 | Rate:  | Rev: 0 | URL: https://www.banggood...
[5] Name: ABBREE AR-... | Price: US$6.99 | Rate:  | Rev: 5 | URL: https://www.banggood...
[6] Name: Chameleon ... | Price: US$19.99 | Rate:  | Rev: 4 | URL: https://www.banggood...
[7] Name: Laser Engr... | Price: US$9.99 | Rate:  | Rev: 0 | URL: https://www.banggood...
[8] Name: PGST PG-10... | Price: US$42.99 | Rate:  | Rev: 7 | URL: https://www.banggood...
[9] Name: 0.5-470MHz... | Price: US$52.99 | Rate:  | Rev: 3 | URL: https://www.banggood...
[10] Name: FNIRSI-FNB... | Price: US$35.99 | Rate:  | Rev: 222 | URL: https://www.banggood...
[11] Name: RANA Z Axi... | Price: US$16.99 | Rate:

In [None]:
df = pd.DataFrame(scraped_data)

try:
    df['Price'] = df['Price'].str.replace('US', '').str.replace('$', '').str.strip()
except:
    pass

csv_name = "electronics_data.csv"
df.to_csv(csv_name, index=False)

print(f"Data Saved Successfully to {csv_name}")
print(df.head())

Data Saved Successfully to electronics_data.csv
                                                 URL           Category  \
0  https://www.banggood.com/EU-DirectFLSUN-T1-Max...  Sports & Outdoors   
1  https://www.banggood.com/EU-or-US-DirectLONGER...  Sports & Outdoors   
2  https://www.banggood.com/EU-or-US-Direct-ATOMS...  Sports & Outdoors   
3  https://www.banggood.com/4MP-HD-WiFi-Solar-Pow...  Sports & Outdoors   
4  https://www.banggood.com/ABBREE-AR-771S-SMA-Fe...  Sports & Outdoors   

                                                Name   Price Rating Reviews  
0  [EU Direct]FLSUN T1 Max 3D Printer 1000mm/s Pr...  699.00              0  
1  [EU/US Direct]LONGER RAY5 40W Laser Engraver w...    0.00              0  
2  [EU/US Direct] ATOMSTACK Swift 7W Portable Las...    0.00              0  
3  4MP HD WiFi Solar Powered Camera PTZ Night Vis...   44.99              0  
4  ABBREE AR-771S SMA Female Universal Soft Anten...    6.99              5  


## **Category 4 Computers, Video & Games**

In [None]:
category_url = "https://www.banggood.com/Wholesale-Computers-and-Office-ca-5001.html?bid=210707&from=nav"
product_links = []

print(f"Going to category page: {category_url}")

# --- FIX: Timeout Handling ---
try:
    driver.get(category_url)
except TimeoutException:
    print("Page load took too long! Continuing anyway (Eager mode is working)...")
    driver.execute_script("window.stop();") 

time.sleep(5) 
# === SCROLLING LOGIC ===
print("Scrolling to load items...")
last_height = driver.execute_script("return document.body.scrollHeight")
for _ in range(3):
    try:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    except Exception as e:
        print(f"Scroll error (ignoring): {e}")
        break


try:
    print("Extracting links...")
    
    all_links = driver.find_elements(By.TAG_NAME, "a")
    
    for a in all_links:
        try:
            href = a.get_attribute("href")
            if href and "-p-" in href and ".html" in href:
                if href not in product_links:
                    product_links.append(href)
        except:
            continue 

    print(f"Method 1 Found: {len(product_links)}")

    if len(product_links) == 0:
        print("Method 1 yielded 0 links. Trying CSS selectors...")
        potential_selectors = [".product-list a.title", ".p-wrap a", "ul.goodlist a", ".list-view a.title"]
        
        for selector in potential_selectors:
            elements = driver.find_elements(By.CSS_SELECTOR, selector)
            if elements:
                for elem in elements:
                    try:
                        href = elem.get_attribute("href")
                        if href and "-p-" in href and href not in product_links:
                            product_links.append(href)
                    except:
                        continue
                if len(product_links) > 0:
                    break

    print(f"Total Unique Links Found: {len(product_links)}")

except Exception as e:
    print(f"Error during extraction: {e}")

print(f"First 5 Links: {product_links[:5]}")

Going to category page: https://www.banggood.com/Wholesale-Computers-and-Office-ca-5001.html?bid=210707&from=nav
Scrolling to load items...
Extracting links...
Method 1 Found: 62
Total Unique Links Found: 62
First 5 Links: ['https://www.banggood.com/Ninkear-S14-Laptop-14-Inch-2_2K-Screen-AMD-Ryzen-5-7535HS-16GB-DDR5-1TB-SSD-1kg-Weight-60Wh-Battery-WIFI-6-Fingerprint-Unlocking-Backlit-Keyboard-Narrow-Bezel-Notebook-p-2039524.html?bid=87410', 'https://www.banggood.com/Wanbo-Vali-1-Smart-Gimbal-Projector-200-Rotating-1080P-FHD-900-ANSI-Lumens-Android-TV-11-Auto-Focus-Keystone-Correction-Home-Cinema-Entertainment-Streaming-Player-p-2036721.html?bid=87080', 'https://www.banggood.com/Blitzwolf-V10-3-inch-LCD-Projector-120-Inch-Screen-Wireless-Cast-Screen-5G-WIFI-Dual-Bluetooth5_1-Electric-Focus-UPandDown-Keystone-Correction-Home-Theater-Outdoor-Movie-Lights-Outdoor-p-2023431.html?rmmds=ComputersFlashDeals&cur_warehouse=CN&ID=477576325077', 'https://www.banggood.com/BlitzWolf-BW-CML2-Air-Moni

In [8]:
scraped_data = []
links_to_scrape = product_links

print(f"Starting extraction for {len(links_to_scrape)} products...")

for i, url in enumerate(links_to_scrape):
    # --- 1. Page Load with Retry ---
    for attempt in range(3):
        try:
            driver.get(url)
            if "site can’t be reached" in driver.title:
                raise Exception("Page load failed")
            break 
        except Exception:
            time.sleep(2)
            if attempt == 2:
                print(f"[{i+1}] Failed to load URL.")

    try:
     
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight / 2);")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

        product_info = {"URL": url, "Category": "Sports & Outdoors"}

        # --- 1. Product Name ---
        try:
            title = driver.find_element(By.CSS_SELECTOR, ".product-title-text").text
            product_info['Name'] = title
        except:
            try:
                title = driver.find_element(By.TAG_NAME, "h1").text
            except:
                product_info['Name'] = "N/A"

        # --- 2. Price ---
        try:
            try:
                price = driver.find_element(By.CSS_SELECTOR, ".main-price").text
            except:
                price = driver.find_element(By.CSS_SELECTOR, ".current-price").text
            
            product_info['Price'] = price
        except:
            product_info['Price'] = "0.00"

        # --- 3. Rating (XPath Method) ---
        try:
            # Method 1: Class based
            rating = driver.find_element(By.CSS_SELECTOR, ".review-score").text
        except:
            try:
                rating = driver.find_element(By.XPATH, "//span[contains(@class, 'score')]").text
            except:
                try:
                    rating = driver.find_element(By.CSS_SELECTOR, ".star-num").text
                except:
                    rating = "0.0"
        
        product_info['Rating'] = rating

        # --- 4. Reviews (XPath Text Method) ---
        try:
            review_text = driver.find_element(By.XPATH, "//*[contains(text(), 'Reviews') and contains(@class, 'num')]").text
            import re
            reviews = re.findall(r'\d+', review_text)
            product_info['Reviews'] = reviews[0] if reviews else "0"
        except:
            try:
                review_text = driver.find_element(By.CSS_SELECTOR, ".review-num").text
                import re
                reviews = re.findall(r'\d+', review_text)
                product_info['Reviews'] = reviews[0] if reviews else "0"
            except:
                product_info['Reviews'] = "0"

        print(f"[{i+1}] Name: {product_info['Name'][:10]}... | Price: {product_info['Price']} | Rate: {product_info['Rating']} | Rev: {product_info['Reviews']} | URL: {product_info['URL'][:20]}...")
        scraped_data.append(product_info)

    except Exception as e:
        print(f"[{i+1}] Skipped: {e}")
        continue

print("Extraction Complete!")

Starting extraction for 62 products...
[1] Name: Ninkear S1... | Price: £479.97 | Rate:  | Rev: 13 | URL: https://www.banggood...
[2] Name: [Netfilx C... | Price: £193.53 | Rate:  | Rev: 9 | URL: https://www.banggood...
[3] Name: Blitzwolf ... | Price: £54.18 | Rate:  | Rev: 28 | URL: https://www.banggood...
[4] Name: BlitzWolf®... | Price: £9.28 | Rate:  | Rev: 442 | URL: https://www.banggood...
[5] Name: AULA F75 8... | Price: £46.44 | Rate:  | Rev: 2 | URL: https://www.banggood...
[6] Name: Bakeey PB-... | Price: £8.51 | Rate:  | Rev: 873 | URL: https://www.banggood...
[7] Name: GY156SP 15... | Price: £166.44 | Rate:  | Rev: 0 | URL: https://www.banggood...
[8] Name: Lenovo Thi... | Price: £6.96 | Rate:  | Rev: 2 | URL: https://www.banggood...
[9] Name: [Netflix C... | Price: £89.03 | Rate:  | Rev: 6 | URL: https://www.banggood...
[10] Name: BlitzWolf®... | Price: £27.86 | Rate:  | Rev: 26 | URL: https://www.banggood...
[11] Name: AULA F75 8... | Price: £47.22 | Rate:  | Rev: 170 | 

In [10]:
df = pd.DataFrame(scraped_data)

try:
    df['Price'] = df['Price'].str.replace('US', '').str.replace('$', '').str.strip()
except:
    pass

csv_name = "banggood_computer_videos_games_data.csv"
df.to_csv(csv_name, index=False)

print(f"Data Saved Successfully to {csv_name}")
print(df.head())

Data Saved Successfully to banggood_computer_videos_games_data.csv
                                                 URL           Category  \
0  https://www.banggood.com/Ninkear-S14-Laptop-14...  Sports & Outdoors   
1  https://www.banggood.com/Wanbo-Vali-1-Smart-Gi...  Sports & Outdoors   
2  https://www.banggood.com/Blitzwolf-V10-3-inch-...  Sports & Outdoors   
3  https://www.banggood.com/BlitzWolf-BW-CML2-Air...  Sports & Outdoors   
4  https://www.banggood.com/AULA-F75-80-Keys-Tri-...  Sports & Outdoors   

                                                Name    Price Rating Reviews  
0  Ninkear S14 Laptop 14 Inch 2.2K Screen AMD Ryz...  £479.97             13  
1  [Netfilx Certified]Wanbo Vali 1 Smart Gimbal P...  £193.53              9  
2  Blitzwolf V10 3" LCD Projector 120-Inch Screen...   £54.18             28  
3  BlitzWolf® BW-CML2 Air Monitor Light Bar Touch...    £9.28            442  
4  AULA F75 80 Keys Tri-mode Customized Mechanica...   £46.44              2  


## **Category 5 Wholesale Men Women Clothing**

In [11]:
category_url = "https://www.banggood.com/Wholesale-Men-and-Womens-Clothing-ca-18941.html?bid=210711&from=nav"
product_links = []

print(f"Going to category page: {category_url}")

try:
    driver.get(category_url)
except TimeoutException:
    print("Page load took too long! Continuing anyway (Eager mode is working)...")
    driver.execute_script("window.stop();") 

time.sleep(5)

# === SCROLLING LOGIC ===
print("Scrolling to load items...")
last_height = driver.execute_script("return document.body.scrollHeight")
for _ in range(3):
    try:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    except Exception as e:
        print(f"Scroll error (ignoring): {e}")
        break

# === EXTRACT LINKS (Robust Method) ===
try:
    print("Extracting links...")
    
    all_links = driver.find_elements(By.TAG_NAME, "a")
    
    for a in all_links:
        try:
            href = a.get_attribute("href")
            if href and "-p-" in href and ".html" in href:
                if href not in product_links:
                    product_links.append(href)
        except:
            continue 

    print(f"Method 1 Found: {len(product_links)}")

    if len(product_links) == 0:
        print("Method 1 yielded 0 links. Trying CSS selectors...")
        potential_selectors = [".product-list a.title", ".p-wrap a", "ul.goodlist a", ".list-view a.title"]
        
        for selector in potential_selectors:
            elements = driver.find_elements(By.CSS_SELECTOR, selector)
            if elements:
                for elem in elements:
                    try:
                        href = elem.get_attribute("href")
                        if href and "-p-" in href and href not in product_links:
                            product_links.append(href)
                    except:
                        continue
                if len(product_links) > 0:
                    break

    print(f"Total Unique Links Found: {len(product_links)}")

except Exception as e:
    print(f"Error during extraction: {e}")

print(f"First 5 Links: {product_links[:5]}")

Going to category page: https://www.banggood.com/Wholesale-Men-and-Womens-Clothing-ca-18941.html?bid=210711&from=nav
Scrolling to load items...
Extracting links...
Method 1 Found: 43
Total Unique Links Found: 43
First 5 Links: ['https://www.banggood.com/4pcs-Mens-Ice-Silk-Mesh-Boxer-Black-Briefs,Summer-Thin-Style-Breathable-Comfy-Boxer-Trunks,Elastic-Sports-Shorts,Mens-Casual-Underwear-Daily-Bottom-Wear-p-2018692.html?cur_warehouse=CN&rmmds=MenFlashdeal', 'https://www.banggood.com/4pcs-Mens-Ice-Silk-Cool-Boxer-Briefs,Mesh-Breathable-Soft-Comfy-Stretchy-Boxer-Trunks,Sports-Trunks,Multicolor-Mens-Underwear-p-2018491.html?cur_warehouse=CN&rmmds=MenFlashdeal', 'https://www.banggood.com/Mens-Quick-Dry-Cotton-Linen-Shorts-Summer-Breathable-Stripe-Gym-Running-Pocket-Cargo-Beach-Casual-Five-point-Pants-Lightweight-Comfortable-p-2018689.html?cur_warehouse=CN&rmmds=MenFlashdeal', 'https://www.banggood.com/4pcs-Mens-Ice-Silk-Mesh-Boxer-Briefs,Summer-Thin-Style-Breathable-Comfy-Boxer-Trunks,Elasti

In [12]:
scraped_data = []
links_to_scrape = product_links

print(f"Starting extraction for {len(links_to_scrape)} products...")

for i, url in enumerate(links_to_scrape):
    # --- 1. Page Load with Retry ---
    for attempt in range(3):
        try:
            driver.get(url)
            if "site can’t be reached" in driver.title:
                raise Exception("Page load failed")
            break 
        except Exception:
            time.sleep(2)
            if attempt == 2:
                print(f"[{i+1}] Failed to load URL.")

    try:
        # --- FIX: Deep Scroll to trigger Reviews ---
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight / 2);")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

        product_info = {"URL": url, "Category": "Sports & Outdoors"}

        # --- 1. Product Name ---
        try:
            title = driver.find_element(By.CSS_SELECTOR, ".product-title-text").text
            product_info['Name'] = title
        except:
            try:
                title = driver.find_element(By.TAG_NAME, "h1").text
            except:
                product_info['Name'] = "N/A"

        # --- 2. Price ---
        try:
            try:
                price = driver.find_element(By.CSS_SELECTOR, ".main-price").text
            except:
                price = driver.find_element(By.CSS_SELECTOR, ".current-price").text
            
            product_info['Price'] = price
        except:
            product_info['Price'] = "0.00"

        # --- 3. Rating (XPath Method) ---
        try:
            # Method 1: Class based
            rating = driver.find_element(By.CSS_SELECTOR, ".review-score").text
        except:
            try:
                rating = driver.find_element(By.XPATH, "//span[contains(@class, 'score')]").text
            except:
                try:
                    rating = driver.find_element(By.CSS_SELECTOR, ".star-num").text
                except:
                    rating = "0.0"
        
        product_info['Rating'] = rating

        # --- 4. Reviews (XPath Text Method) ---
        try:
            review_text = driver.find_element(By.XPATH, "//*[contains(text(), 'Reviews') and contains(@class, 'num')]").text
            import re
            reviews = re.findall(r'\d+', review_text)
            product_info['Reviews'] = reviews[0] if reviews else "0"
        except:
            try:
                review_text = driver.find_element(By.CSS_SELECTOR, ".review-num").text
                import re
                reviews = re.findall(r'\d+', review_text)
                product_info['Reviews'] = reviews[0] if reviews else "0"
            except:
                product_info['Reviews'] = "0"

        print(f"[{i+1}] Name: {product_info['Name'][:10]}... | Price: {product_info['Price']} | Rate: {product_info['Rating']} | Rev: {product_info['Reviews']} | URL: {product_info['URL'][:20]}...")
        scraped_data.append(product_info)

    except Exception as e:
        print(f"[{i+1}] Skipped: {e}")
        continue

print("Extraction Complete!")

Starting extraction for 43 products...
[1] Name: 4pcs Men's... | Price: £5.80 | Rate:  | Rev: 71 | URL: https://www.banggood...
[2] Name: 4pcs Men's... | Price: £10.06 | Rate:  | Rev: 15 | URL: https://www.banggood...
[3] Name: Men's Quic... | Price: £8.51 | Rate:  | Rev: 4 | URL: https://www.banggood...
[4] Name: 4pcs Men's... | Price: £7.97 | Rate:  | Rev: 13 | URL: https://www.banggood...
[5] Name: 6pcs Men's... | Price: £10.06 | Rate:  | Rev: 12 | URL: https://www.banggood...
[6] Name: 4Pcs Men's... | Price: £9.51 | Rate:  | Rev: 12 | URL: https://www.banggood...
[7] Name: 4 Pcs Men'... | Price: £11.60 | Rate:  | Rev: 8 | URL: https://www.banggood...
[8] Name: INCERUN Me... | Price: £16.25 | Rate:  | Rev: 2 | URL: https://www.banggood...
[9] Name: ChArmkpR M... | Price: £7.73 | Rate:  | Rev: 5 | URL: https://www.banggood...
[10] Name: Mens Ethni... | Price: £10.06 | Rate:  | Rev: 3 | URL: https://www.banggood...
[11] Name: INCERUN Me... | Price: £11.60 | Rate:  | Rev: 0 | URL: http

In [13]:
df = pd.DataFrame(scraped_data)

try:
    df['Price'] = df['Price'].str.replace('US', '').str.replace('$', '').str.strip()
except:
    pass

csv_name = "banggood_men_women_clothing_data.csv"
df.to_csv(csv_name, index=False)

print(f"Data Saved Successfully to {csv_name}")
print(df.head())

Data Saved Successfully to banggood_men_women_clothing_data.csv
                                                 URL           Category  \
0  https://www.banggood.com/4pcs-Mens-Ice-Silk-Me...  Sports & Outdoors   
1  https://www.banggood.com/4pcs-Mens-Ice-Silk-Co...  Sports & Outdoors   
2  https://www.banggood.com/Mens-Quick-Dry-Cotton...  Sports & Outdoors   
3  https://www.banggood.com/4pcs-Mens-Ice-Silk-Me...  Sports & Outdoors   
4  https://www.banggood.com/6pcs-Mens-Ice-Silk-Co...  Sports & Outdoors   

                                                Name   Price Rating Reviews  
0  4pcs Men's Ice Silk Mesh Boxer Black Briefs, S...   £5.80             71  
1  4pcs Men's Ice Silk Cool Boxer Briefs, Mesh Br...  £10.06             15  
2  Men's Quick Dry Cotton Linen Shorts Summer Bre...   £8.51              4  
3  4pcs Men's Ice Silk Mesh Boxer Briefs, Summer ...   £7.97             13  
4  6pcs Men's Ice Silk Cool Soft Seamless Comfy B...  £10.06             12  
