In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from datetime import datetime
from selenium.webdriver.chrome.service import Service
import re

# Initialize Chrome driver with Service
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode (no GUI)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

# List of URLs to scrape
urls = [
    "https://www.action.com/nl-nl/search/?q=choco+moment",
    "https://www.action.com/nl-nl/search/?q=snacks+of+the+world",    
    "https://www.action.com/nl-nl/search/?q=natural+happiness",
    "https://www.action.com/fr-fr/search/?q=choco+moment",
    "https://www.action.com/fr-fr/search/?q=snacks+of+the+world",
    "https://www.action.com/fr-fr/search/?q=natural+happiness",
    "https://www.action.com/de-de/search/?q=choco+moment",
    "https://www.action.com/de-de/search/?q=snacks+of+the+world",
    "https://www.action.com/de-de/search/?q=natural+happiness",
    "https://www.action.com/de-de/search/?q=natural+happiness",
    "https://www.action.com/pl-pl/search/?q=choco+moment",
    "https://www.action.com/pl-pl/search/?q=snacks+of+the+world",
    "https://www.action.com/pl-pl/search/?q=natural+happiness"
    
]

# List to store all product data
all_products = []

# Loop through all URLs
for url in urls:
    print(f"Scraping data from {url}")
    
    # Extract the country code from the URL
    country = url.split("https://www.action.com/")[1].split("/")[0][:2]

    driver.get(url)
    time.sleep(8)  # Increase sleep time to allow for page load

    # Accept cookies if the popup appears
    try:
        accept_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.ID, "CybotCookiebotDialogBodyLevelButtonLevelOptinDeclineAll"))
        )
        accept_button.click()
        time.sleep(3)  # Allow time for cookie banner to disappear
    except Exception:
        print("No cookies popup found.")
    
    time.sleep(5)
    
    # Parse the page source with BeautifulSoup
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # List to store extracted product information for this URL
    products = []

    # Loop through all product elements
    for product_card in soup.find_all('a', {'data-testid': 'product-card-link'}):
        # Extract product title
        title_tag = product_card.find('span', {'data-testid': 'product-card-title'})
        title = title_tag.get_text(strip=True) if title_tag else 'Title not found'

        # Extract product description
        description_tag = product_card.find('span', {'data-testid': 'product-card-description'})
        description = description_tag.get_text(strip=True) if description_tag else 'Description not found'

        # Extract price whole part
        price_whole_tag = product_card.find('span', {'data-testid': 'product-card-price-whole'})
        price_whole = price_whole_tag.get_text(strip=True) if price_whole_tag else '0'

        # Extract price fractional part
        price_fractional_tag = product_card.find('span', {'data-testid': 'product-card-price-fractional'})
        price_fractional = price_fractional_tag.get_text(strip=True) if price_fractional_tag else '00'

        # Combine whole and fractional prices
        price = f"{price_whole}.{price_fractional}"

        # Extract price per kilo (if available)
        priceperkilo_tag = product_card.find('span', {'data-testid': 'product-card-price-description'})
        priceperkilo = priceperkilo_tag.get_text(strip=True) if priceperkilo_tag else 'Weight not found'

        # Extract product code from image URL
        product_code = 'Code not found'
        href = product_card.get('href', '')
        
        match = re.search(r"/p/(\d+)/", href)
        if match:
            product_code = match.group(1)




        # Store product details
        products.append({
            'Product Code': product_code,
            'Product Title': title,
            'Description': description,
            'Price (€)': price,
            'Country': country,
            'Price Per Kilo': priceperkilo
        })

    # Add the current URL and timestamp to each product
    timestamp = datetime.now().strftime('%Y-%m-%d')
    for product in products:
        product['Timestamp'] = timestamp

    # Add the products for this URL to the overall list
    all_products.extend(products)

# Convert to DataFrame
df = pd.DataFrame(all_products)

# Save to Excel file
excel_filename = 'Action_data.xlsx'
df.to_excel(excel_filename, index=False, engine='openpyxl')
print(f"Data has been successfully saved to {excel_filename}")

# Close the driver
driver.quit()

Scraping data from https://www.action.com/nl-nl/search/?q=choco+moment
Scraping data from https://www.action.com/nl-nl/search/?q=snacks+of+the+world
No cookies popup found.
Scraping data from https://www.action.com/nl-nl/search/?q=natural+happiness
No cookies popup found.
Scraping data from https://www.action.com/fr-fr/search/?q=choco+moment
No cookies popup found.
Scraping data from https://www.action.com/fr-fr/search/?q=snacks+of+the+world
No cookies popup found.
Scraping data from https://www.action.com/fr-fr/search/?q=natural+happiness
No cookies popup found.
Scraping data from https://www.action.com/de-de/search/?q=choco+moment
No cookies popup found.
Scraping data from https://www.action.com/de-de/search/?q=snacks+of+the+world
No cookies popup found.
Scraping data from https://www.action.com/de-de/search/?q=natural+happiness
No cookies popup found.
Scraping data from https://www.action.com/de-de/search/?q=natural+happiness
No cookies popup found.
Scraping data from https://www.ac