In [None]:
%pip install --upgrade setuptools
%pip install -r requirements.txt

In [None]:
from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import json
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from seleniumwire.utils import decode

In [None]:
#Variables
website = 'https://www.superzoo.cl/perro'
fileName = 'dogsProducts'
folder = 'superzoo'

In [None]:
chrome_options = Options()
chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})
chrome_options.add_argument('--auto-open-devtools-for-tabs')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable_encoding')

driver = webdriver.Chrome(options=chrome_options)

driver.execute_cdp_cmd("Network.enable", {})

driver.get(website)

In [None]:
#Products
products = []

#Categories
categories = []

In [None]:
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
import time
import random

def scan_products():
    products_by_category = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.product-tile')))
    for product in products_by_category:
        products.append({
            'image_url': product.find_element(By.CSS_SELECTOR, '.image-container').find_element(By.CSS_SELECTOR, 'img').get_attribute('src'),
            'product_url': product.find_element(By.CSS_SELECTOR, '.pdp-link').find_element(By.CSS_SELECTOR, '.link').get_attribute('href'),
            'brand': product.find_element(By.CSS_SELECTOR, '.pdp-brand').find_element(By.CSS_SELECTOR, '.product-brand').text,
            'description': product.find_element(By.CSS_SELECTOR, '.pdp-link').find_element(By.CSS_SELECTOR, '.text-base').text,
        })

def go_to_next_page():
    try:
        next_button = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.show-more .text-center .btn')))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        driver.execute_script("arguments[0].click();", next_button)
        wait.until(EC.staleness_of(next_button))
        time.sleep(random.uniform(3, 10))
        return True
    except NoSuchElementException:
        print("Botón 'next' no encontrado, final de la paginación.")
        return False
    except StaleElementReferenceException:
        print("El botón 'next' ya no es válido en el DOM.")
        time.sleep(2)
        return go_to_next_page()

def go_to_category(category):
    print(f"Category {category['title']} ...")
    wait = WebDriverWait(driver, 30)
    driver.get(category['url'])
    flag = True
    while(flag):
        try:
            flag = go_to_next_page()
            if not flag:
                scan_products()
        except Exception as e:
            print(f"Error inesperado: {e}")
            scan_products()
            flag = False

wait = WebDriverWait(driver, 30)  
category_items = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.category-item')))

for item in category_items:
    categories.append({
        'url': item.find_element(By.CSS_SELECTOR, 'a').get_attribute('href'),
        'title': item.find_element(By.CSS_SELECTOR, '.category-tile').find_element(By.CSS_SELECTOR, 'p').text
    })
    
for category in categories:
    go_to_category(category)
    time.sleep(random.uniform(5, 10))
    
                

In [None]:
print(products)

In [None]:
for product in products:
    driver.get(product['product_url'])
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.carousel-item')))
    
    for request in driver.requests:
        if request.response and ("superzoo" in request.url):
            response = request.response
            if(response.headers['content-type'] == 'application/json'):
                body = json.loads(decode(response.body, response.headers.get('Content-Encoding', 'identity')))
                action = body.get('action')
                if action and action == "Product-Variation":
                    print (request.url)
                    itemProduct = body.get('product')
                    product['ean'] = itemProduct.get('ean')
                    product['name'] = itemProduct.get('productName')
                    product['brand'] = itemProduct.get('brand')
                    product['product_weight'] = itemProduct.get('productWeight')
                    product['item_category'] = itemProduct.get('itemCategory')
                    price_info = itemProduct.get('price', {})
                    sales_price = price_info.get('sales', {})
                    if sales_price:
                        product['price'] = sales_price.get('value')
                        product['currency'] = sales_price.get('currency')

In [None]:
print(products)

In [None]:
with open(f"{folder}/{fileName}.json", 'w', encoding='utf-8') as json_file:
    json.dump(products, json_file, ensure_ascii=False, indent=4)

print("Archivo JSON generado correctamente.")

In [None]:
driver.quit()