In [2]:
import json
import re
import pandas as pd
from pathlib import Path

# -------------------------------
# Helpers
# -------------------------------

def strip_html(s):
    if not s:
        return ""
    return re.sub(r"<[^>]*>", " ", s).strip()

def parse_price(s):
    if not s:
        return None
    s = re.sub(r"[^\d,.\-]", "", s)
    try:
        return float(s.replace(",", ""))
    except ValueError:
        return None

def first_image_url(p: dict):
    if p.get("ProductImage"):
        return p["ProductImage"]
    images = p.get("ImageList") or []
    if images and isinstance(images, list):
        urls = (images[0] or {}).get("ImageURL") or []
        if urls and isinstance(urls, list):
            return urls[0]
    return None

def cargar_json(path: Path):
    try:
        with path.open("r", encoding="utf-8") as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"⚠️ Archivo no encontrado: {path}")
        return None
    except Exception as e:
        print(f"❌ Error leyendo {path}: {e}")
        return None

def records_desde_payload_asus(payload):
    if payload is None:
        return []
    if isinstance(payload, dict):
        return [payload]
    if isinstance(payload, list):
        return [p for p in payload if isinstance(p, dict)]
    return []

def inferir_categoria_desde_filename(path: Path) -> str:
    """
    'ASUS-notebooks.json' -> 'notebooks'
    'ASUS-tarjetas-graficas.json' -> 'tarjetas graficas'
    """
    stem = path.stem  # e.g., 'ASUS-notebooks'
    base = stem.replace("ASUS-", "", 1)
    return base.replace("-", " ").strip()

def transform_asus(p: dict, tienda: str, categoria_principal: str | None = None):
    nombre = strip_html(p.get("Name")) or p.get("SalesModelName") or p.get("CategoryName")

    precio = (
        parse_price(p.get("Price")) or
        parse_price(p.get("RegularPrice")) or
        parse_price(p.get("SortPrice"))
    )

    # ASUS payload no trae moneda/símbolo en estos endpoints
    moneda = None
    simbolo = None

    try:
        estrellas = float(p.get("ReviewStar")) if p.get("ReviewStar") is not None else None
    except (TypeError, ValueError):
        estrellas = None
    reviews = p.get("ReviewCount")

    descripcion = strip_html(p.get("ModelSpec")) or strip_html(p.get("SubTagline")) or ""

    especificaciones = {}  # si luego encontrás un bloque de specs, lo mapeamos aquí

    lvl1, lvl2, lvl3 = p.get("Level1Path"), p.get("Level2Path"), p.get("Level3Path")
    categoria_legible = p.get("CategoryName") or lvl3 or lvl2 or lvl1
    categoria_path = [x for x in [lvl1, lvl2, lvl3] if x]

    imagen = first_image_url(p)
    url = p.get("ProductURL") or p.get("ProductCardURL")
    sku = p.get("PartNo") or p.get("ProductID")
    pid = p.get("ProductID") or p.get("RealProductID")

    return {
        "nombre": nombre,
        "tienda": tienda,
        "categoria_principal": categoria_principal or lvl1 or "ASUS",
        "precio": precio,
        "moneda": moneda,
        "simbolo": simbolo,
        "estrellas": estrellas,
        "reviews": reviews,
        "descripcion": descripcion,
        "especificaciones": especificaciones,
        "categoria": categoria_legible,
        "categoria_path": categoria_path,
        "url": url,
        "imagen": imagen,
        "sku": sku,
        "id": pid,
    }

# -------------------------------
# Proceso para tu carpeta 'datos/asus'
# -------------------------------

def procesar_asus_folder(
    carpeta="datos/asus",
    patron="ASUS-*.json",
    tienda="ASUS",
    guardar_csv="asus_unificado.csv",
    guardar_parquet=None
):
    base = Path(carpeta)
    archivos = sorted(base.glob(patron))
    if not archivos:
        raise FileNotFoundError(f"No se encontraron archivos {patron} en {carpeta}")

    filas = []
    for path in archivos:
        payload = cargar_json(path)
        productos = records_desde_payload_asus(payload)
        if not productos:
            print(f"ℹ️ Sin productos en {path.name}")
            continue

        cat_principal = inferir_categoria_desde_filename(path)  # <- usa el nombre del archivo
        for p in productos:
            filas.append(transform_asus(p, tienda=tienda, categoria_principal=cat_principal))

    df = pd.DataFrame(filas)

    if guardar_csv:
        Path(guardar_csv).parent.mkdir(parents=True, exist_ok=True)
        df.to_csv(guardar_csv, index=False, encoding="utf-8")
        print(f"💾 CSV guardado: {guardar_csv} ({len(df)} filas)")

    if guardar_parquet:
        Path(guardar_parquet).parent.mkdir(parents=True, exist_ok=True)
        df.to_parquet(guardar_parquet, index=False)
        print(f"💾 Parquet guardado: {guardar_parquet} ({len(df)} filas)")

    return df

In [3]:
df_asus = procesar_asus_folder(
    carpeta="datos/asus",
    patron="ASUS-*.json",
    tienda="ASUS",
    guardar_csv="datos/asus/productos.csv",
    guardar_parquet=None
)

💾 CSV guardado: datos/asus/productos.csv (479 filas)


In [4]:
df_asus

Unnamed: 0,nombre,tienda,categoria_principal,precio,moneda,simbolo,estrellas,reviews,descripcion,especificaciones,categoria,categoria_path,url,imagen,sku,id
0,ROG Delta S Animate,ASUS,auriculares audio,,,,0.0,0,Auriculares gaming ligeros USB-C ® con pantal...,{},ROG Delta S Animate,"[Headsets & Audio, Headsets, USB Headsets]",https://rog.asus.com/ar/headsets-audio/headset...,https://dlcdnwebimgs.asus.com/gain/459EC147-E7...,90YH037M-B2UA00,R_90YH037M-B2UA00
1,ROG Delta White Edition,ASUS,auriculares audio,,,,0.0,0,Auriculares RGB para juegos con Hi-Res ESS Qua...,{},ROG Delta White Edition,"[Headsets & Audio, Headsets, USB Headsets]",https://rog.asus.com/ar/headsets-audio/headset...,https://dlcdnwebimgs.asus.com/gain/5A3E1722-D3...,90YH02HW-B2UA00,R_90YH02HW-B2UA00
2,ROG Delta S Wireless,ASUS,auriculares audio,,,,0.0,0,Auriculares inalámbricos ligeros para juegos c...,{},ROG Delta S Wireless,"[Headsets & Audio, Headsets, Wireless Headsets]",https://rog.asus.com/ar/headsets-audio/headset...,https://dlcdnwebimgs.asus.com/gain/F223AA9E-DC...,90YH03IW-B3UA00,R_90YH03IW-B3UA00
3,ROG Cetra II Core Moonlight White,ASUS,auriculares audio,,,,0.0,0,Auriculares in-ear para gaming ROG Cetra II Co...,{},ROG Cetra II Core Moonlight White,"[Headsets & Audio, In-ear headphone]",https://rog.asus.com/ar/headsets-audio/in-ear-...,https://dlcdnwebimgs.asus.com/gain/A822B517-F1...,90YH0360-B2UA00,R_90YH0360-B2UA00
4,ROG Delta S Core,ASUS,auriculares audio,,,,0.0,0,"Auriculares ligeros para juegos de 3,5 mm con ...",{},ROG Delta S Core,"[Headsets & Audio, Headsets, 3.5mm Headsets]",https://rog.asus.com/ar/headsets-audio/headset...,https://dlcdnwebimgs.asus.com/gain/6AA18038-50...,90YH03JC-B1UA00,R_90YH03JC-B1UA00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,ROG Strix Flare II Animate Teclado Gamer,ASUS,teclados,,,,0.0,0,Teclado mecánico para juegos ROG Strix Flare I...,{},ROG Strix Flare II Animate,"[Keyboards, Keyboards, Aura RGB]",https://rog.asus.com/ar/keyboards/keyboards/au...,https://dlcdnwebimgs.asus.com/gain/7001B47D-BD...,90MP02E6-BKUA01,R_90MP02E6-BKUA01
475,Teclado Gamer ROG Strix Scope II,ASUS,teclados,,,,0.0,0,Teclado gamer ROG Strix Scope II con switches ...,{},ROG Strix Scope II,"[Keyboards, Keyboards, Aura RGB]",https://rog.asus.com/ar/keyboards/keyboards/au...,https://dlcdnwebimgs.asus.com/gain/B742E991-0F...,90MP036A-BKAA01,R_90MP036A-BKAA01
476,TUF Gaming K3 Gen II,ASUS,teclados,,,,4.5,100,El teclado ASUS TUF Gaming K3 Gen II incorpora...,{},Alámbricos,"[Accessories, Keyboards, TUF-Gaming]",https://www.asus.com/ar/accessories/keyboards/...,https://dlcdnwebimgs.asus.com/gain/93739532-e9...,26434,26434
477,TUF Gaming K3,ASUS,teclados,,,,4.5,100,Teclado mecánico ASUS TUF Gaming K3 RGB con te...,{},Alámbricos,"[Accessories, Keyboards, TUF-Gaming]",https://www.asus.com/ar/accessories/keyboards/...,https://dlcdnwebimgs.asus.com/gain/17b38f2d-d2...,14852,14852


In [None]:
df_lenovo = pd.read_csv('datos/lenovo/productos.csv')

Unnamed: 0,nombre,tienda,precio,moneda,simbolo,estrellas,reviews,descripcion,especificaciones,categoria,categoria_path,sku,url,imagen
0,"IdeaCentre 3 AIO Gen 6 (23.8"" AMD)",Lenovo,1240000.01,ARS,$,4.6,77.0,"<ul style=""list-style-type: disc;""><li><p>Eleg...",{'Procesador': 'Procesador AMD Ryzen™ 7 7730U ...,Computadoras de Escritorio,"['ROOTPMI', 'desktops', 'ideacentre', 'aio-300...",F0G1010SAR_f31ab1f1b6026-4f65-9ed9-0142293b720b,/p/computadoras-de-escritorio/ideacentre/serie...,https://p1-ofp.static.pub//medias/23389938226_...
1,"IdeaCentre i AIO Gen 9 (23.8"" Intel)",Lenovo,1360000.0,ARS,$,4.5,123.0,"<ul style=""list-style-type: disc;""><li><p>Máxi...",{'Procesador': 'Procesador Intel® Core™ i5-134...,Computadoras de Escritorio,"['ROOTPMI', 'desktops', 'ideacentre', 'aio-500...",F0HN00NHAR_f31ab1f1b6026-4f65-9ed9-0142293b720b,/p/computadoras-de-escritorio/ideacentre/serie...,https://p1-ofp.static.pub/medias/26091470103_I...
2,"IdeaCentre AIO Gen 9 (23.8"" AMD)",Lenovo,1429000.01,ARS,$,4.6,40.0,"<ul style=""list-style-type: disc;""><li><p>Rend...",{'Procesador': 'Procesador AMD Ryzen™ 5 7535HS...,Computadoras de Escritorio,"['ROOTPMI', 'desktops', 'ideacentre', 'aio-500...",F0HR008QAR_f31ab1f1b6026-4f65-9ed9-0142293b720b,/p/computadoras-de-escritorio/ideacentre/serie...,https://p1-ofp.static.pub/medias/26091470103_I...
3,"IdeaCentre i AIO Gen 9 (27"" Intel)",Lenovo,1450000.0,ARS,$,4.6,269.0,"<ul style=""list-style-type: disc;""><li><p>Rend...",{'Procesador': 'Procesador Intel® Core™ i5-134...,Computadoras de Escritorio,"['ROOTPMI', 'desktops', 'ideacentre', 'aio-500...",F0HM00S6AR_f31ab1f1b6026-4f65-9ed9-0142293b720b,/p/computadoras-de-escritorio/ideacentre/serie...,https://p1-ofp.static.pub/medias/26091470103_I...
4,"ThinkCentre Neo 50a AIO Gen 5 (23.8"" Intel)",Lenovo,1589999.01,ARS,$,4.8,61.0,"<ul style=""list-style-type: disc;""><li><p><spa...",{'Procesador': 'Procesador Intel® Core™ i5-134...,Computadoras de Escritorio,"['ROOTPMI', 'desktops', 'thinkcentre', 'thinkc...",12SD002DAS_f31ab1f1b6026-4f65-9ed9-0142293b720b,/p/computadoras-de-escritorio/thinkcentre/thin...,https://p1-ofp.static.pub//medias/26146349384_...


In [10]:
df = pd.concat([df_asus, df_lenovo]).sort_values(by=['tienda', 'nombre']).reset_index(drop=True)

  df = pd.concat([df_asus, df_lenovo]).sort_values(by=['tienda', 'nombre']).reset_index(drop=True)


In [12]:
df = df.drop('categoria_principal', axis=1)

In [14]:
df.to_csv('datos/productos-asus-lenovo.csv', index=False)