# Colab: 11번가 첫화면 상품 리스트 수집 (Selenium)


In [None]:
# 필수 패키지
!pip -q install selenium webdriver-manager pandas bs4

# 크롬 설치 (Colab용)
!apt-get update -y
!apt-get install -y wget gnupg unzip
!wget -q https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
!dpkg -i google-chrome-stable_current_amd64.deb || apt-get -fy install


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m72.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m499.2/499.2 kB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
Hit:1 https://cli.github.com/packages stable InRelease
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRel

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options

In [None]:


import os, time, re, csv, itertools
import pandas as pd
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

HOME = "https://www.11st.co.kr/"

# ── 1) 브라우저 준비 ─────────────────────────────────────────────
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--lang=ko-KR")
chrome_options.add_argument("--window-size=1400,2200")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.binary_location = "/usr/bin/chromium-browser"

driver = webdriver.Chrome(options=chrome_options)

def human_scroll(max_steps=8, pause=1.0):
    """홈 화면을 사람처럼 천천히 스크롤해서 동적 카드가 로드되도록 함"""
    last_h = 0
    for step in range(max_steps):
        driver.execute_script("window.scrollBy(0, window.innerHeight*0.9);")
        time.sleep(pause)
        h = driver.execute_script("return document.body.scrollHeight")
        if h == last_h:
            break
        last_h = h

def abs_url(href: str) -> str:
    if not href:
        return ""
    if href.startswith("//"):
        return "https:" + href
    if href.startswith("/"):
        return urljoin(HOME, href)
    return href

def extract_text(el):
    """보이는 텍스트 정리"""
    if not el:
        return None
    txt = el.get_text(" ", strip=True)
    # 과도한 공백 제거
    txt = re.sub(r"\s{2,}", " ", txt)
    return txt or None

def pick_first(soup, selectors):
    """여러 선택자 중 먼저 매칭되는 요소 반환"""
    for sel in selectors:
        found = soup.select_one(sel)
        if found:
            return found
    return None

# ── 2) 접속 및 스크롤 ────────────────────────────────────────────
driver.get(HOME)

# 쿠키/팝업이 있다면 닫기 시도 (없으면 무시)
try:
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
    time.sleep(1.0)
except:
    pass

# 메인 섹션 로드될 때까지 대기(헤더/탭/배너 등으로 로딩 판단)
try:
    WebDriverWait(driver, 12).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "header, nav, main, #wrap"))
    )
except:
    pass

# 천천히 스크롤해서 각 섹션 로드
human_scroll(max_steps=10, pause=1.0)

# ── 3) 페이지 파싱 ───────────────────────────────────────────────
html = driver.page_source
driver.quit()

soup = BeautifulSoup(html, "html.parser")

# ── 4) 상품 카드 후보 선택자 (자주 바뀌는 클래스에 대비해 다중 패턴) ──
# * 11번가 상품 URL은 대체로 "/products/<숫자>" 형태임 → href 필터로 1차 거르기
candidate_anchor_selectors = [
    # 카드형에서 상품 앵커
    "a[href*='/products/']",
    # 혹시 모를 대체 패턴 (프로모/딜 링크 등)
    "a[href*='prd']",
]

# 제목 후보
title_selectors = [
    ".c_card_tit", ".prd_name", ".title", ".name", "[class*='title']",
    "img[alt]", "strong", "span"
]

# 가격 후보
price_selectors = [
    ".price", ".c_prd_price", "[class*='price']", ".num", ".value"
]

# 이미지 후보
image_selectors = [
    "img",
    "source"  # picture 태그 내부
]

# ── 5) 앵커 수집 + 중복 제거 ─────────────────────────────────────
anchors = []
for sel in candidate_anchor_selectors:
    anchors.extend(soup.select(sel))

# href 기준 중복 제거
uniq = {}
for a in anchors:
    href = a.get("href") or ""
    if not href:
        continue
    full = abs_url(href)
    # 제품 상세 링크만 남기기
    if "/products/" not in full:
        continue
    uniq[full] = a

product_rows = []
for link, a in uniq.items():
    card_root = a
    # 타이틀 찾기: 앵커 주변(부모/조상)도 살펴봄
    # 우선 앵커 내부에서 찾고, 없으면 상위 트리도 시도
    title_el = pick_first(a, title_selectors)
    title = extract_text(title_el)
    if not title:
        parent_chain = list(itertools.islice(a.parents, 0, 4))  # 부모 최대 4단계
        for p in parent_chain:
            title_el = pick_first(p, title_selectors)
            title = extract_text(title_el)
            if title:
                break

    # 이미지
    img_url = None
    img_el = pick_first(a, image_selectors)
    if not img_el:
        # 상위 트리에서 탐색
        parent_chain = list(itertools.islice(a.parents, 0, 3))
        for p in parent_chain:
            img_el = pick_first(p, image_selectors)
            if img_el:
                break
    if img_el:
        # lazy 속성 고려
        for attr in ("src", "data-src", "data-original", "data-lazy"):
            if img_el.has_attr(attr) and img_el.get(attr):
                img_url = abs_url(img_el.get(attr))
                break

    # 가격
    price = None
    price_el = pick_first(a, price_selectors)
    if not price_el:
        parent_chain = list(itertools.islice(a.parents, 0, 3))
        for p in parent_chain:
            price_el = pick_first(p, price_selectors)
            if price_el:
                break
    if price_el:
        # 숫자만 추출(원 단위 제거)
        txt = price_el.get_text(" ", strip=True)
        m = re.search(r"(\d[\d,\.]*)", txt)
        if m:
            price = m.group(1).replace(",", "")

    # 제목이 비어 있으면 이미지 alt로 보완
    if not title and img_el and img_el.get("alt"):
        title = img_el.get("alt").strip()

    product_rows.append({
        "title": title,
        "price": price,
        "link": link,
        "image": img_url
    })

# 빈 타이틀 제거 및 상위 노출 품질 개선
cleaned = []
seen = set()
for row in product_rows:
    if not row["title"]:
        continue
    key = (row["title"], row["link"])
    if key in seen:
        continue
    seen.add(key)
    cleaned.append(row)

df = pd.DataFrame(cleaned)
df.to_csv("11st_home_products.csv", index=False, encoding="utf-8-sig")
print("완료! 저장 파일: 11st_home_products.csv (행:", len(df), ")")
df.head(10)


완료! 저장 파일: 11st_home_products.csv (행: 203 )


Unnamed: 0,title,price,link,image
0,생선숙편 꼬치어묵 8개입*4팩+소스 30g*4팩,20.0,https://www.11st.co.kr/products/3638677525,https://c.011st.com/img/prd_size/noimg_300.gif
1,10%,,https://www.11st.co.kr/products/1883525698,
2,구달 여름 잡티케어 NEW 청귤세럼 외 클리오,55.0,https://www.11st.co.kr/products/1730891888,https://cdn.011st.com/11dims/resize/720x360/qu...
3,맘스럽 딥클린 액체세제 2.5LX4개 외 대용량 5종모음,42.0,https://www.11st.co.kr/products/8089212004,https://cdn.011st.com/11dims/resize/720x360/qu...
4,카카오7%+[33%]네파키즈 새학기 추천템 & 썸머 클리어런스 역시즌/책가방/여름시...,33.0,https://www.11st.co.kr/products/4621593388,https://c.011st.com/img/prd_size/noimg_300.gif
5,30%,,https://www.11st.co.kr/products/1646360167,
6,[내일도착] 반식엔 구수한 메밀냉면 물 비빔 10인분(면10+육수5+양념5),42.0,https://www.11st.co.kr/products/8462320975,https://c.011st.com/img/prd_size/noimg_300.gif
7,단1회 착유 통참깨 진심 참기름 350ml*2개,25.0,https://www.11st.co.kr/products/7777544898,https://c.011st.com/img/prd_size/noimg_300.gif
8,Sony WH-CH720N 노이즈캔슬링 블루투스 헤드폰 블랙,38.0,https://www.11st.co.kr/products/6315984662,https://c.011st.com/img/prd_size/noimg_394x198...
9,20%,,https://www.11st.co.kr/products/8192785126,


- 킥스타터

In [None]:
# =========================================================
# Colab: Kickstarter Discover 수집 (0건 방지 개선판)
# - 쿠키/개인정보 배너 자동 허용
# - 충분한 대기/스크롤
# - 다중 선택자로 안전 파싱
# =========================================================

import re, time, itertools, pandas as pd, sys, os
from urllib.parse import urljoin
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

BASE = "https://www.kickstarter.com"
DISCOVER = "https://www.kickstarter.com/discover/advanced?sort=magic&country=US&lang=en"  # en 고정

SCROLL_STEPS = 14
SCROLL_PAUSE = 1.2
WAIT_SEC = 30

def human_scroll(driver, steps=SCROLL_STEPS, pause=SCROLL_PAUSE):
    last_h = 0
    for _ in range(steps):
        driver.execute_script("window.scrollBy(0, Math.max(700, window.innerHeight*0.9));")
        time.sleep(pause)
        h = driver.execute_script("return document.body.scrollHeight")
        if h == last_h:
            break
        last_h = h

def pick_first(soup, selectors):
    for sel in selectors:
        el = soup.select_one(sel)
        if el:
            return el
    return None

def text(el):
    return el.get_text(" ", strip=True) if el else None

def abs_url(href: str):
    if not href: return None
    if href.startswith("//"): return "https:" + href
    if href.startswith("/"):  return urljoin(BASE, href)
    return href

# --- 브라우저 옵션 (일반 UA + headless) ---
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--lang=en-US")
chrome_options.add_argument("--window-size=1400,2400")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                            "(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36")
chrome_options.binary_location = "/usr/bin/chromium-browser"
driver = webdriver.Chrome(options=chrome_options)

def click_if_exists(by, sel, timeout=5):
    try:
        el = WebDriverWait(driver, timeout).until(EC.element_to_be_clickable((by, sel)))
        el.click()
        return True
    except:
        return False

try:
    driver.get(DISCOVER)

    # 1) 페이지 로드 대기
    WebDriverWait(driver, WAIT_SEC).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
    time.sleep(1.0)

    # 2) 쿠키/개인정보 배너 처리 (여러 텍스트/버튼 패턴 시도)
    #   - 지역/쿠키: "Accept all", "I accept", "Agree", "동의"
    #   - data-test-id가 붙어있는 경우도 존재
    cookie_btn_candidates = [
        (By.CSS_SELECTOR, "[data-test-id='cookie-banner-accept']"),
        (By.XPATH, "//button[contains(., 'Accept all')]"),
        (By.XPATH, "//button[contains(., 'I accept')]"),
        (By.XPATH, "//button[contains(., 'Agree')]"),
        (By.XPATH, "//button[contains(., '동의')]"),
        (By.XPATH, "//div[contains(@class, 'cookie') or contains(@class,'consent')]//button"),
    ]
    for by, sel in cookie_btn_candidates:
        if click_if_exists(by, sel, timeout=3):
            time.sleep(0.8)
            break

    # 3) 초기 카드가 보일 때까지 대기 (여러 패턴)
    card_wait_patterns = [
        (By.CSS_SELECTOR, "[data-test-id='project-card']"),
        (By.CSS_SELECTOR, "a[data-test-id='project-card-title']"),
        (By.CSS_SELECTOR, "a[href*='/projects/']"),
    ]
    ok = False
    for by, sel in card_wait_patterns:
        try:
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((by, sel)))
            ok = True
            break
        except:
            pass

    # 4) 충분히 스크롤해서 더 많은 카드 로드
    human_scroll(driver)

    # 5) 파싱
    soup = BeautifulSoup(driver.page_source, "html.parser")

finally:
    # driver는 아래 디버깅 파일 저장 후 끄는 게 안전하지만 여기선 먼저 끄지 않습니다.
    try:
        driver.quit()
    except:
        pass

# --- 카드 선택자 (여러 패턴 동시 시도) ---
card_selectors = [
    "[data-test-id='project-card']",
    "[data-test-id*='project-card']",
    "div[class*='ProjectCard']",
    "li[class*='project-card']",
    "div[data-project]"  # 백업
]
cards = []
for sel in card_selectors:
    cards.extend(soup.select(sel))
# 중복 제거(엘리먼트 객체는 해시 불가하니 outerHTML 기준)
uniq_cards = []
seen_html = set()
for c in cards:
    h = str(c)[:500]  # 앞부분만 키로
    if h in seen_html:
        continue
    seen_html.add(h)
    uniq_cards.append(c)
cards = uniq_cards

# --- 내부 요소 선택자 ---
title_selectors_in = [
    "a[data-test-id='project-card-title']",
    "a[class*='project-title']",
    "a[class*='project-card']",
    "a[href*='/projects/']"
]
creator_selectors = [
    "a[data-test-id='project-card-creator-name']",
    "a[href*='/profile/']",
    "[class*='creator'] a",
]
blurb_selectors = [
    "[data-test-id='project-card-blurb']",
    "p[class*='blurb']",
    "p[class*='description']",
]
percent_selectors = [
    "[data-test-id='percentage-raised']",
    "[aria-valuenow]"
]
pledged_selectors = [
    "[data-test-id='pledged']",
    "span[class*='pledged']",
]
goal_selectors = [
    "[data-test-id='goal']",
    "span[class*='goal']",
]
location_selectors = [
    "[data-test-id='location']",
    "span[class*='location']",
]
category_selectors = [
    "[data-test-id='category']",
    "span[class*='category']",
]

rows = []
for card in cards:
    a = pick_first(card, title_selectors_in)
    title = text(a)
    link = abs_url(a.get("href")) if a and a.has_attr("href") else None

    img = None
    img_el = pick_first(card, ["img", "picture source"])
    if img_el:
        for attr in ("src", "data-src", "data-original", "data-lazy", "srcset"):
            if img_el.has_attr(attr) and img_el.get(attr):
                val = img_el.get(attr)
                img = val.split(" ")[0] if " " in val else val
                img = abs_url(img)
                break

    creator = text(pick_first(card, creator_selectors))
    blurb = text(pick_first(card, blurb_selectors))

    percent = None
    pct_el = pick_first(card, percent_selectors)
    if pct_el:
        if pct_el.has_attr("aria-valuenow"):
            percent = pct_el.get("aria-valuenow")
        else:
            m = re.search(r"(\d+)\s*%", pct_el.get_text(" ", strip=True))
            if m: percent = m.group(1)

    pledged = None
    p_el = pick_first(card, pledged_selectors)
    if p_el:
        m = re.search(r"([\$\€\£₩]\s?[\d,\.]+)", p_el.get_text(" ", strip=True))
        pledged = m.group(1) if m else p_el.get_text(" ", strip=True)

    goal = None
    g_el = pick_first(card, goal_selectors)
    if g_el:
        m = re.search(r"([\$\€\£₩]\s?[\d,\.]+)", g_el.get_text(" ", strip=True))
        goal = m.group(1) if m else g_el.get_text(" ", strip=True)

    location = text(pick_first(card, location_selectors))
    category = text(pick_first(card, category_selectors))

    if not title and img_el and img_el.get("alt"):
        title = img_el.get("alt").strip()

    if not (title or link):
        continue

    rows.append({
        "title": title, "link": link, "creator": creator, "blurb": blurb,
        "percent": percent, "pledged": pledged, "goal": goal,
        "location": location, "category": category, "image": img
    })

df = pd.DataFrame(rows).drop_duplicates(subset=["title","link"], keep="first")

# 디버깅: 0건이면 HTML 저장
if len(df) == 0:
    with open("/content/kickstarter_debug.html", "w", encoding="utf-8") as f:
        f.write(str(soup))
    print("⚠️ 0건입니다. 디버깅용 HTML을 저장했습니다: /content/kickstarter_debug.html")

df.to_csv("kickstarter_discover.csv", index=False, encoding="utf-8-sig")
print("완료! 저장 파일: kickstarter_discover.csv (행:", len(df), ")")
df.head(10)


완료! 저장 파일: kickstarter_discover.csv (행: 12 )


Unnamed: 0,title,link,creator,blurb,percent,pledged,goal,location,category,image
0,Sea Glass Dice: Two-Tone Frosted Dice for RPGs,https://www.kickstarter.com/projects/dicedunge...,Dice Dungeons,,,,,,,https://i.kickstarter.com/assets/050/337/285/5...
1,The Beginning After the End Print Editions,https://www.kickstarter.com/projects/rocketshi...,Rocketship,,,,,,,https://i.kickstarter.com/assets/050/248/478/c...
2,ExoTerra : The Giant Mech Campaign You've Been...,https://www.kickstarter.com/projects/wyldegame...,WyldeGames,,,,,,,https://i.kickstarter.com/assets/050/084/784/0...
3,Priest Series: The Sacred & Sinful Editions,https://www.kickstarter.com/projects/thesierra...,Sierra Simone,,,,,,,https://i.kickstarter.com/assets/050/216/690/0...
4,Linogy: Reinvent AA/AAA Batteries & Chargers,https://www.kickstarter.com/projects/linogy/li...,Linogy,,,,,,,https://i.kickstarter.com/assets/050/509/888/7...
5,Doom Guard: The Supers vs. Cthulhu Board Game,https://www.kickstarter.com/projects/545820095...,Shane Hensley,,,,,,,https://i.kickstarter.com/assets/050/132/694/b...
6,Loteria Tarot®,https://www.kickstarter.com/projects/playninea...,Dreamstar Farms,,,,,,,https://i.kickstarter.com/assets/050/450/308/a...
7,LUMA Projection Arts Festival 2025,https://www.kickstarter.com/projects/lumafesti...,Team LUMA,,,,,,,https://i.kickstarter.com/assets/050/200/491/8...
8,From Desk to Wilderness – The Only Cable You’l...,https://www.kickstarter.com/projects/aohi/from...,AOHi,,,,,,,https://i.kickstarter.com/assets/050/331/772/2...
9,The Book of Jusko,https://www.kickstarter.com/projects/151096620...,Joe Pruett,,,,,,,https://i.kickstarter.com/assets/050/488/104/7...



- Colab: 오늘의집(ohou.se) 첫화면 상품 수집


In [None]:



import re, time, itertools, os, pandas as pd
from urllib.parse import urljoin
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

BASE = "https://ohou.se/"
HOME = "https://ohou.se/"

SCROLL_STEPS = 12     # 더 많이 로드하려면 늘리세요 (예: 20~30)
SCROLL_PAUSE  = 1.1   # 스크롤 간 대기
WAIT_SEC      = 25

# ---------- 유틸 ----------
def human_scroll(driver, steps=SCROLL_STEPS, pause=SCROLL_PAUSE):
    last_h = 0
    for _ in range(steps):
        driver.execute_script("window.scrollBy(0, Math.max(700, window.innerHeight*0.9));")
        time.sleep(pause)
        h = driver.execute_script("return document.body.scrollHeight")
        if h == last_h:
            break
        last_h = h

def abs_url(href: str):
    if not href: return None
    if href.startswith("//"): return "https:" + href
    if href.startswith("/"):  return urljoin(BASE, href)
    return href

def text(el):
    return el.get_text(" ", strip=True) if el else None

def pick_first(soup, selectors):
    for sel in selectors:
        el = soup.select_one(sel)
        if el: return el
    return None

# ---------- 브라우저 옵션 ----------
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--lang=ko-KR")
chrome_options.add_argument("--window-size=1400,2400")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                            "(KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
chrome_options.binary_location = "/usr/bin/chromium-browser"
driver = webdriver.Chrome(options=chrome_options)

def click_if_exists(by, sel, timeout=4):
    try:
        el = WebDriverWait(driver, timeout).until(EC.element_to_be_clickable((by, sel)))
        el.click()
        return True
    except:
        return False

try:
    driver.get(HOME)

    # 1) body 로드
    WebDriverWait(driver, WAIT_SEC).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
    time.sleep(1.0)

    # 2) 쿠키/알림 배너 닫기 (문구/구조가 수시 변경되므로 여러 패턴 시도)
    cookie_candidates = [
        (By.XPATH, "//button[contains(., '동의') or contains(., '수락') or contains(., '확인')]"),
        (By.CSS_SELECTOR, "button#accept-all, button.cookie-accept, .cookie button"),
        (By.XPATH, "//div[contains(@class,'cookie') or contains(@class,'consent')]//button"),
    ]
    for by, sel in cookie_candidates:
        if click_if_exists(by, sel, timeout=3):
            time.sleep(0.5)
            break

    # 앱설치/알림 팝업류 닫기
    close_btn_candidates = [
        (By.XPATH, "//button[contains(., '닫기') or contains(., 'Close') or contains(., '취소')]"),
        (By.CSS_SELECTOR, "button[aria-label='닫기'], button[aria-label='close'], .modal button")
    ]
    for by, sel in close_btn_candidates:
        click_if_exists(by, sel, timeout=2)

    # 3) 메인 컨텐츠 로딩 대기(섹션/카드 그리드 등장)
    grid_wait = [
        (By.CSS_SELECTOR, "main"),
        (By.CSS_SELECTOR, "[class*='Grid'], [class*='Card'], [data-testid*='card']"),
    ]
    for by, sel in grid_wait:
        try:
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((by, sel)))
            break
        except:
            pass

    # 4) 충분한 스크롤로 카드 로드
    human_scroll(driver)

    # 5) DOM 파싱
    soup = BeautifulSoup(driver.page_source, "html.parser")

finally:
    try:
        driver.quit()
    except:
        pass

# ---------- 카드 탐지 ----------
# 오늘의집 제품 상세 링크 패턴 예시:
#  - /store/products/<id>  (신규 스토어)
#  - /productions/<id>     (구형/특정 영역)
#  - /store/category, /projects 등도 있으나 여기선 제품만 우선
anchor_selectors = [
    "a[href*='/store/products/']",
    "a[href^='/productions/']",
]
anchors = []
for sel in anchor_selectors:
    anchors.extend(soup.select(sel))

# href 중복 제거
uniq = {}
for a in anchors:
    href = a.get("href")
    if not href:
        continue
    link = abs_url(href)
    # 제품 상세만 남김
    if "/store/products/" not in link and "/productions/" not in link:
        continue
    uniq[link] = a

# 내부 요소(제목/가격/이미지) 다중 선택자
title_selectors = [
    "[class*='title']", "span[title]", "strong", "h3", "h4", "a[title]"
]
price_selectors = [
    "[class*='price']", "span[class*='Price']", "span[class*='cost']", "strong[class*='price']",
    "div[class*='price'] span", "div[class*='sale'] span"
]
image_selectors = [
    "img", "picture source"
]

rows = []
for link, a in uniq.items():
    # 제목: 앵커 내부 → 상위 트리 순회
    title = None
    title_el = pick_first(a, title_selectors)
    if not title_el:
        for p in itertools.islice(a.parents, 0, 4):
            title_el = pick_first(p, title_selectors)
            if title_el: break
    if title_el:
        title = text(title_el)
    if not title and a.get("title"):
        title = a.get("title").strip()

    # 이미지 (lazy/srcset 고려)
    img = None
    img_el = pick_first(a, image_selectors)
    if not img_el:
        for p in itertools.islice(a.parents, 0, 3):
            img_el = pick_first(p, image_selectors)
            if img_el: break
    if img_el:
        for attr in ("src", "data-src", "data-original", "data-lazy", "srcset"):
            if img_el.has_attr(attr) and img_el.get(attr):
                val = img_el.get(attr)
                img = val.split(" ")[0] if " " in val else val
                img = abs_url(img)
                break
        if not title and img_el.get("alt"):
            title = img_el.get("alt").strip()

    # 가격: 앵커 주변에서 탐색 후 숫자만 추출
    price = None
    price_el = pick_first(a, price_selectors)
    if not price_el:
        for p in itertools.islice(a.parents, 0, 3):
            price_el = pick_first(p, price_selectors)
            if price_el: break
    if price_el:
        raw = price_el.get_text(" ", strip=True)
        m = re.search(r"(\d[\d,\.]*)", raw.replace("원", ""))
        if m:
            price = m.group(1).replace(",", "")

    # 최소 요건: 제목 또는 링크가 있어야 기록
    if not (title or link):
        continue

    rows.append({
        "title": title,
        "price": price,
        "link": link,
        "image": img
    })

# 정리: 빈 제목 제거 + 중복 제거
df = pd.DataFrame(rows)
if not df.empty:
    df = df.dropna(subset=["title"]).drop_duplicates(subset=["title","link"], keep="first")

# 0건 디버깅 파일 저장
if len(df) == 0:
    with open("/content/ohou_debug.html", "w", encoding="utf-8") as f:
        f.write(str(soup))
    print("⚠️ 0건입니다. /content/ohou_debug.html 로 저장했어요. 선택자/배너 처리/스크롤 수를 조정해 보세요.")

df.to_csv("ohou_home_products.csv", index=False, encoding="utf-8-sig")
print("완료! 저장 파일: ohou_home_products.csv (행:", len(df), ")")
df.head(10)


완료! 저장 파일: ohou_home_products.csv (행: 12 )


Unnamed: 0,title,price,link,image
0,5.0,46800,https://ohou.se/productions/3331388/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
1,5.0,31,https://ohou.se/productions/2431508/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
2,4.9,62,https://ohou.se/productions/1590911/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
3,4.8,61,https://ohou.se/productions/412486/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
4,5.0,19,https://ohou.se/productions/3530968/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
5,5.0,45,https://ohou.se/productions/2640103/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
6,5.0,34,https://ohou.se/productions/3299687/selling,https://prs.ohousecdn.com/apne2/any/uploads/pr...
7,4.7,55,https://ohou.se/productions/512512/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
8,4.9,21,https://ohou.se/productions/1008902/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
9,4.9,74200,https://ohou.se/productions/2415114/selling,https://image.ohousecdn.com/i/bucketplace-v2-d...
