In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time, json, re

# ✅ 크롬 드라이버 옵션 설정
options = Options()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)

# ✅ 크롤링 대상 URL
url = "http://www.10x10.co.kr/shopping/category_prd.asp?itemid=5710730"
driver.get(url)
time.sleep(2)

# ✅ 유틸 함수
def get_text(xpath):
    try:
        return driver.find_element(By.XPATH, xpath).text.strip()
    except:
        return ""

def get_attribute(xpath, attr):
    try:
        return driver.find_element(By.XPATH, xpath).get_attribute(attr)
    except:
        return ""

# ✅ 상품 정보 추출
brand_name = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[3]/p[1]/a[2]/span')
product_name = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[3]/h2/p')
thumbnail_url = get_attribute('//*[@id="contentWrap"]/div[1]/div[1]/div[1]/div/div/div/p[1]/div/img', 'src')

# ✅ 가격 정보 (할인 우선, 없으면 일반가)
raw_price = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[4]/dl[2]/dd/strong') or get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[4]/dl[1]/dd/strong')
price = re.split(r'\s|\[', raw_price)[0]

# ✅ 옵션 리스트
options_list = []
try:
    option_elements = driver.find_elements(By.XPATH, '//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[8]/dl/dd/p/select/option')
    for opt in option_elements:
        val = opt.text.strip()
        if val and "선택" not in val:
            options_list.append(val)
except:
    pass

# ✅ 리뷰 수집
reviews = []
seen = set()
max_reviews = 100
review_block_xpath = '//*[@id="lyEvalContAll"]/div/div/a'

while len(reviews) < max_reviews:
    time.sleep(1)

    # 1. talkShort 클릭하여 talkMore 열기
    try:
        expand_btns = driver.find_elements(By.CSS_SELECTOR, 'a.talkShort')
        for btn in expand_btns:
            driver.execute_script("arguments[0].click();", btn)
            time.sleep(0.1)
    except:
        pass

    # ✅ 2. 정확한 XPath로 리뷰 본문 수집
    try:
        review_elements = driver.find_elements(
            By.XPATH, '//*[@id="lyEvalContAll"]/table/tbody/tr[@class="talkMore"]/td/div/div[@class="customerReview"]/div[@class="comment"]/div[@class="textArea"]/p'
        )
        for p in review_elements:
            text = p.text.strip()
            if text and text not in seen:
                seen.add(text)
                reviews.append(text)
                if len(reviews) >= max_reviews:
                    break
    except Exception as e:
        print("[리뷰 수집 오류]", e)
        break

    # 3. 다음 리뷰 페이지 or 다음 블록 이동
    try:
        a_tags = driver.find_elements(By.XPATH, review_block_xpath)
        current_index = -1
        for i, tag in enumerate(a_tags):
            if "current" in tag.get_attribute("class"):
                current_index = i
                break
            
        # 현재가 마지막 링크라면 종료
        if current_index == -1 or current_index + 1 >= len(a_tags):
            print("[페이지 종료] 더 이상 다음 페이지 없음.")
            break
        
        next_tag = a_tags[current_index + 1]
        next_text = next_tag.text.strip()
    
        # 다음이 숫자 or '다음 페이지'면 진행
        if re.match(r'^\d+$', next_text) or "다음 페이지" in next_text:
            driver.execute_script("arguments[0].click();", next_tag)
            time.sleep(1)

        else:
            print("[페이지 종료] 다음 버튼 없음.")
            break
        
    except Exception as e:
        print(f"[페이지 이동 오류] {e}")
        break
    

# ✅ 결과 저장
data = {
    "brand_name": brand_name,
    "product_name": product_name,
    "category_main": "디자인문구",
    "category_sub": "플래너",
    "options": ", ".join(options_list),
    "product_url": url,
    "thumbnail_url": thumbnail_url,
    "review_content": reviews,
    "price": price
}

with open("product_5710730_final.json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print(f"\n✅ 크롤링 완료: 리뷰 {len(reviews)}개 수집됨")
driver.quit()


[페이지 종료] 마지막 페이지에서 새로운 리뷰 없음.

✅ 크롤링 완료: 리뷰 0개 수집됨


-----

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time, json, re, os

# ✅ 크롬 드라이버 옵션 설정
options = Options()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)

# ✅ 저장 파일
save_path = "tenbyten_products.json"
if os.path.exists(save_path):
    with open(save_path, "r", encoding="utf-8") as f:
        all_data = json.load(f)
else:
    all_data = []

# ✅ 카테고리 URL (디자인문구 > 플래너)
category_url = "https://www.10x10.co.kr/shopping/category_main.asp?disp=101"
driver.get(category_url)
time.sleep(2)

SCROLL_PAUSE_TIME = 1
last_height = driver.execute_script("return document.body.scrollHeight")

for _ in range(20):  # 20번 정도 스크롤 시도
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(SCROLL_PAUSE_TIME)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height


# ✅ 상품 URL 100개 수집
product_urls = []

# 최대 150개까지 시도 (중간에 끊기는 상품 제외하고 최대 100개 추출)
for i in range(1, 150):
    try:
        xpath = f'//*[@id="cateSubLst"]/section/div[2]/div/div[2]/alticle[{i}]/div[2]/p[2]/a'
        a_tag = driver.find_element(By.XPATH, xpath)
        href = a_tag.get_attribute("href")
        if href and "itemid=" in href and href not in product_urls:
            product_urls.append(href)
        if len(product_urls) >= 100:
            break
    except Exception as e:
        # 없는 인덱스 넘겨도 계속 진행
        continue

print(f"[INFO] 상품 URL 수집 완료: {len(product_urls)}개")



# ✅ 상품 상세 정보 수집 함수
def crawl_product_detail(url):
    driver.get(url)
    time.sleep(2)

    def get_text(xpath):
        try:
            return driver.find_element(By.XPATH, xpath).text.strip()
        except:
            return ""

    def get_attribute(xpath, attr):
        try:
            return driver.find_element(By.XPATH, xpath).get_attribute(attr)
        except:
            return ""

    brand_name = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[3]/p[1]/a[2]/span')
    product_name = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[3]/h2/p')
    thumbnail_url = get_attribute('//*[@id="contentWrap"]/div[1]/div[1]/div[1]/div/div/div/p[1]/div/img', 'src')
    raw_price = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[4]/dl[2]/dd/strong') or get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[4]/dl[1]/dd/strong')
    price = re.split(r'\s|\[', raw_price)[0]

    # 옵션 수집
    options_list = []
    try:
        option_elements = driver.find_elements(By.XPATH, '//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[8]/dl/dd/p/select/option')
        for opt in option_elements:
            val = opt.text.strip()
            if val and "선택" not in val:
                options_list.append(val)
    except:
        pass

    # ✅ 리뷰 수집
    reviews = []
    seen = set()
    max_reviews = 100
    review_block_xpath = '//*[@id="lyEvalContAll"]/div/div/a'
    last_review_count = 0

    while len(reviews) < max_reviews:
        time.sleep(1)

        # 리뷰 수집
        review_elements = driver.find_elements(By.CSS_SELECTOR, "p.review-contents")
        for p in review_elements:
            text = p.text.strip()
            if text and text not in seen:
                seen.add(text)
                reviews.append(text)
                if len(reviews) >= max_reviews:
                    break

        # 더 이상 새로운 리뷰가 없으면 종료
        if len(reviews) == last_review_count:
            print("[페이지 종료] 더 이상 새로운 리뷰 없음.")
            break
        last_review_count = len(reviews)

        # 다음 페이지 버튼 클릭
        try:
            next_btn = driver.find_element(By.CSS_SELECTOR, ".pagination a.next:not(.disabled)")
            driver.execute_script("arguments[0].click();", next_btn)
        except:
            print("[페이지 종료] 다음 버튼 없음 또는 비활성화.")
            break

    return {
        "brand_name": brand_name,
        "product_name": product_name,
        "category_main": "디자인문구",
        "category_sub": "다이어리/플래너",
        "options": ", ".join(options_list),
        "product_url": url,
        "thumbnail_url": thumbnail_url,
        "review_content": reviews,
        "price": price
    }

# ✅ 상품 크롤링 및 중간 저장
for url in product_urls:
    if any(d and d.get("product_url") == url for d in all_data):  # None 체크 추가
        continue
    try:
        print(f"[INFO] 크롤링 중: {url}")
        data = crawl_product_detail(url)
        if data:  # None이 아닐 때만 저장
            all_data.append(data)
            with open(save_path, "w", encoding="utf-8") as f:
                json.dump(all_data, f, ensure_ascii=False, indent=2)
    except Exception as e:
        print(f"[ERROR] {url}: {e}")



InvalidSessionIdException: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: chrome=135.0.7049.115)
Stacktrace:
	GetHandleVerifier [0x00007FF6A7D9EFA5+77893]
	GetHandleVerifier [0x00007FF6A7D9F000+77984]
	(No symbol) [0x00007FF6A7B691BA]
	(No symbol) [0x00007FF6A7B54F15]
	(No symbol) [0x00007FF6A7B79F04]
	(No symbol) [0x00007FF6A7BEEACF]
	(No symbol) [0x00007FF6A7C0E972]
	(No symbol) [0x00007FF6A7BE6F03]
	(No symbol) [0x00007FF6A7BB0328]
	(No symbol) [0x00007FF6A7BB1093]
	GetHandleVerifier [0x00007FF6A8057B6D+2931725]
	GetHandleVerifier [0x00007FF6A8052132+2908626]
	GetHandleVerifier [0x00007FF6A80700F3+3031443]
	GetHandleVerifier [0x00007FF6A7DB91EA+184970]
	GetHandleVerifier [0x00007FF6A7DC086F+215311]
	GetHandleVerifier [0x00007FF6A7DA6EC4+110436]
	GetHandleVerifier [0x00007FF6A7DA7072+110866]
	GetHandleVerifier [0x00007FF6A7D8D479+5401]
	BaseThreadInitThunk [0x00007FFAEAB97374+20]
	RtlUserThreadStart [0x00007FFAEAF9CC91+33]


In [38]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time, json, re, os

# ✅ 크롬 드라이버 옵션 설정
options = Options()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)

# ✅ 저장 파일
save_path = "tenbyten_products.json"
if os.path.exists(save_path):
    with open(save_path, "r", encoding="utf-8") as f:
        all_data = json.load(f)
else:
    all_data = []

# ✅ 카테고리 URL
category_url = "https://www.10x10.co.kr/shopping/category_main.asp?disp=101"
driver.get(category_url)
time.sleep(2)

# ✅ 스크롤 다운
for _ in range(20):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1)

# ✅ 상품 URL 100개 수집
product_urls = []
links = driver.find_elements(By.CSS_SELECTOR, "article.renewal-product-container a")
for link in links:
    href = link.get_attribute("href")
    if href and "itemid=" in href and href not in product_urls:
        product_urls.append(href)
    if len(product_urls) >= 100:
        break

print(f"[INFO] 상품 URL 수집 완료: {len(product_urls)}개")

# ✅ 상품 상세 정보 수집 함수
def crawl_product_detail(url):
    driver.get(url)
    time.sleep(2)

    def get_text(xpath):
        try:
            return driver.find_element(By.XPATH, xpath).text.strip()
        except:
            return ""

    def get_attribute(xpath, attr):
        try:
            return driver.find_element(By.XPATH, xpath).get_attribute(attr)
        except:
            return ""

    brand_name = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[3]/p[1]/a[2]/span')
    product_name = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[3]/h2/p')
    thumbnail_url = get_attribute('//*[@id="contentWrap"]/div[1]/div[1]/div[1]/div/div/div/p[1]/div/img', 'src')
    raw_price = get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[4]/dl[2]/dd/strong') or \
                get_text('//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[4]/dl[1]/dd/strong')
    price = re.split(r'\s|\[', raw_price)[0]

    # 옵션 수집
    options_list = []
    try:
        option_elements = driver.find_elements(By.XPATH, '//*[@id="contentWrap"]/div[1]/div[2]/div[1]/div[1]/form[2]/div[8]/dl/dd/p/select/option')
        for opt in option_elements:
            val = opt.text.strip()
            if val and "선택" not in val:
                options_list.append(val)
    except:
        pass

    # 리뷰 수집
    reviews = []
    seen = set()
    last_count = 0
    try:
        while len(reviews) < 100:
            time.sleep(1)
            # 열려있는 모든 짧은 리뷰 펼치기
            for btn in driver.find_elements(By.CSS_SELECTOR, "a.talkShort"):
                try:
                    driver.execute_script("arguments[0].click();", btn)
                except: continue

            review_elements = driver.find_elements(By.CSS_SELECTOR, "#lyEvalContAll p")
            for r in review_elements:
                text = r.text.strip()
                if text and text not in seen:
                    seen.add(text)
                    reviews.append(text)
                    if len(reviews) >= 100:
                        break

            # 종료 조건
            if len(reviews) == last_count:
                print("[리뷰 종료] 마지막 페이지 도달.")
                break
            last_count = len(reviews)

            # 다음 버튼
            try:
                next_btn = driver.find_element(By.CSS_SELECTOR, ".pagination a.next:not(.disabled)")
                if "href" in next_btn.get_attribute("outerHTML"):
                    driver.execute_script("arguments[0].click();", next_btn)
                else:
                    break
            except:
                break
    except:
        print("[리뷰 수집 실패]")

    return {
        "brand_name": brand_name,
        "product_name": product_name,
        "category_main": "디자인문구",
        "category_sub": "다이어리/플래너",
        "options": ", ".join(options_list),
        "product_url": url,
        "thumbnail_url": thumbnail_url,
        "review_content": reviews,
        "price": price
    }

# ✅ 크롤링 및 중간 저장
for url in product_urls:
    if any(isinstance(d, dict) and d.get("product_url") == url for d in all_data):
        continue
    try:
        print(f"[INFO] 크롤링 중: {url}")
        data = crawl_product_detail(url)
        all_data.append(data)
        with open(save_path, "w", encoding="utf-8") as f:
            json.dump(all_data, f, ensure_ascii=False, indent=2)
    except Exception as e:
        print(f"[ERROR] {url}: {e}")

driver.quit()
print(f"\n✅ 크롤링 완료: 총 {len(all_data)}개 상품 수집됨")


[INFO] 상품 URL 수집 완료: 0개

✅ 크롤링 완료: 총 7개 상품 수집됨
