In [19]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time, json, re

# 드라이버 설정
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)

# 리뷰 있는 상품 URL
url = "https://www.lotteon.com/p/product/bundle/LO2457950278?sitmNo=&mall_no=1&dp_infw_cd=CASFC08090000&areaCode=CAS"
driver.get(url)
time.sleep(2)

# 안전 추출 함수
def get_text_safe(selector):
    try:
        return driver.find_element(By.CSS_SELECTOR, selector).text.strip()
    except:
        return ""

def get_attribute_safe(selector, attr):
    try:
        return driver.find_element(By.CSS_SELECTOR, selector).get_attribute(attr)
    except:
        return ""

# 정보 추출
brand_name = get_text_safe("a.pd-widget1__product-seller-item")
product_name = get_text_safe("h2.pd-widget1__product-name")
product_url = driver.current_url
thumbnail_url = get_attribute_safe(".productVisualWrap img", "src")

# 가격 (textContent 기준으로 정확히 추출)
try:
    price_elem = driver.find_element(By.XPATH, '//*[@id="stickyTopParent"]/div[2]/div[2]/div/div/div[2]/span')
    raw_price = price_elem.get_attribute("textContent")
    price = re.sub(r"[^\d]", "", raw_price)
except:
    price = ""

# 옵션: selectResult 클릭 → li 수집
options = ""
try:
    driver.find_element(By.CSS_SELECTOR, "div.selectResult").click()
    time.sleep(1)
    li_elements = driver.find_elements(By.CSS_SELECTOR, "ul.selectLists li")
    options = ", ".join([li.text.strip().replace("\n", " ") for li in li_elements if li.text.strip()])
except:
    pass

# 리뷰: 리뷰 탭 클릭 후 수집
review_content = []
try:
    while len(review_content) < 100:
        review_spans = driver.find_elements(
            By.CSS_SELECTOR, "#reviewMain .reviewList .texting"
        )
        for span in review_spans:
            text = span.text.strip().replace("\n", " ")
            if text:
                review_content.append(text)
            if len(review_content) >= 100:
                break

        # 다음 페이지 버튼 누르기
        try:
            next_btn = driver.find_element(By.CSS_SELECTOR, ".pagination .next")
            driver.execute_script("arguments[0].click();", next_btn)
            time.sleep(2)
        except:
            break  # 더 이상 다음 버튼이 없을 경우
except Exception as e:
    print("[리뷰 수집 오류]", e)

# 결과 정리
result = {
    "brand_name": brand_name,
    "product_name": product_name,
    "category_main": "스포츠/레저",
    "category_sub": "의류",
    "options": options,
    "product_url": product_url,
    "thumbnail_url": thumbnail_url,
    "review_content": review_content,
    "price": price
}

# 저장
with open("lotteon_checked_product.json", "w", encoding="utf-8") as f:
    json.dump(result, f, ensure_ascii=False, indent=2)

driver.quit()
print("[완료] 리뷰 있는 상품 수집 완료.")


[완료] 리뷰 있는 상품 수집 완료.


-------

In [27]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time, json

# 1. 드라이버 설정
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)

# 의류 카테고리 URL
url = "https://www.lotteon.com/csearch/render/category?render=nqapi&platform=pc&collection_id=9&login=Y&u9=navigate&u8=FC08090000&mallId=1"
driver.get(url)
time.sleep(3)

product_urls = []
page = 1

# 2. 상품 URL 수집 (100개까지)
while len(product_urls) < 100:
    print(f"[페이지 {page}] 상품 수집 중... 현재까지 {len(product_urls)}개")

    for i in range(1, 100):  # li[1] ~ li[100]
        try:
            # 광고 상품 제외
            ad_xpath = f'//*[@id="content"]/div/section/div/section[3]/div/div/div[2]/div[1]/ul/li[{i}]//*[contains(@id, "s-adflag-tooltip__button")]'
            if driver.find_elements(By.XPATH, ad_xpath):
                continue

            # 상품 링크 수집
            a_xpath = f'//*[@id="content"]/div/section/div/section[3]/div/div/div[2]/div[1]/ul/li[{i}]/div/div/a'
            a_tag = driver.find_element(By.XPATH, a_xpath)
            href = a_tag.get_attribute("href")

            if href and href not in product_urls:
                product_urls.append(href)
                print(f"{len(product_urls)}: {href}")

            if len(product_urls) >= 100:
                break
        except:
            continue

    # 다음 페이지 클릭
    try:
        next_btn_xpath = '//*[@id="content"]/div/section/div/section[3]/div/div/div[2]/div[2]/a[@class="srchPaginationNext"]'
        next_btn = driver.find_element(By.XPATH, next_btn_xpath)
        driver.execute_script("arguments[0].click();", next_btn)
        page += 1
        time.sleep(3)
    except:
        print("다음 페이지 없음 또는 버튼 클릭 실패")
        break

# 3. JSON 저장
with open("lotteon_clothes_product_urls.json", "w", encoding="utf-8") as f:
    json.dump(product_urls, f, ensure_ascii=False, indent=2)

driver.quit()
print(f"✅ 총 {len(product_urls)}개 상품 URL 수집 완료")


[페이지 1] 상품 수집 중... 현재까지 0개
1: https://www.lotteon.com/p/product/LO2475783368?sitmNo=LO2475783368_2475783373&mall_no=1&dp_infw_cd=CASFC08090000&areaCode=CAS
2: https://www.lotteon.com/p/product/LO2475783913?sitmNo=LO2475783913_2475783914&mall_no=1&dp_infw_cd=CASFC08090000&areaCode=CAS
3: https://www.lotteon.com/p/product/LO2466996770?sitmNo=LO2466996770_2466996771&mall_no=1&dp_infw_cd=CASFC08090000&areaCode=CAS
4: https://www.lotteon.com/p/product/bundle/LO2467590000?sitmNo=&mall_no=1&dp_infw_cd=CASFC08090000&areaCode=CAS
5: https://www.lotteon.com/p/product/LO2467450606?sitmNo=LO2467450606_2467450607&mall_no=1&dp_infw_cd=CASFC08090000&areaCode=CAS
6: https://www.lotteon.com/p/product/bundle/LO2457950278?sitmNo=&mall_no=1&dp_infw_cd=CASFC08090000&areaCode=CAS
7: https://www.lotteon.com/p/product/LO2466944701?sitmNo=LO2466944701_2466944702&mall_no=1&dp_infw_cd=CASFC08090000&areaCode=CAS
8: https://www.lotteon.com/p/product/LO2105660626?sitmNo=LO2105660626_2105660628&mall_no=1&dp_infw_cd=

-----

In [14]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time, json, re

# 드라이버 설정
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)

# 의류 카테고리 페이지 접속
category_url = "https://www.lotteon.com/csearch/render/category?render=nqapi&platform=pc&collection_id=9&login=Y&u9=navigate&u8=FC08220000&mallId=1"
driver.get(category_url)
time.sleep(1)

product_urls = []
page = 1

# 상품 URL 최대 100개 수집
while len(product_urls) < 100:
    print(f"[페이지 {page}] 상품 수집 중... 현재까지 {len(product_urls)}개")
    for i in range(1, 100):
        try:
            ad_xpath = f'//*[@id="content"]/div/section/div/section[3]/div/div/div[2]/div[1]/ul/li[{i}]//*[contains(@id, "s-adflag-tooltip__button")]'
            if driver.find_elements(By.XPATH, ad_xpath):
                continue

            a_xpath = f'//*[@id="content"]/div/section/div/section[3]/div/div/div[2]/div[1]/ul/li[{i}]/div/div/a'
            a_tag = driver.find_element(By.XPATH, a_xpath)
            href = a_tag.get_attribute("href")
            if href and href not in product_urls:
                product_urls.append(href)
                print(f"{len(product_urls)}: {href}")
            if len(product_urls) >= 100:
                break
        except:
            continue

    # 다음 페이지 이동
    try:
        next_xpath = '//*[@id="content"]/div/section/div/section[3]/div/div/div[2]/div[2]/a[@class="srchPaginationNext"]'
        next_btn = driver.find_element(By.XPATH, next_xpath)
        driver.execute_script("arguments[0].click();", next_btn)
        page += 1
        time.sleep(1)
    except:
        print("다음 페이지 없음 또는 클릭 실패")
        break

# 상세 정보 수집
results = []

def get_text_safe(selector):
    try:
        return driver.find_element(By.CSS_SELECTOR, selector).text.strip()
    except:
        return ""

def get_attribute_safe(selector, attr):
    try:
        return driver.find_element(By.CSS_SELECTOR, selector).get_attribute(attr)
    except:
        return ""

for idx, url in enumerate(product_urls):
    print(f"\n[{idx+1}/{len(product_urls)}] 상품 정보 수집 중: {url}")
    driver.get(url)
    time.sleep(2)

    brand_name = get_text_safe("a.pd-widget1__product-seller-item")
    product_name = get_text_safe("h2.pd-widget1__product-name")
    product_url = driver.current_url
    thumbnail_url = get_attribute_safe(".productVisualWrap img", "src")

    try:
        price_elem = driver.find_element(By.XPATH, '//*[@id="stickyTopParent"]/div[2]/section[2]/div/dl/dd/strong')
        price = re.sub(r"[^\d]", "", price_elem.text)
        # price_elem = driver.find_element(By.XPATH, '//*[@id="stickyTopParent"]/div[2]/section[2]/div/dl/dd/strong/text()')
        # raw_price = price_elem.get_attribute("textContent")
        # price = re.sub(r"[^\d]", "", raw_price)
    except:
        price = ""

    options = ""
    try:
        driver.find_element(By.CSS_SELECTOR, "div.selectResult").click()
        time.sleep(1)
        li_elements = driver.find_elements(By.CSS_SELECTOR, "ul.selectLists li")
        options = ", ".join([li.text.strip().replace("\n", " ") for li in li_elements if li.text.strip()])
    except:
        pass

    review_content = []
    try:
        while len(review_content) < 100:
            spans = driver.find_elements(By.CSS_SELECTOR, "#reviewMain .reviewList .texting")
            for span in spans:
                txt = span.text.strip().replace("\n", " ")
                if txt:
                    review_content.append(txt)
                if len(review_content) >= 100:
                    break

            try:
                next_btn = driver.find_element(By.CSS_SELECTOR, ".pagination .next")
                driver.execute_script("arguments[0].click();", next_btn)
                time.sleep(2)
            except:
                break
    except Exception as e:
        print("[리뷰 수집 오류]", e)

    results.append({
        "brand_name": brand_name,
        "product_name": product_name,
        "category_main": "스포츠/레저",
        "category_sub": "낚시",
        "options": options,
        "product_url": product_url,
        "thumbnail_url": thumbnail_url,
        "review_content": review_content,
        "price": price
    })

# 결과 저장
with open("lotteon_fishing.json", "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

driver.quit()
print(f"\n✅ 총 {len(results)}개 상품 수집 및 저장 완료")


[페이지 1] 상품 수집 중... 현재까지 0개
1: https://www.lotteon.com/p/product/LO2265393700?sitmNo=LO2265393700_2265393701&mall_no=1&dp_infw_cd=CASFC08220000&areaCode=CAS
2: https://www.lotteon.com/p/product/PD49705118?mall_no=1&dp_infw_cd=CASFC08220000&areaCode=CAS
3: https://www.lotteon.com/p/product/LO2440926975?sitmNo=LO2440926975_2440926976&mall_no=1&dp_infw_cd=CASFC08220000&areaCode=CAS
4: https://www.lotteon.com/p/product/LO2416285256?sitmNo=LO2416285256_2416285257&mall_no=1&dp_infw_cd=CASFC08220000&areaCode=CAS
5: https://www.lotteon.com/p/product/LO2347793447?sitmNo=LO2347793447_2347793449&mall_no=1&dp_infw_cd=CASFC08220000&areaCode=CAS
6: https://www.lotteon.com/p/product/LO2011245954?sitmNo=LO2011245954_2011245955&mall_no=1&dp_infw_cd=CASFC08220000&areaCode=CAS
7: https://www.lotteon.com/p/product/LO2011312817?sitmNo=LO2011312817_2011312818&mall_no=1&dp_infw_cd=CASFC08220000&areaCode=CAS
8: https://www.lotteon.com/p/product/LO2384666733?sitmNo=LO2384666733_2384666734&mall_no=1&dp_infw_cd=C