In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from io import StringIO
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import uuid
import os

# 셀레니움으로 번개장터 키워드 아이템 크롤링

In [None]:
# 웹 브라우저 열기
driver = webdriver.Chrome()
driver.maximize_window() 

In [None]:
# URL로 이동
base_url = "https://m.bunjang.co.kr/"
driver.get(base_url)

## 키워드 검색

In [None]:
# 1. 특정 키워드로 바로 이동
driver.get(base_url + "search/products?q=유모차 스토케&page=1")

In [None]:
# 2. keyword 검색으로 이동
input_box_css = "#root > div > div > div.sc-dfVpRl.fxYqEy > div.sc-gzOgki.ciqSeg > div.sc-iyvyFf.kPWKwK > div.sc-jKJlTe.keyjxL > div.sc-eNQAEJ.voMyM > input"
input_box = driver.find_element(By.CSS_SELECTOR, input_box_css)
input_box.send_keys(" ")
input_box.send_keys("유모차 스토케")
input_box.send_keys(Keys.ENTER)

## 각 페이지별 아이템 리스트

In [None]:
# 모든 아이템 갯수 가져오기
import re

item_count_css = "#root > div > div > div:nth-child(4) > div > div.sc-hRmvpr.jtLTMQ > div > div.sc-cZBZkQ.ckPglo > span.sc-ecaExY.jPSzJz"
item_count = driver.find_element(By.CSS_SELECTOR, item_count_css).text
item_count = int(re.sub("[^0-9]", "", item_count))
item_count

In [None]:
# 상품 href 리스트 만들기
item_card_css = "#root > div > div > div:nth-child(4) > div > div.sc-gacfCG.QBPXM > div > div > a"
item_card_list = driver.find_elements(By.CSS_SELECTOR, item_card_css)
len(item_card_list)

In [None]:
# 페이지 수 세기
import math

page_count = 1
if len(item_card_list) < item_count:
    page_count = math.ceil(item_count / len(item_card_list))
page_count

In [None]:
# 광고인지 체크
is_ad_css = "div.sc-gVyKpa.kXoKNq > span.sc-cpmKsF.buJhvy"
try:
    el = item_card_list[-15].find_element(By.CSS_SELECTOR, is_ad_css)
    is_ad = el.text == "광고"
except NoSuchElementException:
    is_ad = False

is_ad

In [None]:
# 판매 완료
is_completed_css = "div > div > div > img"
try:
    el = item_card_list[0].find_element(By.CSS_SELECTOR, is_completed_css)
    is_completed = el.get_attribute('alt') == "판매 완료"
except NoSuchElementException:
    is_completed = False

is_completed

In [None]:
from datetime import datetime, timedelta
import re

def parse_relative_time(text: str, now: datetime | None = None) -> datetime:
    """
    한국어 상대 시간을 실제 datetime으로 변환
    ex) "1분 전", "2시간 전", "3일 전", "3달 전"
    """
    if now is None:
        now = datetime.now()

    match = re.match(r"(\d+)(분|시간|일|달) 전", text.strip())
    if not match:
        raise ValueError(f"지원하지 않는 형식: {text}")

    value = int(match.group(1))
    unit = match.group(2)

    if unit == "분":
        return now - timedelta(minutes=value)
    elif unit == "시간":
        return now - timedelta(hours=value)
    elif unit == "일":
        return now - timedelta(days=value)
    elif unit == "달":
        # "달" → 30일로 가정
        return now - timedelta(days=value * 30)
    else:
        raise ValueError(f"알 수 없는 단위: {unit}")


# 사용 예시
now = datetime.now()
print(parse_relative_time("1분 전", now))   # → 2025-08-19 15:29:00
print(parse_relative_time("2시간 전", now)) # → 2025-08-19 13:30:00
print(parse_relative_time("3일 전", now))   # → 2025-08-16 15:30:00
print(parse_relative_time("3달 전", now))   # → 2025-05-21 15:30:00

In [None]:
# 업로드 날짜
# 1분 전, 2시간 전, 3일 전, 3달 전

uploaded_date_css = "div.sc-iGPElx.iPIFfd > div.sc-gtfDJT.hZQMHB > div.sc-fOICqy.bgZehm"
uploaded_date = item_card_list[0].find_element(By.CSS_SELECTOR, uploaded_date_css)
uploaded_date = parse_relative_time(uploaded_date.text)

## 아이템 상세 정보

In [None]:
driver.get(item_card_list[0].get_attribute("href"))

In [None]:
# 타이틀
title_css = "#root > div > div > div.Productsstyle__Wrapper-sc-13cvfvh-0.eVEUVR > div.Productsstyle__ProductPageTop-sc-13cvfvh-1.WbLlq > div > div.Productsstyle__ProductContentWrapper-sc-13cvfvh-8.jGywBa > div > div.Productsstyle__ProductSummaryWrapper-sc-13cvfvh-11.iDkwQU > div > div:nth-child(1) > div.ProductSummarystyle__Basic-sc-oxz0oy-2.ifrXrN > div.ProductSummarystyle__Name-sc-oxz0oy-3.dZBHcg"
title = driver.find_element(By.CSS_SELECTOR, title_css).text

# 가격
price_css = "#root > div > div > div.Productsstyle__Wrapper-sc-13cvfvh-0.eVEUVR > div.Productsstyle__ProductPageTop-sc-13cvfvh-1.WbLlq > div > div.Productsstyle__ProductContentWrapper-sc-13cvfvh-8.jGywBa > div > div.Productsstyle__ProductSummaryWrapper-sc-13cvfvh-11.iDkwQU > div > div:nth-child(1) > div.ProductSummarystyle__Basic-sc-oxz0oy-2.ifrXrN > div.ProductSummarystyle__PriceWrapper-sc-oxz0oy-4.dTIDFF > div"
price = driver.find_element(By.CSS_SELECTOR, price_css).text
price = re.sub("[^0-9]", "", price)

# 상품 상태
condition_css = "#root > div > div > div.Productsstyle__Wrapper-sc-13cvfvh-0.eVEUVR > div.Productsstyle__ProductPageTop-sc-13cvfvh-1.WbLlq > div > div.Productsstyle__ProductContentWrapper-sc-13cvfvh-8.jGywBa > div > div.Productsstyle__ProductSummaryWrapper-sc-13cvfvh-11.iDkwQU > div > div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > div:nth-child(1) > div.ProductSummarystyle__Value-sc-oxz0oy-21.eLyjky"
condition = driver.find_element(By.CSS_SELECTOR, condition_css).text

# 상품 정보
detail_css = "#root > div > div > div.Productsstyle__Wrapper-sc-13cvfvh-0.eVEUVR > div.Productsstyle__ProductPageTop-sc-13cvfvh-1.WbLlq > div > div.Productsstyle__ProductBottom-sc-13cvfvh-14.fxuPQD > div.Productsstyle__ProductInfoContent-sc-13cvfvh-13.jzEavb > div > div.ProductInfostyle__Wrapper-sc-ql55c8-0.gPJVxW > div.ProductInfostyle__Description-sc-ql55c8-2.hWujk > div.ProductInfostyle__DescriptionContent-sc-ql55c8-3.eJCiaL > p"
detail = driver.find_element(By.CSS_SELECTOR, detail_css).text


In [None]:
import requests

def download_image(url: str, save_path: str, chunk_size: int = 1024) -> bool:
    """
    주어진 URL에서 이미지를 다운로드하여 지정한 경로에 저장합니다.

    Args:
        url (str): 이미지 URL
        save_path (str): 저장할 파일 경로
        chunk_size (int): 다운로드할 때 사용할 청크 크기 (기본 1024)

    Returns:
        bool: 성공 시 True, 실패 시 False
    """
    try:
        resp = requests.get(url, stream=True, timeout=10)
        if resp.status_code == 200:
            with open(save_path, "wb") as f:
                for chunk in resp.iter_content(chunk_size):
                    if chunk:  # keep-alive chunks 건너뛰기
                        f.write(chunk)
            print(f"다운로드 완료: {save_path}")
            return True
        else:
            print(f"다운로드 실패: HTTP {resp.status_code}")
            return False
    except Exception as e:
        print(f"오류 발생: {e}")
        return False


# 사용 예시
url = "https://media.bunjang.co.kr/product/345010693_1_1754091876_w480.jpg"
download_image(url, "bungae_image.jpg")


In [None]:
# 이미지
image_list_css = "#root > div > div > div.Productsstyle__Wrapper-sc-13cvfvh-0.eVEUVR > div.Productsstyle__ProductPageTop-sc-13cvfvh-1.WbLlq > div > div.Productsstyle__ProductContentWrapper-sc-13cvfvh-8.jGywBa > div > div.Productsstyle__ProductImageWrapper-sc-13cvfvh-10.cXRuyi > div > div.sc-kLIISr.gWGEJy > div > img"
image_list = driver.find_elements(By.CSS_SELECTOR, image_list_css)

uid = uuid.uuid4()
image_path = "../../data/raw/" + str(uid)
os.makedirs(image_path, exist_ok=True)

for index, image in enumerate(image_list):
    url = image.get_attribute('src')
    download_image(url, f"{image_path}/bungaejangter_{uid}_{index}.jpg")

In [None]:
# 모든 정보 
item = {
    "title" : title,
    "detail" : detail,
    "condition" : condition,
    "uploaded_date" : uploaded_date,
    "is_completed" : is_completed,
    "price": price,
}
items = []
items.append(item)
print(item)

In [None]:
df = pd.DataFrame(items)

In [None]:
df.to_csv("../../data/raw/bungaejangter.csv")

## 최종 크롤링 코드

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from io import StringIO
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from dataclasses import dataclass, asdict
import time
import os
import csv
from tools.bungaejangter_item_crawler import get_item_data, get_keyword_item_count, get_keyword_page_count, get_page_keyword_item_list

# 웹 브라우저 열기
driver = webdriver.Chrome()
driver.maximize_window() 

time.sleep(2)
    
base_url = "https://m.bunjang.co.kr/"
data_base_path = "C:/Potenup/SecondHanded-Strollers-PredictedPrice/data/raw/"
csv_path = f"{data_base_path}bungaejangter.csv"

In [None]:
keyword_list = ["유모차 스토케"]

for keyword in keyword_list:
    # 키워드 URL로 이동
    print("============================")
    driver.get(base_url + f"search/products?q={keyword}&page=1")
    print("키워드: ", keyword)

    # 전체 상품 갯수
    item_count = get_keyword_item_count(driver, keyword)
    print("전체 아이템 수: ", item_count)
    page_count = get_keyword_page_count(driver, keyword, item_count)
    print("페이지수: ", page_count)    

    # 모든 아이템
    item_data = []
    for i in range(page_count) :
        page_item_list = get_page_keyword_item_list(driver, keyword, i + 1)
        print(f'페이지 {i + 1}: {len(page_item_list)}')

        for index, item in enumerate(page_item_list):
            link = item['link']
            if item['is_completed']:
                link += "?original=1"
            print(f"페이지 {i + 1} 아이템 {index} : {link}")

            detail_item_data = get_item_data(driver, link)
            item.update(detail_item_data)
            item_data.append(item)
            
            print(item)
        print(f'페이지 {i + 1} 완료')

    print("============================")
    print("결과")
    for item in item_data:
        print(item)

    df = pd.DataFrame(item_data)
    # 파일이 이미 있는지 체크
    if os.path.exists(csv_path):
        df.to_csv(csv_path, mode="a", index=False, header=False, encoding="utf-8", quoting=csv.QUOTE_ALL)
    else:
        df.to_csv(csv_path, mode="w", index=False, header=True, encoding="utf-8", quoting=csv.QUOTE_ALL)

## csv 데이터 확인

In [None]:
df = pd.read_csv(r"C:/Potenup/SecondHanded-Strollers-PredictedPrice/data/raw/bungaejangter.csv", encoding="utf-8", engine="python", on_bad_lines="warn")

In [16]:
df.head()

Unnamed: 0,keyword,is_completed,uploaded_date,link,location,id,title,detail,condition,price
0,유모차 스토케,False,2025-08-20 07:23:57.618948,https://m.bunjang.co.kr/products/330635984?q=%...,,84163eb3-68b8-48b6-8e02-f1bad2ee4a03,스토케 트레일즈 유모차,출고가 150만원 이었습니다\n사진 그대로 구매시 30만원\n겨울용 패딩시트 제외시...,사용감 적음,200000
1,유모차 스토케,False,2025-08-20 07:23:57.643101,https://m.bunjang.co.kr/products/317908347?q=%...,,89cd44b0-e83b-4b62-8b30-bae711ffefb2,스토케 트레일즈 유모차입니다,스토케 트레일즈 유모차입니다\n\n디럭스형이고요\n2년도 채 쓰지 않았습니다\n\n...,사용감 없음,100000
2,유모차 스토케,False,2025-08-19 15:23:57.719904,https://m.bunjang.co.kr/products/350453616?q=%...,,80ed4765-ddb3-4fdf-af4a-83ae87babb2b,스토케 익스플로리X 유모차,스토케 익스플로리 X 유모차 판매합니다\n그레이컬러입니다\n하자는 사진 올린것처럼 ...,사용감 적음,400000
3,유모차 스토케,False,2025-08-19 16:23:57.753230,https://m.bunjang.co.kr/products/345010693?q=%...,,fb8546a4-91aa-49e2-a9a5-06b1edb48963,(미개봉새상품) 스토케 요요3 블랙/블랙프레임 유모차,스토케 요요3 유모차 블랙프레임 블랙색상이고\n현대백화점 무역점에서 주문했고\n7...,새 상품,688000
4,유모차 스토케,False,2025-08-19 19:23:57.815557,https://m.bunjang.co.kr/products/276030707?q=%...,,64ee5316-358d-44e4-8dd8-98246852c5d5,노르웨이산 최고급 명품 럭셔리 디럭스 유모차 스토케,노르웨이산 최고급 명품 럭셔리 디럭스 유모차 스토케 수도권 전철역 전지역 및 대전...,사용감 없음,70000


In [12]:
df.shape

(19, 10)