### <mark>**✅Crawling_Smartstore**

##### **Library**

In [1]:
# selenium import
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service

# library import
from bs4 import BeautifulSoup
from dateutil import parser
import time
import pandas as pd
import platform
import os
from time import sleep
from glob import glob
from tqdm import tqdm
from datetime import datetime

##### **Accept**

In [2]:
os_name = platform.system().lower()
architecture = platform.machine()

driver_path = None
if os_name == 'darwin':
    driver_path = glob('../driver/**/chromedriver', recursive=True)[0]
else:
    driver_path = glob('../driver/**/chromedriver.exe', recursive=True)[0]

In [3]:
# 드라이버 파일의 권한을 확인하고 수정
if os_name == 'darwin':  # 맥 사용자
    driver_permissions = os.stat(driver_path).st_mode
    if not (driver_permissions & 0o100):  # 실행 권한 확인
        os.chmod(driver_path, driver_permissions | 0o111)  # 실행 권한 추가
else:  # 윈도우 사용자
    driver_permissions = os.stat(driver_path).st_mode
    if not (driver_permissions & 0o100):  # 실행 권한 확인
        os.chmod(driver_path, driver_permissions | 0o111)  # 실행 권한 추가

##### **Chrome Options**

In [4]:
service = Service(executable_path=driver_path)
chrome_options =  webdriver.ChromeOptions()
#chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--blink-settings=imagesEnabled=false')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')

##### **Define**

In [5]:
url = 'https://brand.naver.com/cheiljedang/products/11145638507'

In [6]:
# Web OPEN
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get(url)

driver.refresh()
time.sleep(3)

##### **Define**

In [7]:
# Web ZOOM
driver.execute_script("document.body.style.zoom='10%'")
# Web Height
last_height = driver.execute_script("return document.body.scrollHeight")

# Page Loading

while True:
    # 끝까지 스크롤 다운
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # 1초 대기
    time.sleep(1)

    # 스크롤 다운 후 스크롤 높이 다시 가져옴
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

In [8]:
# Parsing
soup = BeautifulSoup(driver.page_source, 'html.parser')
data = []

In [9]:
# Selector
review = '#REVIEW div._1McWUwk15j'
id = 'strong._2L3vDiadT9'
created = 'span._2L3vDiadT9'
rate = 'em'
content = '._3z6gI4oI6l'

count = (soup.select_one('#content > div > div.z7cS6-TO7X > div._27jmWaPaKy > ul > li:nth-child(2) > a > span').text)
selector_title = soup.select_one('title')

In [10]:
# Count page
page_int = int(count.replace(',', ''))

def calculate_page_no(page_int):
    page_no = page_int // 20 if page_int % 20 == 0 else page_int // 20 + 1
    return page_no

page_no = calculate_page_no(page_int)

##### **Crawling**

In [11]:
for page in tqdm(range(1, page_no+1), desc="현재 페이지", unit="page", leave=False):
    WebDriverWait(driver, 20).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, review))
    )

    time.sleep(2)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    for selector in soup.select(review):
        id_selector = selector.select_one(id).text
        created_selector = parser.parse(selector.select_one(created).text)
        rate_selector = selector.select_one(rate).text
        content_selector = selector.select_one(content).text

        data.append({
            'ID': id_selector,
            'RATE': rate_selector,
            'CREATED': created_selector,
            'REVIEW': content_selector
        })

    if page < page_no:
        try:
            next_page_element = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, f'#REVIEW  a[data-shp-contents-id="{page + 1}"]'))
            )
            driver.execute_script("arguments[0].click();", next_page_element)

            time.sleep(2)

        except Exception as e:
            break

                                                              

##### **To CSV**

In [12]:
result_df = pd.DataFrame(data)
result_df['CREATED'] = result_df['CREATED'].apply(lambda x: f"{x.day:02d}-{x.month:02d}-{str(x.year)[2:]}")

In [13]:
driver.close()
driver.quit()

In [14]:
split_text = selector_title.text.split(" ")
if len(split_text) >= 3:
    file_name = " ".join(split_text[:2])
else:
    file_name = selector_title.text

In [15]:
today_str = datetime.today().strftime("%m%d")

In [16]:
result_df.to_csv(rf'..\data\review\{today_str}_{file_name}.csv', index=False, encoding='utf-8-sig')