In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import re
import time

In [2]:
# 設定 Chrome 瀏覽器選項
def configure_chrome_options():
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # 如果不需要顯示瀏覽器，可以啟用這行
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument('User-Agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36"')
    return chrome_options

# 初始化 WebDriver
def initialize_driver():
    chrome_options = configure_chrome_options()
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

# 訪問網頁並抓取評論資料
def fetch_comments_and_ratings(driver, url):
    driver.get(url)
    
    # 等待並點擊商品評價按鈕
    review_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "ul.vendordetailmenu li.goodsCommendLi span"))
    )
    review_button.click()  # 點擊商品評價按鈕

    time.sleep(3)
    # 獲取總頁數
    page_number_element = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.pageArea span'))
    )
    
    page_text = page_number_element[1].text
    
    # 使用正則表達式提取總頁數
    match = re.search(r'頁數(\d+)/(\d+)', page_text)
    total_pages = int(match.group(2)) 
    
    # 初始化評論和評分資料
    comments_data = []
    ratings_data = []

    # 抓取評論和評分
    def grab_comments_and_ratings_from_page():
        # 等待評論加載
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.CommentContainer'))
        )

        # 抓取評論
        comments = driver.find_elements(By.CSS_SELECTOR, 'p.Comment')
        for comment in comments:
            comments_data.append(comment.text)

        # 抓取評分
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.InfoContainer'))
        )
        rating_star_group = driver.find_elements(By.CSS_SELECTOR, 'div.RatingStarGroup')
        for star in rating_star_group:
            score = star.get_attribute('score')
            ratings_data.append(score)

    # 抓取第一頁評論
    grab_comments_and_ratings_from_page()

    # 抓取第二頁到最後一頁的評論
    for page in range(2, total_pages + 1):
        next_page_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, f'div.pageArea dd[pageidx="{str(page)}"] a'))
        )
        next_page_button.click()

        # 等待評論加載並抓取評論與評分
        grab_comments_and_ratings_from_page()

    # 格式化資料
    comments_data = [[comment] for comment in comments_data]
    ratings_data = [[rating] for rating in ratings_data]

    return comments_data, ratings_data

# 儲存評論和評分到 CSV 文件
def save_to_csv(comments_data, ratings_data, filename):
    # 開啟文件寫入
    with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
        writer = csv.writer(file)
        writer.writerow(["Comment", "Rating"])  # 寫入標題列

        # 寫入每條評論與相應的評分
        for comment, rating in zip(comments_data, ratings_data):
            writer.writerow([comment[0], rating[0]])  # 假設 comments_data 和 ratings_data 都是嵌套列表

In [5]:
# 主函數執行流程
def main():
    # 設定要抓取的商品網址
    urls = [
        'https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code=12694615&Area=search&mdiv=403&oid=1_8&cid=index&kw=%E5%AE%B6%E9%9B%BB',
        'https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code=13479586&str_category_code=2134500170&mdiv=2134500000-bt_1_017_01-bt_1_017_01_e31&ctype=B&sourcePageType=4',
        'https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code=13554701&mdiv=2169500000-bt_1_016_01-bt_1_016_01_e11&ctype=B&sourcePageType=4',
        'https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code=12694615&Area=search&mdiv=403&oid=1_2&cid=index&kw=%E6%B0%A3%E7%82%B8%E9%8D%8B'
      ]
    
    # 初始化瀏覽器
    driver = initialize_driver()

    # 創建一個 CSV 文件來儲存所有商品的評論
    all_comments_data = []
    all_ratings_data = []

    for url in urls:
        # 抓取每個商品的評論和評分資料
        comments_data, ratings_data = fetch_comments_and_ratings(driver, url)
        
        # 儲存每個商品的評論資料
        all_comments_data.extend(comments_data)
        all_ratings_data.extend(ratings_data)

    # 儲存所有評論和評分到 CSV 文件
    save_to_csv(all_comments_data, all_ratings_data, "comments_and_ratings.csv")
    
    # 關閉瀏覽器
    driver.quit()

# 執行主函數
if __name__ == "__main__":
    main()