In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import csv

# 유튜브 댓글 스크롤 자동 내리기
def scroll_to_load_comments(driver, scroll_pause_time=3, max_scrolls=350):
    last_height = driver.execute_script("return document.documentElement.scrollHeight") # 문서 높이
    
    for i in range(max_scrolls):
        driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);") # 페이지내리기
        time.sleep(scroll_pause_time)

        new_height = driver.execute_script("return document.documentElement.scrollHeight") # 스크롤 후 높이 저장

        if new_height == last_height: # 높이 비교 후 마무리
            break
        last_height = new_height

def get_comments_csv(video_url, output_file, max_comments=6422):
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    driver = webdriver.Chrome(options=options)
    driver.get(video_url)
    time.sleep(5)  

    scroll_to_load_comments(driver, scroll_pause_time=3, max_scrolls=350)

    comment_elements = driver.find_elements(By.CSS_SELECTOR, '#content-text')
    author_elements = driver.find_elements(By.CSS_SELECTOR, '#author-text span')
    vote_elements = driver.find_elements(By.CSS_SELECTOR, '#vote-count-middle')

    with open(output_file, 'w', encoding='utf-8', newline='') as f:
        fieldnames = ['author', 'text', 'votes']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

        count = 0
        for author, comment, vote in zip(author_elements, comment_elements, vote_elements):
            row = {
                'author': author.text.strip(),
                'text': comment.text.strip(),
                'votes': vote.text.strip() if vote else '',
            }
            writer.writerow(row)
            count += 1
            if count >= max_comments:
                break

    driver.quit()

if __name__ == "__main__":
    video_url = "https://www.youtube.com/watch?v=wtvo-n3ovZo" 
    output_file = '../data/youtube_comment.csv'  
    get_comments_csv(video_url, output_file, max_comments=5905)
