-
Notifications
You must be signed in to change notification settings - Fork 0
/
youtubeComments.py
62 lines (55 loc) · 2.03 KB
/
youtubeComments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python3
# miscellaneous
import sys
import time
#import youtubesentiment
# dynamic web scraping
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument('headless')
# static webscraping
from bs4 import BeautifulSoup
# sentiment
from wordcloud import WordCloud, STOPWORDS
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# plotting
import matplotlib.pyplot as plt
class YouTubeComments:
chromeDriver = webdriver.Chrome("./chromedriver.exe",
options = options)
def __init__(self,**kwargs):
for key in kwargs:
val = kwargs[key]
if key == "url":
exec("self.{} = \"{}\"".format(key,val))
else:
exec("self.{} = {}".format(key,val))
def scrape(self):
"""
Scrapes youtube comments
"""
def scroll():
"""scrolls through youtube page"""
for i in range(0,self.bottom,self.increment):
prompt = 'window.scrollTo({}, {});'.format(i,i+self.increment)
YouTubeComments.chromeDriver.execute_script(prompt)
time.sleep(1)
YouTubeComments.chromeDriver.get(self.url)
scroll()
ytHTML = BeautifulSoup(YouTubeComments.chromeDriver.page_source,"lxml")
wait = WebDriverWait(YouTubeComments.chromeDriver, 7)
comments = []
for comment in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#comment #content-text"))):
comments.append(comment.text)
self.comments = comments
def runCommentScrape(url="https://www.youtube.com/watch?v=OtRuEhm9Eoo"):
try:
ytComments = YouTubeComments(url=url,bottom = 5000,increment = 350)
ytComments.scrape()
return ytComments.comments
except Exception as e:
return "Error"+str(e)