-
Notifications
You must be signed in to change notification settings - Fork 0
/
polly_init.py
40 lines (31 loc) · 1.34 KB
/
polly_init.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from bs4 import BeautifulSoup
from collections import namedtuple
from base import process_and_add_if_not_exists
Podcast = namedtuple("Podcast", ["url", "username", "password", "category"])
p = Podcast(
"https://rumble.com/user/AmazingPolly/",
"Polly",
"38e0f5f3ef49c677f3d47ca5e03c05b3",
13,
) # Amazing Polly 53.9K
def scrape_rumble_most_recent_videos_first_page(url, category, username, password):
print(f"scrape_rumble_most_recent_videos_first_page : {url}")
# Download page HTML with requests
response = requests.get(url)
page_content = response.content
# Load HTML from response content with Beautiful Soup
soup = BeautifulSoup(page_content, "html.parser")
# Find all the video entries on the page
video_entries = soup.find_all("li", class_="video-listing-entry")
# Loop through the video entries and extract the links
for video_entry in reversed(video_entries):
print(f"*** processing {video_entry}")
video_link = video_entry.find("a", class_="video-item--a")["href"]
video_url = "https://rumble.com" + video_link
process_and_add_if_not_exists(video_url, category, username, password)
break
# Iterate over the array of named tuples
video_json = scrape_rumble_most_recent_videos_first_page(
p.url, p.category, p.username, p.password
)