Skip to content

Commit b2e9937

Browse files
authored
Merge pull request #242 from M786453/master
Youtube Playlist Info Scraper
2 parents f84a12d + 3f54aee commit b2e9937

File tree

4 files changed

+208
-0
lines changed

4 files changed

+208
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ More information on contributing and the general code of conduct for discussion
122122
| Word to PDF | [Word to PDF](https://github.com/DhanushNehru/Python-Scripts/tree/master/Word%20to%20PDF%20converter) | A Python script to convert an MS Word file to a PDF file. |
123123
| Youtube Downloader | [Youtube Downloader](https://github.com/DhanushNehru/Python-Scripts/tree/master/Youtube%20Downloader) | Downloads any video from [YouTube](https://youtube.com) in video or audio format!
124124
| Pigeonhole Sort | [Algorithm](https://github.com/DhanushNehru/Python-Scripts/tree/master/PigeonHole) | the pigeonhole sort algorithm to sort your arrays efficiently!
125+
| Youtube Playlist Info Scraper | [Youtube Playlist Info Scraper](https://github.com/DhanushNehru/Python-Scripts/tree/master/Youtube%20Playlist%20Info%20Scraper) | This python module retrieve information about a YouTube playlist in json format using playlist link.
125126

126127
## Gitpod
127128

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""
2+
This module provides functionalities for YouTube Playlist.
3+
"""
4+
5+
import requests
6+
from bs4 import BeautifulSoup
7+
import json
8+
9+
class Playlist:
10+
11+
"""
12+
This class provides methods to perform operatoins for given YouTube Playlist.
13+
"""
14+
15+
def __init__(self, playlist_link):
16+
17+
"""
18+
Initializes the playlist with a playlist link.
19+
20+
Parameters:
21+
playlist_link (str): Url of YouTube Playlist
22+
"""
23+
24+
self.playlist_link = playlist_link
25+
26+
def info(self):
27+
28+
"""
29+
Returns:
30+
dict: Information about given Playlist.
31+
"""
32+
33+
info = {}
34+
35+
try:
36+
37+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.6422.112 Safari/537.36"}
38+
39+
response = requests.get(url=self.playlist_link, headers=headers)
40+
41+
soup = BeautifulSoup(response.text, 'html.parser')
42+
43+
script_elements = soup.find_all('script')
44+
45+
for e in script_elements:
46+
47+
if e.text.startswith("var ytInitialData"):
48+
49+
data_dict = json.loads(e.text[20:-1])
50+
51+
playlist = data_dict["contents"]["twoColumnWatchNextResults"]["playlist"]["playlist"]
52+
53+
if "title" in playlist:
54+
info["title"] = playlist["title"]
55+
else:
56+
info["title"] = ""
57+
58+
if "totalVideos" in playlist:
59+
info["totalVideos"] = playlist["totalVideos"]
60+
else:
61+
info["totalVideos"] = ""
62+
63+
if "ownerName" in playlist:
64+
info["channelName"] = playlist["ownerName"]["simpleText"]
65+
else:
66+
info["channelName"] = ""
67+
68+
if "playlistShareUrl" in playlist:
69+
info["playlistUrl"] = playlist["playlistShareUrl"]
70+
else:
71+
info["playlistUrl"] = ""
72+
73+
if "contents" in playlist:
74+
75+
playlist_videos = playlist["contents"]
76+
77+
info["videos"] = []
78+
79+
for video in playlist_videos:
80+
81+
video_data = {}
82+
83+
video = video["playlistPanelVideoRenderer"]
84+
85+
if "title" in video:
86+
video_data["title"] = video["title"]["simpleText"]
87+
else:
88+
video_data["title"] = ""
89+
90+
if "lengthText" in video:
91+
video_data["duration"] = video["lengthText"]["simpleText"]
92+
else:
93+
video_data["duration"] = ""
94+
95+
if "videoId" in video:
96+
video_data["id"] = video["videoId"]
97+
else:
98+
video_data["id"] = ""
99+
100+
info["videos"].append(video_data) # Update info with video
101+
102+
info["duration"] = self.__calculatePlaylistDuration(info["videos"])
103+
104+
break # Target Element Found; Break loop
105+
106+
except Exception as e:
107+
print("Error in info():", e)
108+
109+
return info
110+
111+
def __calculatePlaylistDuration(self, videos_data):
112+
113+
"""
114+
Calculate total playlist duration by aggregating the duration of all videos present in playlist.
115+
116+
Parameters:
117+
list: List of videos' data
118+
119+
Returns:
120+
str: Total duration of Playlist Videos in format -> HH:MM:SS
121+
"""
122+
123+
total_duration = "00:00:00"
124+
125+
try:
126+
127+
hours, minutes, seconds = 0,0,0
128+
129+
for video in videos_data:
130+
131+
video_duration = video["duration"]
132+
133+
video_duration_parts = video_duration.split(":")
134+
135+
if len(video_duration_parts) == 3:
136+
hours += int(video_duration_parts[0])
137+
minutes += int(video_duration_parts[1])
138+
seconds += int(video_duration_parts[2])
139+
140+
if len(video_duration_parts) == 2:
141+
minutes += int(video_duration_parts[0])
142+
seconds += int(video_duration_parts[1])
143+
144+
if len(video_duration_parts) == 1:
145+
seconds += int(video_duration_parts[0])
146+
147+
hours += minutes // 60
148+
149+
minutes = minutes % 60
150+
151+
minutes += seconds // 60
152+
153+
seconds = seconds % 60
154+
155+
total_duration = f"{hours}:{minutes}:{seconds}"
156+
157+
except Exception as e:
158+
print("Error in __calculatePlaylistDuration():", e)
159+
160+
return total_duration
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
## YouTube Playlist Info Scraper
2+
3+
This python module retrieve information about a YouTube playlist in json format using playlist link.
4+
5+
### Usage:
6+
7+
Install dependencies:
8+
9+
pip install -r requirements.txt
10+
11+
Import module:
12+
13+
from Playlist import Playlist
14+
15+
Create Object:
16+
17+
playlist = Playlist("PLAYLIST_LINK_HERE") # Example: https://www.youtube.com/watch?v=_t2GVaQasRY&list=PLeo1K3hjS3uu_n_a__MI_KktGTLYopZ12
18+
19+
Retrieve Playlist Info:
20+
21+
info = playlist.info()
22+
print(info)
23+
24+
### Output Format:
25+
26+
```
27+
{
28+
"title": ...,
29+
"totalVideos": ...,
30+
"channelName": ...,
31+
"playlistUrl": ...,
32+
"duration": ...,
33+
"videos": [
34+
{
35+
"title": ...,
36+
"duration": ...,
37+
"id": ...
38+
}
39+
,
40+
.
41+
.
42+
.
43+
],
44+
}
45+
```
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
requests
2+
bs4

0 commit comments

Comments
 (0)