import json import yt_dlp import os import re def sanitize_filename(name, max_length=100): """ Sanitize the filename by removing disallowed characters and optionally truncating if necessary. """ name = re.sub(r'[\\/*?:"<>|]', '', name) name = name.replace(' ', '_') if len(name) > max_length: return name[:max_length] return name def download_comments_info_json(url: str, top_comments: str = 'all', replies: str = 'all'): """ Fetches comments (and replies, if specified) for the given video URL. """ max_comments = ['all'] if top_comments == 'all' else [top_comments, replies] ydl_opts = { 'getcomments': True, 'skip_download': True, 'writesubtitles': True, 'writeautomaticsub': True, 'extractor_args': { 'youtube': { 'max_comments': max_comments, 'comment_sort': 'top', } }, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) comments_info = {'comments': info.get('comments', [])} video_title = info.get('title', 'video') return comments_info, video_title def main(urls_file, base_directory): """ Reads video URLs from a file, downloads comments and replies for each, and saves them as JSON files named after the video titles. """ # Ensure the base directory exists if not os.path.exists(base_directory): os.makedirs(base_directory) with open(urls_file, 'r') as file: urls = file.readlines() for url in urls: url = url.strip() comments_info, video_title = download_comments_info_json(url) # Sanitize and prepare the filename filename = sanitize_filename(video_title) + ".json" output_path = os.path.join(base_directory, filename) # Save comments information to a JSON file with open(output_path, 'wt') as fh: json.dump(comments_info, fh, indent=2) print(f"Comments and replies saved successfully as {output_path}") # Paths urls_file = 'C:\\Users\\v1\\Documents\\sample.txt' base_directory = 'C:\\Users\\v1\\Documents\\saampleout' # Execute the main function main(urls_file, base_directory)