import json
import yt_dlp
import os
import re

def sanitize_filename(name, max_length=100):
    """
    Sanitize the filename by removing disallowed characters and optionally truncating if necessary.
    """
    name = re.sub(r'[\\/*?:"<>|]', '', name)
    name = name.replace(' ', '_')
    if len(name) > max_length:
        return name[:max_length]
    return name

def download_comments_info_json(url: str, top_comments: str = 'all', replies: str = 'all'):
    """
    Fetches comments (and replies, if specified) for the given video URL.
    """
    max_comments = ['all'] if top_comments == 'all' else [top_comments, replies]
    ydl_opts = {
        'getcomments': True,
        'skip_download': True,
        'writesubtitles': True,
        'writeautomaticsub': True,
        'extractor_args': {
            'youtube': {
                'max_comments': max_comments,
                'comment_sort': 'top',
            }
        },
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=False)
        comments_info = {'comments': info.get('comments', [])}
        video_title = info.get('title', 'video')
        return comments_info, video_title

def main(urls_file, base_directory):
    """
    Reads video URLs from a file, downloads comments and replies for each, and saves them as JSON files named after the video titles.
    """
    # Ensure the base directory exists
    if not os.path.exists(base_directory):
        os.makedirs(base_directory)

    with open(urls_file, 'r') as file:
        urls = file.readlines()

    for url in urls:
        url = url.strip()
        comments_info, video_title = download_comments_info_json(url)
        
        # Sanitize and prepare the filename
        filename = sanitize_filename(video_title) + ".json"
        output_path = os.path.join(base_directory, filename)
        
        # Save comments information to a JSON file
        with open(output_path, 'wt') as fh:
            json.dump(comments_info, fh, indent=2)
        print(f"Comments and replies saved successfully as {output_path}")

# Paths
urls_file = 'C:\\Users\\v1\\Documents\\sample.txt'
base_directory = 'C:\\Users\\v1\\Documents\\saampleout'

# Execute the main function
main(urls_file, base_directory)