In [1]:
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow, Flow
from google.auth.transport.requests import Request
from google.auth.exceptions import RefreshError
import json
from google.oauth2.credentials import Credentials
import os
from pathlib import Path
from elasticsearch import Elasticsearch
import json
import pandas as pd

In [2]:
class YouTubeAPIOauthConstants:
    TOKEN_FILE = 'credentials.json'
    API_SERVICE_NAME = 'youtube'
    API_VERSION = 'v3'
    SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]

In [3]:
class YouTubeAPIAuth:
    __TOKEN_FILE = YouTubeAPIOauthConstants.TOKEN_FILE
    __API_SERVICE_NAME = YouTubeAPIOauthConstants.API_SERVICE_NAME
    __API_VERSION = YouTubeAPIOauthConstants.API_VERSION
    __SCOPES = YouTubeAPIOauthConstants.SCOPES
    
    def __init__(self):
        self.__credentials_path = None
        self.__client_secrets_file = None
        self.__credentials = None
    
    def authenticate_from_client_secrets_file(self, client_secrets_file: str, 
                                              credentials_path: str = ''):
        self.__verify_client_secret_file(client_secrets_file)
        self.__client_secrets_file = client_secrets_file
        if not credentials_path or not os.path.exists(credentials_path):
            self.__credentials_path = self.__get_default_credentials_path()
        else:
            self.__credentials_path = credentials_path
        return self.__from_client_secrets_file()
    
    def authenticate_from_credentials(self, credentials_path: str):
        if not credentials_path:
            raise ValueError('The credentials file path has to be provided.')
        if not isinstance(credentials_path, str):
            raise TypeError('The credentials file should be a string.')
        if not os.path.exists(credentials_path):
            raise ValueError('The credentials file path has to exist!')
        if not Path(credentials_path).is_file():
            raise ValueError('The credentials path must be a file.')
        with open(credentials_path, "r") as credentials:
            self.__credentials = Credentials(**json.load(credentials))
        return self.__from_credentials()
            
    
    def __verify_client_secret_file(self, client_secrets_file: str) -> None:
        """Verfy the client secret file."""
        if not client_secrets_file:
            raise ValueError('The clients secret file path has to be provided.')
        if not isinstance(client_secrets_file, str):
            raise TypeError('The clients secret file should be a string.')
        if not os.path.exists(client_secrets_file):
            raise ValueError(f'The path {client_secrets_file} does not exist!')
            
    def __get_default_credentials_path(self):
        """Generate the default api token file location."""
        current_user_home_dir = os.path.expanduser('~')
        credentials_path = os.path.join(current_user_home_dir, self.__TOKEN_FILE)
        return credentials_path
    
    def __from_client_secrets_file(self):
        if os.path.exists(self.__credentials_path):
            with open(self.__credentials_path, "r") as credentials:
                self.__credentials = Credentials(**json.load(credentials))
        if not self.__credentials or not self.__credentials.valid:
            if self.__credentials and self.__credentials.expired and self.__credentials.refresh_token:
                self.__credentials.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(self.__client_secrets_file, self.__SCOPES)
                self.__credentials = flow.run_local_server(port=0)
            with open(self.__credentials_path, "w") as credentials_path:
                credentials = self.__credentials_to_dict(self.__credentials)
                json.dump(credentials, credentials_path)
        youtube_api_client = build(self.__API_SERVICE_NAME, self.__API_VERSION, 
                                   credentials=self.__credentials)
        return youtube_api_client
    
    def __from_credentials(self):
        if not self.__credentials or not self.__credentials.valid:
            if self.__credentials and self.__credentials.expired and self.__credentials.refresh_token:
                self.__credentials.refresh(Request())
            with open(self.__credentials_path, "w") as credentials_path:
                credentials = self.__credentials_to_dict(self.__credentials)
                json.dump(credentials, credentials_path)
        youtube_api_client = build(self.__API_SERVICE_NAME, self.__API_VERSION, 
                                   credentials=self.__credentials)
        return youtube_api_client
    
    def generate_credentials(self, client_secrets_file: str, credentials_path: str = ''):
        self.__verify_client_secret_file(client_secrets_file)
        self.__client_secrets_file = client_secrets_file
        if not credentials_path or not os.path.exists(credentials_path):
            self.__credentials_path = self.__get_default_credentials_path()
        else:
            self.__credentials_path = credentials_path
        flow = Flow.from_client_secrets_file(
            self.__client_secrets_file,
            scopes=self.__SCOPES,
            redirect_uri='urn:ietf:wg:oauth:2.0:oob')
        auth_url, _ = flow.authorization_url(prompt='consent')

        print('Please go to this URL: {}'.format(auth_url))
        code = input('Enter the authorization code: ')
        flow.fetch_token(code=code)
        self.__credentials = flow.credentials
        credentials_dict = self.__credentials_to_dict(self.__credentials)
        with open(self.__credentials_path, "w") as credentials_path:
            json.dump(credentials_dict, credentials_path)
    
    def __credentials_to_dict(self, credentials: Credentials) -> dict:
        """Convert credentials to a dict for easy work with Flask."""
        return dict(
            token=credentials.token,
            refresh_token=credentials.refresh_token,
            token_uri=credentials.token_uri,
            client_id=credentials.client_id,
            client_secret=credentials.client_secret,
            scopes=credentials.scopes
        )

In [4]:
class YouTubeSearchQuery:
    """A query to pass to the search resource."""
    
    def __init__(self, query_string: str):
        self.__query_string = query_string
        
    @property
    def query_string(self):
        return self.__query_string
    
    @query_string.setter
    def query_string(self, query_str: str):
        if not query_str:
            raise ValueError('The query string has to be provided')
        if not isinstance(query_str, str):
            raise TypeError('The query string has to be a string')
        self.__query_string = query_str

In [5]:
class YouTubeVideoSearchQuery(YouTubeSearchQuery):
    pass

In [6]:
class YouTubeSearchType:
    VIDEO = 'video'
    CHANNEL = 'channel'
    PLAYLIST = 'playlist'

In [7]:
class YouTubeSearch:
    __MAX_RESULTS = 10
    __REGION_CODE = 'US'
    
    def __init__(self, *args, **kwargs):
        pass

In [8]:
class VideoStat:
    def __init__(self, stat: int):
        self.__stat = stat
     
   #stat setter 
    def stat(self):
        pass
    
    #stat getter
    def get_stat():
        pass
    
    def __format_stat(self):
        pass

In [9]:
class YouTubeVideoStats:
    def __init__(self, viewCount: int, likeCount: int, commentCount: int):
        self.__view_count = int(viewCount)
        self.__like_count = int(likeCount)
        self.__comment_count = int(commentCount)
        
    def get_video_stats(self):
        video_stats = {
            'view_count': self.__view_count,
            'like_count': self.__like_count,
            'comment_count': self.__comment_count
        }
        return video_stats

In [10]:
class YouTubeVideoDetails:
    def __init__(self, id: str, channelId: str, title: str, channelTitle: str, 
                 description: str, thumbnails: str, tags: list[str], duration: str, licensedContent: bool):
        self.__id = id
        self.__channel_id = channelId
        self.__title = title
        self.__channel_title = channelTitle
        self.__description = description
        self.__thumbnails = thumbnails
        self.__tags = tags
        self.__duration = duration
        self.__licensed_content = licensedContent
        
    def get_video_details(self):
        video_details = {
            'id': self.__id,
            'channel_id': self.__channel_id,
            'title': self.__title,
            'channel_title': self.__channel_title,
            'description': self.__description,
            'thumbnail': self.get_video_thumbnail(),
            'tags': self.__tags,
            'duration': self.__duration,
            'licensed_content': self.__licensed_content
        }
        return video_details
    
    def get_video_thumbnail(self):
        thumbnail = ''
        if self.__thumbnails:
            if self.__thumbnails.get('default'):
                thumbnail = self.__thumbnails.get('default').get('url')
            elif self.__thumbnails.get('medium'):
                thumbnail = self.__thumbnails.get('medium').get('url')
            elif self.__thumbnails.get('high'):
                thumbnail = self.__thumbnails.get('high').get('url')
            elif self.__thumbnails.get('standard'):
                thumbnail = self.__thumbnails.get('standard').get('url')
            elif self.__thumbnails.get('maxres'):
                thumbnail = self.__thumbnails.get('maxres').get('url')
        return thumbnail
    
    def get_video_title(self):
        return self.__title
    
    def get_video_id(self):
        return self.__id
    
    def get_video_description(self):
        return self.__description
    
    def get_video_duration(self):
        pass
    
    def get_video_tags(self):
        if self.__tags:
            return self.__tags
        return []
    
    def get_channel_id(self):
        return self.__channel_id
    
    def get_channel_title(self):
        return self.__channel_title

In [11]:
class YouTubeComment:
    def __init__(self, id: str, videoId: str, totalReplyCount: str, textDisplay: str, 
                authorDisplayName: str, authorProfileImageUrl: str, authorChannelId: str,
                likeCount: str, publishedAt: str, updatedAt: str):
        self.__id = id
        self.__video_id = videoId
        self.__total_reply_count = totalReplyCount
        self.__text_display = textDisplay
        self.__author_display_name = authorDisplayName
        self.__author_profile_image_url = authorProfileImageUrl
        self.__author_channel_id = authorChannelId
        self.__like_count = int(likeCount)
        self.__published_at = publishedAt
        self.__updated_at = updatedAt
        
    def get_comment(self):
        comment = dict(
            id=self.__id,
            videoId=self.__video_id,
            totalReplyCount=self.__total_reply_count,
            textDisplay=self.__text_display,
            authorDisplayName = self.__author_display_name,
            authorProfileImageUrl = self.__author_profile_image_url,
            authorChannelId = self.__author_channel_id,
            likeCount = self.__like_count,
            publishedAt = self.__published_at,
            updatedAt = self.__updated_at
        )
        return comment
    
    def get_comment_text(self):
        comment_text = self.__text_display
        return comment_text
    
    def __str__(self):
        return self.get_comment_text()
    
    def __repr__(self):
        return f"YouTubeComment(id='{self.__id}', videoId='{self.__video_id}', \
        totalReplyCount={self.__total_reply_count})"

In [12]:
class YouTubeCommentThread:
    def __init__(self, video_id: str):
        self.__video_id = video_id
        
    def get_video_comments(self, youtube_client):
        """Get the top level comments for a video."""
        youtube_comments = self.__find_comments(youtube_client)
        youtube_comments = [self.__create_comment(comment) for comment in youtube_comments]
        return youtube_comments
        
    def __generate_basic_info_params(self):
        basic_info_params = dict(
            videoId=self.__video_id,
            part='snippet,replies'
        ) 
        return basic_info_params
    
    def __find_comments(self, youtube_client):
        """Find the video comments."""
        basic_info_params = self.__generate_basic_info_params()
        search_request = youtube_client.commentThreads().list(
                **basic_info_params
            )
        search_response = search_request.execute()
        comments = self.__parse_comments(search_response)
        return comments
    
    def __create_comment(self, comment_details):
        youtube_comment = YouTubeComment(**comment_details)
        return youtube_comment

    def __parse_comments(self, search_response):
        items = search_response['items']
        comments = []
        for item in items:
            comments.append({
                'id': item['id'],
                'videoId': item['snippet']['videoId'],
                'totalReplyCount': item['snippet']['totalReplyCount'],
                'textDisplay': item['snippet']['topLevelComment']['snippet']['textDisplay'],
                'authorDisplayName': item['snippet']['topLevelComment']['snippet']['authorDisplayName'],
                'authorProfileImageUrl': item['snippet']['topLevelComment']['snippet']['authorProfileImageUrl'],
                'authorChannelId': item['snippet']['topLevelComment']['snippet']['authorChannelId']['value'],
                'likeCount': item['snippet']['topLevelComment']['snippet']['likeCount'],
                'publishedAt': item['snippet']['topLevelComment']['snippet']['publishedAt'],
                'updatedAt': item['snippet']['topLevelComment']['snippet']['updatedAt']
        })
        return comments

In [93]:
class YouTubeVideo:
    """A YouTube Video."""
    def __init__(self, video_details, youtube_client):
        self.__youtube_client = youtube_client
        self.__video_stats = self.__create_video_stats(video_details)
        self.__video_details = self.__create_video_details(video_details)
        self.__video_top_level_comments = self.__set_video_comments()
        self.__channel = self.__set_video_channel()
        
    def set_youtube_client(self, youtube_client):
        self.__youtube_client = youtube_client
        
    def get_video_stats_details(self):
        video_stats_details = dict()
        video_stats_details['details'] = self.get_video_details()
        video_stats_details['statistics'] = self.get_video_stats()
        return video_stats_details
    
    def __set_video_comments(self):
        youtube_commenthread = YouTubeCommentThread(self.get_video_id())
        video_top_level_comments = youtube_commenthread.get_video_comments(self.__youtube_client)
        return video_top_level_comments
    
    def __set_video_channel(self):
        find_channel = FindChannel(self.__youtube_client)
        video_channel = find_channel.find_channel_by_id(self.get_channel_id())
        return video_channel
    
    def get_video_comments(self):
        if not self.__video_top_level_comments:
            self.__set_video_comments()
        return self.__video_top_level_comments
    
    def get_video_channel(self):
        if not self.__channel:
            self.__set_video_channel()
        return self.__channel
        
    def __create_video_stats(self, video_details: dict):
        video_stats = YouTubeVideoStats(**video_details['statistics'])
        return video_stats
    
    def __create_video_details(self, video_details: dict):
        video_details = YouTubeVideoDetails(**video_details['details'])
        return video_details
        
    def get_video_stats(self):
        return self.__video_stats.get_video_stats()
    
    def get_video_details(self):
        return self.__video_details.get_video_details()
    
    def get_video_id(self):
        return self.__video_details.get_video_id()
    
    def get_video_title(self):
        return self.__video_details.get_video_title()
    
    def get_video_description(self):
        return self.__video_details.get_video_description()
    
    def get_video_tags(self):
        return self.__video_details.get_video_tags()
    
    def get_channel_id(self):
        return self.__video_details.get_channel_id()
    
    def get_channel_title(self):
        return self.__video_details.get_channel_title()
    
    def get_video_thumbnail(self):
        return self.__video_details.get_video_thumbnail()
    
    def get_video_channel_thumbnail(self):
        if not self.__channel:
            self.__set_video_channel()
        return self.__channel.get_channel_thumbnail()
    
    def to_dict(self):
        return {
            'video_id': self.get_video_id(),
            'video_title': self.get_video_title(),
            'video_description': self.get_video_description(),
            'video_thumbnail': self.get_video_thumbnail(),
            'video_tags': self.get_video_tags(),
            'channel_id': self.get_channel_id(),
            'channel_title': self.get_channel_title(),
            'channel_thumbnail': self.get_video_channel_thumbnail()
        }
    
    def to_csv(self):
        pass
    
    def __str__(self):
        return f'{self.get_video_title()} from {self.get_channel_title()}'
    
    def __repr__(self) -> str:
        return f"YouTubeVideo(id={self.get_video_id()}, video_title={self.get_video_title()})"

In [14]:
class YouTubeVideoCollection:
    def __init__(self):
        self.__youtube_videos = []
        self.__es_client = None
        
    def get_youtube_videos(self):
        return self.__youtube_videos
    
    def add_video(self, video: YouTubeVideo):
        self.__youtube_videos.append(video)
        
    def add_videos(self, videos: list[YouTubeVideo]):
        for video in videos:
            self.__youtube_videos.append(video)
    
    def save_to_database(self):
        pass
    
    def save_to_elasticsearch(self, index_name, elastic_search_host):
        if not self.__es_client:
            self.__create_es_client(elastic_search_host)
        for video in self.__youtube_videos:
            video_details = video.to_dict()
            self.__es_client.index(index=index_name, document=video_details, id=video_details['video_id'])
        
    def __delete_index(self, index_name):
        self.__es_client.indices.delete(index=index_name, ignore=[400, 404])
        
    def __create_index(self, index_name):
        self.__es_client.indices.create(index = index_name)
    
    def __create_es_client(self, es_host):
        if not self.__es_client:
            self.__es_client = Elasticsearch(hosts=[es_host])
    
    def save_to_csv(self):
        pass
    
    def save_to_json(self, file=''):
        if not file:
            file = 'file.json'
        with open(file, 'w') as file_path:
            videos = [video.to_dict() for video in self.__youtube_videos]
            json.dump(videos)
    
    def to_pandas(self):
        videos = [video.to_dict() for video in self.__youtube_videos]
        video_ids = []
        video_titles = []
        video_descriptions = []
        video_thumbnails = []
        video_tags = []
        channel_ids = []
        channel_titles = []
        for video in videos:
            video_ids.append(video['video_id'])
            video_titles.append(video['video_title'])
            video_descriptions.append(video['video_description'])
            video_thumbnails.append(video['video_thumbnail'])
            video_tags.append(video['video_tags'])
            channel_ids.append(video['channel_id'])
            channel_titles.append(video['channel_title'])
        data = {
            'video_id': video_ids,
            'video_title': video_titles,
            'video_description': video_descriptions,
            'video_thumbnail': video_thumbnails,
            'video_tags': video_tags,
            'channel_id': channel_ids,
            'channel_title': channel_titles
        }

        df = pd.DataFrame(data)

In [15]:
class FindVideo:
    def __init__(self, youtube_client):
        """Find the video with the given id."""
        self.__youtube_client = youtube_client
        
    def __generate_basic_info_params(self, video_id: str):
        basic_info_params = dict(
            id=video_id,
            part='snippet,contentDetails,statistics'
        ) 
        return basic_info_params
    
    def find_video(self, video_id: str):
        """Find the video."""
        basic_info_params = self.__generate_basic_info_params(video_id)
        search_request = self.__youtube_client.videos().list(
                **basic_info_params
            )
        search_response = search_request.execute()
        parsed_response = self.__parse_video_details(search_response)
        youtube_video = YouTubeVideo(parsed_response, self.__youtube_client)
        return youtube_video
    
    def __parse_video_details(self, video_details: dict):
        """Parse the video details.

        Returns
        -------
        parsed_video_details: dict
            A dictionary of the YouTube video details.
        """
        parsed_video_details = dict()
        items = video_details['items'][0]
        parsed_video_details['details'] = dict()
        parsed_video_details['statistics'] = dict()
        parsed_video_details['details']['id'] = items['id']
        parsed_video_details['details']['channelId'] = items['snippet']['channelId']
        parsed_video_details['details']['title'] = items['snippet']['title']
        parsed_video_details['details']['channelTitle'] = items['snippet']['channelTitle']
        parsed_video_details['details']['description'] = items['snippet']['description']
        parsed_video_details['details']['thumbnails'] = items['snippet']['thumbnails']
        if items['snippet'].get('tags'):
            parsed_video_details['details']['tags'] = items['snippet']['tags']
        else:
            parsed_video_details['details']['tags'] = []
        parsed_video_details['details']['duration'] = items['contentDetails']['duration']
        parsed_video_details['details']['licensedContent'] = items['contentDetails']['licensedContent']
        parsed_video_details['statistics']['viewCount'] = items['statistics']['viewCount']
        parsed_video_details['statistics']['likeCount'] = items['statistics']['likeCount']
        parsed_video_details['statistics']['commentCount'] = items['statistics']['commentCount']
        return parsed_video_details

In [52]:
class VideoSearch(YouTubeSearch):
    __MAX_RESULTS = 10
    __REGION_CODE = 'US'
    
    def __init__(self, youtube_client):
        self.__youtube_client = youtube_client
        self.__type = YouTubeSearchType.VIDEO
        self.__query = ''
        self.__videos = YouTubeVideoCollection()
        
    def __get_query(self):
        return self.__query.query_string
    
    def get_videos(self):
        return self.__videos
        
    def basic_info(self):
        basic_info_params = self.__generate_basic_info_params()
        return basic_info_params
    
    def advanced_info(self):
        pass
    
    def all_info(self):
        pass
    
    def __generate_basic_info_params(self):
        basic_info_params = dict(
            part='id',
            type=self.__type,
            q=self.__get_query(),
            maxResults=self.__MAX_RESULTS,
            regionCode=self.__REGION_CODE
        ) 
        return basic_info_params
    
    def __generate_basic_info_params_for_related_video(self, video_id):
        basic_info_params = dict(
            part='id',
            type=self.__type,
            relatedToVideoId=video_id,
            maxResults=self.__MAX_RESULTS,
            regionCode=self.__REGION_CODE
        ) 
        return basic_info_params
    
    def __generate_basic_info_params_for_most_popular_video_by_region(self, 
                region_code):
        basic_info_params = dict(
            part='id',
            chart='mostPopular',
            regionCode=region_code
        ) 
        return basic_info_params
    
    def __generate_basic_info_params_for_most_popular_video_by_category(self, 
                category_id):
        basic_info_params = dict(
            part='id',
            chart='mostPopular',
            videoCategoryId=category_id
        ) 
        return basic_info_params
    
    def search_videos(self, query_string: str, next_page_token=None):
        self.__query = YouTubeSearchQuery(query_string)
        basic_info_params = self.__generate_basic_info_params()
        if next_page_token:
            basic_info_params['pageToken'] = next_page_token
        search_request = self.__youtube_client.search().list(
            **basic_info_params
        )
        search_response = search_request.execute()
        (previous_page_token, next_page_token, videos) = self.__parse_basic_response(search_response)
        return (previous_page_token, next_page_token, videos)
    
    def __parse_basic_response(self, search_response):
        videos = []
        previous_page_token = search_response.get('prevPageToken', '')
        next_page_token = search_response.get('nextPageToken', '')
        video_results = search_response['items']
        for video_result in video_results:
            video_id = video_result['id']['videoId']
            youtube_video = FindVideo(self.__youtube_client).find_video(video_id)
            videos.append(youtube_video)
        self.__videos.add_videos(videos)
        return (previous_page_token, next_page_token, videos)
    
    def __parse_basic_response_popular_videos(self, search_response):
        videos = []
        video_results = search_response['items']
        for video_result in video_results:
            if video_result.get('id'):
                video_id = video_result['id']
            else:
                video_id = video_result['id']['videoId']
            youtube_video = FindVideo(self.__youtube_client).find_video(video_id)
            videos.append(youtube_video)
        return videos
    
    def search_related_videos(self, youtube_video):
        basic_info_params = self.__generate_basic_info_params_for_related_video(youtube_video.get_video_id())
        search_request = self.__youtube_client.search().list(
            **basic_info_params
        )
        search_response = search_request.execute()
        (previous_page_token, next_page_token, videos) = self.__parse_basic_response(search_response)
        return (previous_page_token, next_page_token, videos)
    
    def search_most_popular_videos_by_region(self, region_code, youtube_client, search_type='basic'):
        search_response = None
        if search_type == 'basic':
            basic_info_params = self.__generate_basic_info_params_for_most_popular_video_by_region(region_code)
            search_request = youtube_client.videos().list(
                **basic_info_params
            )
            search_response = search_request.execute()
            videos = self.__parse_basic_response_popular_videos(search_response, youtube_client)
        return videos
    
    def search_most_popular_videos_by_category(self, category_id, youtube_client, search_type='basic'):
        search_response = None
        if search_type == 'basic':
            basic_info_params = self.__generate_basic_info_params_for_most_popular_video_by_category(category_id)
            search_request = youtube_client.videos().list(
                **basic_info_params
            )
            try:
                search_response = search_request.execute()
                videos = self.__parse_basic_response_popular_videos(search_response, youtube_client)
            except:
                videos = []
        return videos

In [17]:
class YouTubeVideoCategory:
    def __init__(self, id, title):
        self.__id = id
        self.__title = title
        
    def get_id(self):
        return self.__id
    
    def get_title(self):
        return self.__title
    
    def __str__(self):
        return self.get_title()
    
    def __repr__(self):
        return f"YouTubeVideoCategory(id='{self.__id}', title='{self.__title}')"

In [18]:
class SearchYouTubeVideoCategories:
    def __init__(self, youtube_client, region_code='us'):
        self.__youtube_client = youtube_client
        self.__region_code = region_code
        self.__youtube_video_categories = self.__search_video_categories()
    
    def __generate_basic_info_params(self):
        basic_info_params = dict(
            part='snippet',
            regionCode=self.__region_code
        )
        return basic_info_params
    
    def __search_video_categories(self):
        basic_info_params = self.__generate_basic_info_params()
        search_request = self.__youtube_client.videoCategories().list(
                **basic_info_params
            )
        search_response = search_request.execute()
        video_categories = self.__parse_categories(search_response)
        return video_categories
    
    def __parse_categories(self, search_response):
        category_data = []
        items = search_response['items']
        for item in items:
            category = dict()
            category['id'] = item['id']
            category['title'] = item['snippet']['title']
            category_data.append(YouTubeVideoCategory(**category))
        return category_data
    
    def get_youtube_video_categories(self):
        return self.__youtube_video_categories

In [19]:
class PlaylistItem:
    def __init__(self, id, publishedAt, channelId, title, description, thumbnails, 
                channelTitle, position, videoId, videoOwnerChannelTitle, 
                videoOwnerChannelId):
        self.__id = id
        self.__publishedAt = publishedAt
        self.__channelId = channelId
        self.__title = title
        self.__description = description
        self.__thumbnails = thumbnails
        self.__channelTitle = channelTitle
        self.__position = int(position)
        self.__videoId = videoId
        self.__videoOwnerChannelTitle = videoOwnerChannelTitle
        self.__videoOwnerChannelId = videoOwnerChannelId
        self.__video = None
        
    def get_video(self, youtube_client):
        if not self.__video:
            self.__video = FindVideo(self.__videoId).find_video(youtube_client)
        return self.__video

In [20]:
class PlayList:
    def __init__(self, id, channelId, title, description, thumbnails, channelTitle, 
                itemCount, player):
        self.__id = id
        self.__channelId = channelId
        self.__title = title
        self.__description = description
        self.__thumbnails = thumbnails
        self.__channelTitle = channelTitle
        self.__itemCount = itemCount
        self.__player = player
        self.__playlist_items = []
        self.__videos = []
        
    def get_playlist_items(self, youtube_client):
        if not self.__playlist_items:
            basic_info_params = self.__generate_basic_info_params()
            search_request = youtube_client.playlistItems().list(
                **basic_info_params
            )
            search_response = search_request.execute()
            parsed_response = self.__parse_playlist_items(search_response)
            self.__playlist_items = [PlaylistItem(**item) for item in parsed_response]
        return self.__playlist_items
    
    def get_videos(self, youtube_client):
        if not self.__videos:
            play_list_items = self.get_playlist_items(youtube_client)
            for playlist_item in play_list_items:
                self.__videos.append(playlist_item.get_video(youtube_client))
        return self.__videos
    
    def __generate_basic_info_params(self):
        basic_info_params = dict(
            part='id,snippet,contentDetails',
            playlistId=self.__id
        ) 
        return basic_info_params
    
    def __parse_playlist_items(self, search_response):
        playlist_items = []
        items = search_response['items']
        for item in items:
            playlist_item = dict()
            playlist_item['id'] = item['id']
            playlist_item['publishedAt'] = item['snippet']['publishedAt']
            playlist_item['channelId'] = item['snippet']['channelId']
            playlist_item['title'] = item['snippet']['title']
            playlist_item['description'] = item['snippet']['description']
            playlist_item['thumbnails'] = item['snippet']['thumbnails']
            playlist_item['channelTitle'] = item['snippet']['channelTitle']
            playlist_item['position'] = item['snippet']['position']
            playlist_item['videoId'] = item['snippet']['resourceId']['videoId']
            playlist_item['videoOwnerChannelTitle'] = item['snippet']['videoOwnerChannelTitle']
            playlist_item['videoOwnerChannelId'] = item['snippet']['videoOwnerChannelId']
            playlist_items.append(playlist_item)
        return playlist_items
    
    def get_playlist_thumbnail(self):
        return self.__thumbnails['standard']['url']
    
    def get_playlist_title(self):
        return self.__title
    
    def get_playlist_channel_title(self):
        pass

In [21]:
class FindPlaylist:
    def __init__(self, playlist_id: str):
        """Find the video with the given id."""
        self.__playlist_id = playlist_id
        
    def __generate_basic_info_params(self):
        basic_info_params = dict(
            id=self.__playlist_id,
            part='id,snippet,contentDetails,player',
        ) 
        return basic_info_params
    
    def find_playlist(self, youtube_client):
        """Find the video."""
        basic_info_params = self.__generate_basic_info_params()
        search_request = youtube_client.playlists().list(
                **basic_info_params
            )
        search_response = search_request.execute()
        parsed_response = self.__parse_playlist(search_response)
        youtube_playlist = PlayList(**parsed_response)
        return youtube_playlist
    
    def __parse_playlist(self, search_response):
        playlist_details = dict()
        items = search_response['items'][0]
        playlist_details['id'] = items['id']
        playlist_details['channelId'] = items['snippet']['channelId']
        playlist_details['title'] = items['snippet']['title']
        playlist_details['description'] = items['snippet']['description']
        playlist_details['thumbnails'] = items['snippet']['thumbnails']
        playlist_details['channelTitle'] = items['snippet']['channelTitle']
        playlist_details['itemCount'] = items['contentDetails']['itemCount']
        playlist_details['player'] = items['player']['embedHtml']
        return playlist_details

In [22]:
class PlaylistSearch(YouTubeSearch):
    __MAX_RESULTS = 10
    __REGION_CODE = 'US'
    
    def __init__(self, query_string: str):
        self.__type = YouTubeSearchType.PLAYLIST
        self.__query = YouTubeSearchQuery(query_string)
        
    def __get_query(self):
        return self.__query.query_string
        
    def basic_info(self):
        basic_info_params = self.__generate_basic_info_params()
        return basic_info_params
    
    def advanced_info(self):
        pass
    
    def all_info(self):
        pass
    
    def __generate_basic_info_params(self):
        basic_info_params = dict(
            part='id',
            type=self.__type,
            q=self.__get_query(),
            maxResults=self.__MAX_RESULTS,
        ) 
        return basic_info_params
    
    def search_playlist(self, youtube_client, search_type='basic'):
        search_response = None
        if search_type == 'basic':
            basic_info_params = self.__generate_basic_info_params()
            search_request = youtube_client.search().list(
                **basic_info_params
            )
            search_response = search_request.execute()
            playlist_ids = self.__parse_playlists(search_response)
            play_lists = [FindPlaylist(playlist_id).find_playlist(youtube_client)
                         for playlist_id in playlist_ids]
        return play_lists
    
    def __parse_playlists(self, search_response):
        playlists_ids = []
        items = search_response['items']
        for item in items:
            playlists_ids.append(item['id']['playlistId'])
        return playlists_ids
        

In [94]:
class YouTubeChannel:
    def __init__(self, id, title, description, customUrl, publishedAt, thumbnails, statistics):
        self.__id = id
        self.__title = title
        self.__description = description
        self.__customUrl = customUrl
        self.__publishedAt = publishedAt
        self.__thumbnails = thumbnails
        self.__statistics = statistics
        
    def get_channel_id(self):
        return self.__id
    
    def get_channel_title(self):
        return self.__title
        
    def get_channel_thumbnail(self):
        thumbnail = ''
        if self.__thumbnails:
            if self.__thumbnails.get('default'):
                thumbnail = self.__thumbnails.get('default').get('url')
            elif self.__thumbnails.get('medium'):
                thumbnail = self.__thumbnails.get('medium').get('url')
            elif self.__thumbnails.get('high'):
                thumbnail = self.__thumbnails.get('high').get('url')
            elif self.__thumbnails.get('standard'):
                thumbnail = self.__thumbnails.get('standard').get('url')
            elif self.__thumbnails.get('maxres'):
                thumbnail = self.__thumbnails.get('maxres').get('url')
        return thumbnail
    
    def __repr__(self) -> str:
        return f"YouTubeChannel(id={self.get_channel_id()}, channel_title={self.get_channel_title()})"

In [95]:
class FindChannel:       
    def __init__(self, youtube_client):
        self.__youtube_client = youtube_client
        
    def __generate_basic_info_params(self, channel_id):
        basic_info_params = dict(
            id=channel_id,
            part='id,snippet,contentDetails,contentOwnerDetails,statistics,topicDetails',
        ) 
        return basic_info_params
    
    def find_channel_by_name(self):
        pass
    
    def find_channel_by_id(self, channel_id):
        """Find the video."""
        basic_info_params = self.__generate_basic_info_params(channel_id)
        search_request = self.__youtube_client.channels().list(
                **basic_info_params
            )
        search_response = search_request.execute()
        parsed_response = self.__parse_channel(search_response)
        youtube_channel = YouTubeChannel(**parsed_response)
        return youtube_channel
    
    def __parse_channel(self, search_response):
        channel_details = {}
        items = search_response['items'][0]
        channel_details['id'] = items['id']
        channel_details['title'] = items['snippet']['title']
        channel_details['description'] = items['snippet']['description']
        channel_details['customUrl'] = items['snippet']['customUrl']
        channel_details['publishedAt'] = items['snippet']['publishedAt']
        channel_details['thumbnails'] = items['snippet']['thumbnails']
        channel_details['statistics'] = dict()
        channel_details['statistics']['viewCount'] = items['statistics']['viewCount']
        channel_details['statistics']['subscriberCount'] = items['statistics']['subscriberCount']
        channel_details['statistics']['videoCount'] = items['statistics']['videoCount']
        return channel_details

In [29]:
# class YouTube:
#     def __init__(self):
#         self.__youtube_api_auth = YouTubeAPIAuth()
#         self.__youtube_client = None
#         self.__video_categories = []
        
#     def get_credentials_path(self):
#         return self.__youtube_api_auth.get_credentials_path()
        
#     def authenticate_from_client_secrets_file(self, client_secrets_file: str, 
#                 credentials_path: str = ''):
#         self.__youtube_client = self.__youtube_api_auth.authenticate_from_client_secrets_file(client_secrets_file, credentials_path)
#         return self.__youtube_client
    
#     def authenticate_from_credentials(self, credentials_path: str):
#         self.__youtube_client = self.__youtube_api_auth.authenticate_from_credentials(credentials_path)
#         return self.__youtube_client
    
#     def generate_credentials(self, client_secrets_file: str, credentials_path: str = ''):
#         self.__youtube_api_auth.generate_credentials(client_secrets_file, credentials_path)
        
#     def get_youtube(self):
#         return self.__youtube_client
    
#     def search_videos(self, query_string: str) -> list[str]:
#         try:
#             videos = VideoSearch().search_video(query_string, self.__youtube_client)
#         except HttpError as e:
#             raise QuotasExceededException('You have exceeded your daily quota limit.')
#         return videos
    
#     def find_related_videos(self, youtube_video):
#         related_videos = VideoSearch().search_related_videos(youtube_video, self.__youtube_client)
#         return related_videos
    
#     def find_most_popular_videos_by_region(self, region_code):
#         try:
#             most_popular_videos_by_region = VideoSearch().search_most_popular_videos_by_region(
#                 region_code, self.__youtube_client)
#         except HttpError as e:
#             raise QuotasExceededException('You have exceeded your daily quota limit.')
#         return most_popular_videos_by_region
    
#     def search_playlists(self, query_string: str) -> list[str]:
#         try:
#             playlists = PlaylistSearch(query_string).search_playlist(self.__youtube_client)
#         except HttpError as e:
#             raise QuotasExceededException('You have exceeded your daily quota limit.')
#         return playlists
    
#     def search_channels(self, query_string: str) -> list[str]:
#         try:
#             channels = ChannelSearch(query_string).search_channels(self.__youtube_client)
#         except HttpError as e:
#             raise QuotasExceededException('You have exceeded your daily quota limit.')
#         return channels
    
#     def get_video_categories(self):
#         if not self.__video_categories:
#             try:
#                 self.__video_categories = SearchYouTubeVideoCategories(self.__youtube_client).get_youtube_video_categories()
#             except HttpError as e:
#                 raise QuotasExceededException('You have exceeded your daily quota limit.')
#         return self.__video_categories
    
#     def find_most_popular_videos_by_category(self, category_id):
#         try:
#             most_popular_videos_by_category = VideoSearch().search_most_popular_videos_by_category(
#                 category_id, self.__youtube_client)
#         except HttpError as e:
#             raise QuotasExceededException('You have exceeded your daily quota limit.')
#         return most_popular_videos_by_category
    
#     def find_video_by_url(self, video_url: str):
#         """Get a specific video given the video url."""
#         video_id = self.__get_video_id(video_url)
#         return self.find_video_by_id(video_id)
        
#     def find_video_by_id(self, video_id: str):
#         """Find a video by id."""
#         try:
#             youtube_video = FindVideo(video_id).find_video(self.__youtube_client)
#         except HttpError as e:
#             raise QuotasExceededException('You have exceeded your daily quota limit.')
#         return youtube_video
    
#     @staticmethod
#     def __get_video_id(video_url: str) -> str:
#         """Get vdeo ID from video url"""
#         if not video_url:
#             raise ValueError('The video_ur has to be provided.')
#         if not isinstance(video_url, str):
#             raise TypeError('Te video_url has to be a string.')
#         if '=' not in video_url:
#             url_format = 'https://www.youtube.com/watch?v=Dqdu-FsBk0s'
#             raise ValueError(f'The video_url should be of the format "{url_format}"')
#         video_url = video_url.split('=')[1]
#         return video_url

In [96]:
class YouTube:
    MAX_RESULTS = 20
    REGION_CODE = 'us'
    
    def __init__(self):
        self.__youtube_api_auth = YouTubeAPIAuth()
        self.__youtube_client = None
        self.__video_categories = []
        
    def get_credentials_path(self):
        return self.__youtube_api_auth.get_credentials_path()
        
    def authenticate_from_client_secrets_file(self, client_secrets_file: str, 
                credentials_path: str = ''):
        self.__youtube_client = self.__youtube_api_auth.authenticate_from_client_secrets_file(client_secrets_file, credentials_path)
        return self.__youtube_client
    
    def authenticate_from_credentials(self, credentials_path: str):
        self.__youtube_client = self.__youtube_api_auth.authenticate_from_credentials(credentials_path)
        return self.__youtube_client
    
    def generate_credentials(self, client_secrets_file: str, credentials_path: str = ''):
        self.__youtube_api_auth.generate_credentials(client_secrets_file, credentials_path)
        
    def get_youtube(self):
        return self.__youtube_client
    
    def get_video_categories(self):
        if not self.__video_categories:
            self.__video_categories = SearchYouTubeVideoCategories(self.__youtube_client).get_youtube_video_categories()
        return self.__video_categories
    
    def search_videos(self, query_string: str) -> list[str]:
        self.__video_search = VideoSearch(self.__youtube_client)
        self.__previous_page_token, self.__next_page_token, videos = self.__video_search.search_videos(query_string)
        return videos
    
    def get_video_collections(self):
        return self.__video_search.get_videos()
    
    def find_related_videos(self, youtube_video):
        video_search = VideoSearch(self.__youtube_client)
        previous_page_token, next_page_token, related_videos = video_search.search_related_videos(youtube_video)
        return related_videos
    
    def find_most_popular_videos_by_region(self, region_code):
        most_popular_videos_by_region = VideoSearch().search_most_popular_videos_by_region(
            region_code, self.__youtube_client)
        return most_popular_videos_by_region
    
    def find_most_popular_videos_by_category(self, category_id):
        most_popular_videos_by_category = VideoSearch().search_most_popular_videos_by_category(
            category_id, self.__youtube_client)
        return most_popular_videos_by_category
    
    def find_video_by_url(self, video_url: str):
        """Get a specific video given the video url."""
        video_id = self.__get_video_id(video_url)
        return self.find_video_by_id(video_id)
        
    def find_video_by_id(self, video_id: str):
        """Find a video by id."""
        youtube_video = FindVideo(self.__youtube_client).find_video(video_id)
        return youtube_video
    
    @staticmethod
    def __get_video_id(video_url: str) -> str:
        """Get vdeo ID from video url"""
        if not video_url:
            raise ValueError('The video_ur has to be provided.')
        if not isinstance(video_url, str):
            raise TypeError('Te video_url has to be a string.')
        if '=' not in video_url:
            url_format = 'https://www.youtube.com/watch?v=Dqdu-FsBk0s'
            raise ValueError('Te video_url should be of the format "{url_format}"')
        video_url = video_url.split('=')[1]
        return video_url

In [54]:
youtube = YouTube()
client_secrets_file = '/home/lyle/Downloads/python_learning_site.json'
youtube_client = youtube.authenticate_from_client_secrets_file(client_secrets_file)

In [48]:
videos = youtube.search_videos('intermediate python programming')

In [49]:
videos

[YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={}),
 YouTubeVideo(video_details={})]

In [39]:
videos[0].to_dict()

{'video_id': 'HGOBQPFzWKo',
 'video_title': 'Intermediate Python Programming Course',
 'video_description': 'Take your Python skills to the next level with this intermediate Python course. First, you will get a review of basic concepts such as lists, strings, and dictionaries, but with an emphasis on some lesser known capabilities. Then, you will learn more advanced topics such as threading, multiprocessing, context managers, generators, and more.\n\n💻 Code: https://github.com/python-engineer/python-engineer-notebooks/tree/master/advanced-python\n\n🎥 Course from Patrick Loeber. Check out his channel: https://www.youtube.com/channel/UCbXgNpp0jedKWcQiULLbDTA\n\n🔗 Written Tutorials from Patrick: https://www.python-engineer.com/courses/advancedpython/\n\n⭐️ Course Contents ⭐️\n⌨️ (0:00:00) Intro\n⌨️ (0:00:56) Lists\n⌨️ (0:16:30) Tuples\n⌨️ (0:29:49) Dictionaries\n⌨️ (0:42:40) Sets\n⌨️ (0:58:44) Strings\n⌨️ (1:22:50) Collections\n⌨️ (1:36:43) Itertools\n⌨️ (1:51:50) Lambda Functions\n⌨️ (2:

In [44]:
search = VideoSearch(youtube_client)
related_videos = search.search_related_videos(videos[0])

In [45]:
related_videos

('',
 'CAoQAA',
 [YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={})])

In [50]:
related_videos = youtube.find_related_videos(videos[0])

In [51]:
related_videos

('',
 'CAoQAA',
 [YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={}),
  YouTubeVideo(video_details={})])

In [56]:
youtube.find_video_by_id(videos[0].get_video_id())

YouTubeVideo(video_details={})

In [57]:
videos[0].get_video_id()

'HGOBQPFzWKo'

In [59]:
videos[0].to_dict()

{'video_id': 'HGOBQPFzWKo',
 'video_title': 'Intermediate Python Programming Course',
 'video_description': 'Take your Python skills to the next level with this intermediate Python course. First, you will get a review of basic concepts such as lists, strings, and dictionaries, but with an emphasis on some lesser known capabilities. Then, you will learn more advanced topics such as threading, multiprocessing, context managers, generators, and more.\n\n💻 Code: https://github.com/python-engineer/python-engineer-notebooks/tree/master/advanced-python\n\n🎥 Course from Patrick Loeber. Check out his channel: https://www.youtube.com/channel/UCbXgNpp0jedKWcQiULLbDTA\n\n🔗 Written Tutorials from Patrick: https://www.python-engineer.com/courses/advancedpython/\n\n⭐️ Course Contents ⭐️\n⌨️ (0:00:00) Intro\n⌨️ (0:00:56) Lists\n⌨️ (0:16:30) Tuples\n⌨️ (0:29:49) Dictionaries\n⌨️ (0:42:40) Sets\n⌨️ (0:58:44) Strings\n⌨️ (1:22:50) Collections\n⌨️ (1:36:43) Itertools\n⌨️ (1:51:50) Lambda Functions\n⌨️ (2:

In [60]:
videos[0].get_video_comments()

[YouTubeComment(id='UgxhsgT0TB1L46coSCl4AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='UgzWip4i-I18FHGnrF14AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='UgzlYA9XcmFy3H4zvC14AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='UgwfYg96MRWRasIgZ994AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='UgzitDv1zd0IQ99hV2F4AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='Ugx8BIsFuOhfPCazerh4AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='UgzHNSC1BbAZxp-repR4AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='UgwMoSVbkHwom2S6g2R4AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='UgzJtNQPGQMbjAvINUB4AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),
 YouTubeComment(id='UgxbxWOQHIOCFM8Jfdh4AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0),


In [61]:
comments = videos[0].get_video_comments()

In [62]:
comments[0]

YouTubeComment(id='UgxhsgT0TB1L46coSCl4AaABAg', videoId='HGOBQPFzWKo',         totalReplyCount=0)

In [63]:
comments[0].get_comment()

{'id': 'UgxhsgT0TB1L46coSCl4AaABAg',
 'videoId': 'HGOBQPFzWKo',
 'totalReplyCount': 0,
 'textDisplay': 'Does this course cover all the basics of python for beginners',
 'authorDisplayName': 'Bright Gideon',
 'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AL5GRJUBcnV-vueWK8-nAb3zYjUO2c8InZqui4_rbVI5Wg74eW6gAog0vlYRmGltHR2q=s48-c-k-c0x00ffffff-no-rj',
 'authorChannelId': 'UCblhWhFuUQ7S11XHhKpzzMg',
 'likeCount': 0,
 'publishedAt': '2023-03-29T11:16:23Z',
 'updatedAt': '2023-03-29T11:16:23Z'}

In [64]:
comments = [comment.get_comment() for comment in videos[0].get_video_comments()]
comments

[{'id': 'UgxhsgT0TB1L46coSCl4AaABAg',
  'videoId': 'HGOBQPFzWKo',
  'totalReplyCount': 0,
  'textDisplay': 'Does this course cover all the basics of python for beginners',
  'authorDisplayName': 'Bright Gideon',
  'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AL5GRJUBcnV-vueWK8-nAb3zYjUO2c8InZqui4_rbVI5Wg74eW6gAog0vlYRmGltHR2q=s48-c-k-c0x00ffffff-no-rj',
  'authorChannelId': 'UCblhWhFuUQ7S11XHhKpzzMg',
  'likeCount': 0,
  'publishedAt': '2023-03-29T11:16:23Z',
  'updatedAt': '2023-03-29T11:16:23Z'},
 {'id': 'UgzWip4i-I18FHGnrF14AaABAg',
  'videoId': 'HGOBQPFzWKo',
  'totalReplyCount': 0,
  'textDisplay': 'Lists ✓<br>Tuples ✓<br>Dictionary ×<br>Sets ×',
  'authorDisplayName': 'Castro',
  'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AL5GRJXV5DpixDFefvsGv5M5LVwYACab6MmOy0nY8OV1Ng=s48-c-k-c0x00ffffff-no-rj',
  'authorChannelId': 'UCCEidLinyfexs-UyNiCNiew',
  'likeCount': 0,
  'publishedAt': '2023-03-29T10:17:29Z',
  'updatedAt': '2023-03-30T21:49:18Z'},
 {'id': 'UgzlYA9XcmFy3H

In [305]:
from sqlalchemy_utils import database_exists
import os

In [306]:
def create_db_conn_string(POSTGRES_HOST, POSTGRES_PORT, POSTGRES_USER, 
                          POSTGRES_PASSWORD, POSTGRES_DB) -> str:
    """Create the database connection string.

    Creates the database connection string for a given flask environment.

    Returns
    -------
    db_connection_string: str
        The database connection string
    """
#     POSTGRES_HOST = os.environ["POSTGRES_HOST"]
#     POSTGRES_PORT = os.environ["POSTGRES_PORT"]
#     POSTGRES_USER = os.environ["POSTGRES_USER"]
#     POSTGRES_PASSWORD = os.environ["POSTGRES_PASSWORD"]
#     POSTGRES_DB = os.environ["POSTGRES_DB"]

    return f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"


def check_if_database_exists(db_connection_string: str) -> bool:
    """Check if database exists.

    Ensures that the database exists before starting the application.

    Attributes
    ----------
    db_connection: str
        The database URL

    Raises
    ------
    ValueError:
        If the db_connection_string is empty or is not a string.

    Returns
    -------
    db_exists: bool
        True if database exists or False if it does not
    """
    if not db_connection_string:
        raise ValueError("The db_connection_string cannot be a null value.")

    if not isinstance(db_connection_string, str):
        raise ValueError("The db_connection_string has to be string")

    db_exists = database_exists(db_connection_string)

    return db_exists


In [307]:
POSTGRES_HOST='localhost' 
POSTGRES_PORT=5432 
POSTGRES_USER='lyle' 
POSTGRES_PASSWORD='lyle'
POSTGRES_DB='python-learning-app'
check_if_database_exists(create_db_conn_string(POSTGRES_HOST, POSTGRES_PORT, 
                        POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB))

True

In [308]:
from typing import List
from typing import Optional
from sqlalchemy import ForeignKey
from sqlalchemy import String, Text
from sqlalchemy.orm import DeclarativeBase
from sqlalchemy.orm import Mapped
from sqlalchemy.orm import mapped_column
from sqlalchemy.orm import relationship

In [309]:
class Base(DeclarativeBase):
    pass

In [310]:
from sqlalchemy import create_engine

db_conn = create_db_conn_string(POSTGRES_HOST, POSTGRES_PORT, 
                    POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB)
engine = create_engine(db_conn, echo=True)

In [311]:
class Channel(Base):
    __tablename__ = "youtube_channel"
    
    channel_id: Mapped[str] = mapped_column(primary_key=True)
    channel_name: Mapped[str] = mapped_column(String(250))
    channel_thumbnail: Mapped[str] = mapped_column(String(250))
        
    videos: Mapped[List["Video"]] = relationship(
         back_populates="channel", cascade="all, delete-orphan")
        
    def __repr__(self) -> str:
        return f"Channel(channel_id='{self.channel_id!r}', channel_name='{self.channel_name!r}')"
        

class Comment(Base):
    __tablename__ = "youtube_comment"
    
    comment_id: Mapped[str] = mapped_column(primary_key=True)
    comment_text: Mapped[str] = mapped_column(Text())
    comment_author_name: Mapped[str] = mapped_column(String(250))
    comment_author_thumbnail: Mapped[str] = mapped_column(String(250))
        
    video_id: Mapped[str] = mapped_column(ForeignKey("youtube_video.video_id"))
    video: Mapped["Video"] = relationship(back_populates="comments")
        
    def __repr__(self) -> str:
        return f"Comment(comment_id='{self.comment_id!r}', comment_text='{self.comment_text!r}')"
        
class Video(Base):
    __tablename__ = "youtube_video"
    
    video_id: Mapped[str] = mapped_column(primary_key=True)
    video_name: Mapped[str] = mapped_column(String(250))
    video_thumbnail: Mapped[str] = mapped_column(String(250))
        
    channel_id: Mapped[str] = mapped_column(ForeignKey("youtube_channel.channel_id"))
    channel: Mapped["Channel"] = relationship(back_populates="videos")
        
    comments: Mapped[List["Comment"]] = relationship(
         back_populates="video", cascade="all, delete-orphan")
        
    def __repr__(self) -> str:
        return f"Video(video_id='{self.video_id!r}', video_name='{self.video_name!r}')"

In [312]:
Base.metadata.create_all(engine)

2023-04-04 17:00:07,487 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2023-04-04 17:00:07,489 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-04-04 17:00:07,493 INFO sqlalchemy.engine.Engine select current_schema()
2023-04-04 17:00:07,495 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-04-04 17:00:07,499 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2023-04-04 17:00:07,501 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-04-04 17:00:07,505 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:00:07,511 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname

In [313]:
youtube = YouTube()
client_secrets_file = '/home/lyle/Downloads/python_learning_site.json'
youtube_client = youtube.authenticate_from_client_secrets_file(client_secrets_file)

In [98]:
videos = youtube.search_videos('intermediate python programming')

In [99]:
videos

[YouTubeVideo(id=HGOBQPFzWKo, video_title=Intermediate Python Programming Course),
 YouTubeVideo(id=p15xzjzR9j0, video_title=Mastering Python - Everything You Need To Know To Become a Python Master),
 YouTubeVideo(id=2S7Xxz9PhaU, video_title=Python Intermediate Tutorial #1 - Classes and Objects),
 YouTubeVideo(id=qeOcZiSp3tE, video_title=Learn 8 Python Important Concepts Simplified [ Intermediate Python ]),
 YouTubeVideo(id=txKBWtvV99Y, video_title=3 Mini Python Projects - For Intermediates),
 YouTubeVideo(id=th4OBktqK1I, video_title=Learn Python With This ONE Project!),
 YouTubeVideo(id=ePSQXjIMDho, video_title=Go from BEGINNER to INTERMEDIATE programmer with this 👩‍💻 #technology #programming #career),
 YouTubeVideo(id=_uQrJ0TkZlc, video_title=Python Tutorial - Python Full Course for Beginners),
 YouTubeVideo(id=XKHEtdqhLK8, video_title=Python Full Course for free 🐍),
 YouTubeVideo(id=VchuKL44s6E, video_title=Python As Fast as Possible - Learn Python in ~75 Minutes)]

In [100]:
videos[0].get_video_channel()

YouTubeChannel(id=UC8butISFwT-Wl7EV0hUK0BQ, channel_title=freeCodeCamp.org)

In [101]:
freecodecamp = videos[0].get_video_channel()

In [170]:
related_videos = youtube.find_related_videos(videos[0])

In [171]:
related_videos

[YouTubeVideo(id=vEQ8CXFWLZU, video_title=3 PYTHON AUTOMATION PROJECTS FOR BEGINNERS),
 YouTubeVideo(id=8mAITcNt710, video_title=Harvard CS50 – Full Computer Science University Course),
 YouTubeVideo(id=7eh4d6sabA0, video_title=Python Machine Learning Tutorial (Data Science)),
 YouTubeVideo(id=JeznW_7DlB0, video_title=Python Object Oriented Programming (OOP) - For Beginners),
 YouTubeVideo(id=dam0GPOAvVI, video_title=Python Website Full Tutorial - Flask, Authentication, Databases & More),
 YouTubeVideo(id=ZDa-Z5JzLYM, video_title=Python OOP Tutorial 1: Classes and Instances),
 YouTubeVideo(id=p15xzjzR9j0, video_title=Mastering Python - Everything You Need To Know To Become a Python Master),
 YouTubeVideo(id=th4OBktqK1I, video_title=Learn Python With This ONE Project!),
 YouTubeVideo(id=qUeud6DvOWI, video_title=25 nooby Python habits you need to ditch)]

In [318]:
from sqlalchemy.orm import Session
from psycopg2.errors import UniqueViolation
from sqlalchemy import exc

In [319]:
with Session(engine) as session:
    for video in videos:
        video_channel = video.get_video_channel()
        channel = Channel(
            channel_id=video_channel.get_channel_id(),
            channel_name=video_channel.get_channel_title(),
            channel_thumbnail=video_channel.get_channel_thumbnail()
        )
        try:
            session.add(channel)
            session.commit()
        except exc.IntegrityError:
             session.rollback()

2023-04-04 17:33:48,096 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:48,108 INFO sqlalchemy.engine.Engine INSERT INTO youtube_channel (channel_id, channel_name, channel_thumbnail) VALUES (%(channel_id)s, %(channel_name)s, %(channel_thumbnail)s)
2023-04-04 17:33:48,111 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'channel_id': 'UC8butISFwT-Wl7EV0hUK0BQ', 'channel_name': 'freeCodeCamp.org', 'channel_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJXPR4dSz0wwP-elkeiWUMnfZlCtNZP8Rd-tmFOZYg=s88-c-k-c0x00ffffff-no-rj'}
2023-04-04 17:33:48,122 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:48,129 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:48,131 INFO sqlalchemy.engine.Engine INSERT INTO youtube_channel (channel_id, channel_name, channel_thumbnail) VALUES (%(channel_id)s, %(channel_name)s, %(channel_thumbnail)s)
2023-04-04 17:33:48,134 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'channel_id': 'UC4JX40jDee_tINbkjycV4Sg', 'channel

In [320]:
with Session(engine) as session:
    for video in videos:
        video = Video(
            video_id=video.get_video_id(),
            video_name=video.get_video_title(),
            video_thumbnail=video.get_video_thumbnail(),
            channel_id=video.get_channel_id()
        )
        try:
            session.add(video)
            session.commit()
        except exc.IntegrityError:
             session.rollback()

2023-04-04 17:33:53,170 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:53,176 INFO sqlalchemy.engine.Engine INSERT INTO youtube_video (video_id, video_name, video_thumbnail, channel_id) VALUES (%(video_id)s, %(video_name)s, %(video_thumbnail)s, %(channel_id)s)
2023-04-04 17:33:53,179 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'video_id': 'HGOBQPFzWKo', 'video_name': 'Intermediate Python Programming Course', 'video_thumbnail': 'https://i.ytimg.com/vi/HGOBQPFzWKo/default.jpg', 'channel_id': 'UC8butISFwT-Wl7EV0hUK0BQ'}
2023-04-04 17:33:53,188 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:53,194 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:53,196 INFO sqlalchemy.engine.Engine INSERT INTO youtube_video (video_id, video_name, video_thumbnail, channel_id) VALUES (%(video_id)s, %(video_name)s, %(video_thumbnail)s, %(channel_id)s)
2023-04-04 17:33:53,198 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'video_id': 'p15xzjzR9j0', 'v

In [321]:
with Session(engine) as session:
    for video in videos:
        for comment in video.get_video_comments():
            video_comment = comment.get_comment()
            cm = Comment(
                comment_id=video_comment['id'],
                comment_text=video_comment['textDisplay'],
                comment_author_name=video_comment['authorDisplayName'],
                comment_author_thumbnail=video_comment['authorProfileImageUrl'],
                video_id=video.get_video_id()
            )
            try:
                session.add(cm)
                session.commit()
            except exc.IntegrityError:
                 session.rollback()

2023-04-04 17:33:58,118 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,120 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:58,126 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comment_id': 'UgwGzOJgkxqw5brI4tR4AaABAg', 'comment_text': 'Comprehensive tutorial.  The only problem I see is the black background and the gray text makes it very difficult to follow along.  I do notice that most of the &quot;CODE&quot; presentation on YouTube use the same format &quot;Black Background and a non-emphasis on the text color.', 'comment_author_name': 'Max Tuck', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJU4TsulCnNTSHPregX1eEXfhbquhOao03vBSQ=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'HGOBQPFzWKo'}
2023-04-04 17:33:58,131 INFO sqlalchem

2023-04-04 17:33:58,262 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,268 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,270 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:58,272 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comment_id': 'UgzJtNQPGQMbjAvINUB4AaABAg', 'comment_text': 'Is there an expert level tutorial series?', 'comment_author_name': 'Sheiphan Joseph', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJWBwxugNqRZoUeSBS1FC5YfcQwsX7gJomv2v-mSJaOY_m8xiMndF1hES1gJ1_Xm=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'HGOBQPFzWKo'}
2023-04-04 17:33:58,276 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,282 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,284 INFO sqlalchemy.engine.Engine

2023-04-04 17:33:58,411 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comment_id': 'UgxLr8TB40xjTxGXOzt4AaABAg', 'comment_text': 'This course is hella underrated. Deserves many more views!', 'comment_author_name': 'ParadymShift, Plant-BASED Ⓥegan Jedi', 'comment_author_thumbnail': 'https://yt3.ggpht.com/iemT51hQ1TUzrfUhP3S0DDAadwwWTn3UebhZW7PPwXjJc16LcrfHE-c3e1VQS_yALd_ifpuJDw=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'HGOBQPFzWKo'}
2023-04-04 17:33:58,415 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,420 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,422 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:58,425 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comment_id': 'UgwYyqvGg_QZmItnDZJ4AaABAg', 'comment_text': 'Star

2023-04-04 17:33:58,554 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,557 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:58,558 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comment_id': 'UgzqjdnlPp7tpwVUGXJ4AaABAg', 'comment_text': 'thank uuuuu!', 'comment_author_name': 'Nunuyeaa Sj', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJWHbkzdW6su6TUy6h2o62C5kDjAnmmolCom82DuUg=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'p15xzjzR9j0'}
2023-04-04 17:33:58,560 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,564 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,566 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VAL

2023-04-04 17:33:58,646 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,651 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,655 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:58,658 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comment_id': 'Ugx5h4hjV6DPeGsBeE14AaABAg', 'comment_text': 'I used to read solo learn tutorial while travelling by train. My brain melted at generators of lambda functions. But it was fun :D', 'comment_author_name': 'Cookie Monster', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJVk1bFfjBOXdvVeFD-HwOG97n_8RJnK3wiOdY7q=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'p15xzjzR9j0'}
2023-04-04 17:33:58,662 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,667 INFO sqlalchemy.engine.Engine BEGIN 

2023-04-04 17:33:58,777 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comment_id': 'UgxxixsZZuvoFKKDxa14AaABAg', 'comment_text': 'I wish I could see what you were doing, but the text is way too small.<br><br>Hopefully, future videos in 2022/2023 will show a full screen IDE with larger font.', 'comment_author_name': 'Simon Williamson', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJVsnBqECt8F-aNgtGSk46QzJaSvdudjj3ovBHuInxc=s48-c-k-c0x00ffffff-no-rj', 'video_id': '2S7Xxz9PhaU'}
2023-04-04 17:33:58,780 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,785 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,788 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:58,789 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comm

2023-04-04 17:33:58,895 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,900 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,902 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:58,903 INFO sqlalchemy.engine.Engine [cached since 2010s ago] {'comment_id': 'Ugzk0f1Q0C6BYJbaJQB4AaABAg', 'comment_text': 'Looks like a good tutorial. But please make the text larger', 'comment_author_name': 'martin', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJVAJQQDtgllndghTeDBub7SsrcWBx5xgpubQQ=s48-c-k-c0x00ffffff-no-rj', 'video_id': '2S7Xxz9PhaU'}
2023-04-04 17:33:58,906 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:58,911 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:58,912 INFO sqlalchemy.engine.Engine INSERT INTO yout

2023-04-04 17:33:59,017 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,023 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,025 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:59,026 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'comment_id': 'UgxpAiHgdCDrYn4f2IF4AaABAg', 'comment_text': 'ThNk you fr this great video, I agree with your intro very true', 'comment_author_name': 'Leon Kennedy', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJXnz0qBc5wAWMLDa4BLlK30Nqr7H7PFJiVlV4hl=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'qeOcZiSp3tE'}
2023-04-04 17:33:59,029 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,034 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,036 INFO sqlalchemy.engine.Engine INSE

2023-04-04 17:33:59,145 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,152 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,154 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:59,157 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'comment_id': 'Ugwoy12icDpZPInGkf94AaABAg', 'comment_text': 'I did the first project exactly how you did, but when I made a bigger maze, &#39;stdscr.addstr(i, j*2, value, RED)&#39; in print_maze(maze, stdscr, path=[]) doesn&#39;t work.', 'comment_author_name': 'i dont care', 'comment_author_thumbnail': 'https://yt3.ggpht.com/6Rqn4JNNNOjjPMlkItI6Wqe0JQPP-i7WRs4ZiGeGSg-dBbVPYD0RfWMxbQnqWDj_MGsnFEfGOw=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'txKBWtvV99Y'}
2023-04-04 17:33:59,160 INFO sqlalchemy.engine.Engine 

2023-04-04 17:33:59,268 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,271 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,274 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:59,276 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'comment_id': 'UgyMvUhnko2Gl8YvEVh4AaABAg', 'comment_text': 'what is the name of this vscode theme?', 'comment_author_name': 'thiagodfs81', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJU8JoFAKDXmTiTJboHaJYZj4F8u3VeRTUN-chcV=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'txKBWtvV99Y'}
2023-04-04 17:33:59,280 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,285 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,287 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (c

2023-04-04 17:33:59,368 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,371 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,372 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:59,374 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'comment_id': 'UgwyxpOC-Ir4HP_-hLl4AaABAg', 'comment_text': 'loved this. really helps get in to python.  Thanks.', 'comment_author_name': 'Jay Lee', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJU0zLvWBHyELiYOZwIIxQRr11SdNbl7U-PuGhAP29E=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'th4OBktqK1I'}
2023-04-04 17:33:59,382 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,386 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,388 INFO sqlalchemy.engine.Engine INSERT INTO youtub

2023-04-04 17:33:59,485 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,489 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,491 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:59,493 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'comment_id': 'Ugxsy9ihBgHaaGVbdaZ4AaABAg', 'comment_text': 'Thank you very much!! The for else thing was new to me ;) Realy neat feature :)', 'comment_author_name': 'STFU665', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJUYK8F9FYKTcNa03ZcYRUjqKdR0LK2Q3cvyzw=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'th4OBktqK1I'}
2023-04-04 17:33:59,499 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,504 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,507 INFO sqlalchemy.engine.En

2023-04-04 17:33:59,628 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,633 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,635 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:59,636 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'comment_id': 'UgwxxTUFoR4J2sKYyyN4AaABAg', 'comment_text': 'Where to find it ?', 'comment_author_name': 'Arham Khalid', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJW7tU9P1t6UN_BN5K1Z72sknjyK9Sj9UYrTwuu9zcs=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'ePSQXjIMDho'}
2023-04-04 17:33:59,639 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,647 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,649 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comme

2023-04-04 17:33:59,797 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,802 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,803 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:59,805 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'comment_id': 'UgzJET5We6HS5GOIJPp4AaABAg', 'comment_text': '10th TODO app😂😂😂🤣🤣🤣', 'comment_author_name': 'Sandeep Kaur', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJUia4qpLTVU_f2_lcRzItKILiL6NVqZGg0U7NUmqXCVJ_bxU6Gswy5jwuVWxRGl=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'ePSQXjIMDho'}
2023-04-04 17:33:59,808 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,819 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,821 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comm

2023-04-04 17:33:59,951 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:33:59,959 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:33:59,961 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:33:59,962 INFO sqlalchemy.engine.Engine [cached since 2011s ago] {'comment_id': 'Ugw2TQw2X3w4q0j6F1Z4AaABAg', 'comment_text': '🔥 Want to master Python? Get my Python mastery course: <a href="http://bit.ly/35BLHHP">http://bit.ly/35BLHHP</a><br>👍 Subscribe for more Python tutorials like this: <a href="https://goo.gl/6PYaGF">https://goo.gl/6PYaGF</a>', 'comment_author_name': 'Programming with Mosh', 'comment_author_thumbnail': 'https://yt3.ggpht.com/tBEPr-zTNXEeae7VZKSZYfiy6azzs9OHowq5ZvogJeHoVtKtEw2PXSwzMBKVR7W0MI7gyND8=s48-c-k-c0x00ffffff-no-rj', 'video_id': '_uQrJ0TkZlc'

2023-04-04 17:34:00,074 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,079 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:34:00,082 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:34:00,084 INFO sqlalchemy.engine.Engine [cached since 2012s ago] {'comment_id': 'UgyqS34zqBheIcpoR8V4AaABAg', 'comment_text': 'Hi, my story i want to become a digital nomad. <br>I&#39;d like to travel and still earn. I an retired and I&#39;m soon to be 63 yrs new.<br>I took a ... (1729 characters truncated) ...  But thanks for the free course its much appreciated and I can learn from you.  Which is a compliment because I dont learn easily from just anyone. 😂', 'comment_author_name': 'John Lombardi', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJUPUhMqF20T7

2023-04-04 17:34:00,204 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,209 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:34:00,212 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:34:00,214 INFO sqlalchemy.engine.Engine [cached since 2012s ago] {'comment_id': 'Ugy8oUDDX5AtwbCK3iB4AaABAg', 'comment_text': '<a href="https://www.youtube.com/watch?v=_uQrJ0TkZlc&amp;t=21m20s">21:20</a>', 'comment_author_name': 'Soulfire', 'comment_author_thumbnail': 'https://yt3.ggpht.com/_CJA_J3b3LXOrDA_rWfZoFuJLm6k95nyh4HYnqzbWfJ2zzQNHaXkgfa771vgSistNk-2PSmUdw=s48-c-k-c0x00ffffff-no-rj', 'video_id': '_uQrJ0TkZlc'}
2023-04-04 17:34:00,219 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,224 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:34:00,227

2023-04-04 17:34:00,333 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,337 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:34:00,339 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:34:00,342 INFO sqlalchemy.engine.Engine [cached since 2012s ago] {'comment_id': 'Ugxc_c5nvq7jD2Z02nF4AaABAg', 'comment_text': 'I completed the full course in 7 days. What should I do next?', 'comment_author_name': 'hoho', 'comment_author_thumbnail': 'https://yt3.ggpht.com/UnaGYAeoE447oSKUlKStc7qheT1_lDZhpPaEow4ZJ3JMQjVG4TwcBEU94YppRJG8QXwBoRjt=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'XKHEtdqhLK8'}
2023-04-04 17:34:00,347 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,352 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:34:00,354 INFO sqlalchemy.engi

2023-04-04 17:34:00,440 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:34:00,442 INFO sqlalchemy.engine.Engine [cached since 2012s ago] {'comment_id': 'Ugx2z-dQgDnRMLgJlc14AaABAg', 'comment_text': 'Day1 <a href="https://www.youtube.com/watch?v=XKHEtdqhLK8&amp;t=1h32m23s">1:32:23</a>', 'comment_author_name': '1 sddy', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJUgXm3VxmUBmAC5U51ng8DLzJTbgU4CorD6VeIoJjQO5Q=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'XKHEtdqhLK8'}
2023-04-04 17:34:00,445 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,448 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:34:00,450 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VA

2023-04-04 17:34:00,560 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,565 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:34:00,566 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:34:00,567 INFO sqlalchemy.engine.Engine [cached since 2012s ago] {'comment_id': 'Ugx6A83MfEhgldhkCS14AaABAg', 'comment_text': 'Oh man you are my hero. This is the best video for me to fast review Python. Thanks a lots!', 'comment_author_name': 'Ryo', 'comment_author_thumbnail': 'https://yt3.ggpht.com/X_5-IYfQm1sRYFCNWIy3CxmjY9EO1W-1n4cG0IX1UIhkwD5rPkD6y-u_PmlEWMtR5T6Jk4jorw=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'VchuKL44s6E'}
2023-04-04 17:34:00,571 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,576 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17

2023-04-04 17:34:00,687 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,692 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-04-04 17:34:00,693 INFO sqlalchemy.engine.Engine INSERT INTO youtube_comment (comment_id, comment_text, comment_author_name, comment_author_thumbnail, video_id) VALUES (%(comment_id)s, %(comment_text)s, %(comment_author_name)s, %(comment_author_thumbnail)s, %(video_id)s)
2023-04-04 17:34:00,694 INFO sqlalchemy.engine.Engine [cached since 2012s ago] {'comment_id': 'UgyqiS1kCwUGpqtHuax4AaABAg', 'comment_text': 'In school (US) we learned PEMDAS rather than BEDMAS. It&#39;s concerning that multiplication and divsion are flipped..', 'comment_author_name': 'Benjamin Davis', 'comment_author_thumbnail': 'https://yt3.ggpht.com/ytc/AL5GRJXnvgk4lxWVwrTHs74cNJBKi1x65WttfxE_=s48-c-k-c0x00ffffff-no-rj', 'video_id': 'VchuKL44s6E'}
2023-04-04 17:34:00,697 INFO sqlalchemy.engine.Engine COMMIT
2023-04-04 17:34:00,702 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-

In [None]:
class YouTubeVideoCollection:
    def __init__(self):
        self.__youtube_videos = []
        self.__es_client = None
        
    def get_youtube_videos(self):
        return self.__youtube_videos
    
    def add_video(self, video: YouTubeVideo):
        self.__youtube_videos.append(video)
        
    def add_videos(self, videos: list[YouTubeVideo]):
        for video in videos:
            self.__youtube_videos.append(video)
    
    def save_to_database(self):
        pass
    
    def save_to_elasticsearch(self, index_name, elastic_search_host):
        if not self.__es_client:
            self.__create_es_client(elastic_search_host)
        for video in self.__youtube_videos:
            video_details = video.to_dict()
            self.__es_client.index(index=index_name, document=video_details, id=video_details['video_id'])
        
    def __delete_index(self, index_name):
        self.__es_client.indices.delete(index=index_name, ignore=[400, 404])
        
    def __create_index(self, index_name):
        self.__es_client.indices.create(index = index_name)
    
    def __create_es_client(self, es_host):
        if not self.__es_client:
            self.__es_client = Elasticsearch(hosts=[es_host])
    
    def save_to_csv(self):
        pass
    
    def save_to_json(self, file=''):
        if not file:
            file = 'file.json'
        with open(file, 'w') as file_path:
            videos = [video.to_dict() for video in self.__youtube_videos]
            json.dump(videos)
    
    def to_pandas(self):
        videos = [video.to_dict() for video in self.__youtube_videos]
        video_ids = []
        video_titles = []
        video_descriptions = []
        video_thumbnails = []
        video_tags = []
        channel_ids = []
        channel_titles = []
        for video in videos:
            video_ids.append(video['video_id'])
            video_titles.append(video['video_title'])
            video_descriptions.append(video['video_description'])
            video_thumbnails.append(video['video_thumbnail'])
            video_tags.append(video['video_tags'])
            channel_ids.append(video['channel_id'])
            channel_titles.append(video['channel_title'])
        data = {
            'video_id': video_ids,
            'video_title': video_titles,
            'video_description': video_descriptions,
            'video_thumbnail': video_thumbnails,
            'video_tags': video_tags,
            'channel_id': channel_ids,
            'channel_title': channel_titles
        }

        df = pd.DataFrame(data)

In [None]:
videos[0]