Go to this given URL and solve the following questions

URL: https://www.youtube.com/@PW-Foundation/videos

## Q1. Write a python program to extract the video URL of the first five videos.

In [7]:
import requests
from bs4 import BeautifulSoup
from pandas import DataFrame
from json import loads

In [8]:
url = 'https://www.youtube.com/@PW-Foundation/videos'

# Get the html by get method
r = requests.get(url)
r


<Response [200]>

In [9]:
# Create BeautifulSoup object
soup = BeautifulSoup(r.text, 'html.parser')

In [10]:
all_script_tags = soup.findAll('script')

In [13]:
def script_tag_to_json(tags: list) -> dict:
    for tag in reversed(tags):
        text: str = tag.text
        if 'ytInitialData = {"responseContext"' in text:
            return loads(text[20:-1])

    raise ValueError('Required script tag not found in the given tags.')

In [14]:
data = script_tag_to_json(all_script_tags)

In [15]:
def get_contents_dict(data):
    return data['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['richGridRenderer']['contents']

## Get video id

In [16]:
def get_videoUrl(data:dict, n: int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append('https://www.youtube.com/watch?v=' +
                      contents[i]['richItemRenderer']['content']['videoRenderer']['videoId'])

    return result

get_videoUrl(data)

['https://www.youtube.com/watch?v=bnVfypUYGDA',
 'https://www.youtube.com/watch?v=oRBVGPNSubE',
 'https://www.youtube.com/watch?v=mWCG1rQt4vM',
 'https://www.youtube.com/watch?v=xAEX042GdEs',
 'https://www.youtube.com/watch?v=nInh_Y87Kpo']

## Q2. Write a python program to extract the URL of the video thumbnails of the first five videos.

## Get video thumbnails

In [17]:
def get_thumbnails(data: dict, n: int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append(contents[i]['richItemRenderer']['content']['videoRenderer']['thumbnail']['thumbnails'][-1]['url'])

    return result

get_thumbnails(data)

['https://i.ytimg.com/vi/bnVfypUYGDA/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLDCEHdnwgSzgHNYvox6swyrC4YEmQ',
 'https://i.ytimg.com/vi/oRBVGPNSubE/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLDYQix8jbkIvqT0sMx6dBsK-oJuiw',
 'https://i.ytimg.com/vi/mWCG1rQt4vM/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLD0KWK3ObCUPTAHPVtX1TdSJSLorw',
 'https://i.ytimg.com/vi/xAEX042GdEs/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLAaelIvpSMOAehX4XXRn2dnIGfNkw',
 'https://i.ytimg.com/vi/nInh_Y87Kpo/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLA699gHQoZOp7uYfz5_LjdTVijqHQ']

## Q3. Write a python program to extract the title of the first five videos.

## get video tittle

In [18]:
def get_title(data: dict, n:int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append(contents[i]['richItemRenderer']['content']['videoRenderer']['title']['runs'][-1]['text'])

    return result

get_title(data)

['Complete 1 Chapter in 1 Hour || Get 90% in 10 Days !! 🔥 Launching Exam Buddy BOT 🙌',
 'Class 10th Science: Most Important 100 Questions for Board Exams | BOARD Exam 2024 @Class10th-UDAAN',
 'Complete MATHS in Just 3 hours | LAST Minute Revision | Class 10th CBSE Boards',
 'Class 10th BIOLOGY All Diagrams in 1 Shot | Full Syllabus Covered CBSE Board',
 'Complete HINDI Course B in Just 3 hours | LAST Minute Revision | Class 10th CBSE Boards']

## Q4. Write a python program to extract the number of views of the first five videos.

In [19]:
def get_viwes(data: dict, n: int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append(int(contents[i]['richItemRenderer']['content']['videoRenderer']['viewCountText']['simpleText']
                      [:-6].replace(',', '')))

    return result

get_viwes(data)

[63659, 40888, 33977, 31875, 145484]

# Q5. Write a python program to extract the time of posting of video for the first five videos.

## Get time of posting of video

In [20]:
def get_time_of_posting(data: dict, n: int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append(contents[i]['richItemRenderer']['content']['videoRenderer']['publishedTimeText']['simpleText'])

    return result

get_time_of_posting(data)

['20 hours ago', '23 hours ago', '1 day ago', '2 days ago', '3 days ago']

# Note: Save all the data scraped in the above questions in a CSV file.
# Save data in CSV format

In [21]:
def get_channel_video_details(data: dict, n: int):
    thumbnails = get_thumbnails(data, n)
    time_of_posting = get_time_of_posting(data, n)
    titles = get_title(data, n)
    video_urls = get_videoUrl(data, n)

    main_data = list(zip(video_urls, titles, thumbnails, time_of_posting))
    
    df = DataFrame.from_dict(main_data)
    df.rename(
        columns={
            0: 'video_urls',
            1: 'title',
            2: 'thumbnail_url',
            3: 'time_of_posting'
        }, inplace=True)

    return df

In [22]:
channel_data = get_channel_video_details(data, 30)

In [23]:
channel_data.to_csv('PW-Foundation.csv', index=False)

# i dont have credit card to deploy on aws