## Go to this given URL and solve the following questions.

> URL: https://www.youtube.com/@PW-Foundation/videos

In [1]:
import requests
from bs4 import BeautifulSoup
from pandas import DataFrame
from json import loads

In [2]:
url = 'https://www.youtube.com/@PW-Foundation/videos'

# Get the html by get method
r = requests.get(url)
r

<Response [200]>

In [3]:
# Create BeautifulSoup object
soup = BeautifulSoup(r.text, 'html.parser')

In [4]:
all_script_tags = soup.findAll('script')

In [5]:
def script_tag_to_json(tags: list) -> dict:
    for tag in reversed(tags):
        text: str = tag.text
        if 'ytInitialData = {"responseContext"' in text:
            return loads(text[20:-1])

    raise ValueError('Required script tag not found in the given tags.')

In [6]:
data = script_tag_to_json(all_script_tags)

In [7]:
def get_contents_dict(data):
    return data['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['richGridRenderer']['contents']

### Q1. Write a python program to extract the video URL of the first five videos. 


#### Get Video ID

In [8]:
def get_videoUrl(data:dict, n: int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append('https://www.youtube.com/watch?v=' +
                      contents[i]['richItemRenderer']['content']['videoRenderer']['videoId'])

    return result

get_videoUrl(data)

['https://www.youtube.com/watch?v=AM2Dt7cNebw',
 'https://www.youtube.com/watch?v=7nMJVhey9TM',
 'https://www.youtube.com/watch?v=FSVVlcFUCMk',
 'https://www.youtube.com/watch?v=vKxdTuOirnI',
 'https://www.youtube.com/watch?v=hC86u4g6QPk']

### Q2. Write a python program to extract the URL of the video thumbnails of the first five videos. 


#### Get video thumbnails

In [9]:
def get_thumbnails(data: dict, n: int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append(contents[i]['richItemRenderer']['content']['videoRenderer']['thumbnail']['thumbnails'][-1]['url'])

    return result

get_thumbnails(data)

['https://i.ytimg.com/vi/AM2Dt7cNebw/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLDcx2UK_HLUzfaPFlhUyTJgu2fIBw',
 'https://i.ytimg.com/vi/7nMJVhey9TM/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLAA1_yMeJ7aezayVqtRUmtj_G__VA',
 'https://i.ytimg.com/vi/FSVVlcFUCMk/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLARIR0hJMKXJX8SLLg3G8zDQdZX2Q',
 'https://i.ytimg.com/vi/vKxdTuOirnI/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLCEm4kbyUZA7HIrFfBvf91CQytL8Q',
 'https://i.ytimg.com/vi/hC86u4g6QPk/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLDYYTtZrlgJ4SZzl20cXwKrC6sw7A']

### Q3. Write a python program to extract the title of the first five videos. 


#### Get video title

In [10]:
def get_title(data: dict, n:int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append(contents[i]['richItemRenderer']['content']['videoRenderer']['title']['runs'][-1]['text'])

    return result

get_title(data)

['Best technique to attempt SST paper in Board exam || Class 10th',
 'Last Minute Strategy To Score More Than 98% || ICSE Boards || Class-10th',
 'Why You Should Choose Commerce After 10th? || Complete Information💯',
 "Follow This Plan To Score More Than 95% in Boards || Topper's Strategy",
 'Everything About COMMERCE Stream !! Which Stream to Choose After Class 10th?']

### Q4. Write a python program to extract the number of views of the first five videos. 


#### Get video viwes

In [11]:
def get_viwes(data: dict, n: int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append(int(contents[i]['richItemRenderer']['content']['videoRenderer']['viewCountText']['simpleText']
                      [:-6].replace(',', '')))

    return result

get_viwes(data)

[23422, 19162, 26577, 299757, 25723]

### Q5. Write a python program to extract the time of posting of video for the first five videos.

#### Get time of posting of video

In [12]:
def get_time_of_posting(data: dict, n: int = 5):
    contents = get_contents_dict(data)

    if n > 30:
        raise ValueError('Max Limit is 30.')

    result = []
    for i in range(n):
        result.append(contents[i]['richItemRenderer']['content']['videoRenderer']['publishedTimeText']['simpleText'])

    return result

get_time_of_posting(data)

['4 hours ago', '1 day ago', '2 days ago', '7 days ago', '13 days ago']

# `Note:` Save all the data scraped in the above questions in a CSV file.


## Save data in `CSV` format.

In [13]:
def get_channel_video_details(data: dict, n: int):
    thumbnails = get_thumbnails(data, n)
    time_of_posting = get_time_of_posting(data, n)
    titles = get_title(data, n)
    video_urls = get_videoUrl(data, n)

    main_data = list(zip(video_urls, titles, thumbnails, time_of_posting))
    
    df = DataFrame.from_dict(main_data)
    df.rename(
        columns={
            0: 'video_urls',
            1: 'title',
            2: 'thumbnail_url',
            3: 'time_of_posting'
        }, inplace=True)

    return df

In [14]:
channel_data = get_channel_video_details(data, 30)

In [16]:
channel_data.to_csv('PW-Foundation.csv', index=False)