# 1. Connecting to the YouTube using a python client

- We enabled the API key from the google cloud console and generating the API key.
- To connect to the youtube, we enabled only for the public data access so we don't have to deal with the oAuth related stuff as we don't need any personal data of the users.
- After enabling the API key we would need to install the dependency via pip

We used the following command to install in our virtualenv as I'm using the pip.

```
pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib
```

In [1]:
import json
import csv
import pandas as pd

from pprint import pprint
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

DEVELOPER_KEY = 'AIzaSyDgc2_04BksQXvYmNgNPqCZwEvMRaZ85fI'
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

# 2. Search and extract the data
### a) Search videos related to query string "avatar movie"
- (For this part, choose/search one video of your choice and perform data collection steps on that specific video )


We will only create the collection for the video as we don't really care about the playlist and channel now, 
notherwise that can also be done via fetching the particular `type`


In [2]:
def youtube_search(search_query, max_results):
    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
                    developerKey=DEVELOPER_KEY)

    # Call the search.list method to retrieve results matching the specified
    # query term.
    search_response = youtube.search().list(
    q=search_query,
    part='id,snippet',
    type="video",
    maxResults=max_results
    ).execute()

    videos = []

    # Add each result to the appropriate list, and then display the lists of
    for search_result in search_response.get('items', []):
        if search_result['id']['kind'] == 'youtube#video':
          videos.append(search_result)

    return videos

def extract_data(video_data):
    #  Preparing the simple dictionary for Channel ID, Video Description, Channel Title, Video Title
    video_snippet = video_data["snippet"]
    data = {
            "channel ID": video_snippet['channelId'],
            "video Description": video_snippet['description'],
            "Channel Title": video_snippet['channelTitle'],
            "Video Title": video_snippet['title']
           }
    print("\n")
    for key in data:
        print(key, "-> ", data[key], "\n")

# We will fetch only 5 results
videos = youtube_search("avatar movie", 5)
extract_data(videos[0])




channel ID ->  UC0A86RKLCqTEUna3hPlEpzg 

video Description ->  AVATAR Full Movie 2023: Fallen Kingdom | Superhero FXL Action Movies 2023 in English (Game Movie). Best Action Game ... 

Channel Title ->  Superhero FXL Games 

Video Title ->  AVATAR Full Movie 2023: Fallen Kingdom | Superhero FXL Action Movies 2023 in English (Game Movie) 



### 2.b  Provide the following statistics for query string “avatar movie” of top 50 videos sorted by relevance in the US region.

> Output expected: video ID, title, no of views, no of likes,no of comments exported to CSV file


In [3]:
def youtube_search_with_region(search_query, max_results, region_code):
    search_response = youtube.search().list(
        q=search_query,
        part='id,snippet',
        type="video",
        regionCode=region_code,
        maxResults=max_results
    ).execute()
    
    videos = []

    # Add each result to the appropriate list, and then display the lists of
    for search_result in search_response.get('items', []):
        if search_result['id']['kind'] == 'youtube#video':
          videos.append(search_result)

    return videos

def video_id_list(video_list):
    video_id_list = []
    for video in video_list:
        video_id_list.append(video['id']['videoId'])
    return video_id_list


def write_csv_file(data, file_name):
    df = pd.DataFrame(video_data)
    df.to_csv(file_name)


def extract_video_information(video):
#     video ID, title, no of views, no of likes,no of comments
    statistics = video['statistics']
    print(statistics)
    content = video['contentDetails']
    video_id = video['id']
    return {
        "video_id": video_id, 
        "no_of_likes": statistics.get('likeCount', 0),
        "no_of_views": statistics.get('viewCount', 0),
        "title": video['snippet']['title'],
        "no_of_comments": statistics.get('commentCount', 0)
    }
    
        
def video_information(video_ids):
    search_response = youtube.videos().list(
        part='id,snippet,statistics,contentDetails',
        id=video_ids
    ).execute()
    
    videos = []
    for search_result in search_response.get('items', []):
        if search_result['kind'] == 'youtube#video':
          videos.append(extract_video_information(search_result))
        
    return videos

videos = youtube_search_with_region("avatar movie", 50, "US")
video_id_list = video_id_list(videos)
video_data = video_information(video_id_list)
write_csv_file(video_data, "./2b-data.csv")

{'viewCount': '2422461', 'likeCount': '18583', 'favoriteCount': '0', 'commentCount': '479'}
{'viewCount': '58123548', 'likeCount': '1042782', 'favoriteCount': '0', 'commentCount': '43090'}
{'viewCount': '13673995', 'likeCount': '338836', 'favoriteCount': '0', 'commentCount': '31472'}
{'viewCount': '475907', 'likeCount': '3761', 'favoriteCount': '0', 'commentCount': '612'}
{'viewCount': '12790401', 'likeCount': '81208', 'favoriteCount': '0', 'commentCount': '8939'}
{'viewCount': '218736', 'likeCount': '2872', 'favoriteCount': '0', 'commentCount': '64'}
{'viewCount': '827420', 'likeCount': '7016', 'favoriteCount': '0', 'commentCount': '424'}
{'viewCount': '1178087', 'likeCount': '4339', 'favoriteCount': '0', 'commentCount': '123'}
{'viewCount': '506553', 'likeCount': '5013', 'favoriteCount': '0', 'commentCount': '209'}
{'viewCount': '27757967', 'likeCount': '682437', 'favoriteCount': '0', 'commentCount': '29018'}
{'viewCount': '12023943', 'likeCount': '78511', 'favoriteCount': '0', 'comm

# 3. Analyze the exported data obtained in 2.b and carry out the following tasks 

### a) Sort the data 2.b by top 10 comments in descending order and consider the video IDs and Titles of top 10 videos which have highest comments.

In [4]:
# Create the dataframe
df = pd.DataFrame(video_data)
df['no_of_comments']= df.no_of_comments.astype(int)

df_sorted = df.sort_values(by='no_of_comments', ascending=False)
top_10_sorted = df_sorted.head(10)

# As it's not mentioned the no_of_comments in the description but still writing as 
# it's easy to check the number of comments
data = top_10_sorted.loc[:, ["video_id", "title", "no_of_comments"]]

pprint(data)

       video_id                                              title  \
1   d9MyW72ELq0        Avatar: The Way of Water | Official Trailer   
2   waJKJW_XU90  Avatar: The Last Airbender | Official Teaser |...   
9   a8Gx8wiNbs8  Avatar: The Way of Water | Official Teaser Tra...   
42  2r71I8lvTIA  The Last Airbender Film: How it Disrespected a...   
4   5PSNL1qE6VY  Avatar | Official Trailer (HD) | 20th Century FOX   
40  p_GgAHd5siE                           TOPH: An Avatar Fan Film   
36  T5vdPy7nbRQ                         Avatar Element Animation 2   
35  RGx8rYbRVR4  Why People Hate Avatar: A Lesson In Lazy Comme...   
25  f5Zx8iPek5I  Avatar 3 Will Introduce The Dark Side🔥 Of Na'v...   
32  QOg9LUIvaig  AVATAR: THE LAST AIRBENDER | Water, Earth, Fir...   

    no_of_comments  
1            43090  
2            31472  
9            29018  
42           27173  
4             8939  
40            5399  
36            4628  
35            4044  
25            3997  
32            3976 

### b). Use a suitable method to retrieve comments of those top 10 videos from 3.a. 
> For doing this, write a program to loop through each video id from 3.a and pass in the part parameter set to "snippet", to retrieve basic details about the comments. Execute this request and print the response using the pprint() method.

#### Note: I am not fetching all the comments in the video, only the first level of default comments just to show that we can do more

In [5]:
# Call the API's commentThreads.list method to list the existing comments.

def get_comments(video_id):
    results = youtube.commentThreads().list(
    part="snippet",
    videoId=video_id,
    textFormat="plainText"
    ).execute()

    pprint(results)

def print_video_comments(video_list):
    for video_id in video_list:
        get_comments(video_id)
        
print_video_comments(data["video_id"])

{'etag': 'Go5Y5VHkymTBjI0EH5Z3CVygN2g',
 'items': [{'etag': 'XsCS-xW3bre2AtAidI5ZudN6cAc',
            'id': 'Ugy9R8bfFFOJbZWLMY54AaABAg',
            'kind': 'youtube#commentThread',
            'snippet': {'canReply': True,
                        'channelId': 'UCgjxQJ6TlKqhHax8742ZMdA',
                        'isPublic': True,
                        'topLevelComment': {'etag': 'IUN4pJhnNNF0jPN74vd4UK0NmLg',
                                            'id': 'Ugy9R8bfFFOJbZWLMY54AaABAg',
                                            'kind': 'youtube#comment',
                                            'snippet': {'authorChannelId': {'value': 'UCvLUhqiJ307WlvVbZSPzlEQ'},
                                                        'authorChannelUrl': 'http://www.youtube.com/channel/UCvLUhqiJ307WlvVbZSPzlEQ',
                                                        'authorDisplayName': 'IT '
                                                                             'Ian',
                 

{'etag': 'bnPGy_FPQJCZU21pONAt8aagtrw',
 'items': [{'etag': '-77Gg_txex4NdHvLAIsKpqnA3Y4',
            'id': 'Ugxj1nqJl9gl2zpgQKF4AaABAg',
            'kind': 'youtube#commentThread',
            'snippet': {'canReply': True,
                        'channelId': 'UCgjxQJ6TlKqhHax8742ZMdA',
                        'isPublic': True,
                        'topLevelComment': {'etag': 'CT_2tTAmC7BY4Z9qIzyTiuXuhbU',
                                            'id': 'Ugxj1nqJl9gl2zpgQKF4AaABAg',
                                            'kind': 'youtube#comment',
                                            'snippet': {'authorChannelId': {'value': 'UCxYjlPaaEnANNRSWZcRd7VQ'},
                                                        'authorChannelUrl': 'http://www.youtube.com/channel/UCxYjlPaaEnANNRSWZcRd7VQ',
                                                        'authorDisplayName': 'John '
                                                                             'Moore',
             

{'etag': 'RjwvHk2vymOjqiZg-OUfgPG9Pr8',
 'items': [{'etag': 'IMYP49F8NTBGrGAlK20My4w7O3w',
            'id': 'UgzbWmKLUrCFv57GFqd4AaABAg',
            'kind': 'youtube#commentThread',
            'snippet': {'canReply': True,
                        'channelId': 'UCOajpsI8t3Eg-u-s2j_c-cQ',
                        'isPublic': True,
                        'topLevelComment': {'etag': 'nVL8dXLkjOOAu94_zdAfz5c8mQw',
                                            'id': 'UgzbWmKLUrCFv57GFqd4AaABAg',
                                            'kind': 'youtube#comment',
                                            'snippet': {'authorChannelId': {'value': 'UCUTI-7uqT3qU7yz1GCzoTXQ'},
                                                        'authorChannelUrl': 'http://www.youtube.com/channel/UCUTI-7uqT3qU7yz1GCzoTXQ',
                                                        'authorDisplayName': 'Uriah '
                                                                             'Gonzales',
         

{'etag': 'cthiChtLGEiq0i7Rfbw3sTjLhHk',
 'items': [{'etag': 'oNVL0iMDgR73sXUddwxj05XGG18',
            'id': 'UgxiaRPULwlPZJZdYD14AaABAg',
            'kind': 'youtube#commentThread',
            'snippet': {'canReply': True,
                        'channelId': 'UCzHSU7TR2xrXZTBR4Ogv_SQ',
                        'isPublic': True,
                        'topLevelComment': {'etag': 'j3CqGEFqGDqVflBMgUNasmqJyHA',
                                            'id': 'UgxiaRPULwlPZJZdYD14AaABAg',
                                            'kind': 'youtube#comment',
                                            'snippet': {'authorChannelId': {'value': 'UCc1_XeA-4ssiy3k1yyp2-0w'},
                                                        'authorChannelUrl': 'http://www.youtube.com/channel/UCc1_XeA-4ssiy3k1yyp2-0w',
                                                        'authorDisplayName': 'Mr '
                                                                             'Lusk',
                

{'etag': '8qRb86iRvrIK-_gqhbgB_AzzC9w',
 'items': [{'etag': 'pagd7qrd1UD0tQ8T20NQmMCf0LA',
            'id': 'UgyGn8e7SsOO91xPX0V4AaABAg',
            'kind': 'youtube#commentThread',
            'snippet': {'canReply': True,
                        'channelId': 'UCI9DUIgtRGHNH_HmSTcfUbA',
                        'isPublic': True,
                        'topLevelComment': {'etag': 'DYibMmSQkoMLs575WV_70mGa6gE',
                                            'id': 'UgyGn8e7SsOO91xPX0V4AaABAg',
                                            'kind': 'youtube#comment',
                                            'snippet': {'authorChannelId': {'value': 'UCI9DUIgtRGHNH_HmSTcfUbA'},
                                                        'authorChannelUrl': 'http://www.youtube.com/channel/UCI9DUIgtRGHNH_HmSTcfUbA',
                                                        'authorDisplayName': 'The '
                                                                             'Closer '
             

{'etag': 'fB-2dyb--OSDVSSl-6zkTVtoFK8',
 'items': [{'etag': 'DxqWPwMJ4BdtIp9dB01sh1yr1Ik',
            'id': 'UgwgTJOoY2Jl5wwFD2R4AaABAg',
            'kind': 'youtube#commentThread',
            'snippet': {'canReply': True,
                        'channelId': 'UCkAhyVzx6AyyjD3nCaLG52w',
                        'isPublic': True,
                        'topLevelComment': {'etag': 'ngfzLCsLeiFM3QThsAA5c6GzjEE',
                                            'id': 'UgwgTJOoY2Jl5wwFD2R4AaABAg',
                                            'kind': 'youtube#comment',
                                            'snippet': {'authorChannelId': {'value': 'UCkAhyVzx6AyyjD3nCaLG52w'},
                                                        'authorChannelUrl': 'http://www.youtube.com/channel/UCkAhyVzx6AyyjD3nCaLG52w',
                                                        'authorDisplayName': 'Nerdy '
                                                                             'Minutes ',
         

 'items': [{'etag': 'PYJiZdNrOHpVJIZBJouqk8PmLUI',
            'id': 'UgyzSC9jUUYw_NMKJD94AaABAg',
            'kind': 'youtube#commentThread',
            'snippet': {'canReply': True,
                        'channelId': 'UCWOA1ZGywLbqmigxE4Qlvuw',
                        'isPublic': True,
                        'topLevelComment': {'etag': 'oYOyZEGAEzHyMtHBnpr-H0DOTs8',
                                            'id': 'UgyzSC9jUUYw_NMKJD94AaABAg',
                                            'kind': 'youtube#comment',
                                            'snippet': {'authorChannelId': {'value': 'UC073qCDxkozaCkakqcySBzA'},
                                                        'authorChannelUrl': 'http://www.youtube.com/channel/UC073qCDxkozaCkakqcySBzA',
                                                        'authorDisplayName': 'T;',
                                                        'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/APkrFKZbpuSOz_9kjTTpcT4T6B43OsA6t