# Práctica - Analítica de Datos Social y Web - Análisis de Canal YouTube de empresa
* Empresa: The Browser Company of New York 

* Autor: Yago Tobio (201802168)

Librerías

In [42]:
from dotenv import load_dotenv
from googleapiclient.discovery import build
from pymongo import MongoClient
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

import os 
import requests
import pandas as pd
load_dotenv()


True

----

Comentarios de la práctica: 
* Extraer videos de la marca permitidos por la API de YouTube 
* Extraer los metadatos de cada video, numero de likes, shares si es posible, numero de comentarios.
* Extrae los comentarios de cada video y sus metadatos. 
* Almacena los datos en MongoDB
* Procesar los posts para obtener un número
significativo de palabras clave, bigramas y hashtags
* Procesar los comentarios con una API (por ejemplo
Alchemy) para obtener las emociones asociadas
* Preparar una presentación de conclusiones

In [2]:
api_key = 'AIzaSyDkpmuUEMaWrocDXcdCNLf6ZuEHQls3FDY'
youtube = build('youtube', 'v3', developerKey=api_key)
# * Netflix Channel ID
channel_id = 'UCT5qXmLacW_a4DE-3EgeOiQ'

## Funciones para obtener metadatos de sus videos - The Browser Company

In [13]:
def get_channel_videos(channel_id):
    # Get the Uploads playlist ID
    response = youtube.channels().list(id=channel_id, part='contentDetails').execute()
    playlist_id = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

    videos = []
    next_page_token = None

    while True:
        # Get videos from the playlist
        pl_response = youtube.playlistItems().list(
            playlistId=playlist_id,
            part='snippet',
            maxResults=50,
            pageToken=next_page_token
        ).execute()

        videos += pl_response['items']
        next_page_token = pl_response.get('nextPageToken')

        if next_page_token is None:
            break

    return videos

In [32]:
def get_category_name(category_id):
    response = youtube.videoCategories().list(
        id=category_id,
        part='snippet',
        regionCode='US'  # You can change this to the appropriate region if needed
    ).execute()
    
    if response['items']:
        # Assumes category_id exists and is valid
        return response['items'][0]['snippet']['title']
    else:
        return None  # or some default value like "Unknown Category"

In [29]:

def get_video_details(video_id):
    # Fetch video details
    response = youtube.videos().list(
        part='snippet,statistics',
        id=video_id,
    ).execute()

    details = response['items'][0]
    
    # * Available keys to extract: dict_keys(['publishedAt', 'channelId', 'title', 'description', 'thumbnails', 'channelTitle', 'tags', 'categoryId', 'liveBroadcastContent', 'localized', 'defaultAudioLanguage'])
    return {
        'video_id': video_id,
        'title': details['snippet']['title'],
        'description': details['snippet']['description'],
        'tags': details['snippet'].get('tags', []),
        'categoryId': details['snippet']['categoryId'],
        'publish_time': details['snippet']['publishedAt'],
        'views': details['statistics']['viewCount'],
        'likes': details['statistics']['likeCount'],
        'comments': details['statistics'].get('commentCount'),
    }

In [30]:
videos = get_channel_videos(channel_id)
video_details = [get_video_details(video['snippet']['resourceId']['videoId']) for video in videos]

print(video_details)

[{'title': 'Get the gist, in a pinch. Pinch to Summarize, now available in Arc Search #arcbrowser #ios #tech', 'description': '', 'tags': ['arc', 'arc browser', 'the browser company', 'browser'], 'categoryId': '28', 'publish_time': '2024-02-22T18:56:13Z', 'views': '3264', 'likes': '312', 'comments': '13'}, {'title': 'We added your top requests to our new mobile browser, Arc Search', 'description': "We're back with our first What's New video of 2024! Hear from Nate, Jane, and Samir about everything that's new in Arc this week. \n\n🛠️ Just updated? v1.31.1 and v1.31.2 contain tiny but mighty hotfixes!\n\n00:30 Arc Search updates\n00:38 Your #1 request\n1:13 Introducing Incognito\n1:33 Pinch to Summarize\n2:20 Smarter Pinned Tabs\n2:40 Perf as a Priority\n3:07 Better Web Rendering\n3:26 It's your anniversary?!\n\n-- \n\nGet Arc on Mac 🖥️  https://arc.net\n\nTell us how you use Arc Search! 2 min survey here 👉 https://browserco.typeform.com/to/MM4HpZgu\n\nDon't have the app yet? Download Ar

# Funciones para obtener los comentarios y sub-comentarios de cada video

In [33]:
def fetch_replies(parent_id):
    replies = []

    response = youtube.comments().list(
        parentId=parent_id,
        part='snippet',
        maxResults=100,  # Adjust based on your needs
        textFormat="plainText"
    ).execute()

    for item in response['items']:
        reply = item['snippet']['textDisplay']
        replies.append(reply)

    return replies

In [36]:
def fetch_video_comments(video_id):
    all_comments = []

    # Fetch top-level comments
    response = youtube.commentThreads().list(
        videoId=video_id,
        part='snippet',
        maxResults=100,  # Adjust based on your needs
        textFormat="plainText"
    ).execute()

    for item in response['items']:
        top_level_comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
        all_comments.append({'comment': top_level_comment, 'replies': []})

        # Check if there are replies to the comment
        if item['snippet']['totalReplyCount'] > 0:
            all_comments[-1]['replies'] = fetch_replies(item['snippet']['topLevelComment']['id'])

    return all_comments

## Final function to obtain all channel video data and comments

In [38]:
videos = get_channel_videos(channel_id)
video_details = [get_video_details(video['snippet']['resourceId']['videoId']) for video in videos]
video_comments = [fetch_video_comments(video['snippet']['resourceId']['videoId']) for video in videos]
print(video_details)
print(video_comments)

#for comment in video_comments: 
#    print("Comment:", comment['comment'])
#    for reply in comment['replies']:
#        print("Reply:", reply)

[{'title': 'Get the gist, in a pinch. Pinch to Summarize, now available in Arc Search #arcbrowser #ios #tech', 'description': '', 'tags': ['arc', 'arc browser', 'the browser company', 'browser'], 'categoryId': '28', 'publish_time': '2024-02-22T18:56:13Z', 'views': '3264', 'likes': '312', 'comments': '13'}, {'title': 'We added your top requests to our new mobile browser, Arc Search', 'description': "We're back with our first What's New video of 2024! Hear from Nate, Jane, and Samir about everything that's new in Arc this week. \n\n🛠️ Just updated? v1.31.1 and v1.31.2 contain tiny but mighty hotfixes!\n\n00:30 Arc Search updates\n00:38 Your #1 request\n1:13 Introducing Incognito\n1:33 Pinch to Summarize\n2:20 Smarter Pinned Tabs\n2:40 Perf as a Priority\n3:07 Better Web Rendering\n3:26 It's your anniversary?!\n\n-- \n\nGet Arc on Mac 🖥️  https://arc.net\n\nTell us how you use Arc Search! 2 min survey here 👉 https://browserco.typeform.com/to/MM4HpZgu\n\nDon't have the app yet? Download Ar

# Connection to MongoDB database to save all of the data

In [52]:
password_mongo = 'VpKKfKRnq5q07tMU'
uri = f"mongodb+srv://ytobio66:{password_mongo}@cluster-0.eq8w6mm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster-0"

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

ac-d1cgnzg-shard-00-01.eq8w6mm.mongodb.net:27017: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000) (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms),ac-d1cgnzg-shard-00-02.eq8w6mm.mongodb.net:27017: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000) (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms),ac-d1cgnzg-shard-00-00.eq8w6mm.mongodb.net:27017: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000) (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 65eddaac55352275040b672f, topology_type: ReplicaSetNoPrimary, servers: [<ServerDescription ('ac-d1cgnzg-shard-00-00.eq8w6mm.mongodb.net', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('ac-d1cgnzg-sh

In [50]:
videos_collection = client['video_data']

#* """I want the structure of my database data to be the following:
#* 
#* {
#*   "video_id": "xxxx",
#*   "title": "Sample Title",
#*   "description": 'video description'
#*   "tags": ["tag1", "tag2"],
#*   "publish_time": "2020-01-01T00:00:00Z",
#*   "views": 1000,
#*   "likes": 500,
#*   "comments": 100,
#*   "comments_data": [
#*     {
#*       "comment": "Great video!",
#*       "replies": ["Thank you!", "Glad you liked it!"]
#*     },
#*   ]
#* }
#* """

Insertion of the video data, including the comments, into the MongoDB database: 

In [51]:
for index, video_detail in enumerate(video_details):
    # Incorporate comments into video_detail
    video_detail['comments_data'] = video_comments[index]
    videos_collection.insert_one(video_detail)

TypeError: 'Collection' object is not callable. If you meant to call the 'insert_one' method on a 'Database' object it is failing because no such method exists.