In [16]:
import os
import heapq
import googleapiclient.discovery

API_KEY = os.getenv("YOUTUBE_API_KEY")

In [17]:
import pandas as pd

In [18]:
news_titles = pd.read_json("./news_sentiment_data_20250220_115715.json")
news_titles.head(3)

Unnamed: 0,ticker,title,url,time_published,authors,summary,source,overall_sentiment_score,overall_sentiment_label,ticker_sentiment,topics,collection_timestamp
0,AAPL,Can the Latest iPhone 16e Push the Apple Stock...,https://www.zacks.com/stock/news/2418900/can-t...,20250220T163200,[Aniruddha Ganguly],Although AAPL's AI push and growing Services b...,Zacks Commentary,0.296749,Somewhat-Bullish,"[{'ticker': 'MSFT', 'relevance_score': '0.0870...","[{'topic': 'Retail & Wholesale', 'relevance_sc...",2025-02-20T19:57:09.571019
1,AAPL,Apple Stock Set For Streaming Lift? 'Severance...,https://www.benzinga.com/general/entertainment...,20250220T161828,[Chris Katje],"AppleTV+ has a streaming hit with ""Severance.""...",Benzinga,0.219397,Somewhat-Bullish,"[{'ticker': 'NFLX', 'relevance_score': '0.0525...","[{'topic': 'Technology', 'relevance_score': '1...",2025-02-20T19:57:09.571019
2,AAPL,"LKQ Q4 Earnings Surpass Expectations, Revenues...",https://www.zacks.com/stock/news/2418843/lkq-q...,20250220T155300,[Zacks Equity Research],LKQ reports mixed fourth-quarter results and e...,Zacks Commentary,0.056637,Neutral,"[{'ticker': 'DAN', 'relevance_score': '0.18625...","[{'topic': 'Earnings', 'relevance_score': '0.9...",2025-02-20T19:57:09.571019


In [19]:
news_titles["ticker"].unique()

array(['AAPL', 'NVDA', 'MSFT', 'AMZN', 'GOOG'], dtype=object)

In [20]:
def youtube_search(query, max_results=10):

    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=API_KEY)
    
    # Call the search.list method to retrieve results matching the query
    request = youtube.search().list(
        part="snippet",
        q=query,
        type="video",
        maxResults=max_results
    )
    response = request.execute()
    
    # Extract video IDs, titles, and publication dates from the response
    videos = []
    for item in response.get("items", []):
        video_data = {
            "video_id": item["id"]["videoId"],
            "title": item["snippet"]["title"],
            "published_at": item["snippet"]["publishedAt"]
        }
        videos.append(video_data)
    
    return videos


def get_video_comments(video_id, max_results=100):
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=API_KEY)

    comments = []
    next_page_token = None

    while len(comments) < max_results:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=min(100, max_results - len(comments)),
            textFormat="plainText",
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response.get("items", []):
            comment = item["snippet"]["topLevelComment"]["snippet"]
            comment_data = {
                "author": comment["authorDisplayName"],
                "text": comment["textDisplay"],
                "likes": comment["likeCount"],
                "published_at": comment["publishedAt"]
            }
            comments.append(comment_data)

        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

    return comments

def get_yt_comments_for_all_tickers(
    tickers: list[str] = ["AAPL", "MSFT", "NVDA", "AMZN", "GOOG/GOOGL", "META", "TSLA"],
    max_video_results: int = 35,
    max_comments: int = 100
) -> list[dict]:
    
    parsed_video_data = []
    search_template = """latest news for {ticker} stock"""
    
    for ticker in tickers:
        for ticker_data in youtube_search(
                query=search_template.format(ticker=ticker),
                max_results=max_video_results
            ):

            comments = get_video_comments(ticker_data["video_id"], max_results=max_comments)
            top_comments = heapq.nlargest(max_comments, comments, key=lambda item: item["likes"])
            
            parsed_video_data.append({
                "ticker": ticker,
                "video_id": ticker_data["video_id"],
                "video_title": ticker_data["title"],
                "published_at": ticker_data["published_at"],
                "top_comments": top_comments
            })
    
    return parsed_video_data

In [22]:
# get_yt_comments_for_all_tickers(tickers=["NVDA"], max_video_results=2, max_comments=10)

In [29]:
comments = [
    {
        "ticker": "AAPL",
        "video_id": "XYZ123",
        "video_title": "Apple Stock Latest News!",
        "published_at": "2024-03-05T10:00:00Z",
        "top_comments": [
            {
                "author": "John Doe",
                "text": "Great analysis!",
                "likes": 500,
                "published_at": "2024-03-05T11:00:00Z"
            }
        ]
    },
    {
        "ticker": "AAPL",
        "video_id": "ZZZZZ",
        "video_title": "Apple Stock Latest News!",
        "published_at": "2024-03-05T10:00:00Z",
        "top_comments": [
            {
                "author": "John Doe",
                "text": "Great analysis!",
                "likes": 7000,
                "published_at": "2024-03-05T11:00:00Z"
            }
        ]
    }
]
sorted_comments = sorted(
    comments, 
    key=lambda x: max(comment["likes"] for comment in x["top_comments"]) if x["top_comments"] else 0,
    reverse=True  # Sort in descending order (highest likes first)
)

import pprint
pprint.pprint(sorted_comments)

[{'published_at': '2024-03-05T10:00:00Z',
  'ticker': 'AAPL',
  'top_comments': [{'author': 'John Doe',
                    'likes': 7000,
                    'published_at': '2024-03-05T11:00:00Z',
                    'text': 'Great analysis!'}],
  'video_id': 'ZZZZZ',
  'video_title': 'Apple Stock Latest News!'},
 {'published_at': '2024-03-05T10:00:00Z',
  'ticker': 'AAPL',
  'top_comments': [{'author': 'John Doe',
                    'likes': 500,
                    'published_at': '2024-03-05T11:00:00Z',
                    'text': 'Great analysis!'}],
  'video_id': 'XYZ123',
  'video_title': 'Apple Stock Latest News!'}]


## Ex. item in output list


{
    "ticker": "AAPL",
    "video_id": "XYZ123",
    "video_title": "Apple Stock Latest News!",
    "published_at": "2024-03-05T10:00:00Z",
    "top_comments": [
        {
            "author": "John Doe",
            "text": "Great analysis!",
            "likes": 500,
            "published_at": "2024-03-05T11:00:00Z"
        }
    ]
}
