In [3]:
#Imports
from transformers import pipeline
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np


pipe = pipeline("text-classification", model="ProsusAI/finbert")


In [5]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [19]:
##Data Cleaning##
#Remove Brackets
#Remove URLs
#Remove HTML Tags
#Remove New Lines?

##Models Performance Metrics##
#Accuracy = (TP + TN) / (TP + TN + FP + FN)
#Precision = TP / (TP + FP)
#Recall = TP / (TP + FN)
#F1 Score = 2 * (Precision * Recall) / (Precision + Recall)

pipe("Alphabet Reports Q4 Earnings, Faces Slower Growth in Key Areas March 8, 2025 – Mountain View, CA – Alphabet Inc. (NASDAQ: GOOGL), the parent company of Google, released its financial results for the fourth quarter of 2024, showing a year-over-year revenue increase of 5%, reaching $82.1 billion. While the company reported growth, the results came in slightly below analyst expectations of $84.5 billion. Net income for the quarter was $14.1 billion, or $20.25 per share, compared to $15.7 billion, or $22.50 per share, in the same quarter of 2023. The adjusted operating income totaled $19.3 billion, reflecting a year-over-year change of -6%. Sundar Pichai, CEO of Alphabet, mentioned, “We are continuing to make progress, though we are seeing some evolving trends in our key business segments. We remain committed to adapting and optimizing our strategies to ensure long-term growth as we address the dynamics in the market.” Alphabet's advertising business generated $67.5 billion in the quarter, growing 3% compared to the prior year, while YouTube's advertising revenue saw a more modest increase of 2%. Google Cloud, while still showing growth, reported a slower pace with a 7% increase in revenue, totaling $7.5 billion. Looking ahead, Alphabet has provided guidance for the first quarter of 2025, with revenue expected in the range of $80 to $82 billion, and earnings per share forecasted between $19.50 and $20.00. Shares of Alphabet moved lower by 5% in after-hours trading following the earnings release.")

[{'label': 'NEGATIVE', 'score': 0.9787818789482117}]

In [1]:
# Import necessary libraries
import requests
import pandas as pd
import json
import time
from datetime import datetime, timedelta
import os
from dotenv import load_dotenv
from Data.db import Database
db = Database()

# Define base URL for Alpha Vantage API
ALPHA_VANTAGE_BASE_URL = 'https://www.alphavantage.co/query'
API_KEY = '16U80X1YA3JPOSSY'  # Consider moving this to environment variables

# Function to fetch news from Alpha Vantage API
def fetch_news(ticker=None, topics=None, days_offset=0, chunk_size=30):
    """
    Fetch news articles from Alpha Vantage NEWS_SENTIMENT endpoint
    
    Args:
        ticker (str): Stock ticker symbol to filter news by
        topics (str): Topic to filter news by
        days_offset (int): Days offset from current date
        chunk_size (int): Size of each chunk in days (typically 30 for monthly)
        
    Returns:
        dict: API response containing news items and sentiment data
    """
    # Calculate date range based on offset
    now = datetime.now()
    
    # End date is now minus the offset
    end_date = now - timedelta(days=days_offset)
    
    # Start date is end_date minus the chunk size
    start_date = end_date - timedelta(days=chunk_size)
    
    # Format dates as required by Alpha Vantage (YYYYMMDDTHHMM)
    from_date = start_date.strftime('%Y%m%dT0000')
    to_date = end_date.strftime('%Y%m%dT0000')
    
    print(f"Fetching news from {from_date} to {to_date}")
    
    # Parameters for API request
    params = {
        'function': 'NEWS_SENTIMENT',
        'time_from': from_date,  # Use our calculated dates instead of hardcoded values
        'time_to': to_date,
        'limit': 1000,  # Maximum number of news items to retrieve
        'apikey': API_KEY,
    }
    
    # Add optional parameters if provided
    if ticker:
        params['tickers'] = ticker
    if topics:
        params['topics'] = topics
    
    try:
        response = requests.get(ALPHA_VANTAGE_BASE_URL, params=params)
        response.raise_for_status()  # Raise exception for HTTP errors
        
        result = response.json()
        return result
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching news: {e}")
        return {"feed": []}

# Function to store articles in the database
def store_articles_in_db(news_data):
    """
    Store list of articles from Alpha Vantage API in the database
    Args:
        news_data (dict): News data from Alpha Vantage API
    Returns:
        int: Number of articles stored
    """
    stored_count = 0
    
    if not news_data or "feed" not in news_data:
        print("No news feed data found in the response")
        return 0
    
    for article in news_data["feed"]:
        try:
            # Convert authors list to string
            authors = ", ".join(article.get('authors', [])) if article.get('authors') else ""
            
            # Convert topics list to JSON string
            topics = json.dumps(article.get('topics', []))
            
            # Convert ticker sentiment to JSON string
            ticker_sentiment = json.dumps(article.get('ticker_sentiment', []))
            
            # Extract time published and format if needed
            time_published = article.get('time_published', '')
            # Convert YYYYMMDDTHHMMSS to standard format if needed
            if time_published and len(time_published) >= 8:
                try:
                    dt = datetime.strptime(time_published, '%Y%m%dT%H%M%S')
                    time_published = dt.isoformat()
                except ValueError:
                    # Keep original format if parsing fails
                    pass
            
            # Store the article in the database
            db.store_live_news_articles(
                date=time_published,
                authors=authors,
                source_domain=article.get('source_domain', ''),
                source_name=article.get('source', ''),
                title=article.get('title', ''),
                summary=article.get('summary', ''),
                url=article.get('url', ''),
                topics=topics,
                ticker_sentiment=ticker_sentiment,
                overall_sentiment_label=article.get('overall_sentiment_label', ''),
                overall_sentiment_score=article.get('overall_sentiment_score', 0.0),
                event_type='',  # This field is not in the Alpha Vantage response
                sentiment_label='',  # This is covered by overall_sentiment_label
                sentiment_score=0.0,  # This is covered by overall_sentiment_score
                fetch_timestamp=datetime.now().isoformat()
            )
            
            stored_count += 1
            
        except Exception as e:
            print(f"Error storing article: {e}")
            print(f"Article data: {article}")
    
    return stored_count

# Function to check for duplicate articles
def is_duplicate_article(url, title):
    """
    Check if an article with the same URL or very similar title already exists in the database
    
    Args:
        url (str): URL of the article
        title (str): Title of the article
        
    Returns:
        bool: True if article is a duplicate, False otherwise
    """
    # This is a placeholder function - you would need to implement the actual check
    # by querying your database for existing articles with the same URL or title
    return False

# Function to fetch and save news for multiple tickers or topics
def fetch_and_save_news_monthly(tickers=None, topics=None, starting_month=0, months_back=12):
    """
    Fetch news for multiple tickers or topics in monthly chunks and save to database
    
    Args:
        tickers (list): List of ticker symbols
        topics (list): List of topics
        starting_month (int): Starting point for fetching news (in days)
        months_back (int): Number of months back to search
    """
    total_stored = 0
    
    # Convert months to days (approximate)
    days_back = months_back * 30
    starting_day = starting_month * 30
    # Fetch data in 30-day (monthly) chunks
    for month_chunk in range(starting_day, days_back, 30):
        print(f"\n======= Fetching month {month_chunk // 30 + 1} of {months_back} =======")
        
        # Process tickers if provided
        if tickers:
            for ticker in tickers:
                print(f"Fetching news for ticker {ticker}...")
                ticker_news = fetch_news(ticker=ticker, days_offset=month_chunk, chunk_size=30)
                
                if ticker_news and "feed" in ticker_news and ticker_news["feed"]:
                    # Store articles in database
                    stored_count = store_articles_in_db(ticker_news)
                    total_stored += stored_count
                    print(f"Saved {stored_count} articles to database for ticker {ticker}")
                else:
                    print(f"No articles found for ticker {ticker}")
                
                # Respect API rate limits
                time.sleep(12)  # Alpha Vantage has limits of 5 requests per minute for free API keys
        
        # Process topics if provided
        if topics:
            for topic in topics:
                print(f"Fetching news for topic {topic}...")
                topic_news = fetch_news(topics=topic, days_offset=month_chunk, chunk_size=30)
                
                if topic_news and "feed" in topic_news and topic_news["feed"]:
                    # Store articles in database
                    stored_count = store_articles_in_db(topic_news)
                    total_stored += stored_count
                    print(f"Saved {stored_count} articles to database for topic {topic}")
                else:
                    print(f"No articles found for topic {topic}")
                
                # Respect API rate limits
                time.sleep(12)
    
    print(f"### News fetching and storage completed. Total articles stored: {total_stored} ###")

# Function to parse the Alpha Vantage response and print summary stats
def print_news_summary(news_data):
    """
    Print summary statistics of the news data retrieved
    
    Args:
        news_data (dict): News data from Alpha Vantage API
    """
    if not news_data or "feed" not in news_data:
        print("No news data found")
        return
    
    num_items = len(news_data.get("feed", []))
    print(f"Retrieved {num_items} news items")
    
    # Count by source
    sources = {}
    for item in news_data.get("feed", []):
        source = item.get("source", "Unknown")
        sources[source] = sources.get(source, 0) + 1
    
    print("\nSources breakdown:")
    for source, count in sources.items():
        print(f"- {source}: {count} articles")
    
    # Sentiment distribution
    sentiment_counts = {
        "Bullish": 0,
        "Somewhat_Bullish": 0,
        "Neutral": 0,
        "Somewhat-Bearish": 0,
        "Bearish": 0
    }
    
    for item in news_data.get("feed", []):
        sentiment = item.get("overall_sentiment_label", "Unknown")
        if sentiment in sentiment_counts:
            sentiment_counts[sentiment] += 1
    
    print("\nSentiment distribution:")
    for sentiment, count in sentiment_counts.items():
        print(f"- {sentiment}: {count} articles")

# Example usage
if __name__ == "__main__":
    # Define tickers and topics to track
    target_tickers = ['AAPL', 'MSFT', 'TSLA', 'AMZN', 'GOOGL', 'CRYPTO:BTC']
    target_topics = ['Economy', 'Technology', 'Finance']
    
    # Fetch and save news
    fetch_and_save_news_monthly(tickers=['AAPL'], topics=None, starting_month=21, months_back=24)
    
    # Example: fetch and print summary for Bitcoin news
    # btc_news = fetch_news(ticker='CRYPTO:BTC', days_back=7)
    # print_news_summary(btc_news)

Connected to database: ../Data/data.db
Using database at: C:\Users\kemoo\PycharmProjects\Stock_AI_Predictor\Data\data.db

Fetching news for ticker AAPL...
Fetching news from 20230523T0000 to 20230622T0000
No articles found for ticker AAPL

Fetching news for ticker AAPL...
Fetching news from 20230423T0000 to 20230523T0000
No articles found for ticker AAPL
### News fetching and storage completed. Total articles stored: 0 ###
