In [None]:
import requests
import pandas as pd
from keybert import KeyBERT
from finbert_embedding.embedding import FinbertEmbedding
from sentence_transformers import SentenceTransformer

# Initialize models
kw_model = KeyBERT("all-MiniLM-L6-v2")  # Model for keyword extraction
finbert = FinbertEmbedding()  # Model for sentiment analysis

# Your NewsAPI key
NEWS_API_KEY = "-"
NASDAQ_KEYWORDS = ["Nasdaq", "tech stocks", "NDX", "Apple", "Microsoft", "Google", "Amazon", "Tesla"]




def fetch_news():
    """Fetches latest financial news from NewsAPI."""
    url = f"https://newsapi.org/v2/everything?q=stock market&language=en&apiKey={NEWS_API_KEY}"
    response = requests.get(url)
    data = response.json()
    return data.get("articles", [])

def extract_keywords(text):
    """Extracts keywords from the news content."""
    return kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=5)

def is_related_to_ndx(keywords):
    """Checks if extracted keywords match Nasdaq-related terms."""
    return any(keyword in NASDAQ_KEYWORDS for keyword, _ in keywords)

from transformers import pipeline

# Load FinBERT sentiment analysis model
sentiment_pipeline = pipeline("text-classification", model="ProsusAI/finbert")

def analyze_sentiment(text):
    """Performs sentiment analysis using Hugging Face's FinBERT model."""
    result = sentiment_pipeline(text)[0]
    return result['label']  # Returns 'positive', 'negative', or 'neutral'


def main():
    news_data = fetch_news()
    filtered_news = []

    for article in news_data:
        title = article["title"]
        description = article.get("description", "")
        content = article.get("content", "")

        # Combine title and content for better keyword extraction
        full_text = f"{title}. {description} {content}"
        keywords = extract_keywords(full_text)

        # Check if news is related to Nasdaq-100
        if is_related_to_ndx(keywords):
            sentiment_score = analyze_sentiment(full_text)
            filtered_news.append({
                "title": title,
                "keywords": [k[0] for k in keywords],
                "sentiment_score": sentiment_score
            })

    # Convert to DataFrame and save
    df = pd.DataFrame(filtered_news)
    df.to_csv("filtered_news_ndx.csv", index=False)
    print(df)

if __name__ == "__main__":
    main()


Device set to use cpu


                                               title  \
0  Is Taiwan Semiconductor Manufacturing Company ...   

                                            keywords sentiment_score  
0  [tech stocks, tech stock, nyse tsm, taiwan sem...         neutral  
