In [1]:
import feedparser
import transformers
import pandas as pd
from transformers import pipeline
from datetime import datetime, timedelta, timezone

# Initialize sentiment pipeline
pipe = pipeline(task='text-classification', model='ProsusAI/finbert')

# Ticker and keyword
ticker = 'AAPL'
keyword = 'apple'

# RSS Feed URL
rss_url = f'https://feeds.finance.yahoo.com/rss/2.0/headline?s={ticker}'
feed = feedparser.parse(rss_url)

# Data storage
data = []

# Time filter (past 5 years)
cutoff_date = (datetime.now().replace(tzinfo=timezone.utc) - timedelta(days=5*365))

for i, entry in enumerate(feed.entries):
    # Parse publication date
    published_date = datetime.strptime(entry.published, '%a, %d %b %Y %H:%M:%S %z')
    
    # Filter articles older than 5 years
    if published_date < cutoff_date:
        continue
    
    if keyword.lower() not in entry.summary.lower():
        continue
    
    # Perform sentiment analysis
    sentiment = pipe(entry.summary)[0]
    
    # Store in list
    data.append({
        'title': entry.title,
        'link': entry.link,
        'published': entry.published,
        'summary': entry.summary,
        'sentiment': sentiment['label'],
        'score': sentiment['score']
    })

# Convert to DataFrame
df = pd.DataFrame(data)

# Save as CSV
df.to_csv(fr'C:\Users\ivane\Dissertation\data\{ticker}_sentiment_dataset.csv', index=False)

print("Dataset saved as apple_sentiment_dataset.csv")


  from .autonotebook import tqdm as notebook_tqdm


Dataset saved as apple_sentiment_dataset.csv
