In [1]:
import feedparser
import pandas as pd
from urllib.parse import quote
from bs4 import BeautifulSoup
import requests
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


pd.options.display.max_columns = 250    #Changes the number of columns diplayed (default is 20)
pd.options.display.max_rows = 250       #Changes the number of rows diplayed (default is 60)
pd.options.display.max_colwidth = 250   #Changes the number of characters in a cell so that the contents don't get truncated (default is 50)

In [32]:
# Source - https://news.google.com/
# XML - https://news.google.com/rss?hl=cs&gl=CZ&ceid=CZ:cs
# Query - https://stackoverflow.com/questions/51537063/url-format-for-google-news-rss-feed

In [2]:
def analyze_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    sentiment = analyzer.polarity_scores(text)
    return sentiment['compound']

In [7]:
def parseRSS(rss_url):
    return feedparser.parse(rss_url)

def getContent(rss_url, query):
    dates = []
    sources = []
    headlines = []
    headline_sentiments = []
    descriptions = []

    feed = parseRSS(rss_url)
    for item in feed.entries:
        if query.lower() in item.title.lower():
            dates.append(item.published)
            sources.append(item.source.title)
            headlines.append(item.title)
            headline_sentiments.append(analyze_sentiment(item.title))  # Analyze sentiment of the headline
            
            #descriptions.append(get_article_content(item.link)) 
            #descriptions.append(item.link)

    return pd.DataFrame({'Date': dates, 'Source':sources, 'Title': headlines, 'Sentiment':headline_sentiments}) #, 'Description': descriptions})

def get_article_content(article_url):
    response = requests.get(article_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    article_content = soup.get_text()
    return article_content

query = "Bitcoin"
encoded_query = quote(query)  # Encoding the search term
lang = {
    'cz':'hl=cs&gl=CZ&ceid=CZ:cs',
    'en':'hl=en&gl=US&ceid=US:en'
}

rss_url = f"https://news.google.com/rss/search?q={encoded_query}&{lang['en']}"

df = getContent(rss_url, query)

display(df)



Unnamed: 0,Date,Source,Title,Sentiment
0,"Mon, 10 Jul 2023 17:43:08 GMT",Bloomberg,"Bitcoin (BTC) May Reach $120000 by End of 2024, Standard Chartered Says - Bloomberg",0.0258
1,"Mon, 10 Jul 2023 13:54:00 GMT",Fortune,"Robert F. Kennedy Jr. said he ‘was not an investor’ in Bitcoin. Financial disclosure form reveals he holds north of $100,000 in the cryptocurrency - Fortune",0.0
2,"Tue, 11 Jul 2023 11:40:20 GMT",Cointelegraph,Mt. Gox repayment date looming: Is Bitcoin in trouble? - Cointelegraph,-0.4939
3,"Tue, 11 Jul 2023 11:45:00 GMT",CoinDesk,"Marex Unveils Bitcoin, Ether-Linked Long Strategy With Dollar Index as Hedge - CoinDesk",0.0
4,"Tue, 11 Jul 2023 10:20:13 GMT",Benzinga,"If You Invested $100 In Bitcoin When Snoop Dogg Called Crypto A 'Beautiful Industry', Here's How Much You'd Have Now - Benzinga",0.5994
5,"Sun, 09 Jul 2023 11:45:25 GMT",Forbes,"‘Global Governance’—Leak Reveals ‘Unprecedented’ Plan For Crypto That Could Play Havoc With The Price Of Bitcoin, Ethereum, BNB, XRP, Cardano, Dogecoin, Solana, Tron And Litecoin - Forbes",-0.3612
6,"Mon, 10 Jul 2023 15:32:52 GMT",Finbold - Finance in Bold,"Crypto community with 84% historical accuracy sets Bitcoin price for July 31, 2023 - Finbold - Finance in Bold",0.3818
7,"Tue, 11 Jul 2023 00:57:23 GMT",Cointelegraph,Bitcoin halving bullish for Saylor’s MicroStrategy: Berenberg Capital - Cointelegraph,0.0
8,"Tue, 11 Jul 2023 00:04:59 GMT",Decrypt,Grayscale Cries Foul Over SEC Approval of a Different Kind of Bitcoin ETF - Decrypt,0.1027
9,"Mon, 10 Jul 2023 22:50:00 GMT",CoinDesk,Bitcoin Falls Back Below $31K After Late Monday Surge - CoinDesk,0.0
