In [91]:
import feedparser as fp
import pandas as pd
import json
import newspaper
import dateutil
from datetime import *
import nltk
from IPython.display import display
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# nltk.download('punkt')

pd.options.display.max_columns = 20    #Changes the number of columns diplayed (default is 20)
pd.options.display.max_rows = 60       #Changes the number of rows diplayed (default is 60)
pd.options.display.max_colwidth = 500   #Changes the number of characters in a cell (default is 50)

In [92]:
def load_sources(file):
    # Function that loads in the sources from the JSON database
    try:
        with open(file) as data:
            sources = json.load(data)
        print(f'INFO: Using custom "{file}" as source file.')
        return sources
    except:
        raise Exception(f'Error in "load_sources()"')

sources = "sources.json"
data = load_sources(sources)

INFO: Using custom "sources.json" as source file.


In [93]:
def analyze_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    sentiment = analyzer.polarity_scores(text)
    return sentiment['compound']

In [94]:
#print(data['Reuters']['rss'][0])
def print_scrape_status(count):
    print(f"Scraped {count} articles", end="\r")

def scrape(data,news_date):
    # Function that scrapes the content from the URLs in the source data
    try:
        articles_list = []
        for source, content in data.items():
            for url in content['rss']:
                d = fp.parse(url)
                for entry in d.entries:
                    article = {}
                    if hasattr(entry,'published'):
                        article_date = dateutil.parser.parse(getattr(entry,'published'))
                        if (article_date.strftime('%Y-%m-%d') == str(news_date)):
                            try:
                                content = newspaper.Article(entry.link)
                                content.download()
                                content.parse()  
                                content.nlp()
                                try:
                                    article['source'] = source
                                    article['url'] = entry.link
                                    article['date'] = article_date.strftime('%Y-%m-%d')
                                    article['time'] = article_date.strftime('%H:%M:%S %Z') # hour, minute, timezone (converted)
                                    article['title'] = content.title
                                    article['title sentiment'] = analyze_sentiment(content.title)
                                    ##article['body'] = content.text
                                    article['summary'] = content.summary
                                    article['summary sentiment'] = analyze_sentiment(content.text)
                                    article['keywords'] = content.keywords
                                    article['image_url'] = content.top_image
                                    articles_list.append(article)
                                    print_scrape_status(len(articles_list))
                                except Exception as e:
                                    print(e)
                                    print('continuing...')
                            except Exception as e: 
                                print(e)
                                print('continuing...')
        return articles_list
    except:
        raise Exception(f'Error in "Scraper.scrape()"')

output = scrape(data,date.today())

Scraped 19 articles

In [95]:
df = pd.DataFrame(output)
display(df[['source','time','title','title sentiment','summary','summary sentiment']].sort_values('summary sentiment'))

Unnamed: 0,source,time,title,title sentiment,summary,summary sentiment
4,New York Times,00:28:29 UTC,TikTok Pushes Back Against Claims It Fuels Antisemitism,0.0,"TikTok pushed back on Thursday against growing claims in recent weeks that it had failed to protect Jewish users and had pushed pro-Palestinian content in the United States.\nMany social networks have come under criticism for spreading misinformation and hate speech during the Israel-Hamas war.\nTikTok has the added scrutiny of being owned by the Chinese company ByteDance.\nWhile Beijing has pitched itself as a neutral broker in the dispute, a surge of antisemitism and anti-Israeli sentiment...",-0.9872
8,New York Times,12:11:32 UTC,"Guilty on All Counts, Bankman-Fried Completes a Fall From Grace",0.0,"From crypto wunderkind to felonIt took jurors just four-and-a-half hours yesterday to reach a verdict in the monthlong fraud trial of Sam Bankman-Fried, the founder of the fallen cryptocurrency exchange FTX.\nThe result was unanimous: guilty on all counts.\nBankman-Fried has now completed a narrative arc from whiz-kid founder of a crypto empire to fraudster who stole billions in customer funds.\nProsecutors moved to paint the FTX founder — who appeared on magazine covers and hobnobbed with f...",-0.9766
17,New York Times,04:12:13 UTC,Microsoft Criticized for Embedding ‘Crass’ A.I. Poll Beside News Article,-0.3612,"An auto-generated poll that Microsoft embedded on its news aggregating platform alongside a Guardian article was “crass” and caused The Guardian significant reputation damage, the newspaper said on Thursday.\nThe poll, which was posted last week next to an article about a woman who was found dead in a school bathroom in Australia, asked readers to speculate on the cause of the woman’s death.\nIt gave three choices: murder, accident or suicide.\nThe Guardian said the poll was created using ge...",-0.9712
3,New York Times,01:08:14 UTC,Sam Bankman-Fried Is Found Guilty of 7 Counts of Fraud and Conspiracy,-0.875,"Sam Bankman-Fried, the tousle-haired mogul who founded the FTX cryptocurrency exchange, was convicted on Thursday of seven charges of fraud and conspiracy after a monthlong trial that laid bare the rampant hubris and risk-taking across the crypto industry.\nA jury of nine women and three men took just over four hours of deliberation on Thursday to reach a verdict, convicting Mr. Bankman-Fried of wire fraud, conspiracy and money laundering.\nMr. Bankman-Fried, 31, is expected to appeal.\nBefo...",-0.9666
6,New York Times,13:00:23 UTC,Open Enrollment Begins for Affordable Care Act Health Plans,0.4939,"Unsubsidized monthly premiums — the “sticker” price — for a benchmark silver plan are rising 4.5 percent on average as a result of inflation and greater use of health care services since the pandemic, according to a KFF analysis.\n(Plans are grouped by metal levels, ranging from bronze plans, which have low premiums but higher out-of-pocket costs, to gold and platinum plans, which have higher premiums and lower out-of-pocket costs.)\nThe average monthly premium for a benchmark silver plan is...",-0.6059
11,New York Times,04:01:26 UTC,A Giant Leap for the Leap Second. Is Humankind Ready?,0.3612,"For 50 years, the international community has carefully and precariously balanced two different ways of keeping time.\nThe trouble is that the times on these clocks diverge.\nThe astronomical time, called Universal Time, or UT1, has tended to fall a few clicks behind the atomic one, called International Atomic Time, or TAI.\nSo every few years since 1972, the two times have been synced by the insertion of leap seconds — pausing the atomic clocks briefly to let the astronomic one catch up.\nB...",-0.3099
13,New York Times,00:23:30 UTC,Silicon Valley Shrugs Off Sam Bankman-Fried’s Trial,0.0,"In the four weeks that Sam Bankman-Fried, the founder of the FTX cryptocurrency exchange, was on trial on fraud charges, the tech industry:Reacted to the war in Israel and Gaza, including protesting a tech conference organizer’s social media posts about the conflict.\nBuzzed over a manifesto from a top venture capitalist outlining a list of enemies to technological progress.\nScrambled to invest money in the hottest artificial intelligence company, OpenAI, at triple its valuation earlier thi...",-0.2228
7,New York Times,12:40:30 UTC,New Phase of Gateway Tunnel Project in Hudson River to Begin,0.0,"Construction of the long-delayed rail tunnel under the Hudson River is about to speed up, as the project gets an additional injection of $3.8 billion in federal funding.\nSenator Chuck Schumer, Democrat of New York and the majority leader, will announce the latest grant from Washington on Friday, just before he and Pete Buttigieg, the transportation secretary, are scheduled to proclaim the start of work this month on the next phase of the $16.1 billion tunnel on Manhattan’s West Side, known ...",0.0516
0,New York Times,13:10:53 UTC,"Electric Planes, Once a Fantasy, Start to Take to the Skies",0.0,"Chris Caputo stood on the tarmac at Burlington International Airport in Vermont in early October and looked to the clouds in the distance.\nHe had piloted military and commercial aircraft over a long career, racking up thousands of flight hours, but the trip he was about to take would be very different.\nOver the next 16 days, he and his colleagues flew the plane, a CX300 built by their employer, Beta Technologies, down the East Coast.\nThey would make nearly two dozen stops to rest and rech...",0.1901
10,New York Times,11:15:42 UTC,Jeff Bezos Says He Is Leaving Seattle for Miami,0.0,"Jeff Bezos, who founded Amazon out of his Seattle garage in 1994 and plowed billions of dollars into transforming the city into a tech boomtown, said on Thursday that he was leaving his home of three decades and moving to Miami.\nMr. Bezos, 59, announced his move in an Instagram post on Thursday night.\nHe said his parents had recently moved back to Miami, where he attended high school, and that he wanted to be closer to them and to his partner, Lauren Sánchez.\nBloomberg News reported last ...",0.3818


In [96]:
display(df[['summary sentiment','summary']].sort_values('summary sentiment'))

Unnamed: 0,summary sentiment,summary
4,-0.9872,"TikTok pushed back on Thursday against growing claims in recent weeks that it had failed to protect Jewish users and had pushed pro-Palestinian content in the United States.\nMany social networks have come under criticism for spreading misinformation and hate speech during the Israel-Hamas war.\nTikTok has the added scrutiny of being owned by the Chinese company ByteDance.\nWhile Beijing has pitched itself as a neutral broker in the dispute, a surge of antisemitism and anti-Israeli sentiment..."
8,-0.9766,"From crypto wunderkind to felonIt took jurors just four-and-a-half hours yesterday to reach a verdict in the monthlong fraud trial of Sam Bankman-Fried, the founder of the fallen cryptocurrency exchange FTX.\nThe result was unanimous: guilty on all counts.\nBankman-Fried has now completed a narrative arc from whiz-kid founder of a crypto empire to fraudster who stole billions in customer funds.\nProsecutors moved to paint the FTX founder — who appeared on magazine covers and hobnobbed with f..."
17,-0.9712,"An auto-generated poll that Microsoft embedded on its news aggregating platform alongside a Guardian article was “crass” and caused The Guardian significant reputation damage, the newspaper said on Thursday.\nThe poll, which was posted last week next to an article about a woman who was found dead in a school bathroom in Australia, asked readers to speculate on the cause of the woman’s death.\nIt gave three choices: murder, accident or suicide.\nThe Guardian said the poll was created using ge..."
3,-0.9666,"Sam Bankman-Fried, the tousle-haired mogul who founded the FTX cryptocurrency exchange, was convicted on Thursday of seven charges of fraud and conspiracy after a monthlong trial that laid bare the rampant hubris and risk-taking across the crypto industry.\nA jury of nine women and three men took just over four hours of deliberation on Thursday to reach a verdict, convicting Mr. Bankman-Fried of wire fraud, conspiracy and money laundering.\nMr. Bankman-Fried, 31, is expected to appeal.\nBefo..."
6,-0.6059,"Unsubsidized monthly premiums — the “sticker” price — for a benchmark silver plan are rising 4.5 percent on average as a result of inflation and greater use of health care services since the pandemic, according to a KFF analysis.\n(Plans are grouped by metal levels, ranging from bronze plans, which have low premiums but higher out-of-pocket costs, to gold and platinum plans, which have higher premiums and lower out-of-pocket costs.)\nThe average monthly premium for a benchmark silver plan is..."
11,-0.3099,"For 50 years, the international community has carefully and precariously balanced two different ways of keeping time.\nThe trouble is that the times on these clocks diverge.\nThe astronomical time, called Universal Time, or UT1, has tended to fall a few clicks behind the atomic one, called International Atomic Time, or TAI.\nSo every few years since 1972, the two times have been synced by the insertion of leap seconds — pausing the atomic clocks briefly to let the astronomic one catch up.\nB..."
13,-0.2228,"In the four weeks that Sam Bankman-Fried, the founder of the FTX cryptocurrency exchange, was on trial on fraud charges, the tech industry:Reacted to the war in Israel and Gaza, including protesting a tech conference organizer’s social media posts about the conflict.\nBuzzed over a manifesto from a top venture capitalist outlining a list of enemies to technological progress.\nScrambled to invest money in the hottest artificial intelligence company, OpenAI, at triple its valuation earlier thi..."
7,0.0516,"Construction of the long-delayed rail tunnel under the Hudson River is about to speed up, as the project gets an additional injection of $3.8 billion in federal funding.\nSenator Chuck Schumer, Democrat of New York and the majority leader, will announce the latest grant from Washington on Friday, just before he and Pete Buttigieg, the transportation secretary, are scheduled to proclaim the start of work this month on the next phase of the $16.1 billion tunnel on Manhattan’s West Side, known ..."
0,0.1901,"Chris Caputo stood on the tarmac at Burlington International Airport in Vermont in early October and looked to the clouds in the distance.\nHe had piloted military and commercial aircraft over a long career, racking up thousands of flight hours, but the trip he was about to take would be very different.\nOver the next 16 days, he and his colleagues flew the plane, a CX300 built by their employer, Beta Technologies, down the East Coast.\nThey would make nearly two dozen stops to rest and rech..."
10,0.3818,"Jeff Bezos, who founded Amazon out of his Seattle garage in 1994 and plowed billions of dollars into transforming the city into a tech boomtown, said on Thursday that he was leaving his home of three decades and moving to Miami.\nMr. Bezos, 59, announced his move in an Instagram post on Thursday night.\nHe said his parents had recently moved back to Miami, where he attended high school, and that he wanted to be closer to them and to his partner, Lauren Sánchez.\nBloomberg News reported last ..."
