In [1]:
pip install newsapi-python

Collecting newsapi-python
  Using cached newsapi_python-0.2.7-py2.py3-none-any.whl.metadata (1.2 kB)
Using cached newsapi_python-0.2.7-py2.py3-none-any.whl (7.9 kB)
Installing collected packages: newsapi-python
Successfully installed newsapi-python-0.2.7
Note: you may need to restart the kernel to use updated packages.


In [2]:
from datetime import datetime
import pytz

# function to convert utc timestamp string into est tsring
def convert_utc_timestamp(utc_string):
    utc_time = datetime.strptime(utc_string, "%Y-%m-%dT%H:%M:%SZ")

    utc_time = pytz.utc.localize(utc_time)
    est_time = utc_time.astimezone(pytz.timezone('US/Eastern'))

    return est_time.strftime("%Y-%m-%d %H:%M:%S")
    

In [3]:
API_KEY="" #

In [7]:
from newsapi import NewsApiClient
import pandas as pd


# using newsapi client to get data (ref: https://newsapi.org/docs/client-libraries/python)
newsapi = NewsApiClient(api_key=API_KEY)

# creating list to store data, later to be used to create our data frame
sources = list()
authors = list()
urls = list()
descriptions = list()
titles = list()
contents = list()
published_at_dates = list()

# loop to set date from 9th May to 30th May, 2025.
for i in range(9, 30):
    
    temp = i 
    if i < 10:
        temp = f"0{i}"
    start_date = f'2025-05-{temp}'
    
    # get articles for a single date
    all_articles = newsapi.get_everything(q='finance OR stocks OR markets&',
                                          from_param=start_date,
                                          to=start_date,
                                          language='en',
                                          sort_by='relevancy',
                                          page=1,
                                          page_size=100)
    
    # after getting the data we iterate over each article and append the data into their respective list
    for article in all_articles['articles']:
        is_source = article.get('source')
        if is_source:
            source = str({"id": is_source.get("id"), "name": is_source.get("name")})
        author = article.get('author')
        url = article.get('url')
        description = article.get('description')
        title = article.get('title')
        content = article.get('content')
        published_at = article.get('publishedAt')
        if published_at:
            published_at = convert_utc_timestamp(published_at)
        
        sources.append(source)
        authors.append(author)
        urls.append(url)
        descriptions.append(description)
        titles.append(title)
        contents.append(content)
        published_at_dates.append(published_at)
    
    
# creating a dictionary to covert the data into a data frame
data = {
    "source": sources,
    "author": authors,
    "url": urls,
    "description": descriptions,
    "title": titles,
    "content": contents,
    "published_at": published_at_dates
}
df = pd.DataFrame(data=data)

In [24]:
df.head()

Unnamed: 0,source,author,url,description,title,content,published_at
0,"{'id': None, 'name': 'Gizmodo.com'}",Joe Tilleli,https://gizmodo.com/wall-street-in-your-pocket...,Save 83% and an additional $30 when you sign u...,Wall Street in Your Pocket: The Tykr Stock Scr...,Just starting out with stock trading can be an...,2024-10-01 14:18:47
1,"{'id': 'business-insider', 'name': 'Business I...",Alex Nicoll,https://www.businessinsider.com/apollo-global-...,"Banks are slowing their lending, opening doors...",Apollo just set a goal to manage $1.2 trillion...,"Marc Rowan, CEO of ApolloArturo Holmes / Getty...",2024-10-01 18:01:22
2,"{'id': None, 'name': 'Yahoo Entertainment'}",editorial-team@simplywallst.com (Simply Wall St),https://finance.yahoo.com/news/3-top-dividend-...,With global markets reaching record highs on t...,3 Top Dividend Stocks Yielding 5.6%,With global markets reaching record highs on t...,2024-10-01 09:09:17
3,"{'id': 'business-insider', 'name': 'Business I...",jsor@businessinsider.com (Jennifer Sor),https://markets.businessinsider.com/news/stock...,"""Do I think that stocks could go up more? The ...",Investing in China's stock market is like 'pic...,Investing in China over the long term is nothi...,2024-10-01 14:30:44
4,"{'id': None, 'name': 'Yahoo Entertainment'}",Fatima Farooq,https://finance.yahoo.com/news/alphabet-inc-go...,We recently published a list of 15 AI Stocks T...,Alphabet Inc. (GOOG): How AI Expansion and Fin...,We recently published a list of 15 AI Stocks T...,2024-10-01 06:52:39


In [25]:
df.shape

(1500, 7)

In [7]:
df['content'].iloc[0]

'Just starting out with stock trading can be an overwhelming experience. Knowing what stocks are good and bad and knowing when to buy or sell doesn’t come naturally. You can work with a financial advi… [+2251 chars]'

We retrived 1500 records successfully, from the above cell we can see that the data in the content column is incomplete. Thus our next step would be to scarpe the data using the url provided in every row and replace the content with the scraped content.

In [8]:
df.to_csv('financial_news_data.csv', index=False) 

In [9]:
df.head()

Unnamed: 0,source,author,url,description,title,content,published_at
0,Gizmodo.com,Joe Tilleli,https://gizmodo.com/wall-street-in-your-pocket...,Save 83% and an additional $30 when you sign u...,Wall Street in Your Pocket: The Tykr Stock Scr...,Just starting out with stock trading can be an...,2024-10-01 14:18:47
1,Business Insider,Alex Nicoll,https://www.businessinsider.com/apollo-global-...,"Banks are slowing their lending, opening doors...",Apollo just set a goal to manage $1.2 trillion...,"Marc Rowan, CEO of ApolloArturo Holmes / Getty...",2024-10-01 18:01:22
2,Yahoo Entertainment,editorial-team@simplywallst.com (Simply Wall St),https://finance.yahoo.com/news/3-top-dividend-...,With global markets reaching record highs on t...,3 Top Dividend Stocks Yielding 5.6%,With global markets reaching record highs on t...,2024-10-01 09:09:17
3,Business Insider,jsor@businessinsider.com (Jennifer Sor),https://markets.businessinsider.com/news/stock...,"""Do I think that stocks could go up more? The ...",Investing in China's stock market is like 'pic...,Investing in China over the long term is nothi...,2024-10-01 14:30:44
4,Yahoo Entertainment,Fatima Farooq,https://finance.yahoo.com/news/alphabet-inc-go...,We recently published a list of 15 AI Stocks T...,Alphabet Inc. (GOOG): How AI Expansion and Fin...,We recently published a list of 15 AI Stocks T...,2024-10-01 06:52:39


In [10]:
# df['source'].nunique()

In [26]:
df['source'].unique()

array(["{'id': None, 'name': 'Gizmodo.com'}",
       "{'id': 'business-insider', 'name': 'Business Insider'}",
       "{'id': None, 'name': 'Yahoo Entertainment'}",
       "{'id': None, 'name': 'NPR'}",
       "{'id': None, 'name': 'Biztoc.com'}",
       "{'id': None, 'name': 'Forbes'}",
       "{'id': None, 'name': 'Theregister.com'}",
       "{'id': None, 'name': 'CNET'}",
       "{'id': 'the-times-of-india', 'name': 'The Times of India'}",
       "{'id': None, 'name': 'Quartz India'}",
       "{'id': None, 'name': 'Torrentfreak.com'}",
       "{'id': None, 'name': 'Skift'}",
       '{\'id\': None, \'name\': "Investor\'s Business Daily"}',
       "{'id': None, 'name': 'Japan Today'}",
       "{'id': 'cbc-news', 'name': 'CBC News'}",
       "{'id': None, 'name': 'CoinDesk'}",
       "{'id': 'nbc-news', 'name': 'NBC News'}",
       "{'id': None, 'name': 'International Business Times'}",
       "{'id': None, 'name': 'Phandroid - News for Android'}",
       "{'id': 'fortune', 'name': 'Fo

In [None]:
al-jazeera-english,the-times-of-india,fortune,espn