In [19]:
# Import Modules
import os
from datetime import datetime, timedelta

from newsapi import NewsApiClient
import pandas as pd

from dotenv import load_dotenv

In [None]:
# Initialise News API
load_dotenv()
newsapi = NewsApiClient(api_key=os.getenv('NEWS_API_TOKEN'))

In [25]:
# Select tech news sources
news_sources = newsapi.get_sources()
tech_news = []

for source in news_sources['sources']:
    if source['language'] == 'en' and source['category'] == 'technology':
        tech_news.append(source)

print(tech_news)

[{'id': 'ars-technica', 'name': 'Ars Technica', 'description': "The PC enthusiast's resource. Power users and the tools they love, without computing religion.", 'url': 'https://arstechnica.com', 'category': 'technology', 'language': 'en', 'country': 'us'}, {'id': 'crypto-coins-news', 'name': 'Crypto Coins News', 'description': 'Providing breaking cryptocurrency news - focusing on Bitcoin, Ethereum, ICOs, blockchain technology, and smart contracts.', 'url': 'https://www.ccn.com', 'category': 'technology', 'language': 'en', 'country': 'us'}, {'id': 'engadget', 'name': 'Engadget', 'description': 'Engadget is a web magazine with obsessive daily coverage of everything new in gadgets and consumer electronics.', 'url': 'https://www.engadget.com', 'category': 'technology', 'language': 'en', 'country': 'us'}, {'id': 'hacker-news', 'name': 'Hacker News', 'description': 'Hacker News is a social news website focusing on computer science and entrepreneurship. It is run by Paul Graham\'s investment 

In [43]:
# Date periods for News API
prev_date = datetime.today() - timedelta(days=30) # Free tier has a limit of 30 days
next_date = datetime.today()

prev_date_str = f"{prev_date.year}-{prev_date.month:02d}-{prev_date.day:02d}"
next_date_str = f"{next_date.year}-{next_date.month:02d}-{next_date.day:02d}"

# Retrieve news articles from the sources
def get_news(sourceId):
    news_list = newsapi.get_everything(sources=sourceId,
                                      from_param=prev_date,
                                      to=next_date,
                                      language='en',
                                      sort_by='relevancy',
                                      page=1)
    
    newsData = []
    for news in news_list['articles']:
        list = [news['source']['name'], news['title'],news['content']]
        newsData.append(list)        

    return newsData

In [44]:
# Create news dataframe
df = pd.DataFrame.from_records(get_news(tech_news[0]['id']))
df.columns = ['source', 'title', 'text']
print(df.head())

           name                                              title  \
0  Ars Technica  FDA’s review of MDMA for PTSD highlights study...   
1  Ars Technica  Gaming historians preserve what’s likely Ninte...   
2  Ars Technica  Ancient Maya DNA shows male kids were sacrific...   
3  Ars Technica  Report: Apple isn’t paying OpenAI for ChatGPT ...   
4  Ars Technica  May contain nuts: Precautionary allergen label...   

                                                text  
0  Enlarge/ MDMA is now in the FDA's hands.\r\n10...  
1  Enlarge/ "So slim you can play it anywhere." \...  
2  Enlarge/ Detail from the reconstructed stone t...  
3  80\r\nOn Monday, Apple announced it would be i...  
4  128\r\nWhen Ina Chung, a Colorado mother, firs...  
