In [8]:
#Libraries
import requests
import pandas as pd
import time
import xml.etree.ElementTree as ET

from datetime import datetime, timedelta

TODAY = datetime.today().strftime('%Y-%m-%d')
YESTERDAY = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
THIS_WEEK = (datetime.today() - timedelta(days=7)).strftime('%Y-%m-%d')

def generate_search_url(search_term, data_filter=None):
    """
    Generates a search URL for the given search term and data filter.
    """
    if data_filter == 'today':
        time_filter = f'after:{YESTERDAY}'
    elif data_filter == 'this_week':
        time_filter = f'after:{THIS_WEEK} before:{TODAY}'
    elif data_filter == 'this_year':
        time_filter = f'after:{pd.datetime.today().year - 1}'
    elif isinstance(data_filter, int):
        temp_time = (pd.datetime.today() - pd.Timedelta(days=data_filter)).strftime('%Y-%m-%d')
        time_filter = f'after:{temp_time} before:{TODAY}'
    else:
        time_filter = ''
    url = f'https://news.google.com/rss/search?q={search_term}+{time_filter}&hl=en-US&gl=US&ceid=US:en'
    return url

def get_text(description):
    """
    Extracts the text from a description string.
    """
    start = description.find('<p>') + 3
    end = description.find('</p>')
    return description[start:end]

def parse_news_items(root):
    """
    Parses the news items from the XML root element.
    """
    news_items = []
    for item in root.findall('.//channel/item'):
        title = item.find('title').text
        link = item.find('link').text
        description = get_text(item.find('description').text)
        pub_date = pd.to_datetime(item.find('pubDate').text)
        source = item.find('source').text
        news_item = {
            'title': title,
            'link': link,
            'description': description,
            'pub_date': pub_date,
            'source': source
        }
        news_items.append(news_item)
    return news_items

def get_news(search_term, data_filter=None):
    """
    Searches Google News for the given search term and data filter, and returns
    a DataFrame containing the news items.
    """
    url = generate_search_url(search_term, data_filter)
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception('Failed to fetch news items.')
    root = ET.fromstring(response.text)
    news_items = parse_news_items(root)
    df = pd.DataFrame(news_items)
    df.to_csv(f'{search_term}_news.csv', encoding='utf-8-sig', index=False)
    return df

# if __name__ == '__main__':
#     start_time = time.time()
#     search_term = input('Enter your search term here: ')
#     data_filter = int(input('Enter number of days ago or leave blank for all data: ')) or None
#     data = get_news(search_term, data_filter)
#     end_time = time.time()
#     print(f'Execution time: {end_time - start_time:.2f} seconds')

if __name__ == '__main__':
    search_term = "northeastern university"
    data_filter = None
    url = generate_search_url(search_term, data_filter)
    response = requests.get(url)
    root = ET.fromstring(response.text)
    for item in root.findall('.//channel/item'):
        print(item)

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}


Sentiment
Polarity
Subjectivity

This script does the following:

It prompts the user to input a search term and a number of past days to search. The number of past days acts as a filter to retrieve only the recent news related to the given search term.

Using the get_news function, it makes a request to Google News RSS feed with the specified search term and filters the articles based on the provided date.

The get_news function parses the XML response from the request, extracts useful information from each news article, and stores it in a list of dictionaries.

The script then uses the get_sentiment function to analyze the sentiment of the description of each news article using TextBlob. It computes the polarity and subjectivity of the text. The polarity score is a float between -1.0 and 1.0 where -1.0 means a negative sentiment and 1.0 means a positive sentiment. The subjectivity is a float between 0.0 and 1.0 where 0.0 is very objective and 1.0 is very subjective.

Finally, it prints the title, description, and sentiment (polarity and subjectivity) of each news article.

In [3]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/luisali/nltk_data...


True

In [None]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import time
from textblob import TextBlob


def generate_search_url(search_term, data_filter=None):
    """
    Generates the URL for the news search API request.

    :param search_term: str, the term to search for in the news
    :param data_filter: int, the number of days ago to filter the news by
    :return: str, the URL for the API request
    """
    base_url = 'https://news.google.com/rss/search?q='
    search_url = base_url + search_term.replace(' ', '+')
    if data_filter:
        temp_time = (pd.datetime.today() - pd.Timedelta(days=data_filter)).strftime('%Y-%m-%d')
        search_url += f'+after:{temp_time}'
    return search_url


def get_news(search_term, data_filter=None):
    """
    Makes an API request to Google News and parses the XML response.

    :param search_term: str, the term to search for in the news
    :param data_filter: int, the number of days ago to filter the news by
    :return: list of dicts, each dict containing information about a news article
    """
    url = generate_search_url(search_term, data_filter)
    response = requests.get(url)
    root = ET.fromstring(response.content)
    items = root.findall('./channel/item')
    news_list = []
    for item in items:
        news_dict = {}
        news_dict['title'] = item.find('title').text
        news_dict['link'] = item.find('link').text
        news_dict['description'] = item.find('description').text
        news_dict['pubDate'] = item.find('pubDate').text
        news_dict['source'] = item.find('source').text
        news_list.append(news_dict)
    return news_list


def get_sentiment(text):
    """
    Analyzes the sentiment of the text using TextBlob.

    :param text: str, the text to analyze
    :return: tuple, the sentiment polarity and subjectivity
    """
    blob = TextBlob(text)
    sentiment = blob.sentiment
    return sentiment.polarity, sentiment.subjectivity


if __name__ == '__main__':
    start_time = time.time()
    search_term = input('Enter your search term here: ')
    data_filter = int(input('Enter number of days ago or leave blank for all data: ')) or None
    news_list = get_news(search_term, data_filter)

    for news in news_list:
        title = news['title']
        description = news['description']
        polarity, subjectivity = get_sentiment(description)
        print(f"Title: {title}")
        print(f"Description: {description}")
        print(f"Sentiment: Polarity={polarity:.2f}, Subjectivity={subjectivity:.2f}\n")
    end_time = time.time()
    print(f'Execution time: {end_time - start_time:.2f} seconds')


**Title & Description:** Sentiment Polarity Subjectivity

Title Polarity: This refers to the sentiment expressed in the title of the news article. The polarity is a score between -1.0 and 1.0. If the score is close to 1.0, it indicates that the title has a positive sentiment (positive words or phrases are used). If it's close to -1.0, the title has a negative sentiment (negative words or phrases are used). A score close to 0 suggests that the title is neutral.

Title Subjectivity: This refers to the subjectivity of the title of the news article. The subjectivity is a score between 0.0 and 1.0. If the score is close to 1.0, it indicates that the title is highly subjective and might contain personal opinions or feelings. If it's close to 0, the title is more objective and focuses on factual information.

Description Polarity: Similar to the Title Polarity, this refers to the sentiment expressed in the description or the main body of the news article. Again, a score close to 1.0 indicates positive sentiment, close to -1.0 indicates negative sentiment, and around 0 is neutral.

Description Subjectivity: This refers to the subjectivity of the main content or description of the news article. Like Title Subjectivity, a score close to 1.0 means the description is highly subjective, and a score close to 0 means it is objective or factual.

These scores can give you an understanding of both the emotional tone (polarity) and the presence of factual information vs. personal opinion (subjectivity) in news articles.

In [None]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import time
from textblob import TextBlob

def generate_search_url(search_term, data_filter=None):
    base_url = 'https://news.google.com/rss/search?q='
    search_url = base_url + search_term.replace(' ', '+')
    if data_filter:
        temp_time = (pd.Timestamp.now() - pd.Timedelta(days=data_filter)).strftime('%Y-%m-%d')
        search_url += f'+after:{temp_time}'
    return search_url


def get_news(search_term, data_filter=None):
    url = generate_search_url(search_term, data_filter)
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError as err:
        print(f"HTTP error occurred: {err}")
        return []
    except requests.exceptions.RequestException as err:
        print(f"Error occurred: {err}")
        return []

    root = ET.fromstring(response.content)
    items = root.findall('./channel/item')
    news_list = []
    for item in items:
        news_dict = {}
        news_dict['title'] = item.find('title').text if item.find('title') is not None else ''
        news_dict['link'] = item.find('link').text if item.find('link') is not None else ''
        news_dict['description'] = item.find('description').text if item.find('description') is not None else ''
        news_dict['pubDate'] = item.find('pubDate').text if item.find('pubDate') is not None else ''
        news_dict['source'] = item.find('source').text if item.find('source') is not None else ''
        news_list.append(news_dict)
    return news_list


def get_sentiment(text):
    blob = TextBlob(text)
    sentiment = blob.sentiment
    return sentiment.polarity, sentiment.subjectivity


if __name__ == '__main__':
    start_time = time.time()
    search_term = input('Enter your search term here: ')
    data_filter = int(input('Enter number of days ago or leave blank for all data: ')) or None
    news_list = get_news(search_term, data_filter)
    results = []

    for news in news_list:
        title = news['title']
        description = news['description']
        polarity_title, subjectivity_title = get_sentiment(title)
        polarity_desc, subjectivity_desc = get_sentiment(description)
        print(f"Title: {title}")
        print(f"Description: {description}")
        print(f"Sentiment: Polarity={polarity_title:.2f}, Subjectivity={subjectivity_title:.2f} for the title")
        print(f"Sentiment: Polarity={polarity_desc:.2f}, Subjectivity={subjectivity_desc:.2f} for the description\n")

        # Saving results in a list
        results.append([title, description, polarity_title, subjectivity_title, polarity_desc, subjectivity_desc])

    # Converting the list to a DataFrame
    df = pd.DataFrame(results, columns=['Title', 'Description', 'Title Polarity', 'Title Subjectivity', 'Description Polarity', 'Description Subjectivity'])

    # Saving DataFrame to a CSV file
    df.to_csv('news_sentiment_analysis.csv', index=False)

    end_time = time.time()
    print(f'Execution time: {end_time - start_time:.2f} seconds')
