In [50]:
from textblob import TextBlob
from search import SearchEngine
from article import ArticleParser
from datetime import datetime, timedelta
from xml.etree.ElementTree import ElementTree, fromstring
import requests
import numpy

In [26]:
def generate_search_urls(
    search_term: str,
    start: datetime,
    end: datetime) -> list[str]:
    """
    Generates a search URL for the given search term and start and end date
    Used for sentiment analysis over time
    """
    if start and not end:
        # no to date, default to a timeframe of 10 days
        end = start + timedelta(days=10)
    elif end and not start:
        # no from date, default to a timeframe of 10 days
        start = end - timedelta(days=10)
    elif not end and not start:
        # neither exist, default to a timeframe of the past 10 days
        end = datetime.today()
        start = end - timedelta(days=10)

    delta = end - start

    # for every day in the range, generate a search URL for the same topic

    # specifying "pointers" to the start and end date
    from_date = start
    to_date = start + timedelta(days=1)

    urls = []

    for _ in range(delta.days):
        time_filter = f'after:{from_date.strftime("%Y-%m-%d")} before:{to_date.strftime("%Y-%m-%d")}'
        urls.append(f'https://news.google.com/rss/search?q={search_term}+{time_filter}&hl=en-GB&gl=GB&ceid=GB:en')
        from_date = to_date
        to_date = to_date + timedelta(days=1)

    return urls

In [28]:
urls = generate_search_urls("balls", datetime.today() - timedelta(days=20), datetime.today())

In [45]:
def get_x_news(
    urls: list[str],
    items: int) -> list[list[ArticleParser]]:
    """
    Searches Google News for the given search term, and returns the number of news items specified. 
    """
    articles = []
    for i in range(0, len(urls)):
        response = requests.get(urls[i])
        root = fromstring(response.text)
        news_items = []
        all_items = root.findall('.//channel/item')
        for j in range(items):
            article = ArticleParser(all_items[j])
            news_items.append(article)
        articles.append(news_items)
    return articles

In [46]:
articles = get_x_news(urls, 5)

In [47]:
articles

[[<article.ArticleParser at 0x144674950>,
  <article.ArticleParser at 0x1445164d0>,
  <article.ArticleParser at 0x144798250>,
  <article.ArticleParser at 0x144798590>,
  <article.ArticleParser at 0x144799e90>],
 [<article.ArticleParser at 0x1447de990>,
  <article.ArticleParser at 0x1447dead0>,
  <article.ArticleParser at 0x144775990>,
  <article.ArticleParser at 0x144602590>,
  <article.ArticleParser at 0x1447eda10>],
 [<article.ArticleParser at 0x14476da90>,
  <article.ArticleParser at 0x144674090>,
  <article.ArticleParser at 0x1447c5a90>,
  <article.ArticleParser at 0x1447c6010>,
  <article.ArticleParser at 0x1447c75d0>],
 [<article.ArticleParser at 0x1445e0bd0>,
  <article.ArticleParser at 0x1445e3210>,
  <article.ArticleParser at 0x1447beb10>,
  <article.ArticleParser at 0x1447bec90>,
  <article.ArticleParser at 0x1447be750>],
 [<article.ArticleParser at 0x144655e90>,
  <article.ArticleParser at 0x14476fb50>,
  <article.ArticleParser at 0x144601dd0>,
  <article.ArticleParser at 0x

In [52]:
def analyse_polarity(
        articles: list[list[ArticleParser]]) -> list[float]:
    """Analyses and averages the sentiment over each day, and returns a set of data points that can be used for plotting"""
    sentiments = []
    for art in articles:
        # we average the sentiment over the list art of articles
        polarity = numpy.mean([a.sentiment[0] for a in art])
        sentiments.append(polarity)
    return sentiments

In [53]:
print(analyse_polarity(articles))

AttributeError: 'ArticleParser' object has no attribute '_link'

generate a * b search URLS for a given search term, a being the number of days we span over, and b being the number of articles per day. 2. perform sentment analysis for all articles. 3. average out sentiment for each day in the period. 4. plot a graph that shows the progression of sentiment over time