In [None]:
import requests
import time
import pandas as pd
from collections import Counter
from urllib.parse import urlparse

def fetch_citywise_news(api_key, city, keyword, from_date, to_date):
    url = "https://v3-api.newscatcherapi.com/api/search"
    headers = {'x-api-token': api_key, 'User-agent': 'your bot 0.1'}
    params = {
        'q': f"{keyword} AND {city}",
        'from_': from_date,
        'to_': to_date,
        'countries': 'GB',
        'lang': ['en'],
        'page_size': 50
    }
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()  # Check for HTTP errors
    if response.status_code == 429:
        time.sleep(int(response.headers["Retry-After"]))
    return response.json()

def extract_domain(url):
    parsed_url = urlparse(url)
    # Return scheme and netloc which constitutes the base domain
    return f"{parsed_url.scheme}://{parsed_url.netloc}"

def fetch_and_process_news():
    api_key = 'yourkeyhere'
    cities = ["London", "Edinburgh", "Cardiff", "Belfast", "Birmingham", "Manchester",
              "Liverpool", "Bristol", "Glasgow", "Sheffield", "Leeds", "Newcastle",
              "Nottingham", "Leicester"]
    keyword = 'disease outbreak'
    from_date = '2023-10-01'
    to_date = '2024-04-01'

    all_domains = []

    for city in cities:
        news_data = fetch_citywise_news(api_key, city, keyword, from_date, to_date)
        if news_data.get('status') == 'ok':
            for article in news_data['articles']:
                if 'all_links' in article and article['all_links']:
                    for link in article['all_links']:
                        domain = extract_domain(link)
                        all_domains.append(domain)

    # Count the occurrences of each domain and select the top 10 most frequent
    domain_counts = Counter(all_domains)
    top_domains = domain_counts.most_common(10)
    return [{"Domain": domain, "Count": count} for domain, count in top_domains]

def save_domains_to_csv(domain_links):
    df = pd.DataFrame(domain_links)
    df.to_csv('top_10_frequent_domains.csv', index=False)

def main():
    domain_links = fetch_and_process_news()
    save_domains_to_csv(domain_links)
    print("Saved the top 10 most frequent domains to top_10_frequent_domains.csv")

if __name__ == "__main__":
    main()

