# Import Required Libraries
Import the necessary libraries for file handling, HTTP requests, and the Gemini API.

In [None]:
!pip install google-generativeai

In [None]:
import os
import requests
import google.generativeai as genai

from urllib.parse import urlparse


# Define Helper Functions and News URLs
Define the list of 20 German/European news sources and create helper functions for fetching content and generating filenames.

In [None]:
# Define the list of 20 German/European news sources
NEWS_URLS = [
    "https://www.spiegel.de",
    "https://www.sueddeutsche.de",
    "https://www.faz.net",
    "https://www.welt.de",
    "https://www.tagesschau.de",
    "https://www.n-tv.de",
    "https://www.handelsblatt.com",
    "https://www.dw.com",
    "https://www.bild.de",
    "https://www.heise.de",
    "https://www.lemonde.fr",
    "https://www.lefigaro.fr",
    "https://www.corriere.it",
    "https://www.repubblica.it",
    "https://www.theguardian.com",
    "https://www.elpais.com",
    "https://www.lavanguardia.com",
    "https://www.euronews.com",
    "https://www.ft.com",
    "https://www.diepresse.com",
]

# Define the output directory for saving fetched content
out_dir = "data/misc/webpgs"


# Helper function to fetch content from a URL
def fetch_md(url):
    r = requests.get(f"https://r.jina.ai/{{{url}}}", timeout=15)
    r.raise_for_status()
    return r.json().get("content", "")


# Helper function to generate a filename from a URL
def fname(url):
    d = url.split("//")[-1]
    if d.startswith("www."):
        d = d[4:]
    return f"{d.replace('/', '_')}.md"

# Fetch and Save News Articles
Loop through the news URLs, fetch content for each one, and save the retrieved markdown content to files.

In [None]:
# Loop through the news URLs, fetch content for each one, and save the retrieved markdown content to files
for url in NEWS_URLS:
    md = fetch_md(url)  # Fetch markdown content for the URL
    if md:  # If content is successfully fetched
        domain = urlparse(url).netloc
        with open(f"../{out_dir}/{domain}.md", "w", encoding="utf-8") as f:
            f.write(md)  # Save the content to a file in the output director

# Load Saved Articles
Read the saved markdown files and load their contents into a dictionary for further processing.

In [None]:
articles = {}
for filename in os.listdir(out_dir):
    if filename.endswith(".md"):
        file_path = os.path.join(out_dir, filename)
        with open(file_path, encoding="utf-8") as f:
            file_stem = os.path.splitext(filename)[0]  # Get filename without extension
            articles[file_stem] = f.read()

# Initialize Gemini Client
Set up the Gemini client using API keys from environment variables and configure the model to use.

In [None]:
# API_KEY = "sadkal12312asdl0312ejksdfj1023"
MODEL = "gemini-2.0-flash"


In [None]:
# Configure the API with your key
genai.configure(api_key=API_KEY)

# Then you can use the API directly
model = genai.GenerativeModel(MODEL)  # Replace MODEL with your model name

# Create Gemini Query Function
Create a function to send prompts to the Gemini API and retrieve responses with appropriate configuration.

In [None]:
def gemini_query(prompt):
    try:
        # The model instance is already created as 'model' variable
        response = model.generate_content(
            prompt,  # Just pass the prompt directly
            generation_config={
                "max_output_tokens": 500,  # Limit the output tokens
                "temperature": 0.3,  # Set the temperature for response variability
            },
        )
        # Extract text from the response
        return response.text
    except Exception as e:
        return f"Error: {e}"  # Handle any exceptions and return the error messages

# Execute Different Gemini Queries
Run five different types of queries on the collected news data: individual summaries, trend extraction from a specific source, creative summary, overall news trends analysis, and technology/AI coverage analysis.

In [None]:
for name, content in articles.items():
    print(f"{name}:")
    print(content)
    print("\n\n")

In [None]:
# Query 1: Summarize main headlines for each individual article
print("=== Query 1: Summaries for individual pages ===")
for name, content in articles.items():
    prompt = (
        f"Summarize the main headlines and key news on the following page\n\n{content}"
    )
    summary = gemini_query(prompt)
    print(f"\n[{name}]")
    print(summary)

# Query 2: Extract emerging trends from a specific source (e.g., "spiegel.de")
if "spiegel.de" in articles:
    prompt = f"From the following content from spiegel.de, list 3 emerging news trends:\n\n{articles['spiegel.de'][:1500]}"
    print("\n=== Query 2: Emerging trends on spiegel.de ===")
    print(gemini_query(prompt))

# Query 3: Provide a creative summary for a specific source (e.g., "dw.com")
if "dw.com" in articles:
    prompt = f"Provide a creative summary of today's news as presented on dw.com:\n\n{articles['dw.com'][:1500]}"
    print("\n=== Query 3: Creative summary for dw.com ===")
    print(gemini_query(prompt))

# Query 4: Analyze major news trends across all sources
full_text = "\n\n".join(articles.values())
prompt = (
    "Based on the following combined news articles from 20 major European news sources, "
    "list 5 major news trends and summarize the overall current news landscape:\n\n"
    + full_text[:5000]
)
print("\n=== Query 4: Overall European news trends ===")
print(gemini_query(prompt))

# Query 5: Analyze how technology and AI are covered across all sources
prompt = (
    "Using the following combined text from various European news sources, "
    "analyze and summarize how technology and AI are being covered in today's news. "
    "Highlight any emerging themes or concerns:\n\n" + full_text[:5000]
)
print("\n=== Query 5: Technology and AI coverage analysis ===")
print(gemini_query(prompt))