In [1]:
# set folder tempat kerja (current working directory)
import os
cwd = '/Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Pekerjaan BMN/05. 2025/98_monitoring_berita/monitoring-berita'
# cwd = '/content/drive/MyDrive/Monitoring Berita'
os.chdir(cwd)

In [2]:
# Check and install required libraries only if not already installed
import importlib
import subprocess
import sys

def check_and_install_package(package_name, import_name=None):
    """Check if package is installed, if not install it"""
    if import_name is None:
        import_name = package_name
    
    try:
        importlib.import_module(import_name)
        print(f"✓ {package_name} sudah terinstall")
        return True
    except ImportError:
        print(f"⚠ {package_name} belum terinstall, menginstall...")
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
            print(f"✓ {package_name} berhasil diinstall")
            return True
        except subprocess.CalledProcessError:
            print(f"✗ Gagal menginstall {package_name}")
            return False

# Check required packages
packages_to_check = [
    ("googlenewsdecoder", "googlenewsdecoder"),
    ("tqdm", "tqdm")
]

print("Mengecek dependencies...")
all_installed = True
for package, import_name in packages_to_check:
    if not check_and_install_package(package, import_name):
        all_installed = False

if all_installed:
    print("\n🎉 Semua dependencies siap!")
else:
    print("\n❌ Ada masalah dengan instalasi dependencies")

Mengecek dependencies...
✓ googlenewsdecoder sudah terinstall
✓ tqdm sudah terinstall

🎉 Semua dependencies siap!


In [3]:
"""Tarik daftar berita via Google News RSS.

Menghasilkan DataFrame dengan kolom sama seperti scraper lain:
    query, judul_berita, tanggal_berita, penulis_berita, url_berita

Sumber: Google News RSS (hl=id, gl=ID)
Catatan:
 - Google News tidak selalu menyediakan penulis, hanya sumber (media). Itu kita mapping ke penulis_berita.
 - Tanggal di <pubDate> adalah GMT. Kita konversi ke zona Asia/Jakarta dan format "%Y-%m-%d %H:%M:%S".
 - Kembali ditambahkan filter tanggal: hanya tanggal (YYYY-MM-DD) yang ada di config['search_date'] yang diikutkan jika daftar itu tidak kosong.
 - Pembatas jumlah item per query diterapkan SETELAH filter tanggal (agar slot diisi item relevan tanggal target).

Pemakaian:
    python list_berita_google_news_rss.py  # hasil akan tersimpan ke daftar_berita/google_news_rss.xlsx

Opsi lingkungan (opsional melalui variabel environment):
    GNEWS_TIME_WINDOW_DAYS  (default 7)  -> batas pencarian relatif (when:7d) agar cakupan feed cukup.
"""

from __future__ import annotations

import os
import json
import time
import random
from typing import List, Dict, Optional
import datetime as dt
import zoneinfo
import re
import html
import urllib.parse as urlparse
import xml.etree.ElementTree as ET
from concurrent.futures import ThreadPoolExecutor, as_completed

import pandas as pd
import requests
from tqdm import tqdm

from googlenewsdecoder import gnewsdecoder

# --------------------------------------------------
# Parameter umum
# --------------------------------------------------
JAKARTA_TZ = zoneinfo.ZoneInfo("Asia/Jakarta")
USER_AGENTS = [
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
]
REQUEST_DELAY_RANGE = (0.8, 1.6)
RETRY_TOTAL = 3
TIME_WINDOW_DAYS = int(os.environ.get("GNEWS_TIME_WINDOW_DAYS", "30"))  # Increase default to 30 days

# Parallelization parameters
MAX_WORKERS_RSS = 5  # Untuk RSS fetching - jangan terlalu tinggi untuk menghindari rate limiting
MAX_WORKERS_DECODE = 10  # Untuk URL decoding - bisa lebih tinggi karena berbeda service

# Debug options
DEBUG_RSS_URLS = os.environ.get("DEBUG_RSS_URLS", "0") == "1"  # Set ke "1" untuk debug URL yang digunakan

# --------------------------------------------------
# Util: HTTP fetch dengan retry sederhana
# --------------------------------------------------

def fetch_url(url: str, timeout: float = 15.0) -> str:
    last_err: Optional[Exception] = None
    for attempt in range(1, RETRY_TOTAL + 1):
        try:
            headers = {"User-Agent": random.choice(USER_AGENTS)}
            r = requests.get(url, headers=headers, timeout=timeout)
            if r.status_code >= 400:
                raise RuntimeError(f"Status {r.status_code}")
            return r.text
        except Exception as e:  # noqa: BLE001
            last_err = e
            time.sleep(0.5 * attempt)
    raise RuntimeError(f"Gagal fetch setelah {RETRY_TOTAL} percobaan: {last_err}")

# --------------------------------------------------
# Build URL Google News RSS
# --------------------------------------------------

def build_google_news_rss_url(query: str, date_filters: List[str] = None, time_window_days: int = TIME_WINDOW_DAYS) -> str:
    """
    Build Google News RSS URL dengan support untuk filter tanggal absolut dan relatif
    
    Args:
        query: Search query
        date_filters: List tanggal dalam format YYYY-MM-DD (jika ada, akan digunakan after/before)
        time_window_days: Fallback untuk when:Nd jika date_filters kosong
    """
    q = query.strip()
    
    # Jika ada date_filters, gunakan format after/before untuk rentang yang lebih presisi
    if date_filters and len(date_filters) > 0:
        # Sort tanggal untuk mendapatkan min dan max
        sorted_dates = sorted(date_filters)
        start_date = sorted_dates[0]
        end_date = sorted_dates[-1]
        
        # Tambah 1 hari ke end_date untuk inclusive range (before adalah exclusive)
        from datetime import datetime, timedelta
        end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
        end_date_exclusive = end_dt.strftime("%Y-%m-%d")
        
        # Format yang BENAR untuk Google News RSS
        query_with_date = f"{q} after:{start_date} before:{end_date_exclusive}"
    else:
        # Fallback ke format when: jika tidak ada date_filters
        query_with_date = f"{q} when:{time_window_days}d"
    
    # Gunakan quote_plus yang benar untuk Google News
    encoded = urlparse.quote_plus(query_with_date)
    base = "https://news.google.com/rss/search"
    url = f"{base}?q={encoded}&hl=id&gl=ID&ceid=ID:id"
    
    return url

# --------------------------------------------------
# Parse RSS XML ke list item
# --------------------------------------------------

def parse_rss_items(xml_text: str) -> List[Dict[str, str]]:
    cleaned = re.sub(r"[\x00-\x08\x0b-\x0c\x0e-\x1f]", "", xml_text)
    root = ET.fromstring(cleaned)
    channel = root.find("channel")
    if channel is None:
        return []
    items_out: List[Dict[str, str]] = []
    for item in channel.findall("item"):
        title_el = item.find("title")
        link_el = item.find("link")
        pub_el = item.find("pubDate")
        source_el = item.find("source")

        title = html.unescape(title_el.text.strip()) if title_el is not None and title_el.text else ""
        link = link_el.text.strip() if link_el is not None and link_el.text else ""
        pub_raw = pub_el.text.strip() if pub_el is not None and pub_el.text else ""
        source = source_el.text.strip() if source_el is not None and source_el.text else ""

        final_url = resolve_final_article_url(link)

        items_out.append({
            "judul_berita": title,
            "url_berita": final_url,
            "pub_raw": pub_raw,
            "penulis_berita": source,
        })
    return items_out

# --------------------------------------------------
# Resolve final URL dari link news.google.com jika ada parameter url=...
# --------------------------------------------------

def resolve_final_article_url(link: str) -> str:
    if not link:
        return link
    try:
        if "news.google.com" in link and "url=" in link:
            parsed = urlparse.urlparse(link)
            qs = urlparse.parse_qs(parsed.query)
            if "url" in qs and qs["url"]:
                return qs["url"][0]
    except Exception:  # noqa: BLE001
        return link
    return link

# --------------------------------------------------
# Convert pubDate -> datetime lokal & format string
# --------------------------------------------------
RFC_PARSE_FORMATS = [
    "%a, %d %b %Y %H:%M:%S %Z",
    "%a, %d %b %Y %H:%M:%S %z",
]


def parse_pubdate(pub_raw: str) -> Optional[dt.datetime]:
    if not pub_raw:
        return None
    for fmt in RFC_PARSE_FORMATS:
        try:
            dt_obj = dt.datetime.strptime(pub_raw, fmt)
            if dt_obj.tzinfo is None:
                dt_obj = dt_obj.replace(tzinfo=dt.timezone.utc)
            return dt_obj.astimezone(JAKARTA_TZ)
        except Exception:  # noqa: BLE001
            continue
    return None

# --------------------------------------------------
# Ambil berita untuk satu query (filter tanggal + batasi max item)
# --------------------------------------------------

def scrape_google_news_query(query: str, max_items: int, date_filters: List[str], delay_range=REQUEST_DELAY_RANGE, pbar=None) -> pd.DataFrame:
    # Gunakan date_filters dalam URL building untuk hasil yang lebih akurat
    url = build_google_news_rss_url(query, date_filters)
    if pbar:
        pbar.set_description(f"Fetching: {query}")
    
    try:
        xml_text = fetch_url(url)
    except Exception as e:  # noqa: BLE001
        return pd.DataFrame(columns=["query", "judul_berita", "tanggal_berita", "penulis_berita", "url_berita"])

    raw_items = parse_rss_items(xml_text)

    # Transform semua items dulu, kemudian filter
    all_items: List[Dict[str, str]] = []
    
    for it in raw_items:
        pub_dt = parse_pubdate(it.get("pub_raw", ""))
        tanggal_fmt = pub_dt.strftime("%Y-%m-%d %H:%M:%S") if pub_dt else ""
        date_only = tanggal_fmt[:10] if tanggal_fmt else None
        
        item_data = {
            "query": query,
            "judul_berita": it.get("judul_berita", ""),
            "tanggal_berita": tanggal_fmt,
            "penulis_berita": it.get("penulis_berita", ""),
            "url_berita": it.get("url_berita", ""),
            "date_only": date_only
        }
        all_items.append(item_data)
    
    # PERBAIKAN: Tidak perlu filter tanggal ketat karena Google News RSS URL sudah handle filtering
    # URL after:before: sudah membatasi rentang tanggal yang benar
    # Filter tanggal ketat malah membuang artikel yang valid dari RSS
    filtered_items = all_items
    
    # Hapus kolom bantuan dan batasi jumlah
    out_items = []
    for item in filtered_items:
        out_items.append({
            "query": item["query"],
            "judul_berita": item["judul_berita"],
            "tanggal_berita": item["tanggal_berita"], 
            "penulis_berita": item["penulis_berita"],
            "url_berita": item["url_berita"]
        })

    # Batasi setelah filter
    if max_items > 0 and len(out_items) > max_items:
        out_items = out_items[:max_items]

    df = pd.DataFrame(out_items, columns=["query", "judul_berita", "tanggal_berita", "penulis_berita", "url_berita"])
    # Hapus duplikat dasar per query (hanya URL yang sama)
    if not df.empty:
        df = df.drop_duplicates(subset=["url_berita"]).reset_index(drop=True)
    
    time.sleep(random.uniform(*delay_range))
    return df

# --------------------------------------------------
# Ambil berita untuk banyak query (PARALLEL VERSION)
# --------------------------------------------------

def scrape_google_news_queries_parallel(queries: List[str], max_items: int, date_filters: List[str]) -> pd.DataFrame:
    """Versi paralel untuk scraping multiple queries secara bersamaan"""
    all_df: List[pd.DataFrame] = []
    
    def scrape_single_query(query: str) -> pd.DataFrame:
        """Wrapper untuk scraping single query tanpa progress bar (untuk parallel)"""
        return scrape_google_news_query(query, max_items=max_items, date_filters=date_filters, pbar=None)
    
    # Parallel execution dengan ThreadPoolExecutor
    with ThreadPoolExecutor(max_workers=MAX_WORKERS_RSS) as executor:
        # Submit semua tasks
        future_to_query = {executor.submit(scrape_single_query, query): query for query in queries}
        
        # Progress bar untuk parallel processing
        with tqdm(total=len(queries), desc="Scraping queries", unit="query") as pbar:
            for future in as_completed(future_to_query):
                query = future_to_query[future]
                try:
                    df_q = future.result()
                    all_df.append(df_q)
                    pbar.set_postfix({'Articles': sum(len(df) for df in all_df)})
                except Exception as e:
                    # Tambahkan DataFrame kosong untuk query yang gagal
                    all_df.append(pd.DataFrame(columns=["query", "judul_berita", "tanggal_berita", "penulis_berita", "url_berita"]))
                
                pbar.update(1)
    
    if not all_df:
        return pd.DataFrame(columns=["query", "judul_berita", "tanggal_berita", "penulis_berita", "url_berita"])
    
    df = pd.concat(all_df, ignore_index=True)
    return df

def scrape_google_news_queries(queries: List[str], max_items: int, date_filters: List[str]) -> pd.DataFrame:
    """Wrapper function yang memilih antara sequential atau parallel"""
    if len(queries) <= 2:
        # Untuk query sedikit, gunakan sequential (overhead parallel tidak worth it)
        all_df: List[pd.DataFrame] = []
        
        with tqdm(total=len(queries), desc="Scraping queries", unit="query") as pbar:
            for q in queries:
                df_q = scrape_google_news_query(q, max_items=max_items, date_filters=date_filters, pbar=pbar)
                all_df.append(df_q)
                pbar.update(1)
                pbar.set_postfix({'Articles': sum(len(df) for df in all_df)})
        
        if not all_df:
            return pd.DataFrame(columns=["query", "judul_berita", "tanggal_berita", "penulis_berita", "url_berita"])
        df = pd.concat(all_df, ignore_index=True)
        return df
    else:
        # Untuk banyak query, gunakan parallel
        return scrape_google_news_queries_parallel(queries, max_items, date_filters)

# --------------------------------------------------
# Fungsi untuk menghapus duplikat menyeluruh
# --------------------------------------------------

def remove_duplicates_comprehensive(df: pd.DataFrame) -> pd.DataFrame:
    """
    Menghapus duplikat secara menyeluruh berdasarkan beberapa kriteria:
    1. URL berita yang sama
    2. Judul berita yang sangat mirip (untuk menangani judul dengan sedikit variasi)
    3. Kombinasi penulis dan tanggal yang sama dengan judul mirip
    
    Args:
        df: DataFrame dengan kolom [query, judul_berita, tanggal_berita, penulis_berita, url_berita]
    
    Returns:
        DataFrame yang sudah dibersihkan dari duplikat
    """
    if df.empty:
        return df
    
    initial_count = len(df)
    
    # Progress bar untuk deduplication - lebih efisien tanpa terlalu banyak step
    with tqdm(total=3, desc="Removing duplicates", unit="step") as pbar:
        # 1. Hapus duplikat berdasarkan URL yang sama (paling cepat dan efektif)
        pbar.set_postfix_str("URLs")
        df_cleaned = df.drop_duplicates(subset=["url_berita"], keep='first').reset_index(drop=True)
        pbar.update(1)
        
        # 2. Normalisasi judul dalam satu operasi yang lebih efisien
        pbar.set_postfix_str("Titles")
        df_cleaned = df_cleaned.copy()
        df_cleaned['judul_normalized'] = (
            df_cleaned['judul_berita']
            .str.lower()
            .str.strip()
            .str.replace(r'[^\w\s]', ' ', regex=True)
            .str.replace(r'\s+', ' ', regex=True)
            .str.strip()
        )
        df_cleaned['tanggal_only'] = df_cleaned['tanggal_berita'].str[:10]  # YYYY-MM-DD
        pbar.update(1)
        
        # 3. Hapus duplikat berdasarkan kombinasi kriteria yang lebih ketat
        pbar.set_postfix_str("Complex")
        # Pertama hapus judul yang benar-benar sama
        df_cleaned = df_cleaned.drop_duplicates(subset=["judul_normalized"], keep='first').reset_index(drop=True)
        
        # Kemudian hapus kombinasi penulis + tanggal + judul mirip
        df_cleaned = df_cleaned.drop_duplicates(subset=["penulis_berita", "tanggal_only", "judul_normalized"], keep='first').reset_index(drop=True)
        
        # Hapus kolom bantuan
        df_cleaned = df_cleaned.drop(columns=["judul_normalized", "tanggal_only"])
        pbar.update(1)
    
    final_count = len(df_cleaned)
    total_removed = initial_count - final_count
    
    if total_removed > 0:
        print(f"Removed {total_removed} duplicates: {initial_count} → {final_count} articles")
    
    return df_cleaned

# --------------------------------------------------
# Main CLI (aman untuk notebook & script)
# --------------------------------------------------

def main():  # noqa: D401
    start_time = time.time()
    
    if "__file__" in globals():
        base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    else:
        base_dir = os.getcwd()

    config_path = os.path.join(base_dir, "config.json")
    if not os.path.exists(config_path):
        raise SystemExit(f"config.json tidak ditemukan di {base_dir}")
    with open(config_path, "r", encoding="utf-8") as f:
        config = json.load(f)

    queries = config.get("keywords", [])
    date_filters = config.get("search_date", [])  # daftar tanggal (YYYY-MM-DD)
    # ubah untuk hasil akhir
    max_items = 0
    if not queries:
        raise SystemExit("keywords kosong di config.json")

    print(f"Scraping Google News: {len(queries)} queries, max {max_items} articles per query")
    if date_filters:
        # Show date range instead of all individual dates
        sorted_dates = sorted(date_filters)
        if len(sorted_dates) == 1:
            print(f"Date filter: {sorted_dates[0]}")
        elif len(sorted_dates) <= 2:
            print(f"Date filter: {', '.join(sorted_dates)}")
        else:
            print(f"Date filter: {sorted_dates[0]} to {sorted_dates[-1]} ({len(sorted_dates)} days)")
        
        # PERBAIKAN: Gunakan individual date scraping untuk capture semua artikel
        print(f"Using individual date scraping: {len(queries)} × {len(date_filters)} = {len(queries) * len(date_filters)} tasks")
        
        all_results = []
        with tqdm(total=len(queries) * len(date_filters), desc="Scraping", unit="task") as pbar:
            for query in queries:
                for single_date in date_filters:
                    try:
                        df_single = scrape_google_news_query(
                            query=query, 
                            max_items=max_items, 
                            date_filters=[single_date],  # Single date only
                            pbar=None
                        )
                        
                        if len(df_single) > 0:
                            all_results.append(df_single)
                        
                        pbar.set_postfix({
                            'Articles': sum(len(df) for df in all_results)
                        })
                        
                    except Exception as e:
                        pass  # Continue with other dates
                    
                    pbar.update(1)
        
        # Combine results
        if all_results:
            df = pd.concat(all_results, ignore_index=True)
        else:
            df = pd.DataFrame(columns=["query", "judul_berita", "tanggal_berita", "penulis_berita", "url_berita"])
    else:
        # Fallback to original method if no date filters
        df = scrape_google_news_queries(queries, max_items=max_items, date_filters=date_filters)
    
    # Deduplication
    df = remove_duplicates_comprehensive(df)
    
    total_duration = time.time() - start_time
    print(f"Completed in {total_duration:.1f}s")
    
    return df


def decode_single_url(url_with_index):
    """Decode single URL - untuk parallel processing"""
    index, source_url = url_with_index
    interval_time = 0.1  # Kurangi interval untuk parallel processing
    
    try:
        decoded_url = gnewsdecoder(source_url, interval=interval_time)
        
        if decoded_url.get("status"):
            return index, decoded_url["decoded_url"], True
        else:
            return index, source_url, False  # Gunakan URL asli jika decode gagal
                    
    except Exception as e:
        return index, source_url, False  # Gunakan URL asli jika ada exception

def convert_link_parallel(df):
    """Versi paralel untuk decode URLs"""
    daftar_berita = df['url_berita'].tolist()
    
    if len(daftar_berita) == 0:
        return df
    
    # Buat list dengan index untuk maintain order
    indexed_urls = list(enumerate(daftar_berita))
    
    # Array untuk menyimpan hasil dengan urutan yang benar
    decoded_results = [None] * len(daftar_berita)
    success_count = 0
    error_count = 0
    
    # Parallel execution dengan ThreadPoolExecutor
    with ThreadPoolExecutor(max_workers=MAX_WORKERS_DECODE) as executor:
        # Submit semua tasks
        future_to_index = {executor.submit(decode_single_url, url_data): url_data[0] 
                          for url_data in indexed_urls}
        
        # Progress bar untuk parallel processing
        with tqdm(total=len(daftar_berita), desc="Decoding URLs", unit="url") as pbar:
            for future in as_completed(future_to_index):
                try:
                    index, decoded_url, success = future.result()
                    decoded_results[index] = decoded_url
                    
                    if success:
                        success_count += 1
                    else:
                        error_count += 1
                    
                    pbar.set_postfix({'Success': success_count, 'Failed': error_count})
                    
                except Exception as e:
                    index = future_to_index[future]
                    decoded_results[index] = daftar_berita[index]  # Gunakan URL asli
                    error_count += 1
                    pbar.set_postfix({'Success': success_count, 'Failed': error_count})
                
                pbar.update(1)
    
    # Update DataFrame dengan URL yang sudah di-decode
    df['url_berita'] = decoded_results
    
    return df

def convert_link():
    if "__file__" in globals():
        base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    else:
        base_dir = os.getcwd()

    df = main()
    
    # Gunakan parallel processing untuk decode URLs
    if len(df) <= 10:
        # Untuk URL sedikit, gunakan sequential
        interval_time = 1
        daftar_berita = df['url_berita']
        
        print(f"\nMemulai decode {len(daftar_berita)} Google News URLs...")
        
        decoded = []
        success_count = 0
        error_count = 0
        
        with tqdm(total=len(daftar_berita), desc="Decode URLs", unit="url") as pbar:
            for i, source_url in enumerate(daftar_berita):
                try:
                    decoded_url = gnewsdecoder(source_url, interval=interval_time)
                    
                    if decoded_url.get("status"):
                        decoded.append(decoded_url["decoded_url"])
                        success_count += 1
                    else:
                        decoded.append(source_url)
                        error_count += 1
                        
                except Exception as e:
                    decoded.append(source_url)
                    error_count += 1
                
                pbar.set_postfix({'✓': success_count, '✗': error_count})
                pbar.update(1)
        
        df['url_berita'] = decoded
    else:
        # Untuk banyak URL, gunakan parallel
        df = convert_link_parallel(df)
    
    # Simpan hasil
    out_dir = os.path.join(base_dir, "daftar_berita")
    os.makedirs(out_dir, exist_ok=True)
    out_xlsx = os.path.join(out_dir, "google_news_rss.xlsx")
    df.to_excel(out_xlsx, index=False)
    
    print(f"\nCompleted! File saved: {out_xlsx}")
    print(f"Total articles: {len(df)}")


# Jalankan hanya jika belum pernah dieksekusi
if not hasattr(sys.modules[__name__], '_conversion_done'):
    if __name__ == "__main__":
        convert_link()
        sys.modules[__name__]._conversion_done = True
else:
    print("Sudah dijalankan sebelumnya. Restart kernel jika ingin menjalankan ulang.")

Scraping Google News: 2 queries, max 0 articles per query
Date filter: 2025-09-28 to 2025-10-02 (5 days)
Using individual date scraping: 2 × 5 = 10 tasks


Scraping: 100%|██████████| 10/10 [00:17<00:00,  1.79s/task, Articles=800]
Scraping: 100%|██████████| 10/10 [00:17<00:00,  1.79s/task, Articles=800]
Removing duplicates: 100%|██████████| 3/3 [00:00<00:00, 221.55step/s, Complex]



Removed 197 duplicates: 800 → 603 articles
Completed in 17.9s


Decoding URLs: 100%|██████████| 603/603 [01:21<00:00,  7.43url/s, Success=603, Failed=0]


Completed! File saved: /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Pekerjaan BMN/05. 2025/98_monitoring_berita/monitoring-berita/daftar_berita/google_news_rss.xlsx
Total articles: 603



