# Setting Environment

In [13]:
"""# mount the colab with google drive
from google.colab import drive
drive.mount('/content/drive')"""

"# mount the colab with google drive\nfrom google.colab import drive\ndrive.mount('/content/drive')"

In [14]:
# set folder tempat kerja (current working directory)
import os
cwd = "/Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Pekerjaan BMN/05. 2025/98_monitoring_berita"
# cwd = '/content/drive/MyDrive/Monitoring Berita'
os.chdir(cwd)

In [15]:
# %%
import re
import unicodedata
import pandas as pd
from typing import Dict, List, Tuple
import html
import dateparser

# Opsi tampilan
pd.set_option("display.max_colwidth", 200)


# Proses Utama

### Membaca Config

In [16]:
import json
from pathlib import Path

config_path = Path(cwd+"/config.json")
with open(config_path, "r", encoding="utf-8") as f:
            config = json.load(f)
csv_path = config['last_output_path']
# Flexible date filter spec (can be str | list | dict)
date_filter_spec = config['search_date'][-1]



### Membaca Output Daftar Berita

In [17]:
df = pd.read_csv(csv_path)

# Normalisasi tanggal_berita dan filter sebelum labeling
if 'tanggal_berita' in df.columns:
    def _parse_tgl(x):
        try:
            if pd.isna(x):
                return pd.NaT
            dt = dateparser.parse(str(x))
            return dt.date() if dt else pd.NaT
        except Exception:
            return pd.NaT
    df['tanggal_berita_norm'] = df['tanggal_berita'].apply(_parse_tgl)
else:
    df['tanggal_berita_norm'] = pd.NaT

# Laporan kualitas normalisasi
total = len(df)
na_count = df['tanggal_berita_norm'].isna().sum()
print(f"Normalisasi tanggal_berita: total={total}, sukses={total-na_count}, gagal={na_count} ({(na_count/total if total else 0):.1%})")

Normalisasi tanggal_berita: total=277, sukses=277, gagal=0 (0.0%)


In [18]:
# filter berdasarkan tanggal_berita_norm jika ada di date_filter_spec
if 'tanggal_berita_norm' in df.columns:
    df = df[df['tanggal_berita_norm'].notna()]

In [19]:
# filter based on tanggal search
try:
    start_date, end_date = None, None
    if isinstance(date_filter_spec, (list, tuple)):
        parsed = [d for d in [dateparser.parse(str(x)) for x in date_filter_spec] if d]
        if parsed:
            start_date = min(parsed).date()
            end_date = max(parsed).date()
    else:
        # single spec string or dict
        from datetime import date
        if isinstance(date_filter_spec, dict):
            start = date_filter_spec.get('start') or date_filter_spec.get('from')
            end = date_filter_spec.get('end') or date_filter_spec.get('to')
            ps = dateparser.parse(str(start)) if start else None
            pe = dateparser.parse(str(end)) if end else None
            if ps and pe:
                start_date, end_date = min(ps, pe).date(), max(ps, pe).date()
            elif ps:
                start_date = end_date = ps.date()
            elif pe:
                start_date = end_date = pe.date()
        else:
            # string seperti '2025-09-20' atau relatif 'last_7d'
            spec = str(date_filter_spec).strip()
            m = re.fullmatch(r'last_(\d+)d', spec, flags=re.IGNORECASE)
            if m:
                days = int(m.group(1))
                end_date = pd.Timestamp.now().date()
                start_date = (pd.Timestamp.now() - pd.Timedelta(days=days)).date()
            else:
                p = dateparser.parse(spec)
                if p:
                    start_date = end_date = p.date()
    if start_date:
        if end_date is None:
            end_date = start_date
        before = len(df)
        df = df[(df['tanggal_berita_norm'] >= start_date) & (df['tanggal_berita_norm'] <= end_date)]
        print(f"Filter tanggal: {start_date} s.d {end_date} (dari {before} -> {len(df)})")
    else:
        print("Tidak menerapkan filter tanggal karena start_date tidak terdefinisi.")
except Exception as e:
    print(f"Peringatan: gagal menerapkan filter tanggal: {e}")

Filter tanggal: 2025-09-26 s.d 2025-09-26 (dari 277 -> 97)


### Weak Labelling

In [20]:
# Helper cleaning & date utilities (ditambahkan untuk mencegah NameError)
import unicodedata, html, re
import dateparser

def basic_clean(text):
    if text is None or (isinstance(text, float) and pd.isna(text)):
        return ""
    if not isinstance(text, str):
        text = str(text)
    text = html.unescape(text)
    text = unicodedata.normalize("NFKC", text)
    # Hilangkan karakter kontrol
    text = re.sub(r"[\r\n\t]+", " ", text)
    # Spasi ganda
    text = re.sub(r"\s+", " ", text).strip()
    return text

def clean_title(text):
    return basic_clean(text)

def clean_article(text):
    return basic_clean(text)

def normalize_date(val):
    """Parse berbagai format tanggal menjadi datetime (atau None)."""
    if val is None or (isinstance(val, float) and pd.isna(val)):
        return None
    try:
        dt = dateparser.parse(str(val), settings={"PREFER_DAY_OF_MONTH": "first"})
        return dt
    except Exception:
        return None

In [21]:
# %%
# ... existing helpers above ...

def parse_date_filter(spec: str):
    """Kembalikan (start_date, end_date) sebagai date (bukan datetime) dari spec.
    Mendukung:
    - Satu tanggal (contoh: "2025-09-26" atau "26/09/2025")
    - Rentang: "2025-09-20 to 2025-09-26", "20/09/2025 - 26/09/2025", "2025-09-20:2025-09-26"
    - Relatif: "last_7d", "last_1d", "last_30d"
    """
    if spec is None:
        return None, None
    if isinstance(spec, (list, tuple)):
        # list tanggal → ambil min/max
        dates = [normalize_date(x) for x in spec if x]
        dates = [d.date() for d in dates if d is not None]
        if not dates:
            return None, None
        return min(dates), max(dates)
    if isinstance(spec, dict):
        # {"start": "2025-09-01", "end": "2025-09-26"}
        d1 = normalize_date(spec.get("start")) if spec.get("start") else None
        d2 = normalize_date(spec.get("end")) if spec.get("end") else None
        if d1 is None and d2 is None:
            return None, None
        if d1 is None:
            d1 = d2
        if d2 is None:
            d2 = d1
        d1d, d2d = d1.date(), d2.date()
        if d1d > d2d:
            d1d, d2d = d2d, d1d
        return d1d, d2d

    # spec as string
    s = str(spec).strip()

    # Relatif: last_Xd
    m = re.fullmatch(r"last_(\d+)d", s, flags=re.IGNORECASE)
    if m:
        days = int(m.group(1))
        end = pd.Timestamp.now().date()
        start = (pd.Timestamp.now() - pd.Timedelta(days=days)).date()
        return start, end

    # Rentang dengan pemisah umum
    for sep in [" to ", " - ", "–", "—", ":"]:
        if sep in s:
            parts = [p.strip() for p in s.split(sep, 1)]
            if len(parts) == 2:
                d1 = normalize_date(parts[0])
                d2 = normalize_date(parts[1])
                if d1 is not None and d2 is not None:
                    d1d = d1.date(); d2d = d2.date()
                    if d1d > d2d:
                        d1d, d2d = d2d, d1d
                    return d1d, d2d

    # Satu tanggal
    d = normalize_date(s)
    if d is not None:
        return d.date(), d.date()

    return None, None


In [22]:
# %%
# Pola dibuat sebagai regex dgn word boundary agar tidak salah-match
def kw(words: List[str]) -> List[re.Pattern]:
    return [re.compile(w, flags=re.IGNORECASE) for w in words]

# --- KATEGORI: ISU KEMENKEU ---
KEMENKEU_PATTERNS: Dict[str, int] = {
    # Lembaga & jabatan inti
    r"\bkementerian keuangan\b": 6,
    r"\bkemenkeu\b": 6,
    r"\bmenkeu\b": 6,
    r"\bpurbaya yudhi\b|\bpurbaya\b": 6,
    r"\bmenteri keuangan\b": 6,
    r"\bbadan kebijakan fiskal\b|\bbkf\b": 5,
    r"\bdirektorat jenderal pajak\b|\bdjp\b": 6,
    r"\bditjen bea cukai\b|\bdirektorat jenderal bea dan cukai\b|\bdjbc\b|\bbea\s*cukai\b": 6,
    r"\bdirektorat jenderal kekayaan negara\b|\bdjkn\b": 6,
    r"\bdirektorat jenderal perbendaharaan\b|\bdjpbn?\b": 5,
    r"\bdirektorat jenderal pengelolaan pembiayaan dan risiko\b|\bdjppr\b": 6,
    r"\blembaga pembiayaan ekspor indonesia\b|\blpei\b|\beximbank\b": 4,
    r"\bpusat investasi pemerintah\b|\bpip\b": 4,
    r"\blkpp\b": 2,  # bukan di bawah Kemenkeu, tetapi sering terkait pengadaan (beri bobot rendah)
    r"\bbi\b": 5,
    r"\bojk\b": 5,

    # Istilah kebijakan fiskal/anggaran/perbendaharaan
    r"\bapbn\b": 6,
    r"\b(apbd)\b": 2,  # daerah: turunkan bobot (bisa isu nasional juga)
    r"\banggaran\b": 3,
    r"\bpajak\b|\bppn\b|\bpajak penghasilan\b|\bpbb\b|\bpajak daerah\b": 5,
    r"\bkepabeanan\b|\bcukai\b": 5,
    r"\b(fiskal|kebijakan fiskal)\b": 5,
    r"\bpenerimaan negara\b|\bpnbp\b": 5,
    r"\bperbendaharaan\b|\bkas negara\b|\bspm\b|\bsp2d\b": 4,
    r"\bsurat berharga negara\b|\bsbn\b|\bsun\b|\bsbsn\b|\bsukuk\b": 5,
    r"\bpembiayaan utang\b|\butang negara\b|\bdefisit\b|\bdefisit anggaran\b": 4,
    r"\b(bmn|barang milik negara|psp|lelang negara)\b": 5,
    r"\btransfer ke daerah\b|\b(dau|dak|dbh)\b": 3,
    r"\bsubsidi\b|\bkompensasi\b": 3,
    r"\b(asuransi sosial)\b|\bbpj?sk\b": 2,  # ambiguitas, bobot rendah

    # Program/aksi khas fiskal
    r"\binsentif fiskal\b|\btax holiday\b|\btax allowance\b": 5,
    r"\bamnesty\b|\bpengampunan pajak\b": 5,
    r"\b(bea masuk|bea keluar)\b": 4,
    r"\b(impor|ekspor)\b.*\b(pajak|bea|cukai)\b": 4,
    r"\b(200 triliun|200t|rp200t)\b": 4,
    r"\bgaji asn\b": 4,
    r"\bbantuan sosial\b": 2,
    r"\transfer ke daerah\b": 2,
}

# --- KATEGORI: ISU NASIONAL (non-Kemenkeu) ---
NASIONAL_PATTERNS: Dict[str, int] = {
    # Lini eksekutif pusat
    r"\bpresiden\b|\bwapres\b|\bwakil presiden\b|\bistana\b|\bsekretariat negara\b": 5,
    r"\bprabowo\b|\bgibran\b": 5,
    r"\bkabinet\b|\bmenteri\b|\bkementerian\b": 3,  # umum (jika bukan Kemenkeu)
    r"\bkemen(terian)? (esdm|pupr|kesehatan|perhubungan|kominfo|kumham|perdagangan|pertanian|perindustrian|bumn|sosial|kominfo)\b": 4,

    # Politik & legislatif
    r"\bdpr\b|\bdpd\b|\bmpr\b": 4,
    r"\bmk\b|\bmahkamah konstitusi\b|\bma\b|\bmahkamah agung\b|\bbawaslu\b|\bkpu\b": 4,
    r"\bpemilu\b|\bpilkada\b|\bpilpres\b|\bpileg\b": 4,

    # Hukum pusat / Penegakan hukum tingkat nasional
    r"\bkpk\b|\bott\b|\bkejagung\b|\bkejaksaan agung\b|\bpolri\b|\bbnn\b": 5,
    r"\bkapolri\b|\bkapuspen\b": 4,
    r"\bperppu\b|\bpp\b|\bperpres\b|\binpres\b": 3,

    # Isu/Program pemerintah pusat (non-fiskal)
    r"\bibukota nusantara\b|\bikn\b": 4,
    r"\bpertamina\b|\bpln\b|\btelkom\b|\bbumn\b": 3,
    r"\bharga bbm\b|\bsubsidi energi\b|\bketahanan pangan\b": 3,
    r"\bimigrasi\b|\bwna\b|\bwni\b|\bnaturalization\b": 2,
    r"\bpenanganan bencana\b|\bbnpb\b": 3,
}

# --- KATEGORI: ISU INTERNASIONAL ---
INTERNASIONAL_PATTERNS: Dict[str, int] = {
    # Organisasi internasional & forum
    r"\basean\b|\bapec\b|\bg20\b|\bg7\b|\boecd\b|\brcep\b": 4,
    r"\b(pbb|perserikatan bangsa bangsa|persatuan bangsa bangsa|united nations|dewan keamanan pbb|un security council)\b": 6,
    r"\b(who|wto|imf|international monetary fund|world bank|bank dunia|adb|asian development bank)\b": 5,
    r"\b(uni eropa|european union|eu)\b": 5,

    # Negara dan kewilayahan (pilihan representatif, hindari kata ambigu)
    r"\bamerika serikat\b|\busa\b|\bu\.s\.a\b|\bu\.s\.?\b": 5,
    r"\bchina\b|\btiongkok\b|\brusia\b|\bjepang\b": 4,
    r"\bkorea selatan\b|\bkorsel\b|\bkorea utara\b|\bkorut\b": 4,
    r"\bindia\b|\bsingapura\b|\bmalaysia\b|\bthailand\b|\bvietnam\b|\bfilipina\b|\bbrunei\b|\blaos\b|\bmyanmar\b": 4,
    r"\baustralia\b|\bselandia baru\b|\bnew zealand\b": 4,
    r"\binggris\b|\bbritania raya\b|\bunited kingdom\b|\buk\b": 4,
    r"\bpalestina\b|\bisrael\b|\biran\b|\birak\b|\bsaudi arabia\b|\barab saudi\b|\buni emirat arab\b|\buea\b|\bqatar\b|\bturki\b|\bmesir\b": 4,
}

# --- KATEGORI: PENANDA "ISU LAINNYA" ---
LAINNYA_HINTS: Dict[str, int] = {
    r"\bolahraga\b|\bsport(s)?\b|\bbola\b|\bfifa\b|\bmotogp\b|\bf1\b": 3,
    r"\bseleb\b|\bselebriti\b|\bgosip\b|\bartis\b|\bkorea\b|\bkpop\b": 3,
    r"\bkriminal\b|\bpencurian\b|\bperampokan\b|\bpenganiayaan\b": 2,
    r"\bviral\b|\btiktok\b|\binstagram\b": 2,
    r"\bpromo\b|\bdiskon\b|\bflash sale\b": 1,
}

# Ambang keputusan
THRESH_KEMENKEU = 5
THRESH_NASIONAL = 4
THRESH_INTERNASIONAL = 4


In [23]:
# %%
def score_by_patterns(text: str, patterns: Dict[str, int]) -> Tuple[int, List[str], Dict[str, int]]:
    """
    Hitung skor berdasarkan pola regex dan kembalikan:
    - total score
    - daftar pola yang match (untuk audit)
    - detail skor per pola (dict)
    """
    score = 0
    matched = []
    score_detail = {}
    for pat, w in patterns.items():
        if re.search(pat, text, flags=re.IGNORECASE):
            score += w
            matched.append(pat)
            score_detail[pat] = w
    return score, matched, score_detail


def classify_item(title_raw: str, article_raw: str = "", alpha_title: float = 1.2) -> Dict[str, object]:
    """
    Kembalikan dict:
    - cleaned_title, cleaned_article
    - kategori_isu
    - score_kemenkeu, score_nasional, score_internasional, score_lainnya_hint
    - matched_terms (gabungan)
    - score_detail (per kategori)
    - reason (ringkas)

    Mekanisme penggabungan:
    - Skor dihitung terpisah: judul dan artikel.
    - Skor judul diberi bobot alpha_title (>1.0 by default) agar judul tetap dominan.
    - Total skor = alpha*skor_judul + skor_artikel.
    """
    t = clean_title(title_raw)
    a = clean_article(article_raw)

    # Skor berdasarkan judul
    t_kem, tmk_kem, tdet_kem = score_by_patterns(t, KEMENKEU_PATTERNS)
    t_nas, tmk_nas, tdet_nas = score_by_patterns(t, NASIONAL_PATTERNS)
    t_int, tmk_int, tdet_int = score_by_patterns(t, INTERNASIONAL_PATTERNS)
    t_lai, tmk_lai, tdet_lai = score_by_patterns(t, LAINNYA_HINTS)

    # Skor berdasarkan artikel
    a_kem, amk_kem, adet_kem = score_by_patterns(a, KEMENKEU_PATTERNS)
    a_nas, amk_nas, adet_nas = score_by_patterns(a, NASIONAL_PATTERNS)
    a_int, amk_int, adet_int = score_by_patterns(a, INTERNASIONAL_PATTERNS)
    a_lai, amk_lai, adet_lai = score_by_patterns(a, LAINNYA_HINTS)

    # Gabungkan dengan bobot judul
    sk_kem = int(round(alpha_title * t_kem + a_kem))
    sk_nas = int(round(alpha_title * t_nas + a_nas))
    sk_int = int(round(alpha_title * t_int + a_int))
    sk_lai = int(round(alpha_title * t_lai + a_lai))

    # Keputusan kategori: beri prioritas Kemenkeu > Nasional/Internasional bila kuat; lalu bandingkan Nasional vs Internasional
    if sk_kem >= THRESH_KEMENKEU and sk_kem >= sk_nas and sk_kem >= sk_int:
        kategori = "Isu Kemenkeu"
        reason = "Indikasi kuat fiskal/Kemenkeu dari judul/artikel."
    else:
        # pilih tertinggi antara nasional dan internasional jika melewati threshold masing-masing
        if sk_int >= THRESH_INTERNASIONAL and sk_int >= sk_nas:
            kategori = "Isu Internasional"
            reason = "Indikasi kuat hubungan/organisasi/isu lintas negara."
        elif sk_nas >= THRESH_NASIONAL:
            kategori = "Isu Nasional"
            reason = "Indikasi kuat isu pemerintah pusat/penegakan hukum/politik nasional."
        else:
            kategori = "Isu Lainnya"
            reason = "Tidak ada indikasi kuat isu Kemenkeu/Nasional/Internasional."

    matched_terms = {
        "kemenkeu": list(set(tmk_kem + amk_kem)),
        "nasional": list(set(tmk_nas + amk_nas)),
        "internasional": list(set(tmk_int + amk_int)),
        "lainnya_hint": list(set(tmk_lai + amk_lai)),
    }
    score_detail = {
        "kemenkeu": {**tdet_kem, **adet_kem},
        "nasional": {**tdet_nas, **adet_nas},
        "internasional": {**tdet_int, **adet_int},
        "lainnya_hint": {**tdet_lai, **adet_lai},
    }

    return {
        "cleaned_title": t,
        "cleaned_article": a,
        "kategori_isu": kategori,
        "score_kemenkeu": sk_kem,
        "score_nasional": sk_nas,
        "score_internasional": sk_int,
        "score_lainnya_hint": sk_lai,
        "matched_terms": matched_terms,
        "score_detail": score_detail,
        "reason": reason,
    }


In [24]:
# %%

def label_dataframe(df: pd.DataFrame,
                    title_col: str = "judul_berita",
                    article_col: str = "artikel_berita",
                    alpha_title: float = 1.2) -> pd.DataFrame:
    """
    Melabeli berdasarkan gabungan judul + artikel.

    Parameter:
    - title_col: nama kolom judul
    - article_col: nama kolom artikel (opsional; jika tak ada, hanya judul)
    - alpha_title: bobot untuk skor judul agar tetap dominan

    Error handling:
      - Cek keberadaan kolom
      - Tangani NaN/None
      - Tidak mengubah df input (return salinan)
    """
    if not isinstance(df, pd.DataFrame):
        raise TypeError("Input 'df' harus berupa pandas.DataFrame.")
    if title_col not in df.columns:
        raise KeyError(f"Kolom '{title_col}' tidak ditemukan di df. Kolom tersedia: {list(df.columns)}")

    has_article = article_col in df.columns

    df_out = df.copy()

    results = []
    for i, row in df_out.iterrows():
        raw_title = "" if pd.isna(row[title_col]) else str(row[title_col])
        raw_article = "" if (not has_article or pd.isna(row.get(article_col, ""))) else str(row[article_col])
        try:
            res = classify_item(raw_title, raw_article, alpha_title=alpha_title)
        except Exception as e:
            # Jika gagal proses satu baris, beri fallback aman
            res = {
                "cleaned_title": clean_title(raw_title),
                "cleaned_article": clean_article(raw_article),
                "kategori_isu": "Isu Lainnya",
                "score_kemenkeu": 0,
                "score_nasional": 0,
                "score_internasional": 0,
                "score_lainnya_hint": 0,
                "matched_terms": {"kemenkeu": [], "nasional": [], "internasional": [], "lainnya_hint": []},
                "score_detail": {"kemenkeu": {}, "nasional": {}, "internasional": {}, "lainnya_hint": {}},
                "reason": f"Fallback labeling karena error: {e.__class__.__name__}",
            }
        results.append(res)

    # Tambahkan kolom-kolom hasil
    df_out["judul_berita_bersih"] = [r["cleaned_title"] for r in results]
    if has_article:
        df_out["artikel_berita_bersih"] = [r["cleaned_article"] for r in results]
    df_out["kategori_isu"] = [r["kategori_isu"] for r in results]
    df_out["score_kemenkeu"] = [r["score_kemenkeu"] for r in results]
    df_out["score_nasional"] = [r["score_nasional"] for r in results]
    df_out["score_internasional"] = [r["score_internasional"] for r in results]
    df_out["score_lainnya_hint"] = [r["score_lainnya_hint"] for r in results]
    df_out["matched_terms"] = [r["matched_terms"] for r in results]
    df_out["score_detail"] = [r["score_detail"] for r in results]
    df_out["label_reason"] = [r["reason"] for r in results]

    return df_out


In [25]:
# Jalankan pelabelan
try:
    df_labeled = label_dataframe(df, title_col="judul_berita", article_col="artikel_berita", alpha_title=1.3)
    print(f"Labeling selesai. Jumlah baris: {len(df_labeled)}")
except Exception as e:
    raise RuntimeError(f"Gagal melabeli dataframe: {e}")

# Ringkasan
if not df_labeled.empty and 'kategori_isu' in df_labeled.columns:
    summary = (
        df_labeled["kategori_isu"]
        .value_counts(dropna=False)
        .rename_axis("kategori_isu")
        .reset_index(name="jumlah")
    )
    display(summary)
else:
    print("Data labeled kosong atau kolom kategori_isu tidak ditemukan.")

Labeling selesai. Jumlah baris: 97


Unnamed: 0,kategori_isu,jumlah
0,Isu Lainnya,39
1,Isu Nasional,25
2,Isu Internasional,17
3,Isu Kemenkeu,16


### Langkah Selanjutnya

In [26]:
# %%
# Simpan sebagai CSV/Excel untuk integrasi lanjutan
DATE_TAG = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
OUTPUT_DIR = f"{cwd}/hasil_labeling"
OUTPUT_CSV = f"{OUTPUT_DIR}/hasil_labeling_{DATE_TAG}.csv"
OUTPUT_XLSX = f"{OUTPUT_DIR}/hasil_labeling_{DATE_TAG}.xlsx"

# Pastikan folder ada
import os
os.makedirs(OUTPUT_DIR, exist_ok=True)

try:
    df_labeled.to_csv(OUTPUT_CSV, index=False, encoding="utf-8")
    df_labeled.to_excel(OUTPUT_XLSX, index=False)
    print(f"Tersimpan: {OUTPUT_CSV} & {OUTPUT_XLSX}")
except Exception as e:
    print(f"Gagal menyimpan output: {e}")


Tersimpan: /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Pekerjaan BMN/05. 2025/98_monitoring_berita/hasil_labeling/hasil_labeling_20250926_144446.csv & /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Pekerjaan BMN/05. 2025/98_monitoring_berita/hasil_labeling/hasil_labeling_20250926_144446.xlsx


In [27]:
# update config
# %%
import json
from pathlib import Path

CONFIG_PATH = Path(f"{cwd}/config.json")


def update_config(path: Path, new_values: dict):
    """Update config.json hanya pada key tertentu tanpa menimpa keseluruhan isi."""
    data = {}
    if path.exists():
        try:
            with open(path, "r", encoding="utf-8") as f:
                data = json.load(f)
        except Exception:
            data = {}

    # update hanya key yang diberikan
    data.update(new_values)

    try:
        with open(path, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=4, ensure_ascii=False)
        print(f"Berhasil update config.json di {path}")
    except Exception as e:
        print(f"Gagal menyimpan config.json: {e}")

# Simpan OUTPUT_CSV & OUTPUT_XLSX ke config dengan nama yang lebih jelas
update_config(CONFIG_PATH, {
    "labelled_data_csv": OUTPUT_CSV,
    "labelled_data_xlsx": OUTPUT_XLSX
})


Berhasil update config.json di /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Pekerjaan BMN/05. 2025/98_monitoring_berita/config.json
