# SETTING ENVIRONMENT


In [47]:
"""# mount the colab with google drive
from google.colab import drive
drive.mount('/content/drive')"""

"# mount the colab with google drive\nfrom google.colab import drive\ndrive.mount('/content/drive')"

In [48]:
# set folder tempat kerja (current working directory)
import os
cwd = "/Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Pekerjaan BMN/05. 2025/98_monitoring_berita/monitoring-berita"
#cwd = '/content/drive/MyDrive/Monitoring Berita'
os.chdir(cwd)

In [49]:
# API Keys - diganti dengan konfigurasi lengkap di cell selanjutnya
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [50]:
# Membaca AI model yang dipilih user melalui config.json ['AI_name']
# Kemudian mengaplikasikan pilihan tersebut (openai/deepseek) ke seluruh sel utama

import os
import json
import logging
from pathlib import Path

# Setup logging
logger = logging.getLogger(__name__)

def load_ai_configuration():
    """
    Memuat konfigurasi AI model dari config.json dan setup environment
    
    Returns:
        dict: Dictionary berisi konfigurasi AI yang telah diload
    """
    try:
        # Baca config.json
        with open('config.json', 'r', encoding='utf-8') as f:
            config = json.load(f)
        
        # Ambil AI configuration
        ai_name = config.get('AI_name', 'openai').lower()
        
        # Validasi AI name
        supported_ai = ['openai', 'deepseek']
        if ai_name not in supported_ai:
            logger.warning(f"AI model '{ai_name}' tidak didukung. Menggunakan 'openai' sebagai default.")
            ai_name = 'openai'
        
        # Setup environment variables berdasarkan pilihan AI
        if ai_name == 'openai':
            api_key = os.getenv("OPENAI_API_KEY")
            if not api_key:
                raise ValueError("OPENAI_API_KEY tidak ditemukan di environment variables")
            
            ai_config = {
                'provider': 'openai',
                'model': config.get('openai_model', 'gpt-4o-mini'),
                'api_key': api_key,
                'base_url': None,
                'temperature': config.get('temperature', 0.2),
                'max_tokens': config.get('max_tokens', 500)
            }
            
        elif ai_name == 'deepseek':
            api_key = os.getenv("DEEPSEEK_API_KEY")
            if not api_key:
                raise ValueError("DEEPSEEK_API_KEY tidak ditemukan di environment variables")
                
            ai_config = {
                'provider': 'deepseek',
                'model': config.get('deepseek_model', 'deepseek-chat'),
                'api_key': api_key,
                'base_url': 'https://api.deepseek.com/v1',
                'temperature': config.get('temperature', 0.2),
                'max_tokens': config.get('max_tokens', 500)
            }
        
        # Set global environment variable untuk digunakan di sel lain
        os.environ['MODEL_ANALISIS'] = ai_name
        os.environ['AI_MODEL_NAME'] = ai_config['model']
        os.environ['AI_TEMPERATURE'] = str(ai_config['temperature'])
        os.environ['AI_MAX_TOKENS'] = str(ai_config['max_tokens'])
        
        if ai_config['base_url']:
            os.environ['AI_BASE_URL'] = ai_config['base_url']
        
        logger.info(f"✅ AI Configuration loaded: {ai_name.upper()}")
        logger.info(f"   Model: {ai_config['model']}")
        logger.info(f"   Temperature: {ai_config['temperature']}")
        logger.info(f"   Max Tokens: {ai_config['max_tokens']}")
        
        return ai_config
        
    except FileNotFoundError:
        logger.error("❌ File config.json tidak ditemukan!")
        raise
    except json.JSONDecodeError as e:
        logger.error(f"❌ Error parsing config.json: {e}")
        raise
    except Exception as e:
        logger.error(f"❌ Error loading AI configuration: {e}")
        raise

def setup_ai_client(ai_config):
    """
    Setup AI client berdasarkan konfigurasi yang dipilih
    
    Args:
        ai_config (dict): Konfigurasi AI dari load_ai_configuration()
    
    Returns:
        object: AI client object (OpenAI atau Deepseek compatible)
    """
    try:
        if ai_config['provider'] == 'openai':
            from openai import OpenAI
            
            client = OpenAI(
                api_key=ai_config['api_key']
            )
            
        elif ai_config['provider'] == 'deepseek':
            from openai import OpenAI  # Deepseek menggunakan OpenAI compatible API
            
            client = OpenAI(
                api_key=ai_config['api_key'],
                base_url=ai_config['base_url']
            )
        
        # Test connection dengan simple call
        test_response = client.chat.completions.create(
            model=ai_config['model'],
            messages=[{"role": "user", "content": "Test connection. Respond with 'OK'."}],
            max_tokens=10,
            temperature=0
        )
        
        if test_response.choices[0].message.content:
            logger.info(f"✅ {ai_config['provider'].upper()} API connection successful")
            return client
        else:
            raise Exception("API test failed - empty response")
            
    except ImportError as e:
        logger.error(f"❌ Missing required library: {e}")
        raise
    except Exception as e:
        logger.error(f"❌ Error setting up {ai_config['provider']} client: {e}")
        raise

def get_ai_call_function(ai_config, client):
    """
    Mengembalikan function untuk memanggil AI yang sudah dikonfigurasi
    
    Args:
        ai_config (dict): Konfigurasi AI
        client (object): AI client object
    
    Returns:
        function: Function untuk memanggil AI dengan parameter standar
    """
    def call_ai_model(prompt, temperature=None, max_tokens=None):
        """
        Function wrapper untuk memanggil AI model dengan konfigurasi yang sudah diset
        
        Args:
            prompt (str): Prompt untuk AI
            temperature (float, optional): Temperature override
            max_tokens (int, optional): Max tokens override
            
        Returns:
            str: Response dari AI model
        """
        try:
            response = client.chat.completions.create(
                model=ai_config['model'],
                messages=[{"role": "user", "content": prompt}],
                temperature=temperature or ai_config['temperature'],
                max_tokens=max_tokens or ai_config['max_tokens']
            )
            return response.choices[0].message.content
            
        except Exception as e:
            logger.error(f"Error calling {ai_config['provider']} API: {e}")
            raise
    
    return call_ai_model

# ===== EKSEKUSI KONFIGURASI AI ===== #
print("=== KONFIGURASI AI MODEL ===")

try:
    # Load AI configuration dari config.json
    ai_config = load_ai_configuration()
    
    # Setup AI client
    ai_client = setup_ai_client(ai_config)
    
    # Buat function wrapper untuk memanggil AI
    call_ai_model = get_ai_call_function(ai_config, ai_client)
    
    # Set sebagai global variables untuk digunakan di sel lain
    globals()['AI_CONFIG'] = ai_config
    globals()['AI_CLIENT'] = ai_client 
    globals()['CALL_AI_MODEL'] = call_ai_model
    
    print(f"🎯 AI Model: {ai_config['provider'].upper()} ({ai_config['model']})")
    print(f"🔧 Temperature: {ai_config['temperature']}")
    print(f"📝 Max Tokens: {ai_config['max_tokens']}")
    print(f"🔑 API Key: {'*' * 20}...{ai_config['api_key'][-4:]}")
    
    # Test simple call
    print(f"\n🧪 Testing AI connection...")
    test_result = call_ai_model("Respond with 'AI Ready!'", temperature=0, max_tokens=10)
    print(f"✅ Test Response: {test_result}")
    
    print(f"\n✅ AI configuration completed successfully!")
    print(f"💡 Gunakan CALL_AI_MODEL(prompt) untuk memanggil AI di sel lain.")
    
except Exception as e:
    print(f"❌ Error dalam setup AI configuration: {e}")
    print(f"🔧 Pastikan:")
    print(f"   1. File config.json ada dan berisi 'AI_name': 'openai' atau 'deepseek'")
    print(f"   2. Environment variable API key sudah diset (OPENAI_API_KEY atau DEEPSEEK_API_KEY)")
    print(f"   3. Library openai sudah terinstall")
    
    # Set fallback ke dummy mode
    os.environ['DUMMY_MODE'] = '1'
    globals()['AI_CONFIG'] = {'provider': 'dummy', 'model': 'dummy'}
    globals()['CALL_AI_MODEL'] = lambda prompt, **kwargs: '{"dummy": "response"}'
    print(f"🔄 Fallback ke DUMMY MODE untuk development")

print(f"\n📋 Environment Variables yang diset:")
print(f"   MODEL_ANALISIS = {os.getenv('MODEL_ANALISIS', 'not set')}")
print(f"   AI_MODEL_NAME = {os.getenv('AI_MODEL_NAME', 'not set')}")
print(f"   DUMMY_MODE = {os.getenv('DUMMY_MODE', '0')}")

2025-10-01 08:29:22,555 - INFO - ✅ AI Configuration loaded: DEEPSEEK
2025-10-01 08:29:22,556 - INFO -    Model: deepseek-chat
2025-10-01 08:29:22,556 - INFO -    Temperature: 0.2
2025-10-01 08:29:22,556 - INFO -    Model: deepseek-chat
2025-10-01 08:29:22,556 - INFO -    Temperature: 0.2


2025-10-01 08:29:22,556 - INFO -    Max Tokens: 500
2025-10-01 08:29:22,734 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:29:22,734 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"


=== KONFIGURASI AI MODEL ===


2025-10-01 08:29:23,853 - INFO - ✅ DEEPSEEK API connection successful
2025-10-01 08:29:23,976 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:29:23,976 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"


🎯 AI Model: DEEPSEEK (deepseek-chat)
🔧 Temperature: 0.2
📝 Max Tokens: 500
🔑 API Key: ********************...8d74

🧪 Testing AI connection...
✅ Test Response: AI Ready!

✅ AI configuration completed successfully!
💡 Gunakan CALL_AI_MODEL(prompt) untuk memanggil AI di sel lain.

📋 Environment Variables yang diset:
   MODEL_ANALISIS = deepseek
   AI_MODEL_NAME = deepseek-chat
   DUMMY_MODE = 0
✅ Test Response: AI Ready!

✅ AI configuration completed successfully!
💡 Gunakan CALL_AI_MODEL(prompt) untuk memanggil AI di sel lain.

📋 Environment Variables yang diset:
   MODEL_ANALISIS = deepseek
   AI_MODEL_NAME = deepseek-chat
   DUMMY_MODE = 0


# MAIN

In [51]:
# Langkah pertama membaca file csv hasil analisis AI sebelumnya
# file terletak di config.json "analisis_ai_output"
# Filter out berita dengan topik_llm "Lainnya"
# Filter out berita dengan importance < 50

import pandas as pd
import json
import logging
from pathlib import Path

# Setup logging untuk error handling
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def load_berita_penting():
    """
    Memuat dan memfilter berita penting dari file hasil analisis AI
    
    Returns:
        pandas.DataFrame: DataFrame berisi berita yang sudah difilter
    """
    try:
        # Baca konfigurasi
        with open('config.json', 'r', encoding='utf-8') as f:
            config = json.load(f)
        
        # Path file analisis AI
        analisis_file = config.get('analisis_ai_output')
        if not analisis_file:
            raise ValueError("analisis_ai_output tidak ditemukan dalam config.json")
        
        # Periksa apakah file ada
        if not Path(analisis_file).exists():
            raise FileNotFoundError(f"File analisis AI tidak ditemukan: {analisis_file}")
        
        # Baca file CSV
        logger.info(f"Membaca file analisis AI: {analisis_file}")
        df = pd.read_csv(analisis_file)
        
        # Filter berita penting
        # 1. Exclude topik_llm "Lainnya"
        # 2. Include importance >= 70
        df_filtered = df[
            (df['topik_llm'] != 'Lainnya') & 
            (df['importance'] >= 70)
        ].copy()
        
        logger.info(f"Total berita: {len(df)}")
        logger.info(f"Berita penting (filtered): {len(df_filtered)}")
        
        if df_filtered.empty:
            logger.warning("Tidak ada berita penting yang memenuhi kriteria!")
            return pd.DataFrame()
        
        # Urutkan berdasarkan importance (descending)
        df_filtered = df_filtered.sort_values('importance', ascending=False)
        
        return df_filtered
        
    except Exception as e:
        logger.error(f"Error dalam load_berita_penting: {str(e)}")
        raise

# Load data berita penting
df_berita_penting = load_berita_penting()
print(f"Berhasil memuat {len(df_berita_penting)} berita penting")
if not df_berita_penting.empty:
    print("\nSample berita penting:")
    print(df_berita_penting[['judul_berita', 'topik_llm', 'importance', 'sentimen']].head())

2025-10-01 08:29:32,833 - INFO - Membaca file analisis AI: 00_hasil_analisis/seluruh_berita/analisis_ai_20250930_deepseek_default.csv
2025-10-01 08:29:32,857 - INFO - Total berita: 225
2025-10-01 08:29:32,858 - INFO - Berita penting (filtered): 107
2025-10-01 08:29:32,857 - INFO - Total berita: 225
2025-10-01 08:29:32,858 - INFO - Berita penting (filtered): 107


Berhasil memuat 107 berita penting

Sample berita penting:
                                          judul_berita topik_llm  importance  \
94   Menkeu Purbaya Sidak ke Kantor Pusat BNI, Ada ...  Kemenkeu        85.0   
118    Cukai Rokok Tak Naik, Penerimaan Turun - KONTAN  Kemenkeu        85.0   
116  Pemerhati Sayangkan Penundaan Kenaikan Cukai R...  Kemenkeu        85.0   
114  Prabowo Perintahkan Bea Cukai Gandeng Ahli Kim...  Kemenkeu        85.0   
113  Saham WIIM, HMSP, GGRM Rontok Usai Menkeu Purb...  Kemenkeu        85.0   

    sentimen  
94   positif  
118  positif  
116  negatif  
114  positif  
113  positif  


In [52]:
# Unkomen sel ini untuk testing.
df_berita_penting = df_berita_penting.sample(10)

In [53]:
# SEL 1 - Analisis Berita Penting (Parallel) - Updated with Dynamic AI Configuration
import os
import re
import json
import time
import traceback
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, Any, List
from pathlib import Path
import pandas as pd

try:
    from tqdm import tqdm
except ImportError:
    tqdm = lambda x, **_: x

# Konfigurasi dari AI setup
MAX_WORKERS = 3
MODEL_PILIHAN = os.getenv("MODEL_ANALISIS", "openai").lower()
AI_MODEL_NAME = os.getenv("AI_MODEL_NAME", "gpt-4o-mini")
DUMMY_MODE = os.getenv("DUMMY_MODE", "0") == "1"

# Deteksi kolom
CANDIDATE_TEXT_COLS = ["isi_berita", "content", "artikel_berita_bersih", "isi", "full_text", "body"]
TEXT_COL = None
for c in CANDIDATE_TEXT_COLS:
    if c in df_berita_penting.columns:
        TEXT_COL = c
        break

JUDUL_COL = 'judul_berita'
print(f"Kolom teks: {TEXT_COL}, Kolom judul: {JUDUL_COL}")
print(f"AI Model: {MODEL_PILIHAN.upper()} ({AI_MODEL_NAME})")

# Setup AI client menggunakan konfigurasi global
_ai_client = None
_call_ai_model = None

if not DUMMY_MODE:
    try:
        # Gunakan konfigurasi global yang sudah disetup di cell sebelumnya
        if 'AI_CLIENT' in globals() and 'CALL_AI_MODEL' in globals():
            _ai_client = AI_CLIENT
            _call_ai_model = CALL_AI_MODEL
            print(f"✅ Using pre-configured {MODEL_PILIHAN.upper()} client")
        else:
            raise Exception("AI configuration not found. Please run AI configuration cell first.")
    except Exception as e:
        print(f"❌ Error accessing AI client: {e}")
        print("🔄 Switching to DUMMY mode")
        DUMMY_MODE = True

def build_prompt(judul: str, isi: str) -> str:
    return f"""Analisis berita ini dan buat JSON dengan format tepat:

Judul: {judul[:200]}
Isi: {isi[:3000]}

Buat JSON dengan 4 field:
- resume: ringkasan singkat (maks 60 kata)  
- dampak_kemenkeu: Positif/Negatif/Netral (untuk Kementerian Keuangan)
- alasan_dampak: alasan singkat (maks 40 kata)
- hal_menarik: array 1-3 poin menarik

Contoh format:
{{"resume": "Menteri melakukan sidak...", "dampak_kemenkeu": "Positif", "alasan_dampak": "Meningkatkan transparansi", "hal_menarik": ["Kunjungan mendadak", "Fokus kredit"]}}"""

def call_model(prompt: str) -> str:
    if DUMMY_MODE:
        return '{"resume": "Dummy analisis berita", "dampak_kemenkeu": "Netral", "alasan_dampak": "Mode dummy testing", "hal_menarik": ["Test mode", "Dummy data"]}'
    
    try:
        # Gunakan wrapper function yang sudah dikonfigurasi
        response = _call_ai_model(
            prompt, 
            temperature=float(os.getenv('AI_TEMPERATURE', '0.2')),
            max_tokens=int(os.getenv('AI_MAX_TOKENS', '400'))
        )
        return response
    except Exception as e:
        raise Exception(f"API call failed: {str(e)}")

def parse_response(raw: str) -> Dict[str, Any]:
    if not raw:
        raise ValueError("Empty response")
    
    # Extract JSON
    json_match = re.search(r'\{[^{}]*"resume"[^{}]*\}', raw)
    if json_match:
        candidate = json_match.group()
    else:
        candidate = raw.strip()
    
    try:
        data = json.loads(candidate)
    except Exception as e:
        raise ValueError(f"JSON parse error: {e}")
    
    # Normalize
    result = {
        'resume': str(data.get('resume', '')),
        'dampak_kemenkeu': str(data.get('dampak_kemenkeu', 'Netral')),
        'alasan_dampak': str(data.get('alasan_dampak', '')),
        'hal_menarik': data.get('hal_menarik', [])
    }
    
    if isinstance(result['hal_menarik'], str):
        result['hal_menarik'] = [result['hal_menarik']]
    
    # Standardize dampak
    dk = result['dampak_kemenkeu'].lower()
    if 'pos' in dk:
        result['dampak_kemenkeu'] = 'Positif'
    elif 'neg' in dk:
        result['dampak_kemenkeu'] = 'Negatif'  
    else:
        result['dampak_kemenkeu'] = 'Netral'
        
    return result

def analyze_row(idx: int, row: pd.Series) -> Dict[str, Any]:
    try:
        judul = str(row.get(JUDUL_COL, ''))[:300]
        isi = str(row.get(TEXT_COL, ''))[:5000]
        
        # Clean text dari karakter bermasalah
        judul = judul.encode('utf-8', 'ignore').decode('utf-8')
        isi = isi.encode('utf-8', 'ignore').decode('utf-8')
        
        prompt = build_prompt(judul, isi)
        raw = call_model(prompt)
        parsed = parse_response(raw)
        
        parsed['__status'] = 'ok'
        return parsed
        
    except Exception as e:
        return {
            '__status': 'error',
            '__error': str(e)[:200]
        }

# Execute parallel analysis
start_time = time.time()
rows_df = df_berita_penting.reset_index(drop=True)
results = [None] * len(rows_df)
errors = 0

print(f"Starting analysis of {len(rows_df)} articles using {MODEL_PILIHAN.upper()}...")

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    futures = {executor.submit(analyze_row, i, row): i for i, row in rows_df.iterrows()}
    
    for fut in tqdm(as_completed(futures), total=len(futures)):
        idx = futures[fut]
        result = fut.result()
        results[idx] = result
        
        if result.get('__status') != 'ok':
            errors += 1

# Compile results
col_resume = []
col_dampak = []
col_alasan = []  
col_hal = []
col_status = []
col_error = []

for res in results:
    if res and res.get('__status') == 'ok':
        col_resume.append(res.get('resume', ''))
        col_dampak.append(res.get('dampak_kemenkeu', ''))
        col_alasan.append(res.get('alasan_dampak', ''))
        col_hal.append(' | '.join(res.get('hal_menarik', [])))
        col_status.append('ok')
        col_error.append('')
    else:
        col_resume.append('')
        col_dampak.append('')
        col_alasan.append('')
        col_hal.append('')
        col_status.append('error')
        col_error.append(res.get('__error', 'unknown') if res else 'unknown')

# Add results to dataframe
df_out = rows_df.copy()
df_out['resume_ai'] = col_resume
df_out['dampak_kemenkeu_ai'] = col_dampak  
df_out['alasan_dampak_ai'] = col_alasan
df_out['hal_menarik_ai'] = col_hal
df_out['analisis_status'] = col_status
df_out['analisis_error'] = col_error

proc_time = time.time() - start_time
success_count = len(df_out) - errors

print(f"Analysis complete: {len(df_out)} articles in {proc_time:.1f}s")
print(f"Success: {success_count}, Errors: {errors}")
print(f"AI Provider: {MODEL_PILIHAN.upper()}")

# Show successful samples
success_rows = df_out[df_out['analisis_status'] == 'ok']
if not success_rows.empty:
    print(f"\nSample results (showing {min(3, len(success_rows))}):")
    for i, (_, row) in enumerate(success_rows.head(3).iterrows()):
        print(f"{i+1}. {row['judul_berita'][:60]}...")
        print(f"   Resume: {row['resume_ai'][:70]}...")
        print(f"   Dampak: {row['dampak_kemenkeu_ai']}")

# Save results  
out_dir = Path('00_hasil_analisis/berita_penting')
out_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
outfile = out_dir / f"analisis_berita_penting_{MODEL_PILIHAN}_{timestamp}.csv"
df_out.to_csv(outfile, index=False)
print(f"\nFile saved: {outfile}")

analisis_berita_penting = df_out

Kolom teks: artikel_berita_bersih, Kolom judul: judul_berita
AI Model: DEEPSEEK (deepseek-chat)
✅ Using pre-configured DEEPSEEK client
Starting analysis of 10 articles using DEEPSEEK...


  0%|          | 0/10 [00:00<?, ?it/s]2025-10-01 08:29:47,981 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:29:47,981 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:29:47,995 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:29:47,995 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:29:48,011 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:29:48,011 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
 10%|█         | 1/10 [00:07<01:06,  7.39s/it]2025-10-01 08:29:55,378 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:29:55,378 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 

Analysis complete: 10 articles in 32.2s
Success: 10, Errors: 0
AI Provider: DEEPSEEK

Sample results (showing 3):
1. Omongan Purbaya Sukses Bangkitkan Rupiah, Ini Buktinya! - CN...
   Resume: Rupiah menguat 0,51% ke Rp16.640/US$ setelah klarifikasi Menteri Keuan...
   Dampak: Positif
2. Purbaya Umumkan Cukai Rokok 2026 Tak Naik, Airlangga: Bagus!...
   Resume: Menteri Keuangan Purbaya Yudhi Sadewa memastikan tidak akan menaikkan ...
   Dampak: Positif
3. Menkeu Purbaya Sidak Kantor BNI Saat Direksi Sedang Rapat, A...
   Resume: Menteri Keuangan Purbaya melakukan inspeksi mendadak ke kantor BNI saa...
   Dampak: Netral

File saved: 00_hasil_analisis/berita_penting/analisis_berita_penting_deepseek_20251001_083020.csv





In [54]:
# SEL 2 - Generator Laporan "Daftar Berita & Konten" (Fixed Version)
import os
import json
from datetime import datetime, timedelta
from pathlib import Path

def get_sentiment_emoji(sentimen):
    """Convert sentimen to emoji atau tag"""
    if not sentimen:
        return "🟡"
    sentimen_lower = str(sentimen).lower()
    if 'pos' in sentimen_lower:
        return "🟢"
    elif 'neg' in sentimen_lower:
        return "🔴"
    else:
        return "🟡"

def clean_text(text):
    """Clean text dari karakter bermasalah"""
    if not text:
        return ""
    # Gunakan replace untuk karakter bermasalah umum
    clean = str(text).replace('\udcca', '').replace('\udccb', '').replace('\x00', '')
    return clean.encode('utf-8', 'ignore').decode('utf-8')

def format_indonesian_date():
    """Format tanggal hari ini dalam bahasa Indonesia"""
    today = datetime.now()
    days = ['Senin', 'Selasa', 'Rabu', 'Kamis', 'Jumat', 'Sabtu', 'Minggu']
    months = ['Januari', 'Februari', 'Maret', 'April', 'Mei', 'Juni',
              'Juli', 'Agustus', 'September', 'Oktober', 'November', 'Desember']
    
    day_name = days[today.weekday()]
    day = today.day
    month = months[today.month - 1]
    year = today.year
    
    return f"{day_name}, {day} {month} {year}"

def generate_daftar_berita_konten(df_data):
    """Generate laporan Daftar Berita & Konten"""
    
    # Header laporan
    today = datetime.now()
    yesterday = today - timedelta(days=1)
    tanggal_laporan = format_indonesian_date()
    
    lines = []
    lines.append("Daftar Berita & Konten")
    lines.append(tanggal_laporan)
    lines.append(f"Periode pantauan tanggal {yesterday.day}-{today.day} September 2025 (pukul 14.00 s.d. 06.00 WIB)")
    lines.append("")
    lines.append("Media Online")
    lines.append("===========")
    lines.append("")
    
    # Filter data yang berhasil dianalisis
    success_data = df_data[df_data['analisis_status'] == 'ok'].copy()
    
    if success_data.empty:
        lines.append("Tidak ada berita yang berhasil dianalisis.")
        return "\n".join(lines)
    
    print(f"Memproses {len(success_data)} berita yang berhasil dianalisis...")
    
    # Sort berdasarkan sentimen: positif dulu
    sentimen_order = {'Positif': 1, 'Netral': 2, 'Negatif': 3}
    if 'sentimen' in success_data.columns:
        success_data['sentimen_score'] = success_data['sentimen'].map(sentimen_order).fillna(4)
        success_data = success_data.sort_values('sentimen_score')
    
    # Generate entry untuk setiap berita
    for idx, row in success_data.iterrows():
        # Ambil data
        judul_raw = row.get('judul_berita', 'Judul tidak tersedia')
        url = row.get('url_berita', row.get('link', ''))
        sentimen = row.get('sentimen', 'Netral')
        
        # Clean text
        judul_clean = clean_text(judul_raw)
        if len(judul_clean.strip()) < 10:  # Jika terlalu banyak karakter hilang
            judul_clean = "Berita Terkait Kementerian Keuangan"
        
        # Format emoji sentimen
        emoji = get_sentiment_emoji(sentimen)
        
        # Format entry
        berita_line = f"{emoji} {judul_clean}"
        lines.append(berita_line)
        
        if url and url.strip() and url != '#':
            url_clean = clean_text(url)
            if url_clean.strip():
                lines.append(url_clean)
        lines.append("")  # Baris kosong pemisah
    
    return "\n".join(lines)

def save_laporan_txt(content, filename_prefix="daftar_berita"):
    """Simpan konten laporan ke file txt"""
    # Buat direktori output
    output_dir = Path("00_laporan_cetak")
    output_dir.mkdir(exist_ok=True)
    
    # Generate filename dengan timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{filename_prefix}_{timestamp}.txt"
    filepath = output_dir / filename
    
    # Tulis file dengan encoding yang aman
    with open(filepath, 'w', encoding='utf-8', errors='ignore') as f:
        f.write(content)
    
    return filepath

# ===== EKSEKUSI GENERATOR ===== #
print("=== GENERATOR DAFTAR BERITA & KONTEN ===")

# Check data availability
if 'analisis_berita_penting' not in globals():
    print("❌ Data analisis_berita_penting tidak tersedia. Jalankan SEL 1 dulu.")
elif analisis_berita_penting.empty:
    print("❌ DataFrame analisis_berita_penting kosong.")
else:
    print(f"📊 Data tersedia: {len(analisis_berita_penting)} berita")
    
    # Generate laporan
    try:
        laporan_content = generate_daftar_berita_konten(analisis_berita_penting)
        
        # Simpan ke file
        saved_file = save_laporan_txt(laporan_content, "daftar_berita")
        
        print(f"✅ Laporan disimpan di {saved_file}")
        print(f"📄 Total baris: {len(laporan_content.splitlines())}")
        
        # Preview (10 baris pertama)
        preview_lines = laporan_content.splitlines()[:12]
        print("\n📋 Preview laporan:")
        print("-" * 60)
        for line in preview_lines:
            if line.strip():
                print(line)
            else:
                print("")  # Baris kosong
        if len(laporan_content.splitlines()) > 12:
            print("...")
            print(f"[{len(laporan_content.splitlines()) - 12} baris lainnya]")
        print("-" * 60)
        
    except Exception as e:
        print(f"❌ ERROR: {str(e)}")
        # Buat fallback laporan minimal
        fallback_content = f"""Daftar Berita & Konten
{format_indonesian_date()}

Media Online
===========

Total berita: {len(analisis_berita_penting)}
Berhasil dianalisis: {(analisis_berita_penting['analisis_status'] == 'ok').sum()}

[Detail laporan tidak dapat dibuat - silakan cek file CSV]
"""
        
        saved_file = save_laporan_txt(fallback_content, "daftar_berita_fallback")
        print(f"📄 Laporan fallback disimpan di {saved_file}")

print("\n🎯 Selesai! File tersedia di folder: 00_laporan_cetak/")

=== GENERATOR DAFTAR BERITA & KONTEN ===
📊 Data tersedia: 10 berita
Memproses 10 berita yang berhasil dianalisis...
✅ Laporan disimpan di 00_laporan_cetak/daftar_berita_20251001_083028.txt
📄 Total baris: 36

📋 Preview laporan:
------------------------------------------------------------
Daftar Berita & Konten
Rabu, 1 Oktober 2025
Periode pantauan tanggal 30-1 September 2025 (pukul 14.00 s.d. 06.00 WIB)

Media Online

🟢 Omongan Purbaya Sukses Bangkitkan Rupiah, Ini Buktinya! - CNBC Indonesia
https://www.cnbcindonesia.com/news/20250929141335-4-671135/omongan-purbaya-sukses-bangkitkan-rupiah-ini-buktinya

🟢 Purbaya Umumkan Cukai Rokok 2026 Tak Naik, Airlangga: Bagus! - CNBC Indonesia
https://www.cnbcindonesia.com/news/20250929201058-4-671319/purbaya-umumkan-cukai-rokok-2026-tak-naik-airlangga-bagus
...
[24 baris lainnya]
------------------------------------------------------------

🎯 Selesai! File tersedia di folder: 00_laporan_cetak/


In [58]:
# SEL 3 - Generator News Update dengan AI Analysis
# Gabungkan ringkasan berita dari proses di sel menjadi satu paragraf panjang. 
# Buat prompt untuk membuat maksimal 10 headline untuk mengisi sorotan_media_online
# Tambahkan Tautan media online yang sesuai headline tersebut dari database.

import os
import json
import re
from datetime import datetime, timedelta
from pathlib import Path

# Setup AI client untuk headline generation - menggunakan konfigurasi dinamis
_client_ai = None
_call_ai_model = None

if 'AI_CLIENT' in globals() and 'CALL_AI_MODEL' in globals():
    _client_ai = AI_CLIENT
    _call_ai_model = CALL_AI_MODEL
    ai_provider = os.getenv('MODEL_ANALISIS', 'openai').upper()
    print(f"✅ Using {ai_provider} for headline generation")
else:
    print("⚠️ AI configuration not found. Headlines will use fallback method.")

def load_config():
    """Load config.json untuk mendapat topic keywords"""
    try:
        with open('config.json', 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Warning: Tidak dapat memuat config.json: {e}")
        return {}

def clean_text_safe(text):
    """Safe text cleaning"""
    if not text:
        return ""
    return str(text).encode('utf-8', 'ignore').decode('utf-8').strip()

def combine_resumes(df_data):
    """Gabungkan semua resume berita menjadi satu paragraf panjang"""
    success_data = df_data[df_data['analisis_status'] == 'ok']
    
    if success_data.empty:
        return "Tidak ada resume berita yang tersedia."
    
    # Gabungkan semua resume
    all_resumes = []
    for _, row in success_data.iterrows():
        resume = clean_text_safe(row.get('resume_ai', ''))
        if resume and len(resume) > 10:
            all_resumes.append(resume)
    
    if not all_resumes:
        return "Resume berita tidak tersedia."
    
    # Gabungkan dengan connector yang natural
    combined = ". ".join(all_resumes)
    return combined

def count_sentiment_stats(df_data):
    """Hitung statistik sentimen"""
    success_data = df_data[df_data['analisis_status'] == 'ok']
    
    if success_data.empty:
        return 0, 0, 0, 0
    
    total = len(success_data)
    positif = len(success_data[success_data['sentimen'].str.contains('Positif', na=False)])
    negatif = len(success_data[success_data['sentimen'].str.contains('Negatif', na=False)])
    netral = total - positif - negatif
    
    return total, positif, negatif, netral

def generate_headlines_with_ai(combined_resumes, topic_keywords, df_data):
    """Generate headlines menggunakan AI"""
    try:
        if not _call_ai_model:
            ai_provider = os.getenv('MODEL_ANALISIS', 'unknown').upper()
            print(f"Warning: {ai_provider} tidak tersedia, menggunakan fallback headlines")
            return generate_fallback_headlines(df_data)
        
        # Build prompt untuk AI
        keywords_str = ", ".join(topic_keywords) if topic_keywords else "Kementerian Keuangan, ekonomi, fiskal"
        
        prompt = f"""Berdasarkan ringkasan berita berikut, buatlah maksimal 8 poin sorotan media online yang menarik dan informatif.

RINGKASAN GABUNGAN BERITA:
{combined_resumes[:2000]}

TOPIK YANG DIPANTAU: {keywords_str}

TUGAS:
1. Buat 5-8 poin sorotan yang merangkum isu-isu utama, setiap poin sorotan harus berbeda satu dengan yang lain.
2. Fokus pada aspek Kementerian Keuangan, ekonomi, dan kebijakan fiskal
3. Setiap poin maksimal 25 kata
4. Gunakan bahasa Indonesia yang profesional
5. Format: satu poin per baris, dimulai dengan "• "

CONTOH FORMAT:
• Menkeu melakukan inspeksi mendadak ke kantor pusat BNI untuk memantau penyaluran kredit perbankan.
• Kebijakan cukai tembakau 2026 tidak mengalami kenaikan untuk melindungi industri dan pekerja.

Buat poin sorotan sekarang:"""

        ai_response = _call_ai_model(prompt, temperature=0.3, max_tokens=600)
        
        # Parse response menjadi list headlines
        headlines = []
        for line in ai_response.split('\n'):
            line = line.strip()
            if line.startswith('•'):
                headline = line[1:].strip()
                if len(headline) > 10:  # Filter headline yang terlalu pendek
                    headlines.append(headline)
        
        return headlines[:8]  # Maksimal 8 headlines
        
    except Exception as e:
        print(f"Error generating AI headlines: {e}")
        return generate_fallback_headlines(df_data)

def generate_fallback_headlines(df_data):
    """Generate headlines fallback tanpa AI"""
    success_data = df_data[df_data['analisis_status'] == 'ok']
    
    headlines = []
    for _, row in success_data.head(6).iterrows():
        resume = clean_text_safe(row.get('resume_ai', ''))
        if resume and len(resume) > 15:
            # Potong di titik atau koma pertama untuk jadi headline
            headline = resume.split('.')[0].split(',')[0]
            if len(headline) > 20 and len(headline) < 100:
                headlines.append(headline.strip())
    
    return headlines

def get_main_topic_from_data(df_data, config):
    """Tentukan topik utama berdasarkan data berita"""
    success_data = df_data[df_data['analisis_status'] == 'ok']
    
    if success_data.empty:
        return "Monitoring Berita Kementerian Keuangan"
    
    # Ambil kata kunci dari judul-judul berita
    all_titles = " ".join([clean_text_safe(row.get('judul_berita', '')) for _, row in success_data.iterrows()])
    
    # Cari keyword yang sering muncul
    common_words = ['Menkeu', 'Purbaya', 'BNI', 'Sidak', 'Cukai', 'Bank', 'Kredit', 'Ekonomi']
    word_counts = {word: all_titles.upper().count(word.upper()) for word in common_words}
    
    # Ambil kata dengan frekuensi tertinggi
    most_common = max(word_counts.items(), key=lambda x: x[1])
    if most_common[1] > 0:
        if 'SIDAK' in all_titles.upper() and 'BNI' in all_titles.upper():
            return "Menkeu Sidak BNI"
        elif 'CUKAI' in all_titles.upper():
            return "Kebijakan Cukai Tembakau"
        elif 'MENKEU' in all_titles.upper() or 'PURBAYA' in all_titles.upper():
            return "Aktivitas Menteri Keuangan"
    
    return "Monitoring Berita Kementerian Keuangan"

def get_related_links(df_data, max_links=8):
    """Ambil link berita yang relevan untuk tautan media online"""
    success_data = df_data[df_data['analisis_status'] == 'ok']
    
    if success_data.empty:
        return []
    
    links = []
    for idx, row in success_data.head(max_links).iterrows():
        judul = clean_text_safe(row.get('judul_berita', ''))
        url = clean_text_safe(row.get('url_berita', row.get('link', '')))
        
        if judul and url and url != '#':
            # Potong judul jika terlalu panjang
            if len(judul) > 80:
                judul = judul[:80] + "..."
            
            links.append({
                'judul': judul,
                'url': url
            })
    
    return links

def generate_news_update(df_data, config=None):
    """Generate News Update format lengkap"""
    
    if config is None:
        config = {}
    
    # Ambil data statistik
    total, positif, negatif, netral = count_sentiment_stats(df_data)
    
    if total == 0:
        return "News Update tidak dapat dibuat: tidak ada berita yang berhasil dianalisis."
    
    # Header informasi
    today = datetime.now()
    hari_indo = ['Senin', 'Selasa', 'Rabu', 'Kamis', 'Jumat', 'Sabtu', 'Minggu'][today.weekday()]
    tanggal_indo = f"{today.day} Oktober {today.year}"
    waktu_laporan = f"{hari_indo}, {tanggal_indo} (Pukul {today.hour:02d}.00 WIB)"
    
    # Tentukan topik utama
    main_topic = get_main_topic_from_data(df_data, config)
    
    # Gabungkan resume
    combined_resumes = combine_resumes(df_data)
    
    # Generate headlines dengan AI
    topic_keywords = config.get('topic_keywords', [])
    headlines = generate_headlines_with_ai(combined_resumes, topic_keywords, df_data)
    
    # Ambil tautan terkait
    related_links = get_related_links(df_data)
    
    # Build content
    lines = []
    lines.append("News Update")
    lines.append(main_topic)
    lines.append(f"Jakarta, {waktu_laporan}")
    lines.append("")
    
    # Statistik berita
    stats_text = f"Pemberitaan terkait {main_topic.lower()} hari ini tercatat terdapat {total} berita"
    if positif > 0 or negatif > 0 or netral > 0:
        detail_stats = []
        if positif > 0:
            detail_stats.append(f"{positif} positif")
        if netral > 0:
            detail_stats.append(f"{netral} netral")
        if negatif > 0:
            detail_stats.append(f"{negatif} negatif")
        
        if detail_stats:
            stats_text += f" ({', '.join(detail_stats)})"
    
    stats_text += " di media online."
    lines.append(stats_text)
    lines.append("")
    
    # Sorotan Media Online
    lines.append("Sorotan Media Online")
    if headlines:
        for headline in headlines:
            lines.append(f"• {headline}")
    else:
        lines.append("• Tidak ada sorotan khusus tersedia.")
    
    lines.append("")
    
    # Tautan Media Online
    lines.append("Tautan Media Online:")
    if related_links:
        for i, link in enumerate(related_links, 1):
            lines.append(f" {i}. {link['judul']}")
            lines.append(f"    {link['url']}")
    else:
        lines.append(" 1. Tidak ada tautan tersedia")
    
    return "\n".join(lines)

def save_news_update(content, filename_prefix="news_update"):
    """Simpan News Update ke file txt"""
    output_dir = Path("00_laporan_cetak")
    output_dir.mkdir(exist_ok=True)
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{filename_prefix}_{timestamp}.txt"
    filepath = output_dir / filename
    
    with open(filepath, 'w', encoding='utf-8', errors='ignore') as f:
        f.write(content)
    
    return filepath

# ===== EKSEKUSI SEL 3 ===== #
print("=== GENERATOR NEWS UPDATE ===")

# Check data
if 'analisis_berita_penting' not in globals() or analisis_berita_penting.empty:
    print("❌ Data analisis_berita_penting tidak tersedia. Jalankan SEL 1 terlebih dahulu.")
else:
    print(f"📊 Data tersedia: {len(analisis_berita_penting)} berita")
    
    # Load config
    config = load_config()
    
    # Preview gabungan resume
    combined_resumes = combine_resumes(analisis_berita_penting)
    print(f"\n📝 Gabungan resume ({len(combined_resumes)} karakter):")
    print(f"Preview: {combined_resumes[:200]}...")
    
    # Generate news update
    try:
        news_update_content = generate_news_update(analisis_berita_penting, config)
        
        # Simpan file
        saved_file = save_news_update(news_update_content, "news_update_general")
        
        print(f"\n✅ News Update disimpan di: {saved_file}")
        print(f"📄 Total baris: {len(news_update_content.splitlines())}")
        
        # Preview hasil
        preview_lines = news_update_content.splitlines()[:15]
        print(f"\n📋 Preview News Update:")
        print("-" * 60)
        for line in preview_lines:
            print(line)
        if len(news_update_content.splitlines()) > 15:
            print("...")
            print(f"[{len(news_update_content.splitlines()) - 15} baris lainnya]")
        print("-" * 60)
        
    except Exception as e:
        print(f"❌ Error generating news update: {e}")
        import traceback
        traceback.print_exc()

print(f"\n🎯 Selesai! File tersedia di: 00_laporan_cetak/")

2025-10-01 08:33:43,098 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"


✅ Using DEEPSEEK for headline generation
=== GENERATOR NEWS UPDATE ===
📊 Data tersedia: 10 berita

📝 Gabungan resume (2868 karakter):
Preview: Rupiah menguat 0,51% ke Rp16.640/US$ setelah klarifikasi Menteri Keuangan Purbaya mengenai kebijakan deposito valas 4%. Pasar merespons positif penjelasan bahwa rencana kenaikan deposito valas dibatal...

✅ News Update disimpan di: 00_laporan_cetak/news_update_general_20251001_083353.txt
📄 Total baris: 33

📋 Preview News Update:
------------------------------------------------------------
News Update
Menkeu Sidak BNI
Jakarta, Rabu, 1 Oktober 2025 (Pukul 08.00 WIB)

Pemberitaan terkait menkeu sidak bni hari ini tercatat terdapat 10 berita (10 netral) di media online.

Sorotan Media Online
• Rupiah menguat 0,51% ke Rp16.640/US$ setelah pemerintah batalkan rencana kenaikan deposito valas.
• Menkeu pastikan tidak ada kenaikan cukai rokok pada 2026, fokus dialihkan ke pemberantasan rokok ilegal.
• Menkeu lakukan sidak mendadak ke kantor BNI untuk pan

In [60]:
# SEL 4 - Generator Laporan Analisis Media Online dan Media Sosial (Fixed Version)
# Format Laporan Analisis Berita
# Lengkapi sel ini dengan cara untuk memanggil open AI dengan feeding data berupa:
# 1. Gabungan dari resume berita yang ada dikelompokkan dalam topik terpisah sesuai config.json 'topic_keywords'
# 2. Topik Berita di dokumen diambil dari config.json 'topic_keywords', masing-masing topic harus ada resume berita singkat. 
# 3. Masing-masing topic harus diberikan poin 2-3 poin penjelasan.

import os
import json
import re
from datetime import datetime
from pathlib import Path
from collections import defaultdict, Counter

# Setup AI client - menggunakan konfigurasi dinamis
_client_ai = None
_call_ai_model = None

if 'AI_CLIENT' in globals() and 'CALL_AI_MODEL' in globals():
    _client_ai = AI_CLIENT
    _call_ai_model = CALL_AI_MODEL
    ai_provider = os.getenv('MODEL_ANALISIS', 'openai').upper()
    print(f"✅ Using {ai_provider} for topic analysis")
else:
    print("⚠️ AI configuration not found. Analysis will use fallback method.")

def load_config_keywords():
    """Load config.json untuk mendapat topic keywords"""
    try:
        with open('config.json', 'r', encoding='utf-8') as f:
            config = json.load(f)
        return config.get('topic_keywords', []), config
    except Exception as e:
        print(f"Warning: Tidak dapat memuat config.json: {e}")
        return [], {}

def clean_text_safe(text):
    """Safe text cleaning"""
    if not text:
        return ""
    return str(text).encode('utf-8', 'ignore').decode('utf-8').strip()

def format_indonesian_datetime():
    """Format tanggal dan waktu dalam bahasa Indonesia"""
    today = datetime.now()
    days = ['Senin', 'Selasa', 'Rabu', 'Kamis', 'Jumat', 'Sabtu', 'Minggu']
    months = ['Januari', 'Februari', 'Maret', 'April', 'Mei', 'Juni',
              'Juli', 'Agustus', 'September', 'Oktober', 'November', 'Desember']
    
    day_name = days[today.weekday()]
    day = today.day
    month = months[today.month - 1]
    year = today.year
    
    return f"{day_name}, {day} {month} {year}"

def group_news_by_topics(df_data, topic_keywords):
    """Kelompokkan berita berdasarkan topic keywords dari config.json - FIXED VERSION"""
    success_data = df_data[df_data['analisis_status'] == 'ok'].copy()
    
    if success_data.empty:
        return {}
    
    # Kelompokkan berita berdasarkan topik
    topic_groups = defaultdict(list)
    
    # Jika tidak ada topic_keywords, gunakan topik_llm yang ada
    if not topic_keywords:
        for _, row in success_data.iterrows():
            topik = clean_text_safe(row.get('topik_llm', 'Lainnya'))
            if topik != 'Lainnya':
                topic_groups[topik].append(row)
    else:
        # Gunakan topic_keywords dari config - HANYA yang cocok dengan keywords
        for _, row in success_data.iterrows():
            judul = clean_text_safe(row.get('judul_berita', '')).upper()
            resume = clean_text_safe(row.get('resume_ai', '')).upper()
            
            # Cek apakah berita cocok dengan keywords
            for topic in topic_keywords:
                topic_upper = topic.upper()
                if topic_upper in judul or topic_upper in resume:
                    topic_groups[topic].append(row)
                    break  # Berita hanya masuk ke satu topik
        
        # TIDAK membuat kategori "Isu Lainnya" otomatis
        # Hanya topik yang ada beritanya yang akan ditampilkan
    
    return dict(topic_groups)

def analyze_sentiment_by_topic(topic_groups):
    """Analisis sentimen per topik"""
    topic_sentiments = {}
    
    for topic, news_list in topic_groups.items():
        sentiments = []
        for news in news_list:
            sentiment = clean_text_safe(news.get('sentimen', 'Netral'))
            sentiments.append(sentiment)
        
        # Hitung distribusi sentimen
        sentiment_counts = Counter(sentiments)
        total = len(sentiments)
        
        # Tentukan sentimen dominan
        if sentiment_counts.get('Positif', 0) > total * 0.5:
            dominant = 'Positif'
        elif sentiment_counts.get('Negatif', 0) > total * 0.3:
            dominant = 'Negatif'
        else:
            dominant = 'Netral'
        
        topic_sentiments[topic] = {
            'dominant': dominant,
            'distribution': dict(sentiment_counts),
            'total': total
        }
    
    return topic_sentiments

def generate_topic_analysis_with_ai(topic, news_list, topic_keywords):
    """Generate analisis untuk satu topik menggunakan AI"""
    
    if not _call_ai_model:
        return generate_fallback_topic_analysis(topic, news_list)
    
    # Gabungkan semua resume untuk topik ini
    resumes = []
    for news in news_list[:5]:  # Maksimal 5 berita per topik
        resume = clean_text_safe(news.get('resume_ai', ''))
        if resume:
            resumes.append(resume)
    
    combined_resumes = ". ".join(resumes)
    
    # Build prompt untuk AI
    prompt = f"""Analisis topik berita berikut dan buat analisis profesional:

TOPIK: {topic}
RINGKASAN BERITA: {combined_resumes[:1500]}

Tugas:
1. Buat ringkasan singkat topik ini (maksimal 40 kata)
2. Buat 2-3 poin analisis utama (masing-masing maksimal 25 kata)
3. Fokus pada dampak untuk Kementerian Keuangan atau kebijakan ekonomi

Format output:
RINGKASAN: [ringkasan singkat]
POIN 1: [analisis poin 1]
POIN 2: [analisis poin 2]
POIN 3: [analisis poin 3 jika ada]

Contoh:
RINGKASAN: Menteri Keuangan melakukan inspeksi ke BNI untuk memantau kinerja
POIN 1: Transparansi sektor perbankan menjadi fokus utama
POIN 2: Pengawasan kredit perbankan diperkuat"""

    try:
        ai_response = _call_ai_model(prompt, temperature=0.3, max_tokens=500)
        
        # Parse response
        lines = ai_response.strip().split('\n')
        result = {
            'ringkasan': '',
            'poin_analisis': []
        }
        for line in lines:
            line = line.strip()
            if line.startswith('RINGKASAN:'):
                result['ringkasan'] = line.replace('RINGKASAN:', '').strip()
            elif line.startswith('POIN'):
                poin_text = re.sub(r'^POIN \d+:', '', line).strip()
                if poin_text:
                    result['poin_analisis'].append(poin_text)
        
        return result
        
    except Exception as e:
        print(f"Error generating AI analysis for topic {topic}: {e}")
        return generate_fallback_topic_analysis(topic, news_list)

def generate_fallback_topic_analysis(topic, news_list):
    """Generate analisis fallback tanpa AI"""
    # Ambil resume pertama sebagai ringkasan
    ringkasan = "Tidak ada analisis tersedia."
    if news_list:
        first_resume = clean_text_safe(news_list[0].get('resume_ai', ''))
        if first_resume:
            ringkasan = first_resume[:100] + "..." if len(first_resume) > 100 else first_resume
    
    poin_analisis = [
        f"Terdapat {len(news_list)} berita terkait topik {topic}",
        "Memerlukan pemantauan lebih lanjut dari perspektif kebijakan fiskal"
    ]
    
    return {
        'ringkasan': ringkasan,
        'poin_analisis': poin_analisis
    }

def categorize_topics(topic_groups, topic_keywords):
    """Kategorikan topik menjadi ISU KEMENKEU vs ISU NASIONAL/INTERNASIONAL"""
    kemenkeu_keywords = [
        'Menkeu', 'Kementerian Keuangan', 'Pajak', 'Cukai', 'APBN', 'Fiskal', 
        'Bea Cukai', 'DJP', 'Purbaya', 'Sidak', 'Bank', 'Kredit', 'Ekonomi'
    ]
    
    isu_kemenkeu = {}
    isu_nasional = {}
    
    for topic, news_list in topic_groups.items():
        # Cek apakah topik terkait Kemenkeu
        is_kemenkeu = False
        topic_upper = topic.upper()
        
        for keyword in kemenkeu_keywords:
            if keyword.upper() in topic_upper:
                is_kemenkeu = True
                break
        
        # Jika tidak jelas dari nama topik, cek dari isi berita
        if not is_kemenkeu and news_list:
            sample_text = " ".join([
                clean_text_safe(news.get('judul_berita', '')).upper() + " " +
                clean_text_safe(news.get('resume_ai', '')).upper()
                for news in news_list[:3]
            ])
            
            for keyword in kemenkeu_keywords:
                if keyword.upper() in sample_text:
                    is_kemenkeu = True
                    break
        
        if is_kemenkeu:
            isu_kemenkeu[topic] = news_list
        else:
            isu_nasional[topic] = news_list
    
    return isu_kemenkeu, isu_nasional

def extract_narasumber(df_data):
    """Extract narasumber utama dari berita"""
    success_data = df_data[df_data['analisis_status'] == 'ok']
    
    # Cari nama-nama yang sering muncul (kemungkinan narasumber)
    all_text = ""
    
    for _, row in success_data.iterrows():
        judul = clean_text_safe(row.get('judul_berita', ''))
        resume = clean_text_safe(row.get('resume_ai', ''))
        all_text += f" {judul} {resume}"
    
    # Cari nama-nama pejabat yang umum
    known_officials = [
        'Purbaya Yudhi Sadewa', 'Menkeu Purbaya', 'Menteri Keuangan',
        'Dirjen Pajak', 'Dirjen Bea Cukai', 'Kepala Bappenas',
        'Gubernur BI', 'Direktur BNI', 'Presiden Jokowi'
    ]
    
    found_narasumber = []
    for official in known_officials:
        if official.upper() in all_text.upper():
            found_narasumber.append(official)
    
    return found_narasumber[:3] if found_narasumber else ["Belum ada narasumber"]

def generate_laporan_analisis_media(df_data, config):
    """Generate laporan analisis media lengkap"""
    
    # Load topic keywords
    topic_keywords, _ = load_config_keywords()
    
    # Kelompokkan berita berdasarkan topik - HANYA yang cocok dengan keywords
    topic_groups = group_news_by_topics(df_data, topic_keywords)
    
    # Jika tidak ada yang cocok dengan keywords, buat pesan informasi
    if not topic_groups:
        return f"""**Laporan Analisis Media Online dan Media Sosial**
{format_indonesian_datetime()}

**EXECUTIVE SUMMARY**
==================================================
Periode pemantauan ini tidak menemukan berita yang sesuai dengan topic keywords yang telah ditentukan dalam config.json.
Mungkin perlu review atau penyesuaian keywords untuk menangkap lebih banyak berita yang relevan.

**MEDIA ONLINE**
**Topik Berita:** Tidak ada topik yang cocok dengan keywords
**Tonasi Berita:** -

**Kegiatan yang dirujuk:** Pemantauan Berkelanjutan
**Narasumber utama yang dirujuk:** Belum ada narasumber

Silakan periksa kembali topic_keywords di config.json atau data berita yang tersedia."""
    
    # Analisis sentimen per topik
    topic_sentiments = analyze_sentiment_by_topic(topic_groups)
    
    # Kategorikan topik
    isu_kemenkeu, isu_nasional = categorize_topics(topic_groups, topic_keywords)
    
    # Extract narasumber
    narasumber_list = extract_narasumber(df_data)
    
    # Build laporan
    lines = []
    
    # ===== HEADER ===== #
    lines.append("===== Page 1 =====")
    lines.append("")
    lines.append("**Laporan Analisis Media Online dan Media Sosial**")
    lines.append(format_indonesian_datetime())
    lines.append("")
    
    # ===== EXECUTIVE SUMMARY ===== #
    lines.append("**EXECUTIVE SUMMARY**")
    lines.append("=" * 50)
    
    # Generate executive summary dengan AI jika tersedia
    total_berita = len(df_data[df_data['analisis_status'] == 'ok'])
    total_relevan = sum(len(news_list) for news_list in topic_groups.values())
    main_topics = list(topic_groups.keys())[:3]
    
    lines.append(f"Periode pemantauan media online menunjukkan {total_berita} berita penting yang berhasil dianalisis.")
    lines.append(f"Dari jumlah tersebut, {total_relevan} berita sesuai dengan topik yang dipantau: {', '.join(main_topics)}.")
    
    if isu_kemenkeu:
        lines.append("Fokus pemerintah terutama pada transparansi dan pengawasan sektor keuangan.")
    
    lines.append("Pernyataan dan kebijakan penting dari pejabat terkait terus dipantau secara intensif.")
    lines.append("")
    
    # ===== MEDIA ONLINE ===== #
    lines.append("**MEDIA ONLINE**")
    lines.append("")
    
    # Topik Berita
    lines.append(f"**Topik Berita:** {', '.join(topic_groups.keys())}")
    
    # Tonasi Berita Overall - FIXED VERSION
    all_sentiments = []
    for sentiment_data in topic_sentiments.values():
        for sentiment, count in sentiment_data['distribution'].items():
            # Extend dengan string sentimen sebanyak count-nya
            all_sentiments.extend([sentiment] * count)
            
    sentiment_counter = Counter(all_sentiments) if all_sentiments else Counter(['Netral'])
    dominant_sentiment = sentiment_counter.most_common(1)[0][0] if sentiment_counter else 'Netral'
    lines.append(f"**Tonasi Berita:** {dominant_sentiment}")
    lines.append("")
    
    # ===== PESAN KUNCI DAN ANALISIS ===== #
    lines.append("**Pesan Kunci dan Analisis:**")
    lines.append("")
    
    # ISU KEMENKEU
    if isu_kemenkeu:
        lines.append("**ISU KEMENKEU**")
        for i, (topic, news_list) in enumerate(isu_kemenkeu.items(), 1):
            # Generate AI analysis untuk topik ini
            analysis = generate_topic_analysis_with_ai(topic, news_list, topic_keywords)
            
            lines.append(f"{i}. **{topic}**")
            lines.append(f"   Ringkasan: {analysis['ringkasan']}")
            for j, poin in enumerate(analysis['poin_analisis'], 1):
                lines.append(f"   - {poin}")
            lines.append("")
    
    # ISU NASIONAL DAN INTERNASIONAL
    if isu_nasional:
        lines.append("**ISU NASIONAL DAN INTERNASIONAL**")
        for i, (topic, news_list) in enumerate(isu_nasional.items(), 1):
            # Generate AI analysis untuk topik ini
            analysis = generate_topic_analysis_with_ai(topic, news_list, topic_keywords)
            
            lines.append(f"{i}. **{topic}**")
            lines.append(f"   Ringkasan: {analysis['ringkasan']}")
            for j, poin in enumerate(analysis['poin_analisis'], 1):
                lines.append(f"   - {poin}")
            lines.append("")
    
    # Jika tidak ada isu kemenkeu atau nasional, beri informasi
    if not isu_kemenkeu and not isu_nasional:
        lines.append("**ISU KEMENKEU**")
        lines.append("Tidak ada berita yang cocok dengan kategori isu Kemenkeu pada periode ini.")
        lines.append("")
        lines.append("**ISU NASIONAL DAN INTERNASIONAL**")
        lines.append("Tidak ada berita yang cocok dengan kategori isu nasional/internasional pada periode ini.")
        lines.append("")
    
    # ===== KEGIATAN & NARASUMBER ===== #
    lines.append("**Kegiatan yang dirujuk:** Kegiatan Baru, Pemantauan Berkelanjutan")
    lines.append(f"**Narasumber utama yang dirujuk:** {', '.join(narasumber_list)}")
    lines.append("")
    
    # ===== DAFTAR BERITA ===== #
    lines.append("===== Page 2 =====")
    lines.append("")
    lines.append("**Daftar Berita:**")
    
    # Hanya tampilkan berita yang masuk dalam topic groups
    displayed_count = 0
    for topic, news_list in topic_groups.items():
        for news in news_list:
            displayed_count += 1
            judul = clean_text_safe(news.get('judul_berita', 'Judul tidak tersedia'))
            url = clean_text_safe(news.get('url_berita', news.get('link', '#')))
            
            lines.append(f"{displayed_count}. {judul}")
            if url and url != '#':
                lines.append(f"[{url}]")
            lines.append("")
    
    if displayed_count == 0:
        lines.append("Tidak ada berita yang sesuai dengan topic keywords untuk ditampilkan.")
    
    return "\n".join(lines)

def save_laporan_analisis(content, filename_prefix="laporan_analisis_media"):
    """Simpan laporan analisis ke file txt"""
    output_dir = Path("00_laporan_cetak")
    output_dir.mkdir(exist_ok=True)
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{filename_prefix}_{timestamp}.txt"
    filepath = output_dir / filename
    
    with open(filepath, 'w', encoding='utf-8', errors='ignore') as f:
        f.write(content)
    
    return filepath

# ===== EKSEKUSI SEL 4 ===== #
print("=== GENERATOR LAPORAN ANALISIS MEDIA (FIXED VERSION) ===")

# Check data
if 'analisis_berita_penting' not in globals() or analisis_berita_penting.empty:
    print("❌ Data analisis_berita_penting tidak tersedia. Jalankan SEL 1 terlebih dahulu.")
else:
    print(f"📊 Data tersedia: {len(analisis_berita_penting)} berita")
    
    # Load config dan analisis topik
    topic_keywords, config = load_config_keywords()
    print(f"📋 Topic keywords dari config: {topic_keywords}")
    
    # Preview pengelompokan topik
    topic_groups = group_news_by_topics(analisis_berita_penting, topic_keywords)

    if topic_groups:
        print(f"🏷️  Berita dikelompokkan dalam {len(topic_groups)} topik:")
        for topic, news_list in topic_groups.items():
            print(f"   - {topic}: {len(news_list)} berita")
    else:
        print("⚠️  Tidak ada berita yang cocok dengan topic keywords dari config.json")
        print("   Laporan akan dibuat dengan informasi bahwa tidak ada topik yang cocok")
    
    # Generate laporan lengkap
    try:
        laporan_content = generate_laporan_analisis_media(analisis_berita_penting, config)
        
        # Simpan file
        saved_file = save_laporan_analisis(laporan_content, "laporan_analisis_media")
        
        print(f"\n✅ Laporan Analisis Media disimpan di: {saved_file}")
        print(f"📄 Total baris: {len(laporan_content.splitlines())}")
        
        # Preview hasil (20 baris pertama)
        preview_lines = laporan_content.splitlines()[:20]
        print(f"\n📋 Preview Laporan Analisis Media:")
        print("-" * 70)
        for line in preview_lines:
            print(line)
        if len(laporan_content.splitlines()) > 20:
            print("...")
            print(f"[{len(laporan_content.splitlines()) - 20} baris lainnya]")
        print("-" * 70)
        
    except Exception as e:
        print(f"❌ Error generating laporan analisis: {e}")
        import traceback
        traceback.print_exc()

print(f"\n🎯 Selesai! File tersedia di: 00_laporan_cetak/")

✅ Using DEEPSEEK for topic analysis
=== GENERATOR LAPORAN ANALISIS MEDIA (FIXED VERSION) ===
📊 Data tersedia: 10 berita
📋 Topic keywords dari config: ['rokok ilegal', 'makan bergizi gratis', 'tax amnesty', 'sidak BNI']
🏷️  Berita dikelompokkan dalam 2 topik:
   - rokok ilegal: 2 berita
   - makan bergizi gratis: 1 berita


2025-10-01 08:38:02,777 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:38:10,271 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-01 08:38:10,271 - INFO - HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"



✅ Laporan Analisis Media disimpan di: 00_laporan_cetak/laporan_analisis_media_20251001_083816.txt
📄 Total baris: 46

📋 Preview Laporan Analisis Media:
----------------------------------------------------------------------
===== Page 1 =====

**Laporan Analisis Media Online dan Media Sosial**
Rabu, 1 Oktober 2025

**EXECUTIVE SUMMARY**
Periode pemantauan media online menunjukkan 10 berita penting yang berhasil dianalisis.
Dari jumlah tersebut, 3 berita sesuai dengan topik yang dipantau: rokok ilegal, makan bergizi gratis.
Fokus pemerintah terutama pada transparansi dan pengawasan sektor keuangan.
Pernyataan dan kebijakan penting dari pejabat terkait terus dipantau secara intensif.

**MEDIA ONLINE**

**Topik Berita:** rokok ilegal, makan bergizi gratis
**Tonasi Berita:** positif

**Pesan Kunci dan Analisis:**

**ISU KEMENKEU**
...
[26 baris lainnya]
----------------------------------------------------------------------

🎯 Selesai! File tersedia di: 00_laporan_cetak/


In [None]:
# output path /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Pekerjaan BMN/05. 2025/98_monitoring_berita/monitoring-berita/00_laporan_cetak