In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import time

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
}

def get_article_content(link):
    try:
        res = requests.get(link, headers=headers, timeout=15)
        res.raise_for_status()
        soup = BeautifulSoup(res.text, 'lxml')

        div_content = soup.find('div', class_='article-content-body__item-content')

        if div_content:
            paragraphs = div_content.find_all('p')
            content = ' '.join([p.get_text(strip=True) for p in paragraphs])
            return content
        else:
            return "Konten tidak dapat ditemukan."

    except requests.exceptions.RequestException as e:
        print(f"❌ Error saat mengakses artikel: {link} | {e}")
        return ''
    except Exception as e:
        print(f"❌ Error saat parsing konten artikel: {link} | {e}")
        return ''


url = 'https://www.liputan6.com/bola/liga-nasional'

output_filename = 'liputan6_timnas_indonesia.csv'
with open(output_filename, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['Judul', 'Link', 'Waktu', 'Konten'])

    print(f"📄 Melakukan scraping dari halaman: {url}")

    try:
        res = requests.get(url, headers=headers, timeout=15)
        res.raise_for_status()
        soup = BeautifulSoup(res.text, 'lxml')

        articles = soup.find_all('article', class_='articles--iridescent-list--item')

        if not articles:
            print("Tidak ada artikel yang ditemukan di halaman ini.")
        else:
            print(f"🔎 Ditemukan {len(articles)} artikel. Memulai proses...")

        for art in articles:
            try:
                title_tag = art.find('h4', class_='articles--iridescent-list--text-item__title').find('a')
                link = title_tag['href']
                judul = title_tag.get_text(strip=True)

                waktu_tag = art.find('time', class_='articles--iridescent-list--text-item__time')
                waktu = waktu_tag.get_text(strip=True) if waktu_tag else 'Tidak ada waktu'

                print(f"\n📝 Mengambil konten dari: '{judul}'")
                konten = get_article_content(link)

                time.sleep(1)

                writer.writerow([judul, link, waktu, konten])
                print(f"✅ Berhasil disimpan: {judul}")

            except Exception as e:
                print(f"❌ Gagal memproses satu item artikel: {e}")
                continue

    except requests.exceptions.RequestException as e:
        print(f"❌ Gagal total mengakses URL utama: {url} | {e}")

print(f"\n🎉 Proses scraping selesai. Hasil disimpan di file '{output_filename}'.")

📄 Melakukan scraping dari halaman: https://www.liputan6.com/bola/liga-nasional
🔎 Ditemukan 57 artikel. Memulai proses...

📝 Mengambil konten dari: 'Kapan Semifinal Piala AFF U-23?'
✅ Berhasil disimpan: Kapan Semifinal Piala AFF U-23?

📝 Mengambil konten dari: 'Bertemu di Semifinal AFF U-23 2025: Jadwal Indonesia vs Thailand U-23 Kapan?'
✅ Berhasil disimpan: Bertemu di Semifinal AFF U-23 2025: Jadwal Indonesia vs Thailand U-23 Kapan?

📝 Mengambil konten dari: 'Intip Bagan Semifinal Piala AFF U-23 2025: Ujian Berat Pertama Indonesia Lawan Thailand'
✅ Berhasil disimpan: Intip Bagan Semifinal Piala AFF U-23 2025: Ujian Berat Pertama Indonesia Lawan Thailand

📝 Mengambil konten dari: 'Jadwal Indonesia vs Thailand di Semifinal Piala AFF U-23 2025'
✅ Berhasil disimpan: Jadwal Indonesia vs Thailand di Semifinal Piala AFF U-23 2025

📝 Mengambil konten dari: 'Timnas Indonesia Bertemu Siapa di Semifinal Piala AFF U-23?'
✅ Berhasil disimpan: Timnas Indonesia Bertemu Siapa di Semifinal Piala AFF U-