In [14]:
# pip install tokopaedi pandas
from tokopaedi import search, get_product
import pandas as pd
import time, random

KEYWORDS = [
    "elektronik", "handphone", "komputer", "fashion pria", "fashion wanita",
    "ibu dan bayi", "kecantikan", "kesehatan", "rumah tangga", "olahraga",
    "hobi", "makanan", "minuman", "sekolah",
]

MAX_PER_KEYWORD = 100                 # target aman per kategori
SLEEP_SEARCH = (3.0, 5.0)             # jeda antar keyword (pelan)
SLEEP_DETAIL = (1.2, 2.0)             # jeda antar produk (pelan)
RETRY_PER_ITEM = 2                    # retry ringan kalau request gagal

def extract_description(detail) -> str:
    pdp = getattr(detail, "product_detail", None)
    if isinstance(pdp, dict):
        d = pdp.get("description") or ""
        if d: return d.strip()
    return (getattr(detail, "description", "") or "").strip()

def get_detail_safe(pid):
    """get_product dengan retry ringan + jeda"""
    for attempt in range(RETRY_PER_ITEM + 1):
        try:
            return get_product(product_id=pid, debug=False)
        except Exception as e:
            if attempt < RETRY_PER_ITEM:
                time.sleep(1.5 + attempt)   # backoff kecil
            else:
                raise e

def main():
    all_rows = []

    for kw in KEYWORDS:
        print(f"\n=== Keyword: {kw} (max {MAX_PER_KEYWORD}) ===")
        try:
            results = search(kw, max_result=MAX_PER_KEYWORD, debug=False)
        except Exception as e:
            print(f"search gagal untuk '{kw}': {e}")
            time.sleep(random.uniform(*SLEEP_SEARCH))
            continue

        time.sleep(random.uniform(*SLEEP_SEARCH))  # pelan antar keyword

        for i, prod in enumerate(results, 1):
            pid   = getattr(prod, "product_id", None)
            name  = getattr(prod, "product_name", "")
            url   = getattr(prod, "url", None)
            price = getattr(prod, "price", None)
            rating= getattr(prod, "rating", None)

            desc = None
            sold_count = None
            total_stock = None
            category = getattr(prod, "category", None)

            try:
                det = get_detail_safe(pid)
                desc = extract_description(det)

                data = det.json()  # angka2 dari detail
                sold_count  = data.get("sold_count")
                total_stock = data.get("total_stock")
                category    = data.get("category") or category
            except Exception as e:
                print(f"[{kw} {i}/{len(results)}] detail gagal ({e}) — simpan meta search saja.")

            all_rows.append({
                "product_id": pid,
                "product_name": name,
                "url": url,
                "description": desc,
                "price": price,
                "sold_count": sold_count,
                "rating": rating,
                "total_stock": total_stock,
                "category": category,
            })

            print(f"[{kw} {i}/{len(results)}] {name[:60]}")
            time.sleep(random.uniform(*SLEEP_DETAIL))  # pelan antar produk

        # checkpoint per keyword biar aman kalau kepotong
        df_ckpt = pd.DataFrame(all_rows)
        df_ckpt.drop_duplicates(subset=["product_id"], inplace=True, ignore_index=True)
        df_ckpt.to_csv("tokopedia_scrape_all.csv", index=False, encoding="utf-8-sig")
        print(f"↳ checkpoint saved ({len(df_ckpt)} rows)")

    # final save (redundan tapi aman)
    df = pd.DataFrame(all_rows).drop_duplicates(subset=["product_id"], ignore_index=True)
    df.to_csv("tokopedia_scrape_all.csv", index=False, encoding="utf-8-sig")
    print(f"\n✅ Done. Saved {len(df)} rows → tokopedia_scrape_all.csv")

if __name__ == "__main__":
    main()



=== Keyword: elektronik (max 100) ===
[elektronik 1/83] Piano Elektronik Payment Mr. D 2
[elektronik 2/83] mesin cuci lg 15kg f2515stgw(GARANSI RESMI)
[elektronik 3/83] Smart TV XIAOMI L32M8-A2ID 32 A Pro series
[elektronik 4/83] AQUA Elektronik AQR-D225(MDS) 1 pintu 180L
[elektronik 5/83] GARANSI 1 TAHUN! BARDI ZigBee Smart Gateway
[elektronik 6/83] TOSHIBA LED FHD SMART TV 43V31LP GARANSI RESMI NEGO
[elektronik 7/83] SAMSUNG MESIN CUCI WD21T6500GV / SE
[elektronik 8/83] Piano elektronik, piano elektronik 88 tombol, piano elektrik
[elektronik 9/83] ENOVE - Device Stylish 3 Warna | Bisa Dijeda | Compact & Ele
[elektronik 10/83] SMART TV LG 32 INCH 32LR600 MAGIC REMOTE GARANSI RESMI
[elektronik 11/83] SMART TV POLYTRON PLD 32CV2269 NEGO PLD32CV2269 GARANSI RESM
[elektronik 12/83] BARDI Smart Siren Alarm - Alarm Pendeteksi Otomatis Pintar W
[elektronik 13/83] LG Smart TV Oled Evo 83" 83C3PSA 4K UHD 120Hz OLED83C3
[elektronik 14/83] Power Station 1200W JETE PS1200 , 7 Multiple Output wit

In [15]:
df_kolom = pd.read_csv("tokopedia_scrape_all.csv")
df_kolom

Unnamed: 0,product_id,product_name,url,description,price,sold_count,rating,total_stock,category
0,2618801377,Piano Elektronik Payment Mr. D 2,https://www.tokopedia.com/miracle-piano-maestr...,pengiriman seluruh Indonesia Jakarta Merupak...,20300000,1,,9,Speaker
1,1418259858,mesin cuci lg 15kg f2515stgw(GARANSI RESMI),https://www.tokopedia.com/memory-elektronik-ja...,Tanyakan ketersediaan stock terlebih dahulu\nF...,11100000,1,5.0,5,Mesin Cuci
2,12881922184,Smart TV XIAOMI L32M8-A2ID 32 A Pro series,https://www.tokopedia.com/blessingcombali/smar...,WAJIB Membaca CATATAN TOKO\nWAJIB Video Unboxi...,1729800,215,5.0,38,Monitor Tabung
3,11108003873,AQUA Elektronik AQR-D225(MDS) 1 pintu 180L,https://www.tokopedia.com/rossielektroni/aqua-...,AQUA Elektronik AQR-D225 1Door 180 L\n\nAQR-D2...,1933000,11,5.0,11,Kulkas
4,100413508806,GARANSI 1 TAHUN! BARDI ZigBee Smart Gateway,https://www.tokopedia.com/technobitzzz/garansi...,BARDI Zigbee Gateway adalah jembatan antara pe...,469000,2,,398,Remote IO
...,...,...,...,...,...,...,...,...,...
1138,100727166226,Ransel Anak - Backpack Sekolah PX 2053 - Ranse...,https://www.tokopedia.com/vintage-fusion/ranse...,Model : PX 2053\n \nBerat : 600gr\n \nSize : 4...,214945,0,,1399986,Tas Ransel Anak
1139,100563426583,Tas Ransel Wanita BackPack Waterproof Hitam Se...,https://www.tokopedia.com/shoppingstorm/tas-ra...,> Bahan : Ransel wanita terdiri dari kain Oxfo...,115000,91,4.6,79,Tas Ransel Wanita
1140,100316116566,FLATE Tas Sekolah Anak SMP/SMA Perempuan Tas R...,https://www.tokopedia.com/flate/flate-tas-seko...,Selamat datang di toko FLATE\nKami memproduksi...,170100,6042,4.8,713,Tas Ransel Wanita
1141,100464677416,TAS RANSEL SEKOLAH UNTUK ANAK LAKI-LAKI DAN PE...,https://www.tokopedia.com/wawan-vsmivvodjpnizs...,BAHAN: KANVAS\nUNKURAN 30X17X45\nFOTO HANYA RE...,147400,1,,249,Tas Ransel Pria
