# Instalasi dan Scraping

In [1]:
from google_play_scraper import reviews, Sort
import pandas as pd
from tqdm import tqdm
import time

# Daftar aplikasi yang akan di-scrape
app_ids = {
    "Netflix": "com.netflix.mediaclient",
    "Disney+ Hotstar": "in.startv.hotstar",
    "Amazon Prime Video": "com.amazon.avod.thirdpartyclient",
    "Viu": "com.viu.android",
    "WeTV": "com.tencent.qqlivei18n",
    "Vidio": "com.vidio.android",
    "HBO GO": "id.hbo.hbogo"
}

In [2]:
def get_reviews(app_id, app_name, total_count=3000):
    all_reviews = []
    count = 0
    next_token = None

    with tqdm(total=total_count, desc=f"Scraping {app_name}") as pbar:
        while count < total_count:
            try:
                result, next_token = reviews(
                    app_id,
                    lang='id',
                    country='id',
                    sort=Sort.MOST_RELEVANT,
                    count=200,
                    continuation_token=next_token
                )
            except Exception as e:
                print(f"[!] Error saat scrape {app_name}: {e}")
                break

            if not result:
                print(f"[i] Tidak ada hasil, kemungkinan review habis untuk {app_name}.")
                break

            all_reviews.extend(result)
            count += len(result)
            pbar.update(len(result))

            if not next_token:
                break

            time.sleep(0.5)  # jeda agar tidak terlalu cepat

    df = pd.DataFrame(all_reviews)
    if df.empty:
        print(f"[i] Tidak ada data review untuk {app_name}")
        return pd.DataFrame(columns=['app', 'content', 'score'])

    df['app'] = app_name
    print(f"[✓] Jumlah review terkumpul dari {app_name}: {len(df)}")
    return df[['app', 'content', 'score']]

In [3]:
all_data = pd.DataFrame()

for name, app_id in app_ids.items():
    df = get_reviews(app_id, name, total_count=3000)
    all_data = pd.concat([all_data, df], ignore_index=True)

    # Opsional: simpan per aplikasi
    # df.to_csv(f"{name}_reviews.csv", index=False)

# Simpan semua data ke satu file
all_data.to_csv("all_streaming-platform_reviews.csv", index=False)
print(f"[✓] Total semua review: {len(all_data)}")

Scraping Netflix:   0%|          | 0/3000 [00:00<?, ?it/s]

Scraping Netflix: 100%|██████████| 3000/3000 [00:24<00:00, 121.29it/s]


[✓] Jumlah review terkumpul dari Netflix: 3000


Scraping Disney+ Hotstar:  12%|█▏        | 349/3000 [00:03<00:25, 103.22it/s]


[i] Tidak ada hasil, kemungkinan review habis untuk Disney+ Hotstar.
[✓] Jumlah review terkumpul dari Disney+ Hotstar: 349


Scraping Amazon Prime Video: 100%|██████████| 3000/3000 [00:22<00:00, 136.09it/s]


[✓] Jumlah review terkumpul dari Amazon Prime Video: 3000


Scraping Viu:   0%|          | 0/3000 [00:00<?, ?it/s]


[i] Tidak ada hasil, kemungkinan review habis untuk Viu.
[i] Tidak ada data review untuk Viu


Scraping WeTV: 100%|██████████| 3000/3000 [00:18<00:00, 159.68it/s]


[✓] Jumlah review terkumpul dari WeTV: 3000


Scraping Vidio: 100%|██████████| 3000/3000 [00:19<00:00, 154.03it/s]


[✓] Jumlah review terkumpul dari Vidio: 3000


Scraping HBO GO:  21%|██        | 635/3000 [00:04<00:16, 147.14it/s]

[i] Tidak ada hasil, kemungkinan review habis untuk HBO GO.
[✓] Jumlah review terkumpul dari HBO GO: 635
[✓] Total semua review: 12984



