In [1]:
# Import required libraries
import json
import pandas as pd
from tqdm import tqdm
from google_play_scraper import Sort, reviews, app
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter

In [2]:
# Setup output directory for local machine
import os

# Create output directory if it doesn't exist
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)

In [3]:
# Define app packages to scrape
# List of Android app package names for Indonesian e-commerce apps
app_pack = [
    'blibli.mobile.commerce'  # Blibli - Indonesian e-commerce platform
]

# Display as DataFrame for better visualization
import pandas as pd
pd.DataFrame({'App Package': app_pack, 'Description': ['Blibli - Indonesian e-commerce platform']})

Unnamed: 0,App Package,Description
0,blibli.mobile.commerce,Blibli - Indonesian e-commerce platform


In [4]:
# Scrape app information
# Get basic app information from Google Play Store
app_info = []

for apps in tqdm(app_pack, desc="Getting app info"):
    # Get app details in Indonesian language for Indonesian market
    info = app(apps, lang='id', country='id')
    
    # Remove comments section to keep only basic app info
    del info['comments']
    app_info.append(info)

# Display app info as DataFrame
app_info_df = pd.DataFrame(app_info)
app_info_df

Getting app info:   0%|          | 0/1 [00:00<?, ?it/s]

Getting app info: 100%|██████████| 1/1 [00:00<00:00,  2.89it/s]


Unnamed: 0,title,description,descriptionHTML,summary,installs,minInstalls,realInstalls,score,ratings,reviews,...,contentRating,contentRatingDescription,adSupported,containsAds,released,lastUpdatedOn,updated,version,appId,url
0,Blibli,Kamu bisa pakai promonya buat belanja online m...,Kamu bisa pakai promonya buat belanja online m...,"100% Orisinil, Gratis Ongkir, Pengiriman Cepat...",10.000.000+,10000000,46048944,4.739756,662803,175830,...,Rating 3+,,False,False,9 Des 2013,2 Agu 2025,1754074989,12.5.5,blibli.mobile.commerce,https://play.google.com/store/apps/details?id=...


In [5]:
# Scrape app reviews
# Get reviews for each app across all ratings (1-5 stars) and sort orders
print("Scraping app reviews...")
app_revue = []

for ap in tqdm(app_pack, desc="Scraping reviews"):
    # Loop through each rating score (1-5 stars)
    for score in list(range(1, 6)):
        # Loop through different sort orders
        for sort_order in [Sort.MOST_RELEVANT, Sort.NEWEST]:
            # Get more reviews for neutral rating (score=3) as they provide balanced insights
            count = 9000 if score == 3 else 5000
            
            # Scrape reviews with current parameters
            rvs, _ = reviews(
                ap,                    # App package name
                lang='id',             # Indonesian language
                country='id',          # Indonesian market
                sort=sort_order,       # Sort order (most relevant or newest)
                count=count,           # Number of reviews to fetch
                filter_score_with=score # Filter by rating score
            )
            
            # Add metadata to each review
            for r in rvs:
                r['sortOrder'] = 'most_relevant' if sort_order == Sort.MOST_RELEVANT else 'newest'
                r['appId'] = ap
                
            # Add reviews to main list
            app_revue.extend(rvs)

print(f"Successfully scraped {len(app_revue)} reviews")

Scraping app reviews...


Scraping reviews: 100%|██████████| 1/1 [00:44<00:00, 44.49s/it]

Successfully scraped 58000 reviews





In [6]:
# Display sample reviews
# Show first 3 reviews to verify data structure
sample_df = pd.DataFrame(app_revue[0:3])
sample_df

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,sortOrder,appId
0,34c0068e-9464-4eb6-91a6-0b0413f496c3,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"saya order barang helm, saya tidak ada cancel ...",1,17,12.5.5,2025-07-25 18:28:42,Halo kak Frans mohon maaf terkait proses order...,2025-07-30 15:38:44,12.5.5,most_relevant,blibli.mobile.commerce
1,4040d0cc-0c4a-48c5-be52-971781b97990,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"alamat tidak bisa disesuaikan dengan aslinya ,...",1,67,12.4.5,2025-07-05 19:20:58,"Halo kak, mohon maaf atas pengalaman belanja y...",2025-07-05 19:40:27,12.4.5,most_relevant,blibli.mobile.commerce
2,6315b567-3118-4458-bb13-7efd75d8306d,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Saya heran, sudah nonaktifkan notifikasi dari ...",1,21,12.5.5,2025-07-17 07:38:24,Halo kak Ridwan mohon maaf terkait notifikasi ...,2025-07-20 14:36:13,12.5.5,most_relevant,blibli.mobile.commerce


In [7]:
# Convert to DataFrame and sort by date
# Create pandas DataFrame from review list
data_review = pd.DataFrame(app_revue)

# Sort reviews by date (newest first)
data_review = data_review.sort_values('at', ascending=False)

data_review

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,sortOrder,appId
42159,ffca2a84-9212-41f9-b25d-896d57a0bc8b,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,aplikasi nya kurang ini masa paylater saya d t...,4,0,12.5.5,2025-08-02 15:21:53,,NaT,12.5.5,most_relevant,blibli.mobile.commerce
43000,ffca2a84-9212-41f9-b25d-896d57a0bc8b,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,aplikasi nya kurang ini masa paylater saya d t...,4,0,12.5.5,2025-08-02 15:21:53,,NaT,12.5.5,newest,blibli.mobile.commerce
53000,3cf56805-2a66-46c9-ab92-7bdf1624d72c,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,mantap ini aplikasi,5,0,12.5.5,2025-08-02 10:56:00,Halo kak Daamxyz. mantap! Makasih banyak buat ...,2025-08-02 12:50:35,12.5.5,newest,blibli.mobile.commerce
53001,3fbb7d3f-dba6-4fa3-90cb-87c160ad182e,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,mantab,5,0,12.5.5,2025-08-02 06:56:00,Halo kak mantap! Makasih banyak buat review ke...,2025-08-02 08:20:04,12.5.5,newest,blibli.mobile.commerce
5000,e77d54c6-f431-44e3-9865-50ad67113895,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,mau log in D aplikasi ini susah padah sinyal b...,1,0,12.5.5,2025-08-02 04:52:36,Halo kak Asep mohon maaf terkait terkait kenda...,2025-08-02 06:24:28,12.5.5,newest,blibli.mobile.commerce
...,...,...,...,...,...,...,...,...,...,...,...,...,...
27833,538ed255-8837-48df-9e9b-e1a5a53c1a11,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Keren,3,0,1.4.3,2014-05-16 21:41:32,Thank you blibli friend for good review :) hop...,2020-02-15 08:45:27,1.4.3,most_relevant,blibli.mobile.commerce
27835,aef97273-7329-4221-aba4-4064e2678789,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Mantap..,3,0,1.4.2,2014-05-15 08:33:35,Thank you blibli friend for good review :) hop...,2020-02-15 08:45:32,1.4.2,most_relevant,blibli.mobile.commerce
14321,f2489bff-f3c2-4e97-b68b-6c2c9fcfac36,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Jelek banget susah masuknya,2,0,1.4.2,2014-05-08 01:00:28,Sangat disayangkan sekali dengan kondisinya :(...,2020-02-29 16:59:17,1.4.2,most_relevant,blibli.mobile.commerce
27834,c357a52f-4e00-45d8-9c52-bf61f0dfe6a1,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Mantap,up 2 date trus ya..",3,0,1.4.2,2014-05-02 21:36:16,terima kasih untuk reviewnya yang keren banget...,2020-02-29 17:03:22,1.4.2,most_relevant,blibli.mobile.commerce


In [8]:
# Clean and prepare data for export
# Select only relevant columns for analysis
refine_data = data_review[['reviewId', 'userName', 'content', 'score', 'at', 'replyContent', 'repliedAt', 'appId']].copy()

# Fix datetime formatting and remove timezone info for better compatibility
refine_data['at'] = pd.to_datetime(refine_data['at']).dt.tz_localize(None)
refine_data['repliedAt'] = pd.to_datetime(refine_data['repliedAt']).dt.tz_localize(None)

# Rename columns to more descriptive names
refine_data.rename(columns={
    'reviewId': 'review_id', 
    'userName': 'username', 
    'at': 'created_at',
    'replyContent': 'reply_content',
    'repliedAt': 'replied_at', 
    'appId': 'app_id'
}, inplace=True)

# Remove duplicate reviews based on review_id
refine_data.drop_duplicates(subset=['review_id'], inplace=True)

# Reset index after cleaning
refine_data.reset_index(drop=True, inplace=True)

refine_data

Unnamed: 0,review_id,username,content,score,created_at,reply_content,replied_at,app_id
0,ffca2a84-9212-41f9-b25d-896d57a0bc8b,Pengguna Google,aplikasi nya kurang ini masa paylater saya d t...,4,2025-08-02 15:21:53,,NaT,blibli.mobile.commerce
1,3cf56805-2a66-46c9-ab92-7bdf1624d72c,Pengguna Google,mantap ini aplikasi,5,2025-08-02 10:56:00,Halo kak Daamxyz. mantap! Makasih banyak buat ...,2025-08-02 12:50:35,blibli.mobile.commerce
2,3fbb7d3f-dba6-4fa3-90cb-87c160ad182e,Pengguna Google,mantab,5,2025-08-02 06:56:00,Halo kak mantap! Makasih banyak buat review ke...,2025-08-02 08:20:04,blibli.mobile.commerce
3,e77d54c6-f431-44e3-9865-50ad67113895,Pengguna Google,mau log in D aplikasi ini susah padah sinyal b...,1,2025-08-02 04:52:36,Halo kak Asep mohon maaf terkait terkait kenda...,2025-08-02 06:24:28,blibli.mobile.commerce
4,9be6ab30-0b54-41f5-afab-aef3c68a858d,Pengguna Google,ok,5,2025-08-02 04:36:55,Halo kak Maulana mantap! Makasih banyak buat r...,2025-08-02 06:21:50,blibli.mobile.commerce
...,...,...,...,...,...,...,...,...
40815,538ed255-8837-48df-9e9b-e1a5a53c1a11,Pengguna Google,Keren,3,2014-05-16 21:41:32,Thank you blibli friend for good review :) hop...,2020-02-15 08:45:27,blibli.mobile.commerce
40816,aef97273-7329-4221-aba4-4064e2678789,Pengguna Google,Mantap..,3,2014-05-15 08:33:35,Thank you blibli friend for good review :) hop...,2020-02-15 08:45:32,blibli.mobile.commerce
40817,f2489bff-f3c2-4e97-b68b-6c2c9fcfac36,Pengguna Google,Jelek banget susah masuknya,2,2014-05-08 01:00:28,Sangat disayangkan sekali dengan kondisinya :(...,2020-02-29 16:59:17,blibli.mobile.commerce
40818,c357a52f-4e00-45d8-9c52-bf61f0dfe6a1,Pengguna Google,"Mantap,up 2 date trus ya..",3,2014-05-02 21:36:16,terima kasih untuk reviewnya yang keren banget...,2020-02-29 17:03:22,blibli.mobile.commerce


In [9]:
# Save data to CSV file and display summary statistics
# Export cleaned data to local file
output_file = os.path.join(output_dir, "blibli_app_review.csv")
refine_data.to_csv(output_file, index=False, header=True)

print(f"✅ Data exported successfully to: {output_file}")
print(f"📊 Total records saved: {len(refine_data)}")

# Create summary statistics DataFrame
summary_stats = {
    'Metric': ['Total Reviews', 'Date Range (From)', 'Date Range (To)', 'Average Rating'],
    'Value': [
        len(refine_data),
        refine_data['created_at'].min(),
        refine_data['created_at'].max(),
        f"{refine_data['score'].mean():.2f}"
    ]
}

summary_df = pd.DataFrame(summary_stats)

# Rating distribution DataFrame
rating_dist = refine_data['score'].value_counts().sort_index().reset_index()
rating_dist.columns = ['Rating (Stars)', 'Count']
rating_dist['Percentage'] = (rating_dist['Count'] / len(refine_data) * 100).round(1)

print("\n📈 Summary Statistics:")
display(summary_df)

print("\n⭐ Rating Distribution:")
display(rating_dist)

✅ Data exported successfully to: output/blibli_app_review.csv
📊 Total records saved: 40820

📈 Summary Statistics:


Unnamed: 0,Metric,Value
0,Total Reviews,40820
1,Date Range (From),2014-03-19 16:46:04
2,Date Range (To),2025-08-02 15:21:53
3,Average Rating,3.10



⭐ Rating Distribution:


Unnamed: 0,Rating (Stars),Count,Percentage
0,1,7951,19.5
1,2,5562,13.6
2,3,11347,27.8
3,4,6467,15.8
4,5,9493,23.3
