In [1]:
import pandas as pd
import os

Raw scraped data from Shopee is taken from a third-party application, which then undergoes cleaning, sentiment and workload processes on this file.

## Multiple File

In [None]:
folder_path = r"dir"
file_names = [f"komentar_avero_{i}.csv" for i in range(1, 6)]

In [3]:
dataframes = []
for file_name in file_names:
    full_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(full_path)
    dataframes.append(df)

In [4]:
df_comment = pd.concat(dataframes, ignore_index=True)

## Single File

In [None]:
# file_path = r"dir"

In [17]:
# df_comment = pd.read_csv(file_path)

## Cleaning

In [6]:
df_comment.dropna(how='all', inplace=True)
df_comment.drop_duplicates(inplace=True)

In [None]:
df_comment

## Preparation

In [8]:
from deep_translator import GoogleTranslator

In [9]:
def translate_text(text):
    try:
        return GoogleTranslator(source='auto', target='en').translate(text)
    except Exception as e:
        print(f"Terjadi error saat translate: {e}")
        return text 

In [10]:
sa_comments = df_comment['Comment'].copy()

In [11]:
sa_comments = sa_comments.astype(str).str.replace(r'\s*\n\s*', '. ', regex=True).str.strip()
sa_comments = sa_comments.str.replace(":", " ", regex=False)

In [12]:
sa_comments = sa_comments.apply(translate_text)

In [13]:
sa_comments = sa_comments.apply(lambda x: x.lower())

In [None]:
sa_comments

In [15]:
folder_path = r"C:\Users\linkc\py\QOAR\SHOPEE\frescent"
file_name = "komentar_avero_fix.csv"

full_path = os.path.join(folder_path, file_name)

sa_comments.to_csv(full_path, index=False)

## Sentiment Analysis

In [16]:
import nltk
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from nltk.sentiment import SentimentIntensityAnalyzer

In [None]:
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

In [18]:
def get_sentiment_label(text):
    score = sia.polarity_scores(text)['compound']
    if score >= 0.05:
        return 'positive'
    elif score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

In [None]:
sa_df = pd.DataFrame({'comment': sa_comments})
sa_df['sentiment'] = sa_df['comment'].apply(get_sentiment_label)

print(sa_df.head())

In [None]:
sentiment_counts = sa_df['sentiment'].value_counts().reindex(['positive', 'neutral', 'negative'])

# Plot
plt.figure(figsize=(8, 5))
sns.barplot(x=sentiment_counts.index, y=sentiment_counts.values, palette='Set2')
plt.title("Jumlah Komentar Berdasarkan Sentimen", fontsize=14)
plt.xlabel("Label Sentimen")
plt.ylabel("Jumlah Komentar")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
all_text = " ".join(sa_df['comment'])

wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_text)

# plot
plt.figure(figsize=(12, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Word Cloud Semua Komentar", fontsize=16)
plt.show()