In [10]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from google_play_scraper import reviews, Sort
from pandas import DataFrame
from string import punctuation
from nltk.tokenize import word_tokenize
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from nlp_id.lemmatizer import Lemmatizer
from requests import get
from io import StringIO

import nltk, re, json, csv

nltk.download("punkt")
nltk.download("stopwords")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
minecraft_reviews = reviews(
    'com.mojang.minecraftpe',
    lang = 'id',
    country = 'id',
    sort = Sort.MOST_RELEVANT,
    count = 15000
)

In [4]:
minecraft_df = DataFrame(minecraft_reviews[0])

x_review, y_review = minecraft_df.shape
print("Record :", x_review, "baris")
print("Field  :", y_review, "kolom")

Record : 15000 baris
Field  : 11 kolom


In [5]:
# REMOVE SPECIAL CHARACTERS & CASE FOLDING
def cleaning(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', str(text)) # menghapus mention
    text = re.sub(r'#[A-Za-z0-9]+', '', text) # menghapus hashtag
    text = re.sub(r'RT[\s]', '', text) # menghapus RT
    text = re.sub(r"http\S+", '', text) # menghapus link
    text = re.sub(r'[0-9]+', '', text) # menghapus angka
    text = re.sub(r'[^\w\s]', '', text) # menghapus karakter selain huruf dan angka
    text = text.replace('\n', ' ') # mengganti baris baru dengan spasi
    text = text.translate(str.maketrans('', '', punctuation)) # menghapus semua tanda baca
    text = text.strip(' ') # menghapus karakter spasi dari kiri dan kanan teks
    return text.lower()

# REMOVE SLANG WORDS
def fixSlangwords(text):
    with open('./slangwords.txt', 'r') as file :
        slangwords = file.readlines()

    for slang in slangwords:
        slang = slang.replace('\n', '')
        slang = slang.split('\t')
        # print(slang)

    words = text.split()
    fixed_words = []

    for word in words:
        if word.lower() in slangwords:
            fixed_words.append(slang[word.lower()])
        else :
            fixed_words.append(word)

    return ' '.join(fixed_words)

# TOKENIZING
def tokenizer(text):
    return word_tokenize(text)

# REMOVE STOPWORDS
def stopwordsRemove(text):
    with open('stopwords.txt', 'r') as file:
        stopwords = file.readlines()
    stopwordsDict =[]
    for word in stopwords:
        word = word.replace('\n', '')
        stopwordsDict.append(word)
    # print(stopwordsDict)
    # print('every' in stopwordsDict)
    # text = word.tokenize(text)
    # return [word not in stopwordsDict for word in text]

    fix_words = []
    for txt in text:
        if txt not in stopwordsDict:
            fix_words.append(txt)
    return fix_words

# LEMMATIZING
def lemmatizerWord(text):
    lemmatizer = Lemmatizer()
    return [lemmatizer.lemmatize(word.lower()) for word in text]

# STEMMING
def stemmerWord(text):
    stemmer = StemmerFactory().create_stemmer()
    return [stemmer.stem(word) for word in text]

def toSentence(text):
    return ' '.join(word for word in text)

In [7]:
print("Proses Cleaning     : START")
minecraft_df['text_clean'] = minecraft_df.content.apply(cleaning)
print("Proses Cleaning     : DONE\n")

print("Proses Slangword    : START")
minecraft_df['text_slangwords'] = minecraft_df['text_clean'].apply(fixSlangwords)
print("Proses Slangword    : DONE\n")

print("Proses Tokenizing   : START")
minecraft_df['text_tokenizingText'] = minecraft_df['text_slangwords'].apply(tokenizer)
print("Proses Tokenizing   : DONE\n")

print("Proses Stopword     : START")
minecraft_df['text_stopword'] = minecraft_df['text_tokenizingText'].apply(stopwordsRemove)
print("Proses Stopword     : DONE\n")

print("Proses Stemming     : START")
minecraft_df['text_stemming'] = minecraft_df['text_stopword'].apply(stemmerWord)
print("Proses Stemming     : DONE\n")

print("Proses Final        : START")
minecraft_df['text_akhir'] = minecraft_df['text_stemming'].apply(toSentence)
print("Proses Final        : DONE\n")

Proses Cleaning     : START
Proses Cleaning     : DONE

Proses Slangword    : START
Proses Slangword    : DONE

Proses Tokenizing   : START
Proses Tokenizing   : DONE

Proses Stopword     : START
Proses Stopword     : DONE

Proses Stemming     : START
Proses Stemming     : DONE



In [12]:
lexicon_positive, lexicon_negative = {}, {}

def sentiment_analysis_lexicon_indonesia(text):
    score = 0

    for word in text:
        if (word in lexicon_positive):
            score = score + lexicon_positive[word]

    for word in text:
        if (word in lexicon_negative):
            score = score + lexicon_negative[word]

    sentiment=''

    if (score > 0) :sentiment = 'positive'
    elif (score < 0) :sentiment = 'negative'
    else: sentiment = 'neutral'

    return score, sentiment

response = get(url='https://raw.githubusercontent.com/angelmetanosaa/dataset/main/lexicon_positive.csv')
def create_dict(url, dictType):
    response = get(url=url)
    if response.status_code == 200:
        reader = csv.reader(StringIO(response.text), delimiter=',')
        if dictType == 'positive':
            print("Fetching {} lexicon data".format(dictType))
            for row in reader:
                lexicon_positive[row[0]] = int(row[1])
        else:
            print("Fetching {} lexicon data".format(dictType))
            for row in reader:
                lexicon_negative[row[0]] = int(row[1])
    else:
        print("Failed to fetch lexicon data")

create_dict('https://raw.githubusercontent.com/angelmetanosaa/dataset/main/lexicon_positive.csv', "positive")
create_dict('https://raw.githubusercontent.com/angelmetanosaa/dataset/main/lexicon_negative.csv', "negative")
print("")

results = minecraft_df['text_stemming'].apply(sentiment_analysis_lexicon_indonesia)
results = list(zip(*results))
minecraft_df['polarity_score'] = results[0]
minecraft_df['sentiment'] = results[1]
print(minecraft_df['sentiment'].value_counts())

Fetching positive lexicon data
Fetching negative lexicon data

sentiment
positive    7159
negative    6219
neutral     1622
Name: count, dtype: int64


In [13]:
minecraft_df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,text_clean,text_slangwords,text_tokenizingText,text_stopword,text_stemming,text_akhir,polarity_score,sentiment
0,9d03bd81-f407-44e2-af1d-f12aac9806e3,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Gameplay sudah bagus, tapi ada sedikit bug pad...",1,941,1.21.2.02,2024-07-26 11:44:38,,,1.21.2.02,gameplay sudah bagus tapi ada sedikit bug pada...,gameplay sudah bagus tapi ada sedikit bug pada...,"[gameplay, sudah, bagus, tapi, ada, sedikit, b...","[gameplay, bagus, bug, controlernya, controler...","[gameplay, bagus, bug, controlernya, controler...",gameplay bagus bug controlernya controler kere...,-1,negative
1,52773c82-9074-4570-a95b-676c42bb8708,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Bagus sih,cuma tolong fix di bagian freeze. Ma...",5,6748,1.21.2.02,2024-07-12 09:34:28,,,1.21.2.02,bagus sihcuma tolong fix di bagian freeze maks...,bagus sihcuma tolong fix di bagian freeze maks...,"[bagus, sihcuma, tolong, fix, di, bagian, free...","[bagus, sihcuma, fix, bagian, freeze, maksud, ...","[bagus, sihcuma, fix, bagi, freeze, maksud, sa...",bagus sihcuma fix bagi freeze maksud sayalayar...,-1,negative
2,3686bb42-88d3-4727-8236-d42474f705d9,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Sudah sangat bagus, saya sudah bermain sangat ...",5,1686,1.21.1.03,2024-07-21 03:35:45,,,1.21.1.03,sudah sangat bagus saya sudah bermain sangat l...,sudah sangat bagus saya sudah bermain sangat l...,"[sudah, sangat, bagus, saya, sudah, bermain, s...","[bagus, bermain, frezze, game, pengaturan, fps...","[bagus, main, frezze, game, atur, fps, batas, ...",bagus main frezze game atur fps batas bagi atu...,8,positive
3,395b6e10-4aa7-435c-8d32-abc534c71657,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Daripada menambah mob atau Creature baru. Menu...,3,7,1.21.1.03,2024-07-26 05:12:58,,,1.21.1.03,daripada menambah mob atau creature baru menur...,daripada menambah mob atau creature baru menur...,"[daripada, menambah, mob, atau, creature, baru...","[menambah, mob, creature, sayayang, dipentingk...","[tambah, mob, creature, sayayang, penting, min...",tambah mob creature sayayang penting minecraft...,-16,negative
4,7b20e5b0-5251-4a0e-9a90-d58b0a93039a,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Untuk pengalaman bermain kurang, sering terjad...",5,3988,1.21.1.03,2024-07-03 03:52:05,,,1.21.1.03,untuk pengalaman bermain kurang sering terjadi...,untuk pengalaman bermain kurang sering terjadi...,"[untuk, pengalaman, bermain, kurang, sering, t...","[pengalaman, bermain, frame, drop, detik, game...","[alam, main, frame, drop, detik, game, minecra...",alam main frame drop detik game minecraft nya ...,-5,negative


In [15]:
# Saving daataset
minecraft_df.to_csv("dataset_minecraft.csv", index=False)

# dataset.head()