Redes Sociais: [GitHub](https://github.com/wendelfrota) | [Linkedin](https://www.linkedin.com/in/wendel-frota-11649b279)

In [None]:
pip install datasets

In [None]:
from keras.models import Sequential
from keras.layers import Embedding, Dense, GlobalAveragePooling1D
from keras.preprocessing.text import Tokenizer
from keras.callbacks import History
from keras.utils import pad_sequences
from sklearn.model_selection import train_test_split
from datasets import load_dataset
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Dataset
dataset = load_dataset('app_reviews')
reviews = dataset['train']['review']
stars = dataset['train']['star']

In [None]:
# Tokenizer
tokenizer = Tokenizer(num_words=80000, oov_token='<00V>')
tokenizer.fit_on_texts(reviews)
reviews = tokenizer.texts_to_sequences(reviews)
reviews = pad_sequences(reviews, maxlen=300, truncating='post', padding='post')

In [None]:
# Test & Training Set
train_review, test_review, train_star, test_star = (
    train_test_split(reviews, stars, test_size=0.75, random_state=42)
)

train_review = np.array(train_review)
test_review = np.array(test_review)
train_star = np.array(train_star)
test_star = np.array(test_star)

train_star -= 1
test_star -= 1

val_data = (test_review, test_star)

In [None]:
# Modelo
model = Sequential()
history = History()

vocab = len(tokenizer.word_index)+1

model.add(Embedding(input_dim=vocab, output_dim=220, input_length=reviews.shape[1]))
model.add(GlobalAveragePooling1D())
model.add(Dense(64, activation='relu'))
model.add(Dense(5, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'mae'])
result = model.fit(train_review, train_star, epochs=12, batch_size=48, validation_data=val_data)

In [None]:
# Model - Info & Save
model.summary()
model.save('./model_trained_v1.keras')


# Log
with open('history_log.txt', 'w') as f:
    for key, value in result.history.items():
        f.write(f"{key}: {value}\n")

In [None]:
# Matplotlib
plt.plot(result.history['loss'], label='Erro treino')
plt.plot(result.history['val_loss'], label='Erro teste')
plt.title('Histórico de Treinamento - Função de custo')
plt.ylabel('Função de custo')
plt.xlabel('Épocas de treinamento')
plt.legend()
plt.show()

plt.plot(result.history['accuracy'], label='Acurácia treino')
plt.plot(result.history['val_accuracy'], label='Acurácia teste')
plt.title('Histórico de Treinamento - Acurácia')
plt.ylabel('Acurácia')
plt.xlabel('Épocas de treinamento')
plt.legend()
plt.show()

plt.plot(result.history['mae'], label='MAE Treino')
plt.plot(result.history['val_mae'], label='MAE Teste')
plt.title('Training - MAE')
plt.ylabel('MAE')
plt.xlabel('Épocas de treinamento')
plt.legend()
plt.show()

In [None]:
# Frases de teste
test_phrases = [
    "This app is amazing, I love it!",  # 5 estrelas
    "The worst app ever, don't download it!",  # 1 estrela
    "This app is terrible, don't waste your time!",  # 1 estrela
    "Average app, nothing special.",  # 3 estrelas
    "It's okay, could be better.",  # 3 estrelas
    "Very useful, highly recommend it.",  # 5 estrelas
    "I don't like this app, very disappointing.",  # 2 estrelas
    "Amazing app! Couldn't live without it.",  # 5 estrelas
    "Needs improvement, but has potential.",  # 3 estrelas
    "The best app I've ever used!",  # 5 estrelas
    "An essential app for everyday use.",  # 5 estrelas
    "Not bad, but could use some updates.",  # 3 estrelas
    "Avoid this app at all costs, it's a scam!",  # 1 estrela
    "Incredible features, exceeded my expectations.",  # 5 estrelas
    "Unreliable app, crashes frequently.",  # 2 estrelas
    "Decent app, gets the job done.",  # 4 estrelas
    "Too many ads, very annoying.",  # 2 estrelas
    "Great app, easy to use interface.",  # 5 estrelas
    "Very disappointed, not what I expected.",  # 2 estrelas
    "Love this app, use it every day.",  # 5 estrelas
    "Could be better, needs more features.",  # 3 estrelas
    "Hate this app, waste of money.",  # 1 estrela
    "Exceptional performance, worth every penny."  # 5 estrelas
]



# Pré-processamento das frases de teste
test_sequences = tokenizer.texts_to_sequences(test_phrases)
test_sequences = pad_sequences(test_sequences, maxlen=300, truncating='post', padding='post')

# Fazendo previsões
predictions = model.predict(test_sequences)

# Interpretando as previsões
for i, phrase in enumerate(test_phrases):
    print(f"Phrase: {phrase}")
    print("Predictions:")
    for star in range(1, 6):
        print(f"   Star {star}: Probability = {predictions[i][star-1]*100:.2f}%")
    predicted_class = np.argmax(predictions[i]) + 1
    print(f"Predicted Class: Star {predicted_class}\n")