In [6]:
!pip install tensorflow-addons

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.20.0-cp39-cp39-win_amd64.whl (746 kB)
     -------------------------------------- 746.7/746.7 kB 4.7 MB/s eta 0:00:00
Collecting typeguard<3.0.0,>=2.7
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.20.0 typeguard-2.13.3


In [4]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense
from tensorflow.keras.optimizers import Adam

# 1. Pré-processar os dados
# Carregue seus datasets balanceados
train_data = pd.read_csv('../data/preprocessed/movies_genres_train_preprocessed.csv')
test_data = pd.read_csv('../data/preprocessed/movies_genres_test_preprocessed.csv')

category_columns = train_data.columns.drop(['Name', 'Description', 'Combined'])

# Defina os parâmetros de pré-processamento
max_features = 10000  # Número máximo de palavras a serem usadas (palavras mais frequentes)
maxlen = 200  # Número máximo de palavras no texto

# Tokenize os textos
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(train_data['Combined'])

X_train = tokenizer.texts_to_sequences(train_data['Combined'])
X_test = tokenizer.texts_to_sequences(test_data['Combined'])

# Padronize os textos
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

# Obtenha os labels one-hot encoded
y_train = train_data[category_columns].values
y_test = test_data[category_columns].values

# 2. Construir o modelo
embedding_dim = 128  # Dimensão do vetor de embedding
lstm_units = 64  # Unidades LSTM

model = Sequential([
    Embedding(max_features, embedding_dim, input_length=maxlen),
    Bidirectional(LSTM(lstm_units, return_sequences=True)),
    Bidirectional(LSTM(lstm_units)),
    Dense(len(category_columns), activation='sigmoid')
])

# 3. Treinar o modelo
optimizer = Adam(learning_rate=0.001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

batch_size = 32
epochs = 10

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

# 4. Avaliar o modelo
loss, accuracy = model.evaluate(X_test, y_test)
print('Loss:', loss)
print('Accuracy:', accuracy)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.23454779386520386
Accuracy: 0.34215956926345825


In [5]:
from sklearn.metrics import f1_score, average_precision_score, precision_recall_curve

y_pred = model.predict(X_test)
y_pred = np.round(y_pred)
f1_micro = f1_score(y_test, y_pred, average='micro')
f1_macro = f1_score(y_test, y_pred, average='macro')
precision_micro = precision_score(y_test, y_pred, average='micro')
precision_macro = precision_score(y_test, y_pred, average='macro')
recall_micro = recall_score(y_test, y_pred, average='micro')
recall_macro = recall_score(y_test, y_pred, average='macro')
print("F1-score (micro): {:.2f}".format(f1_micro))
print("F1-score (macro): {:.2f}".format(f1_macro))
print("Precision (micro): {:.2f}".format(precision_micro))
print("Precision (macro): {:.2f}".format(precision_macro))
print("Recall (micro): {:.2f}".format(recall_micro))
print("Recall (macro): {:.2f}".format(recall_macro))

f1_scores_per_class = f1_score(y_test, np.round(y_pred), average=None)
auc_pr_per_class = average_precision_score(y_test, y_pred, average=None)
for i, category in enumerate(category_columns):
    print("Class: {}".format(category))
    print("F1-score: {:.2f}".format(f1_scores_per_class[i]))
    print("AUC-PR: {:.2f}".format(auc_pr_per_class[i]))
    print("\n")

F1-score (micro): 0.41
F1-score (macro): 0.15
Precision (micro): 0.62
Precision (macro): 0.25
Recall (micro): 0.31
Recall (macro): 0.12


  _warn_prf(average, modifier, msg_start, len(result))


NameError: name 'average_precision_score' is not defined