Montiamo Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Lavoriamo sul dataset (Dataset già preprocessato e settato)

In [17]:
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Leggiamo il nostro dataset (già pulito)
df = pd.read_csv('/content/drive/MyDrive/Database/filtered_mt.csv');

# Estrazione delle descrizioni dei film e dei tag corrispondenti
texts = df['plot_synopsis'].tolist()
labels = df['tags'].tolist()

# Inizializzazione del Tokenizer
tokenizer = Tokenizer(num_words=5000)
# Inizializzazione del Tokenizer e adattamento ai tuoi dati
tokenizer.fit_on_texts(texts)

# Trasformazione del testo in sequenze di interi
sequences = tokenizer.texts_to_sequences(texts)

# Padding delle sequenze per garantire la stessa lunghezza
X = pad_sequences(sequences, maxlen=500)

# Codifica dei tag come interi
encoder = LabelEncoder()
y = encoder.fit_transform(labels)

In [19]:
# Importazione delle librerie necessarie
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import ModelCheckpoint

# Definizione dei tag
tags = ["murder", "romantic", "violence", "psychedelic", "comedy"]

# Creazione del modello
model = Sequential()
model.add(Embedding(5000, 64, input_length=500))
model.add(LSTM(64))
model.add(Dense(5, activation='softmax'))

# Compilazione del modello
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

checkpoint = ModelCheckpoint('best_model.h5', monitor='accuracy', verbose=1, save_best_only=True, mode='max')

# Addestramento del modello
model.fit(X, y, epochs=10, validation_split=0.2, callbacks=[checkpoint])


Epoch 1/10
Epoch 1: accuracy improved from -inf to 0.34764, saving model to best_model.h5
Epoch 2/10
 1/73 [..............................] - ETA: 1s - loss: 1.4441 - accuracy: 0.2812

  saving_api.save_model(


Epoch 2: accuracy improved from 0.34764 to 0.39914, saving model to best_model.h5
Epoch 3/10
Epoch 3: accuracy improved from 0.39914 to 0.59313, saving model to best_model.h5
Epoch 4/10
Epoch 4: accuracy improved from 0.59313 to 0.72618, saving model to best_model.h5
Epoch 5/10
Epoch 5: accuracy improved from 0.72618 to 0.83476, saving model to best_model.h5
Epoch 6/10
Epoch 6: accuracy improved from 0.83476 to 0.88627, saving model to best_model.h5
Epoch 7/10
Epoch 7: accuracy improved from 0.88627 to 0.93906, saving model to best_model.h5
Epoch 8/10
Epoch 8: accuracy improved from 0.93906 to 0.96652, saving model to best_model.h5
Epoch 9/10
Epoch 9: accuracy improved from 0.96652 to 0.98112, saving model to best_model.h5
Epoch 10/10
Epoch 10: accuracy did not improve from 0.98112


<keras.src.callbacks.History at 0x7c4f192fad40>

Testing model

In [27]:
from keras.models import load_model
import numpy as np

# Caricamento del modello
model = load_model('best_model.h5')

# Creazione della sequenza di testo
test_text1 = ["\"Rocky\" is a 1976 American sports drama film written and starring Sylvester Stallone. The film follows the story of Rocky Balboa, a small-time boxer from Philadelphia, who gets a shot at the world heavyweight championship. Despite being an underdog, Rocky seizes the opportunity to train rigorously and face the reigning champion, Apollo Creed, in a match that becomes a symbol of determination and the human spirit."]
test_text2 = ["“La Dolce Vita” is an iconic masterpiece that takes you back to 1960’s Rome, exploring the sweet and sinful life of the city’s high society1. The film stars the dashing Marcello Mastroianni and the captivating Anita Ekberg1. The story follows Marcello Rubini, a jaded journalist seeking to uncover the true essence of love and happiness amidst the glamour and sensuality of the Italian capital1. The film is a cinematic marvel that earned the prestigious Palme d’Or at the 1960 Cannes Film Festival and helped to popularize the term “paparazzi” in global culture1. As you watch, you’ll be drawn into the hedonistic world of Italy’s elite and the heart of Rome’s enchanting nightlife1."]
# Trasformazione del testo in una sequenza di interi
test_seq1 = tokenizer.texts_to_sequences(test_text1)
test_seq2 = tokenizer.texts_to_sequences(test_text2)

# Applicazione del padding alla sequenza
test_seq1 = pad_sequences(test_seq1, maxlen=500)
test_seq2 = pad_sequences(test_seq2, maxlen=500)

# Ottenimento delle previsioni dal modello
predictions1 = model.predict(test_seq1)
predictions2 = model.predict(test_seq2)

# Ottenimento dell'indice del tag con la probabilità più alta
predicted_index1 = np.argmax(predictions1, axis=-1)
predicted_index2 = np.argmax(predictions2, axis=-1)

# Decodifica dell'indice per ottenere il tag corrispondente
predicted_tag1 = encoder.inverse_transform(predicted_index1)
print(test_text1)
print(f"Tag: {predicted_tag1}")
predicted_tag2 = encoder.inverse_transform(predicted_index2)
print(test_text2)
print(f"Tag: {predicted_tag2}")

['"Rocky" is a 1976 American sports drama film written and starring Sylvester Stallone. The film follows the story of Rocky Balboa, a small-time boxer from Philadelphia, who gets a shot at the world heavyweight championship. Despite being an underdog, Rocky seizes the opportunity to train rigorously and face the reigning champion, Apollo Creed, in a match that becomes a symbol of determination and the human spirit.']
Tag: ['violence']
['“La Dolce Vita” is an iconic masterpiece that takes you back to 1960’s Rome, exploring the sweet and sinful life of the city’s high society1. The film stars the dashing Marcello Mastroianni and the captivating Anita Ekberg1. The story follows Marcello Rubini, a jaded journalist seeking to uncover the true essence of love and happiness amidst the glamour and sensuality of the Italian capital1. The film is a cinematic marvel that earned the prestigious Palme d’Or at the 1960 Cannes Film Festival and helped to popularize the term “paparazzi” in global cult