In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

df = pd.read_csv('IMDB Dataset.csv') 
df.head()

tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(df['review'])

sequences = tokenizer.texts_to_sequences(df['review'])
padded = pad_sequences(sequences, maxlen=100)

# Encode labels
labels = pd.get_dummies(df['sentiment']).values
X_train, X_test, y_train, y_test = train_test_split(padded, labels, test_size=0.2, random_state=42)

model = Sequential([
    Embedding(input_dim=5000, output_dim=128),
    LSTM(64, return_sequences=False),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(labels.shape[1], activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

y_pred = model.predict(X_test)
print(classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1)))

model.save = 'text_classification_model.h5'



Epoch 1/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 42ms/step - accuracy: 0.7332 - loss: 0.5011 - val_accuracy: 0.8441 - val_loss: 0.3594
Epoch 2/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 43ms/step - accuracy: 0.8816 - loss: 0.2915 - val_accuracy: 0.8612 - val_loss: 0.3368
Epoch 3/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 49ms/step - accuracy: 0.9130 - loss: 0.2232 - val_accuracy: 0.8620 - val_loss: 0.3442
Epoch 4/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 73ms/step - accuracy: 0.9333 - loss: 0.1758 - val_accuracy: 0.8587 - val_loss: 0.3841
Epoch 5/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 88ms/step - accuracy: 0.9479 - loss: 0.1438 - val_accuracy: 0.8559 - val_loss: 0.4407
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step
              precision    recall  f1-score   support

           0       0.89      0.81      0.84 