In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt


In [3]:

data = pd.read_csv("SMSSpamCollection.txt", sep="\t", header=None, names=["label", "message"])
df = pd.DataFrame(data)
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
df['message'] = df['message'].str.lower()


X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)


In [4]:

vectorize = TfidfVectorizer(stop_words='english')
X_train_vec = vectorize.fit_transform(X_train)
X_test_vec = vectorize.transform(X_test)

model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=X_train_vec.shape[1]),
    LSTM(64),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])





In [None]:
history = model.fit(X_train_vec.toarray(), y_train, epochs=5, batch_size=32, validation_data=(X_test_vec.toarray(), y_test))


Epoch 1/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m516s[0m 4s/step - accuracy: 0.8590 - loss: 0.4428 - val_accuracy: 0.8664 - val_loss: 0.3965
Epoch 2/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m614s[0m 4s/step - accuracy: 0.8633 - loss: 0.4017 - val_accuracy: 0.8664 - val_loss: 0.3960
Epoch 3/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m610s[0m 4s/step - accuracy: 0.8557 - loss: 0.4145 - val_accuracy: 0.8664 - val_loss: 0.3934
Epoch 4/5
[1m107/140[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m3:06:14[0m 339s/step - accuracy: 0.8677 - loss: 0.3943

In [None]:
plt.figure(figsize=(12, 6))

# Plot training & validation loss values
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot training & validation accuracy values
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Show the plot
plt.show()
model.save('spam_checker_1.0-100.h5')

# Step 6: Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test_vec.toarray(), y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')