In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Sample dataset
emails = [
    "Get free tickets now!",
    "Limited offer, buy now!",
    "Your wing is waiting",
    "Not spam",
    "Free trial, sign up now!"
]
labels = np.array([1, 1, 1, 0, 1], dtype=np.int32)  # 1 for spam, 0 for not spam

# Tokenize emails (use OOV token so unseen words map to a known index)
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(emails)
sequences = tokenizer.texts_to_sequences(emails)

# Vocabulary size for Embedding layer
vocab_size = len(tokenizer.word_index) + 1  # +1 for padding token index 0

# Pad sequences
max_length = 10
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Define model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=10, input_length=max_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Train model
model.fit(padded_sequences, labels, epochs=20, verbose=2)

# Test model
test_email = ["Get free stuff now!"]
test_sequence = tokenizer.texts_to_sequences(test_email)
test_padded_sequence = pad_sequences(test_sequence, maxlen=max_length, padding='post')
prediction = model.predict(test_padded_sequence)  # shape (1,1)

prob = float(prediction[0][0])
print(f"Spam probability: {prob:.4f}")
print("Spam" if prob > 0.5 else "Not Spam")




Epoch 1/20
1/1 - 1s - 1s/step - accuracy: 0.6000 - loss: 0.6899
Epoch 2/20
1/1 - 0s - 45ms/step - accuracy: 0.8000 - loss: 0.6820
Epoch 3/20
1/1 - 0s - 46ms/step - accuracy: 0.8000 - loss: 0.6747
Epoch 4/20
1/1 - 0s - 50ms/step - accuracy: 0.8000 - loss: 0.6675
Epoch 5/20
1/1 - 0s - 55ms/step - accuracy: 0.8000 - loss: 0.6603
Epoch 6/20
1/1 - 0s - 58ms/step - accuracy: 0.8000 - loss: 0.6531
Epoch 7/20
1/1 - 0s - 49ms/step - accuracy: 0.8000 - loss: 0.6458
Epoch 8/20
1/1 - 0s - 46ms/step - accuracy: 0.8000 - loss: 0.6384
Epoch 9/20
1/1 - 0s - 46ms/step - accuracy: 0.8000 - loss: 0.6308
Epoch 10/20
1/1 - 0s - 47ms/step - accuracy: 0.8000 - loss: 0.6230
Epoch 11/20
1/1 - 0s - 50ms/step - accuracy: 0.8000 - loss: 0.6151
Epoch 12/20
1/1 - 0s - 50ms/step - accuracy: 0.8000 - loss: 0.6069
Epoch 13/20
1/1 - 0s - 47ms/step - accuracy: 0.8000 - loss: 0.5984
Epoch 14/20
1/1 - 0s - 47ms/step - accuracy: 0.8000 - loss: 0.5896
Epoch 15/20
1/1 - 0s - 46ms/step - accuracy: 0.8000 - loss: 0.5805
Epoch 