In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, classification_report, recall_score, f1_score

# Load the dataset .txt file
df = pd.read_csv('SMSSPamCollection', sep='\t', names=["Label", "SMS"])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['SMS'], df['Label'], test_size=0.2, random_state=69)

# Vectorize the text data
vectorizer = CountVectorizer()
X_train_transformed = vectorizer.fit_transform(X_train)
X_test_transformed = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_transformed, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test_transformed)

print('True positive: ', confusion_matrix(y_test, y_pred)[0][0])
print('False positive: ', confusion_matrix(y_test, y_pred)[0][1])
print('True negative: ', confusion_matrix(y_test, y_pred)[1][1])
print('False negative: ', confusion_matrix(y_test, y_pred)[1][0], '\n')
print(classification_report(y_test, y_pred))
print("Accuracy:", round(accuracy_score(y_test, y_pred), 4))
print('Precision:', round(precision_score(y_test, y_pred, pos_label='spam'), 4))
print('Recall:', round(recall_score(y_test, y_pred, pos_label='spam'), 4))
print('F1 Score:', round(f1_score(y_test, y_pred, pos_label='spam'), 4))

True positive:  961
False positive:  3
True negative:  139
False negative:  12 

              precision    recall  f1-score   support

         ham       0.99      1.00      0.99       964
        spam       0.98      0.92      0.95       151

    accuracy                           0.99      1115
   macro avg       0.98      0.96      0.97      1115
weighted avg       0.99      0.99      0.99      1115

Accuracy: 0.9865
Precision: 0.9789
Recall: 0.9205
F1 Score: 0.9488


In [6]:
# Generar recomendaciones si identifica un mensaje como spam
def generar_alerta_spam(prediccion):
    if prediccion == 'spam':
        print('Alerta: Este mensaje puede ser un fraude')
    else:
        print('Este mensaje parece ser seguro')

# Aplicar el sistema de alertas
mensaje = 'You have won a free ticket to the Bahamas. Call now!'
mensaje_transformado = vectorizer.transform([mensaje])
prediccion = classifier.predict(mensaje_transformado)
generar_alerta_spam(prediccion[0])

Alerta: Este mensaje puede ser un fraude
