In [1]:
import pandas as pd
import numpy as np
import joblib
import os
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.metrics import accuracy_score, f1_score, classification_report

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'<.*?>', ' ', text)
    text = re.sub(r'http\S+|www\S+|https\S+', ' [LINK] ', text)
    text = re.sub(r'\S+@\S+', ' [EMAIL] ', text)
    text = re.sub(r'\d+', ' [NUMBER] ', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = nltk.word_tokenize(text)
    words = [lemmatizer.lemmatize(w) for w in words if w not in stop_words]
    return ' '.join(words)

def get_stats(text):
    stats = {}
    stats['msg_length'] = len(text)
    stats['word_count'] = len(text.split())
    stats['capital_ratio'] = sum(1 for c in text if c.isupper()) / (len(text)+1)
    stats['excl_count'] = text.count('!')
    stats['digit_count'] = sum(1 for c in text if c.isdigit())
    spam_words = ['free', 'win', 'winner', 'cash', 'prize', 'urgent', 'claim']
    stats['spam_word_count'] = sum([text.lower().count(word) for word in spam_words])
    return pd.Series(stats)

test_msgs = [
    "Congratulations! You've won a $1000 Walmart gift card. Go to http://bit.ly/123 to claim now.",
    "Hey, what time are we meeting at the library tomorrow?",
    "URGENT! Your account is compromised. Send us your password immediately!",
    "Let's catch up over coffee tomorrow."
]


df_test = pd.DataFrame({'message': test_msgs})
df_test['clean_text'] = df_test['message'].apply(clean_text)
stats = df_test['message'].apply(get_stats)
df_test = pd.concat([df_test, stats], axis=1)

tfidf = joblib.load("models/sms_tfidf_vectorizer.pkl")  
X_tfidf = tfidf.transform(df_test['clean_text'])
extra_features = df_test[['msg_length', 'word_count', 'capital_ratio', 'excl_count', 'digit_count', 'spam_word_count']].values
from scipy.sparse import hstack
X_test = hstack([X_tfidf, extra_features])

model_names = [
    "sms_RandomForest.pkl",
    "sms_AdaBoost.pkl",
    "sms_KNN.pkl",
    "sms_LinearSVC.pkl",
    "sms_MLP.pkl",
    "sms_ensemble.pkl"
]

results = pd.DataFrame({'message': test_msgs})
for model_name in model_names:
    model_path = f"models/{model_name}"
    if not os.path.exists(model_path):
        print(f"UYARI: {model_name} modeli bulunamadı, atlanıyor.")
        continue
    model = joblib.load(model_path)
    preds = model.predict(X_test)
    results[model_name.replace(".pkl", "")] = preds
    print(f"\nModel: {model_name}")
    for msg, label in zip(test_msgs, preds):
        print(f"Mesaj: {msg}\nTahmin: {'SPAM' if label==1 else 'HAM'}\n")

results.replace({1: "SPAM", 0: "HAM"}, inplace=True)
print("\nBütün modellerin tahmin tablosu:")
print(results)



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\yiit_\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\yiit_\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\yiit_\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Model: sms_RandomForest.pkl
Mesaj: Congratulations! You've won a $1000 Walmart gift card. Go to http://bit.ly/123 to claim now.
Tahmin: SPAM

Mesaj: Hey, what time are we meeting at the library tomorrow?
Tahmin: HAM

Mesaj: URGENT! Your account is compromised. Send us your password immediately!
Tahmin: HAM

Mesaj: Let's catch up over coffee tomorrow.
Tahmin: HAM


Model: sms_AdaBoost.pkl
Mesaj: Congratulations! You've won a $1000 Walmart gift card. Go to http://bit.ly/123 to claim now.
Tahmin: SPAM

Mesaj: Hey, what time are we meeting at the library tomorrow?
Tahmin: HAM

Mesaj: URGENT! Your account is compromised. Send us your password immediately!
Tahmin: SPAM

Mesaj: Let's catch up over coffee tomorrow.
Tahmin: HAM


Model: sms_KNN.pkl
Mesaj: Congratulations! You've won a $1000 Walmart gift card. Go to http://bit.ly/123 to claim now.
Tahmin: SPAM

Mesaj: Hey, what time are we meeting at the library tomorrow?
Tahmin: HAM

Mesaj: URGENT! Your account is compromised. Send us your pas