In [9]:
import warnings
import re, numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from lime.lime_text import LimeTextExplainer

warnings.filterwarnings("ignore", message = ".*matmul.*")
np.seterr(all = "ignore")

CSV_PATH = "/Users/merterol/Desktop/UZH/CompLing:CompSci/CL/Sem 5/Bachelors Thesis/VSCode/Bachelors-Thesis/data/merged.csv"
RANDOM_STATE = 42
TEST_SIZE = 0.2

In [10]:
def extract_sender_domain(sender: str) -> str:
    if not isinstance(sender, str) or sender.strip() == "":
        return "no_sender"
    m = re.search(r'@([a-zA-Z0-9.-]+)', sender)
    
    return m.group(1).lower() if m else "unknown_format"

# --- Loader ---
df = pd.read_csv(CSV_PATH).sample(frac=1, random_state=RANDOM_STATE).reset_index(drop=True)
df["sender"] = df["sender"].fillna("")
df["text"] = df["text"].fillna("")
df["sender_domain"] = df["sender"].apply(extract_sender_domain)

df["joined_text"] = "sender=" + df["sender_domain"] + " " + df["text"]

X = df["joined_text"]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y if np.unique(y).size > 1 else None)

In [11]:
# --- Pipeline ---
if len(np.unique(y_train)) < 2:
    print("Error: Training data contains only one class. Please check your data or label creation process.")
    print("Unique classes in y_train:", np.unique(y_train))
else:
    text_clf = Pipeline([
        ("tfidf", TfidfVectorizer(max_features=3000, min_df = 2, stop_words="english", lowercase=True, token_pattern=r'(?u)\b[\w@.\-=:]+\b')),
        ("lr", LogisticRegression(solver="saga", max_iter=1000, n_jobs=1, random_state=RANDOM_STATE)),
    ])

    text_clf.fit(X_train, y_train)
    print("\n=== Classification Report (Text Only for LIME) ===")
    print(classification_report(y_test, text_clf.predict(X_test)))


=== Classification Report (Text Only for LIME) ===
              precision    recall  f1-score   support

           0       0.98      0.97      0.98     12150
           1       0.98      0.98      0.98     13530

    accuracy                           0.98     25680
   macro avg       0.98      0.98      0.98     25680
weighted avg       0.98      0.98      0.98     25680



In [12]:
def predict_proba_safe(texts):
    P = text_clf.predict_proba(list(texts))
    return np.nan_to_num(P, nan=0.5, posinf=1.0, neginf=0.0)

In [38]:
# --- LIME ---
from lime.lime_text import LimeTextExplainer

# LIME likes class names as strings
class_names = [str(c) for c in np.unique(y_train)]
explainer = LimeTextExplainer(class_names=class_names)

# Map the positive class index robustly (supports 'spam'/1)
classes = text_clf.classes_
if any(isinstance(c, str) and c.lower() == "spam" for c in classes):
    spam_idx = int(np.where(np.char.lower(classes.astype(str)) == "spam")[0][0])
elif 1 in classes:
    spam_idx = int(np.where(classes == 1)[0][0])
else:
    spam_idx = 1 if len(classes) > 1 else 0

# Pick any test row by POSITION
i = 7 
instance_text = X_test[i]

exp = explainer.explain_instance(
    instance_text,
    predict_proba_safe,
    num_features=15,
    num_samples=2000,
    labels=[spam_idx]
)

import sys
from termcolor import colored

def weight_to_text(word, weight):
    if weight > 0 and weight <= 1:
        w = colored(word, "red")
        s = colored("Contributed to a higher spam prediciton factor", "red")
    else:
        w = colored(word, "green")
        s = colored("Contributed to a lower spam prediction factor", "green")
    
    return w, s

print("\n=== ORIGINAL EMAIL (joined with sender) ===\n")
print(instance_text[:1000])

print("\nLIME explanation (top tokens)")
print(exp.as_list(label=spam_idx))

pred_label = text_clf.predict([instance_text])[0]
pred_probs = text_clf.predict_proba([instance_text])[0]
print(f"\nPredicted label: {"Spam" if pred_label == 1 else "Ham"} | Probabilities: {dict(zip(classes, pred_probs))}")

print("\n\n")
for word, weight in exp.as_list(label=spam_idx):
    w, s = weight_to_text(word, weight)
    print(w, ":", s)
    #print(weight)


=== ORIGINAL EMAIL (joined with sender) ===

sender=no_sender http myprint hk don't miss this unique escapelong pills x escapenumbermg escapenumber escapenumberonly escapenumber escapenumber per pil lescapenumber pills x escapenumbermg escapenumber escapenumberonly escapenumber escapenumber per pillescapenumber pills x escapenumbermg escapenumber escapenumberonly escapenumber escapenumber per pillescapenumber pills x escapenumbermg escapenumber escapenumberonly escapenumber escapenumber per pillescapenumber pills x escapenumbermg escapenumber escapenumberonly escapenumber escapenumber per pillescapenumber pills x escapenumbermg escapenumber escapenumberonly escapenumber escapenumber per pillescapenumber pills x escapenumbermg escapenumber escapenumberonly escapenumber escapenumber per pill cialisescapenumber pills x escapenumbermg escapenumber escapenumberonly escapenumber per pillescapenumber pills x escapenumbermg escapenumber escapenumberonly escapenumber escapenumber per pillescap