In [None]:
%%capture
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
import string
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Download required NLTK data silently
import ssl
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download('stopwords', quiet=True)

In [None]:
%%capture
# Load and prepare the dataset
try:
    df = pd.read_csv("spam_ham_dataset.csv")
    # Clean the dataset
    df = df.drop(columns=["Unnamed: 0", "label_num"], errors='ignore')
    df['spam'] = np.where(df['label'] == 'spam', 1, 0)
    
    # Text cleaning function
    def clean_text(text):
        stemmer = SnowballStemmer('english')
        stop_words = set(stopwords.words('english'))
        
        text = text.lower()
        text = text.translate(str.maketrans('', '', string.punctuation))
        words = text.split()
        words = [stemmer.stem(word) for word in words if word not in stop_words]
        return ' '.join(words)
    
    # Apply text cleaning
    df['clean_text'] = df['text'].apply(clean_text)
    df = df.drop(columns=['text'])
    
    # Prepare features and target
    X = df['clean_text']
    y = df['spam']
    
    # Vectorize the text
    vectorizer = CountVectorizer()
    X_vectorized = vectorizer.fit_transform(X)
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)
    
    # Train models
    models = {
        "Naive Bayes": MultinomialNB(),
        "SVM": SVC(probability=True),
    }
    
    best_model = None
    best_accuracy = 0
    model_results = {}
    
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        model_results[name] = {
            'model': model,
            'accuracy': accuracy
        }
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model
            best_model_name = name
    
    data_loaded = True
    
except Exception as e:
    data_loaded = False
    error_message = str(e)

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# Create the main title
title = HTML("""
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 30px;">
    <h1 style="margin: 0; font-size: 2.5em; font-weight: bold;">🛡️ Email Spam Detection System</h1>
    <p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.9;">AI-Powered Email Security Analysis</p>
</div>
""")

display(title)

In [None]:
if data_loaded:
    # Display model performance
    performance_html = """
    <div style="background: #f8f9fa; padding: 20px; border-radius: 10px; margin: 20px 0; border-left: 5px solid #28a745;">
        <h3 style="color: #155724; margin-top: 0;">📊 Model Performance Results</h3>
    """
    
    for name, result in model_results.items():
        accuracy_percentage = result['accuracy'] * 100
        status_color = "#28a745" if name == best_model_name else "#6c757d"
        status_icon = "🏆" if name == best_model_name else "📊"
        
        performance_html += f"""
        <div style="background: white; padding: 15px; margin: 10px 0; border-radius: 8px; border: 1px solid #dee2e6;">
            <h4 style="color: {status_color}; margin: 0 0 10px 0;">{status_icon} {name}</h4>
            <div style="background: #e9ecef; height: 20px; border-radius: 10px; overflow: hidden;">
                <div style="background: {status_color}; height: 100%; width: {accuracy_percentage:.1f}%; border-radius: 10px; transition: width 0.3s ease;"></div>
            </div>
            <p style="margin: 5px 0 0 0; font-weight: bold; color: {status_color};">Accuracy: {accuracy_percentage:.2f}%</p>
        </div>
        """
    
    performance_html += "</div>"
    display(HTML(performance_html))
else:
    display(HTML(f"""
    <div style="background: #f8d7da; color: #721c24; padding: 20px; border-radius: 10px; margin: 20px 0; border-left: 5px solid #dc3545;">
        <h3 style="margin-top: 0;">⚠️ Data Loading Error</h3>
        <p>Could not load the spam_ham_dataset.csv file. Please ensure the dataset is available.</p>
        <p>Error: {error_message if 'error_message' in locals() else 'Unknown error'}</p>
    </div>
    """))

In [None]:
if data_loaded:
    # Create interactive widgets
    email_input = widgets.Textarea(
        value="",
        placeholder="Enter your email content here to check if it's spam...",
        description="",
        layout=widgets.Layout(width='100%', height='120px')
    )
    
    predict_button = widgets.Button(
        description="🔍 Analyze Email",
        button_style='primary',
        layout=widgets.Layout(width='200px', height='40px')
    )
    
    clear_button = widgets.Button(
        description="🗑️ Clear",
        button_style='info',
        layout=widgets.Layout(width='100px', height='40px')
    )
    
    output_area = widgets.Output()
    
    # Sample emails for testing
    sample_emails = {
        "Legitimate Email": "Subject: Meeting tomorrow\n\nHi John,\n\nJust wanted to confirm our meeting scheduled for tomorrow at 2 PM. Please let me know if you need to reschedule.\n\nBest regards,\nSarah",
        "Promotional Email": "Subject: Limited time offer!\n\nDear valued customer,\n\nWe have an exclusive 50% discount on all our products. This offer is valid only for the next 24 hours. Click here to shop now!",
        "Suspicious Email": "Subject: Urgent! Your account will be closed\n\nDear user,\n\nYour account will be permanently closed within 24 hours unless you verify your information immediately. Click this link and enter your password and credit card details to keep your account active. Act now!"
    }
    
    sample_dropdown = widgets.Dropdown(
        options=[("Select a sample email...", "")] + [(k, v) for k, v in sample_emails.items()],
        description="",
        layout=widgets.Layout(width='300px')
    )
    
    def on_sample_change(change):
        if change['new']:
            email_input.value = change['new']
    
    def clean_and_predict(text):
        if not text.strip():
            return None, None, None
        
        # Clean the input text
        cleaned = clean_text(text)
        
        # Vectorize
        text_vectorized = vectorizer.transform([cleaned])
        
        # Predict with best model
        prediction = best_model.predict(text_vectorized)[0]
        
        # Get prediction probability if available
        try:
            probability = best_model.predict_proba(text_vectorized)[0]
            confidence = max(probability) * 100
        except:
            confidence = None
        
        return prediction, confidence, cleaned
    
    def on_predict_click(b):
        with output_area:
            clear_output(wait=True)
            
            if not email_input.value.strip():
                display(HTML("""
                <div style="background: #fff3cd; color: #856404; padding: 15px; border-radius: 8px; border-left: 5px solid #ffc107;">
                    <strong>⚠️ Please enter an email to analyze</strong>
                </div>
                """))
                return
            
            prediction, confidence, cleaned_text = clean_and_predict(email_input.value)
            
            if prediction is not None:
                is_spam = prediction == 1
                result_color = "#dc3545" if is_spam else "#28a745"
                result_icon = "🚨" if is_spam else "✅"
                result_text = "SPAM DETECTED" if is_spam else "LEGITIMATE EMAIL"
                result_desc = "This email appears to be spam and should be treated with caution." if is_spam else "This email appears to be legitimate."
                
                confidence_display = f"<p><strong>Confidence:</strong> {confidence:.1f}%</p>" if confidence else ""
                
                result_html = f"""
                <div style="background: white; padding: 20px; border-radius: 10px; border: 2px solid {result_color}; margin: 20px 0;">
                    <div style="text-align: center; margin-bottom: 15px;">
                        <h2 style="color: {result_color}; margin: 0; font-size: 1.8em;">{result_icon} {result_text}</h2>
                        <p style="color: #6c757d; margin: 5px 0 0 0;">{result_desc}</p>
                    </div>
                    
                    <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; margin-top: 15px;">
                        <h4 style="margin: 0 0 10px 0; color: #495057;">📋 Analysis Details</h4>
                        <p><strong>Model Used:</strong> {best_model_name}</p>
                        <p><strong>Model Accuracy:</strong> {best_accuracy*100:.2f}%</p>
                        {confidence_display}
                    </div>
                </div>
                """
                
                display(HTML(result_html))
    
    def on_clear_click(b):
        email_input.value = ""
        sample_dropdown.value = ""
        with output_area:
            clear_output()
    
    # Connect event handlers
    sample_dropdown.observe(on_sample_change, names='value')
    predict_button.on_click(on_predict_click)
    clear_button.on_click(on_clear_click)
    
    # Create the interface
    interface_html = HTML("""
    <div style="background: #f8f9fa; padding: 25px; border-radius: 10px; margin: 20px 0; border: 1px solid #dee2e6;">
        <h3 style="color: #495057; margin: 0 0 20px 0;">✉️ Email Analysis Interface</h3>
        <p style="color: #6c757d; margin-bottom: 20px;">Enter an email or select a sample below to test the spam detection system:</p>
    </div>
    """)
    
    display(interface_html)
    display(widgets.HBox([widgets.Label("Sample Emails:"), sample_dropdown]))
    display(widgets.Label("Email Content:"))
    display(email_input)
    display(widgets.HBox([predict_button, clear_button], layout=widgets.Layout(margin='10px 0')))
    display(output_area)

In [None]:
footer_html = HTML("""
<div style="text-align: center; padding: 20px; margin-top: 40px; border-top: 2px solid #dee2e6; color: #6c757d;">
    <p style="margin: 0;">🔒 Built with Machine Learning • Powered by Voilà • Secure Email Analysis</p>
    <p style="margin: 5px 0 0 0; font-size: 0.9em;">This system uses trained AI models to detect potentially harmful email content</p>
</div>
""")

display(footer_html)