In [1]:
!pip install transformers torch


Collecting transformers
  Downloading transformers-5.1.0-py3-none-any.whl.metadata (31 kB)
Collecting torch
  Downloading torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl.metadata (31 kB)
Collecting huggingface-hub<2.0,>=1.3.0 (from transformers)
  Downloading huggingface_hub-1.4.1-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.3 kB)
Collecting typer-slim (from transformers)
  Downloading typer_slim-0.23.1-py3-none-any.whl.metadata (4.2 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl.metadata (4.1 kB)
Collecting hf-xet<2.0.0,>=1.2.0 (from huggingface-hub<2.0,>=1.3.0->transformers)
  Downloading hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl.metadata (4.9 kB)
Collecting typer>=0.23.1 (from typer-slim->transformers)
  Downloading typer-0.23.1-py3-none-any.whl.metadata (16 kB)
Collecting annotated-doc>=

In [2]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch

# Hugging Face model for phishing detection
model_name = 'ElSlay/BERT-Phishing-Email-Model'

# Load model and tokenizer
model = BertForSequenceClassification.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)

# Set model to evaluation mode
model.eval()

print("BERT model loaded successfully!")


config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]



special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

BERT model loaded successfully!


In [5]:
def predict_email(email_text):
    # Tokenize the input
    inputs = tokenizer(email_text, return_tensors="pt", truncation=True, padding='max_length', max_length=512)
    
    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=-1)
    
    # Convert prediction to readable result
    result = "Phishing" if prediction.item() == 1 else "Legitimate"
    return result


In [4]:
sample_email_1 = "Your bank account has been blocked. Click this link to verify immediately!"
sample_email_2 = "Meeting scheduled at 10 AM tomorrow with the team."

print("Email 1 Prediction:", predict_email(sample_email_1))
print("Email 2 Prediction:", predict_email(sample_email_2))


Email 1 Prediction: Phishing
Email 2 Prediction: Legitimate


In [6]:
new_email = input("Paste email text: ")
print("Prediction:", predict_email(new_email))


Paste email text:  Dear Customer,  We have detected suspicious activity in your account.   Your account has been temporarily suspended for security reasons.    To reactivate your account immediately, click the link below and verify your details:   http://secure-bank-verification.com/login  Failure to verify within 24 hours will result in permanent account closure.  Sincerely,   Security Team   [Your Bank Name]


Prediction: Phishing


In [7]:
# Sample messages for testing
test_messages = [
    # Fake / Scam messages
    "Your bank account has been blocked. Click here to verify: http://fakebank-login.com",
    "Congratulations! You have won ₹50,000. Claim your prize now: http://scam-link.com",
    "Your UPI transaction failed. Login immediately to avoid account suspension.",
    "Alert: Someone is trying to hack your account. Verify now to secure it!",
    "You received a gift card worth $1000. Redeem it immediately: http://phishing-link.com",
    
    # Normal messages
    "Hey, are we meeting for lunch today?",
    "Reminder: Your electricity bill is due tomorrow.",
    "Can you send me the report by 5 PM?",
    "Happy Birthday! Wishing you a wonderful day.",
    "Don't forget the meeting with the team at 10 AM."
]


In [8]:
# Function to test messages
for i, msg in enumerate(test_messages):
    result = predict_email(msg)  # predict_email() from your existing code
    print(f"Message {i+1}: {msg}")
    print(f"Prediction: {result}")
    print("-"*60)


Message 1: Your bank account has been blocked. Click here to verify: http://fakebank-login.com
Prediction: Phishing
------------------------------------------------------------
Message 2: Congratulations! You have won ₹50,000. Claim your prize now: http://scam-link.com
Prediction: Phishing
------------------------------------------------------------
Message 3: Your UPI transaction failed. Login immediately to avoid account suspension.
Prediction: Phishing
------------------------------------------------------------
Message 4: Alert: Someone is trying to hack your account. Verify now to secure it!
Prediction: Phishing
------------------------------------------------------------
Message 5: You received a gift card worth $1000. Redeem it immediately: http://phishing-link.com
Prediction: Phishing
------------------------------------------------------------
Message 6: Hey, are we meeting for lunch today?
Prediction: Legitimate
------------------------------------------------------------
Mes

In [9]:
# Step 4: Paste teacher’s email here
teacher_email = """Dear Customer,
Your bank account has been temporarily blocked. Please click the link below to verify:
http://fakebank-login.com
Failure to do so will result in permanent suspension."""


In [10]:
prediction_result = predict_email(teacher_email)
print("Prediction:", prediction_result)


Prediction: Phishing


In [11]:
# For BERT model probability
inputs = tokenizer(teacher_email, return_tensors="pt", truncation=True, padding='max_length', max_length=512)
with torch.no_grad():
    outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=-1)
    
print(f"Prediction: {prediction_result} | Phishing Probability: {probs[0][1]*100:.2f}%")


Prediction: Phishing | Phishing Probability: 99.97%
