In [1]:
# ===============================================================================
# FAKE NEWS DETECTION GRADIO WEB APPLICATION
# ===============================================================================
# This script loads the trained models and creates a professional web interface
# for real-time fake news classification.

# --- Core and ML Libraries ---
import gradio as gr
import joblib
import pandas as pd
import numpy as np
import re
import string
import os

# --- Handle potential numpy/scikit-learn compatibility issues ---
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

# --- TensorFlow for Deep Learning Model ---
# Sets TensorFlow logging to a less verbose level.
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from tensorflow.keras.utils import pad_sequences

# --- NLTK for Text Preprocessing ---
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# ==============================================================================
# 1. INITIAL SETUP AND MODEL LOADING
# ==============================================================================
print("--- Initializing Application ---")

# --- Download NLTK data (if not already present) ---
try:
    stopwords.words('english')
except LookupError:
    print("Downloading NLTK data...")
    nltk.download('stopwords', quiet=True)
    nltk.download('wordnet', quiet=True)

# --- Define paths to saved models ---
MODEL_DIR = "production_models"

# --- Load all models and necessary components ---
print("Loading all trained models into memory...")
# If you see 'Models are not available. Please check the console.' error:
# Ensure all required model files exist in the 'production_models' directory and are named correctly.
try:
    # --- CORRECTED FILENAMES ---
    # These names now match the files in your project directory.
    # Please double-check the full, untruncated names on your machine.
    
    # Initialize variables to None first
    model_pac = None
    model_rf = None
    model_lr = None
    model_bilstm = None
    tokenizer_bilstm = None
    
    # Load Traditional ML Model Pipelines (TF-IDF + Classifier)
    if os.path.exists(os.path.join(MODEL_DIR, 'passive_aggressive_pipeline.pkl')):
        model_pac = joblib.load(os.path.join(MODEL_DIR, 'passive_aggressive_pipeline.pkl'))
        print("✓ Passive Aggressive model loaded")
    
    if os.path.exists(os.path.join(MODEL_DIR, 'random_forest_pipeline.pkl')):
        model_rf = joblib.load(os.path.join(MODEL_DIR, 'random_forest_pipeline.pkl'))
        print("✓ Random Forest model loaded")
    
    if os.path.exists(os.path.join(MODEL_DIR, 'logistic_regression_pipeline.pkl')):
        model_lr = joblib.load(os.path.join(MODEL_DIR, 'logistic_regression_pipeline.pkl'))
        print("✓ Logistic Regression model loaded")

    # Load Bi-LSTM Model and its Tokenizer
    if os.path.exists(os.path.join(MODEL_DIR, 'bilstm_model.h5')):
        model_bilstm = tf.keras.models.load_model(os.path.join(MODEL_DIR, 'bilstm_model.h5'))
        print("✓ Bi-LSTM model loaded")
    
    if os.path.exists(os.path.join(MODEL_DIR, 'bilstm_tokenizer.pkl')):
        tokenizer_bilstm = joblib.load(os.path.join(MODEL_DIR, 'bilstm_tokenizer.pkl'))
        print("✓ Bi-LSTM tokenizer loaded")
    
    # Check if at least one model was loaded
    models_available = [model_pac, model_rf, model_lr, model_bilstm]
    if any(model is not None for model in models_available):
        MODELS_LOADED = True
        print("All available models loaded successfully.")
    else:
        MODELS_LOADED = False
        print("No model files found in the production_models directory.")
        
except FileNotFoundError as e:
    print(f"Error: A model file was not found. {e}")
    print("Please ensure the 'production_models' directory is in the same folder as this script and contains all required model files.")
    MODELS_LOADED = False
except Exception as e:
    print(f"An unexpected error occurred during model loading: {e}")
    MODELS_LOADED = False

# ===============================================================================
# 2. TEXT PREPROCESSING FUNCTION
# ===============================================================================
# This function is used by all models to clean the input text.
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    """
    Cleans and preprocesses raw text for model prediction.
    """
    text = str(text).lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

def get_available_models():
    """
    Returns a list of available models based on what was successfully loaded.
    """
    available_models = []
    
    if model_pac is not None:
        available_models.append("Passive Aggressive")
    if model_rf is not None:
        available_models.append("Random Forest")
    if model_lr is not None:
        available_models.append("Logistic Regression")
    if model_bilstm is not None and tokenizer_bilstm is not None:
        available_models.append("Bi-LSTM (with GloVe)")
    
    # If no models are available, provide a placeholder
    if not available_models:
        available_models = ["No models available"]
    
    return available_models

# ===============================================================================
# 3. MASTER PREDICTION FUNCTION
# ===============================================================================
# This function orchestrates the prediction process based on the user's choice.

def classify_news(model_name, article_text):
    """
    Takes a model name and article text, preprocesses the text,
    and returns a formatted prediction.
    """
    if not MODELS_LOADED:
        return {"Error": 1.0}, "Models are not available. Please check the console for details."
        
    if not article_text or not article_text.strip():
        return {"Input Error": 1.0}, "Please enter some text to analyze."

    # Step 1: Clean the input text
    cleaned_article = clean_text(article_text)
    
    if not cleaned_article.strip():
        return {"Input Error": 1.0}, "The text appears to be empty after preprocessing. Please enter meaningful content."
    
    prediction_label = "Error"
    confidence_scores = {"Error": 1.0}
    
    # Step 2: Use the selected model to predict
    if model_name == "Bi-LSTM (with GloVe)":
        if model_bilstm is None or tokenizer_bilstm is None:
            return {"Error": 1.0}, "Bi-LSTM model or tokenizer is not available. Please check if the model files exist."
        
        # Preprocessing for Bi-LSTM
        sequence = tokenizer_bilstm.texts_to_sequences([cleaned_article])
        padded_sequence = pad_sequences(sequence, maxlen=256, padding='post')
        
        # Prediction returns a probability
        probability_fake = model_bilstm.predict(padded_sequence, verbose=0)[0][0]
        
        if probability_fake > 0.5:
            prediction_label = "FAKE News"
            confidence_scores = {"FAKE News": float(probability_fake), "REAL News": 1 - float(probability_fake)}
        else:
            prediction_label = "REAL News"
            confidence_scores = {"REAL News": 1 - float(probability_fake), "FAKE News": float(probability_fake)}

    else: # For traditional ML models
        model_map = {
            "Passive Aggressive": model_pac,
            "Random Forest": model_rf,
            "Logistic Regression": model_lr
        }
        
        model_pipeline = model_map.get(model_name)
        
        if model_pipeline is None:
            return {"Error": 1.0}, f"Model '{model_name}' is not available. Please check if the model file exists."
        
        # The pipeline handles both TF-IDF vectorization and prediction
        prediction = model_pipeline.predict([cleaned_article])[0]
        
        # Get probability scores for confidence
        try:
            probabilities = model_pipeline.predict_proba([cleaned_article])[0]
            confidence_scores = {"REAL News": float(probabilities[0]), "FAKE News": float(probabilities[1])}
        except AttributeError: # Passive Aggressive doesn't have predict_proba
            # For Passive Aggressive, provide binary confidence
            confidence = 0.85 if prediction == 1 else 0.85  # Default confidence
            confidence_scores = {"REAL News": 1-confidence if prediction == 1 else confidence, 
                               "FAKE News": confidence if prediction == 1 else 1-confidence}

        prediction_label = "FAKE News" if prediction == 1 else "REAL News"

    # Step 3: Format the output
    result_text = f"## Analysis Result\n\n**Model Used:** {model_name}\n\n**Classification:** {prediction_label}\n\n---\n\n*The confidence scores below indicate the model's certainty in its prediction.*"
    return confidence_scores, result_text

# ===============================================================================
# 4. GRADIO INTERFACE DEFINITION
# ===============================================================================
# Defines the layout and components of the web application.

# --- Enhanced Dark Mode CSS with #E3256B accent color ---
custom_css = """
/* Import Google Fonts */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

/* Global Dark Theme */
* {
    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important;
}

body, .gradio-container {
    background: linear-gradient(135deg, #0f0f23 0%, #1a1a2e 50%, #16213e 100%) !important;
    color: #e0e0e0 !important;
    min-height: 100vh;
}

.gradio-container {
    border-radius: 20px !important;
    box-shadow: 0 8px 32px rgba(227, 37, 107, 0.1), 0 4px 16px rgba(0, 0, 0, 0.3) !important;
    backdrop-filter: blur(10px) !important;
    border: 1px solid rgba(227, 37, 107, 0.1) !important;
}

/* Header Styling */
#header {
    text-align: center;
    padding: 40px 20px 30px 20px;
    background: linear-gradient(135deg, rgba(227, 37, 107, 0.1) 0%, rgba(26, 26, 46, 0.8) 100%);
    border-radius: 20px 20px 0 0;
    margin-bottom: 20px;
}

#header h1 {
    background: linear-gradient(135deg, #E3256B 0%, #ff6b9d 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
    font-size: 3.2em !important;
    font-weight: 700 !important;
    margin-bottom: 10px !important;
    letter-spacing: -1px;
    text-shadow: 0 4px 8px rgba(227, 37, 107, 0.3);
}

#header p {
    color: #b0b0c0 !important;
    font-size: 1.2em !important;
    font-weight: 400 !important;
    margin-bottom: 0 !important;
    opacity: 0.9;
}

/* Section Headers */
.gradio-markdown h3 {
    color: #E3256B !important;
    font-weight: 600 !important;
    font-size: 1.3em !important;
    margin-bottom: 15px !important;
    padding-bottom: 8px;
    border-bottom: 2px solid rgba(227, 37, 107, 0.3);
}

/* Input Elements */
.gradio-textbox, .gradio-dropdown {
    background: rgba(30, 30, 50, 0.7) !important;
    border: 2px solid rgba(227, 37, 107, 0.3) !important;
    border-radius: 15px !important;
    color: #e0e0e0 !important;
    transition: all 0.3s ease !important;
}

.gradio-textbox:focus, .gradio-dropdown:focus {
    border-color: #E3256B !important;
    box-shadow: 0 0 20px rgba(227, 37, 107, 0.2) !important;
    transform: translateY(-2px);
}

.gradio-textbox textarea {
    background: transparent !important;
    color: #e0e0e0 !important;
    border: none !important;
}

/* Buttons */
.gradio-button {
    border-radius: 12px !important;
    font-weight: 600 !important;
    font-size: 1em !important;
    padding: 12px 24px !important;
    transition: all 0.3s ease !important;
    border: none !important;
    text-transform: uppercase;
    letter-spacing: 0.5px;
}

.gradio-button.primary {
    background: linear-gradient(135deg, #E3256B 0%, #ff4081 100%) !important;
    color: white !important;
    box-shadow: 0 4px 15px rgba(227, 37, 107, 0.3) !important;
}

.gradio-button.primary:hover {
    transform: translateY(-3px) !important;
    box-shadow: 0 8px 25px rgba(227, 37, 107, 0.4) !important;
}

.gradio-button.secondary {
    background: rgba(60, 60, 80, 0.8) !important;
    color: #e0e0e0 !important;
    border: 2px solid rgba(227, 37, 107, 0.3) !important;
}

.gradio-button.secondary:hover {
    background: rgba(227, 37, 107, 0.1) !important;
    border-color: #E3256B !important;
    transform: translateY(-2px) !important;
}

/* Output Elements */
.gradio-label {
    background: rgba(30, 30, 50, 0.8) !important;
    border: 2px solid rgba(227, 37, 107, 0.3) !important;
    border-radius: 15px !important;
    color: #e0e0e0 !important;
    padding: 20px !important;
    margin: 10px 0 !important;
}

.gradio-markdown {
    background: rgba(30, 30, 50, 0.6) !important;
    border-radius: 15px !important;
    padding: 20px !important;
    border: 1px solid rgba(227, 37, 107, 0.2) !important;
    color: #e0e0e0 !important;
}

/* Examples Section */
.gradio-examples {
    background: rgba(25, 25, 40, 0.8) !important;
    border-radius: 15px !important;
    padding: 20px !important;
    border: 1px solid rgba(227, 37, 107, 0.2) !important;
    margin-top: 20px !important;
}

/* Scrollbar */
::-webkit-scrollbar {
    width: 8px;
}

::-webkit-scrollbar-track {
    background: rgba(30, 30, 50, 0.5);
    border-radius: 10px;
}

::-webkit-scrollbar-thumb {
    background: linear-gradient(135deg, #E3256B 0%, #ff4081 100%);
    border-radius: 10px;
}

::-webkit-scrollbar-thumb:hover {
    background: linear-gradient(135deg, #ff4081 0%, #E3256B 100%);
}

/* Animation for results */
.gradio-label, .gradio-markdown {
    animation: slideIn 0.5s ease-out;
}

@keyframes slideIn {
    from {
        opacity: 0;
        transform: translateY(20px);
    }
    to {
        opacity: 1;
        transform: translateY(0);
    }
}

/* Dropdown styling */
.gradio-dropdown select {
    background: rgba(30, 30, 50, 0.9) !important;
    color: #e0e0e0 !important;
    border: none !important;
}

/* Footer removal */
footer {
    display: none !important;
}

/* Loading indicator */
.loading {
    border: 3px solid rgba(227, 37, 107, 0.3);
    border-top: 3px solid #E3256B;
    border-radius: 50%;
    width: 30px;
    height: 30px;
    animation: spin 1s linear infinite;
    margin: 20px auto;
}

@keyframes spin {
    0% { transform: rotate(0deg); }
    100% { transform: rotate(360deg); }
}
"""

# ===============================================================================
# 5. GRADIO INTERFACE LAYOUT
# ===============================================================================

# --- Building the interface with Gradio Blocks ---
with gr.Blocks(css=custom_css, theme=gr.themes.Base(primary_hue="pink", secondary_hue="purple", neutral_hue="slate").set(
    body_background_fill="*neutral_950",
    background_fill_primary="*neutral_900",
    background_fill_secondary="*neutral_800",
    border_color_primary="*primary_500",
    button_primary_background_fill="*primary_600",
    button_primary_background_fill_hover="*primary_700"
)) as app:
    
    # --- Header Section ---
    with gr.Row():
        with gr.Column():
            gr.Markdown("# Fake News Detection System")

    gr.Markdown("---")

    # --- Main Interface Section ---
    with gr.Row():
        # --- Input Column ---
        with gr.Column(scale=2):
            gr.Markdown("### 1. Select Classification Model")
            
            available_models = get_available_models()
            default_model = available_models[0] if available_models else "No models available"
            
            model_selector = gr.Dropdown(
                label="Choose AI Model",
                choices=available_models,
                value=default_model,
                info="Each model uses different algorithms for classification"
            )
            
            gr.Markdown("### 2. Enter News Article Text")
            article_input = gr.Textbox(
                label="Article Content",
                placeholder="Paste the complete news article text here for analysis...\n\nTip: Include headlines, body text, and any relevant details for better accuracy.",
                lines=16,
                max_lines=20,
                info="Minimum 10 words recommended for accurate classification"
            )
            
            with gr.Row():
                clear_button = gr.Button("Clear", variant="secondary", size="lg")
                submit_button = gr.Button("Analyze News", variant="primary", size="lg")

        # --- Output Column ---
        with gr.Column(scale=1):
            gr.Markdown("### 3. Classification Results")
            
            with gr.Column():
                output_text = gr.Markdown(
                    value="**Ready for Analysis**\n\nSelect a model, enter your news article, and click 'Analyze News' to get started.\n\n---\n\n*Results will appear here with detailed confidence scores.*",
                    elem_classes="result-container"
                )
                output_label = gr.Label(
                    label="Confidence Scores",
                    show_label=True
                )
            
            gr.Markdown("### Pro Tips")
            gr.Markdown(
                """
                • **Better Results**: Use complete articles with headlines
                • **Model Selection**: Try different models for comparison  
                • **Confidence**: Higher scores indicate more certainty
                • **Speed**: Passive Aggressive is fastest, Bi-LSTM most accurate
                """,
                elem_classes="tips-container"
            )

    # --- Enhanced Button Logic ---
    def analyze_with_loading(model_name, article_text):
        """Wrapper function to add loading states"""
        if not article_text or len(article_text.split()) < 5:
            return {"Input Error": 1.0}, "Please enter at least 5 words for meaningful analysis."
        return classify_news(model_name, article_text)
    
    submit_button.click(
        fn=analyze_with_loading,
        inputs=[model_selector, article_input],
        outputs=[output_label, output_text],
        api_name="classify"
    )
    
    clear_button.click(
        lambda: (None, "**Ready for Analysis**\n\nSelect a model, enter your news article, and click 'Analyze News' to get started.\n\n---\n\n*Results will appear here with detailed confidence scores.*", None), 
        outputs=[article_input, output_text, output_label]
    )

# ===============================================================================
# 6. LAUNCH THE APPLICATION  
# ===============================================================================
if __name__ == "__main__":
    print("\n--- Launching Fake News Detection App ---")
    print("Open the URL below in your browser to access the application:")
    print("Models Status:", "Loaded" if MODELS_LOADED else "Failed")
    
    app.launch(
        share=False,
        server_name="127.0.0.1",
        server_port=7010,
        show_error=True,
        quiet=False
    )

  from .autonotebook import tqdm as notebook_tqdm


--- Initializing Application ---
Loading all trained models into memory...
✓ Passive Aggressive model loaded
✓ Random Forest model loaded
✓ Logistic Regression model loaded




✓ Bi-LSTM model loaded
✓ Bi-LSTM tokenizer loaded
All available models loaded successfully.

--- Launching Fake News Detection App ---
Open the URL below in your browser to access the application:
Models Status: Loaded
* Running on local URL:  http://127.0.0.1:7010
* To create a public link, set `share=True` in `launch()`.
