In [None]:
# 📊 Advanced Classification System
# 📊 Advanced Classification System

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.calibration import CalibratedClassifierCV
import numpy as np

class AdvancedNewsClassifier:
    """
    Enhanced news classification system with:
    - Confidence scoring
    - Multi-label support
    - Explainability
    """

    def __init__(self):
        """
        Initializes classification components.
        You can expand this with more models or embeddings (e.g., BERT).
        """
        self.vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
        self.model = CalibratedClassifierCV(
            base_estimator=LogisticRegression(max_iter=1000, solver='liblinear'), cv=5
        )
        self.pipeline = Pipeline([
            ('vectorizer', self.vectorizer),
            ('classifier', self.model)
        ])
        self.is_trained = False

    def train(self, X_train, y_train):
        """
        Trains the classifier on provided text and labels.
        Args:
            X_train (list of str): News article texts.
            y_train (list of str): Corresponding category labels.
        """
        self.pipeline.fit(X_train, y_train)
        self.is_trained = True
        print("✅ Classifier trained successfully.")

    def predict_with_confidence(self, article_text):
        """
        Predicts the category of a news article with confidence scores.

        Returns:
            dict: {
                'primary_category': str,
                'confidence': float,
                'alternatives': List[Tuple[str, float]]
            }
        """
        if not self.is_trained:
            raise ValueError("Classifier must be trained before prediction.")

        probs = self.pipeline.predict_proba([article_text])[0]
        labels = self.pipeline.classes_

        ranked = sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)

        return {
            'primary_category': ranked[0][0],
            'confidence': round(ranked[0][1], 4),
            'alternatives': [(label, round(score, 4)) for label, score in ranked[1:]]
        }

    def explain_prediction(self, article_text):
        """
        Provides a basic explanation of what influenced the prediction.
        """
        if not self.is_trained:
            raise ValueError("Classifier must be trained before explanation.")

        vector = self.vectorizer.transform([article_text])
        feature_names = np.array(self.vectorizer.get_feature_names_out())
        weights = self.model.base_estimator.coef_

        predicted_class = self.pipeline.predict([article_text])[0]
        class_index = list(self.model.classes_).index(predicted_class)

        top_indices = np.argsort(weights[class_index])[::-1][:10]
        top_features = feature_names[top_indices]

        return {
            'predicted_category': predicted_class,
            'influential_features': list(top_features)
        }


print("📊 Advanced classification system ready for implementation!")