# Softmax vs One-vs-Rest (OVR) Comparison

This notebook demonstrates:
- Dataset where OVR performs better (20 Newsgroups)
- Dataset where Softmax performs better (Digits)


In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, log_loss
from sklearn.model_selection import train_test_split

In [12]:
import warnings

# Hide only sklearn's transition warnings (keeps other important warnings visible)
warnings.filterwarnings(
    "ignore",
    message=".*'penalty' was deprecated.*",
    category=FutureWarning,
    module=r"sklearn\.linear_model\._logistic"
)

warnings.filterwarnings(
    "ignore",
    message=".*Setting penalty=None will ignore the C and l1_ratio parameters.*",
    category=UserWarning,
    module=r"sklearn\.linear_model\._logistic"
)


In [13]:
def eval_model(name, model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    proba = model.predict_proba(X_test)
    y_pred = np.argmax(proba, axis=1)
    return {
        'model': name,
        'accuracy': accuracy_score(y_test, y_pred),
        'log_loss': log_loss(y_test, proba)
    }

# Digits Dataset (Softmax better)

In [14]:
from sklearn.datasets import load_digits
X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

softmax = LogisticRegression(solver='lbfgs', penalty="l2", C=np.inf, max_iter=2000)
ovr = OneVsRestClassifier(LogisticRegression(solver='lbfgs', penalty="l2", C=np.inf, max_iter=2000))

results = []
results.append(eval_model('Softmax', softmax, X_train, X_test, y_train, y_test))
results.append(eval_model('OVR', ovr, X_train, X_test, y_train, y_test))
pd.DataFrame(results)

Unnamed: 0,model,accuracy,log_loss
0,Softmax,0.95,0.835231
1,OVR,0.919444,1.552873


# 20 Newsgroups (OVR better)

In [15]:
from sklearn.datasets import fetch_20newsgroups_vectorized
X, y = fetch_20newsgroups_vectorized(subset='train', return_X_y=True)
X, y = X[:2000], y[:2000]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

softmax = LogisticRegression(solver='lbfgs', penalty="l2", C=np.inf, max_iter=2000)
ovr = OneVsRestClassifier(LogisticRegression(solver='lbfgs', penalty="l2", C=np.inf, max_iter=2000))

results = []
results.append(eval_model('Softmax', softmax, X_train, X_test, y_train, y_test))
results.append(eval_model('OVR', ovr, X_train, X_test, y_train, y_test))
pd.DataFrame(results)

Unnamed: 0,model,accuracy,log_loss
0,Softmax,0.675,2.692767
1,OVR,0.7025,2.213393
