In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis 
from sklearn.linear_model import LogisticRegression 
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

url = "https://hastie.su.domains/ElemStatLearn/datasets/spam.data"
df = pd.read_csv(url, sep=" ", header=None)

X = df.iloc[:, :-1]  # features (all but last)
y = df.iloc[:, -1]   # target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2
)

# first LDA to classify
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
y_pred_lda = lda.predict(X_test)

# then use QDA classification
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
y_pred_qda = qda.predict(X_test)

# then logistic regression
log_reg = LogisticRegression(solver="lbfgs", max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log = log_reg.predict(X_test)

# and finally support vector machine
svm = SVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

# confusion matrix and error rates
def report_results(name, model, X_train, y_train, X_test, y_test):
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    
    print(f"{name} - Train")
    cm_train = confusion_matrix(y_train, y_pred_train)
    acc_train = accuracy_score(y_train, y_pred_train)
    print("Confusion Matrix:")
    print(cm_train)
    print(f"Accuracy: {acc_train:.4f}")
    print(f"Misclassification Rate: {1 - acc_train:.4f}")
    print("")

    print(f"{name} - Test")
    cm_test = confusion_matrix(y_test, model.predict(X_test))
    acc_test = accuracy_score(y_test, y_pred_test)
    print("Confusion Matrix:")
    print(cm_test)
    print(f"Accuracy: {acc_test:.4f}")
    print(f"Misclassification Rate: {1 - acc_test:.4f}")
    print("")

# prints
report_results("LDA", lda, X_train, y_train, X_test, y_test)
report_results("QDA", qda, X_train, y_train, X_test, y_test)
report_results("Logistic Regression", log_reg, X_train, y_train, X_test, y_test)
report_results("Support Vector Machines", svm, X_train, y_train, X_test, y_test)

  X = (
  X = (
  X = (
  self.scalings_ = scalings @ Vt.T[:, :rank]
  self.scalings_ = scalings @ Vt.T[:, :rank]
  self.scalings_ = scalings @ Vt.T[:, :rank]
  ret = a @ b
  ret = a @ b
  ret = a @ b
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ 

LDA - Train
Confusion Matrix:
[[2111  104]
 [ 295 1170]]
Accuracy: 0.8916
Misclassification Rate: 0.1084

LDA - Test
Confusion Matrix:
[[548  25]
 [ 74 274]]
Accuracy: 0.8925
Misclassification Rate: 0.1075

QDA - Train
Confusion Matrix:
[[1553  662]
 [  47 1418]]
Accuracy: 0.8073
Misclassification Rate: 0.1927

QDA - Test
Confusion Matrix:
[[391 182]
 [ 15 333]]
Accuracy: 0.7861
Misclassification Rate: 0.2139

Logistic Regression - Train
Confusion Matrix:
[[2109  106]
 [ 157 1308]]
Accuracy: 0.9285
Misclassification Rate: 0.0715

Logistic Regression - Test
Confusion Matrix:
[[548  25]
 [ 34 314]]
Accuracy: 0.9359
Misclassification Rate: 0.0641

Support Vector Machines - Train
Confusion Matrix:
[[1956  259]
 [ 802  663]]
Accuracy: 0.7117
Misclassification Rate: 0.2883

Support Vector Machines - Test
Confusion Matrix:
[[490  83]
 [197 151]]
Accuracy: 0.6960
Misclassification Rate: 0.3040

