In [6]:
from sklearn.datasets import load_breast_cancer
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from scipy.special import expit, softmax

In [7]:
data = load_breast_cancer()
X = data['data']
y = data['target']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [9]:
lda = LinearDiscriminantAnalysis(store_covariance=True)
lda.fit(X_train, y_train)

LinearDiscriminantAnalysis(store_covariance=True)

In [12]:
pred = lda.predict(X_test)
pred_proba = lda.predict_proba(X_test)

`priors_`

In [25]:
y_train_counts = np.unique(y_train, return_counts=True)[1]
np.alltrue(lda.priors_ == y_train_counts/y_train_counts.sum())

True

`covariance_`

In [124]:
covariance = (lda.priors_[0]*np.cov(X_train[y_train==0], rowvar=False, ddof=0) + lda.priors_[1]*np.cov(X_train[y_train==1], rowvar=False, ddof=0))
np.alltrue(lda.covariance_==covariance)

True

`means_`

In [131]:
means = np.array([X_train[y_train==0].mean(axis=0),X_train[y_train==1].mean(axis=0)])
np.alltrue(lda.means_== means)

True

`xbar_`

In [140]:
print(np.allclose(lda.xbar_, X_train.mean(axis=0)))
print(np.alltrue(lda.xbar_ == np.dot(lda.priors_, lda.means_)))

True
True


`predict_proba`

In [13]:
np.alltrue(pred_proba[:, 1] == expit(lda.decision_function(X_test)) )

True

`decision_function`

In [22]:
np.alltrue(lda.decision_function(X_test) == ((X_test).dot(lda.coef_.T) + lda.intercept_).flatten())

True

`coef_`
1. when `solver='svd'`

In [53]:
Xc = []
for idx, group in enumerate(lda.classes_):
    Xg = X_train[y_train == group, :]
    Xc.append(Xg - lda.means_[idx])
    
Xc = np.concatenate(Xc, axis=0)


n_samples, n_features = X_train.shape
n_classes = len(lda.classes_)

std = Xc.std(axis=0)
std[std == 0] = 1.0
fac = 1.0 / (n_samples - n_classes)

X_train_ = np.sqrt(fac) * (Xc / std)
U, S, Vt = np.linalg.svd(X_train_, full_matrices=False)

rank = np.sum(S > lda.tol)
scalings = (Vt[:rank] / std).T / S[:rank]


X_train_0 = np.dot(
    (
        (np.sqrt((n_samples * lda.priors_) * fac))
        * (lda.means_ - lda.xbar_).T
    ).T,
    scalings,
)

_, S, Vt = np.linalg.svd(X_train_0, full_matrices=0)


rank = np.sum(S > lda.tol * S[0])
scalings_ = np.dot(scalings, Vt.T[:, :rank])
coef = np.dot(lda.means_ - lda.xbar_, scalings_)
intercept_ = -0.5 * np.sum(coef ** 2, axis=1) + np.log(lda.priors_)
coef_ = np.dot(coef, scalings_.T)
intercept_ -= np.dot(lda.xbar_, coef_.T)

if n_classes==2:
    coef_ = np.array( coef_[1, :] - coef_[0, :], ndmin=2, dtype=X_train.dtype)

In [54]:
np.allclose(lda.coef_ , coef_)

True

`intercept_`
1. when `solver = 'svd'`

In [55]:
if n_classes==2:
    intercept_ =  np.array(  intercept_[1] - intercept_[0], ndmin=1, dtype=X_train.dtype )

In [56]:
np.allclose(lda.intercept_ , intercept_)

True