In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
class FDA():    
    def __init__(self):
        self.w = 0
        self.threshold = 0
    
    def fit(self, Xtrain, ytrain):
        index = (ytrain == 1).ravel()
        X_pos = Xtrain[index, :]
        X_neg = Xtrain[~index, :]
        mu_pos = np.mean(X_pos, axis=0)
        mu_neg = np.mean(X_neg, axis=0)
        sigma_pos = (X_pos-mu_pos).T.dot((X_pos-mu_pos))
        sigma_neg = (X_neg-mu_neg).T.dot((X_neg-mu_neg))
        Sw = sigma_pos + sigma_neg
        self.w = np.linalg.inv(Sw).dot(mu_pos.T - mu_neg.T)
        self.threshold = self.w.T.dot(mu_pos.T + mu_neg.T)/2
        
    def score(self, Xtest, ytest):
        n = ytest.shape[0]
        count = 0
        for i in range(n):
            if np.sign(np.dot(Xtest[i, :], self.w) - self.threshold) != ytest[i] :
                count = count + 1
        print('FDA正确率为' + str((n-count)/n)[:6])
        return (n-count)/n
        
    def distance(self, X):
        return np.dot(X, self.w)

In [None]:
mean1 = [-5, 0]
s1 = [[1, 0], [0, 1]]
data1 = np.random.multivariate_normal(mean1, s1, 200)
mean2 = [0, 5]
s2 = [[1, 0], [0, 1]]
data2 = np.random.multivariate_normal(mean2, s2, 200)
X = np.vstack((data1, data2))
ones = np.ones(200).reshape(-1,1)
y = np.vstack((ones,-1*ones))
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)

In [None]:
model = FDA()
model.fit(Xtrain, ytrain)
print('最佳投影向量为：' + str(model.w))
print('分类阈值为：' + str(model.threshold))
model.score(Xtrain,ytrain)
model.score(Xtest, ytest);

In [None]:
train_index = (ytrain==1).ravel()
test_index = (ytest==1).ravel()
plt.figure(figsize=(5, 5))
axis_min = min(np.min(Xtrain, axis=0)) - 1
axis_max = max(np.max(Xtrain, axis=0)) + 1
plt.xlim(axis_min, axis_max)
plt.ylim(axis_min, axis_max)
plt.scatter(Xtrain[train_index, 0], Xtrain[train_index, 1], marker='.', c='b', s=100
            , label='train +1')
plt.scatter(Xtrain[~train_index, 0], Xtrain[~train_index, 1], marker='.', c='k', s=100
           , label='train -1')
plt.scatter(Xtest[test_index, 0], Xtest[test_index, 1], marker='D', c='r', label='test +1')
plt.scatter(Xtest[~test_index, 0], Xtest[~test_index, 1], marker='D', c='y', label='test -1')
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
axisx = np.linspace(xlim[0], xlim[1], 30)
axisy = np.linspace(ylim[0], ylim[1], 30)
axisx, axisy = np.meshgrid(axisx, axisy)
xy = np.vstack([axisx.ravel(), axisy.ravel()]).T
z = model.distance(xy).reshape(axisx.shape)
plt.contour(axisx, axisy, z, levels=[model.threshold], linestyles=['-'])
plt.contour(axisx, axisy, z, levels=[0], linestyles=['--'], alpha=0.4)
alpha = 1/min([model.w[0], model.w[1]])
plt.arrow(0, 0, alpha*model.w[0], alpha*model.w[1], head_width=0.3)
plt.legend()
plt.title('FDA')
plt.show();