-
Notifications
You must be signed in to change notification settings - Fork 2
/
LDA.py
100 lines (64 loc) · 2.36 KB
/
LDA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from preprocess import loadData
from preprocess import cross_10folds
# In[2]:
def func(x, w):
return np.dot((x), w)
# In[3]:
# 最标准的写法
def handw_LDA1(X, y):
X1 = np.array([X[i] for i in range(len(X)) if y[i] == 0])
X2 = np.array([X[i] for i in range(len(X)) if y[i] == 1])
len1 = len(X1)
len2 = len(X2)
mju1 = np.mean(X1, axis=0)#求中心点
mju2 = np.mean(X2, axis=0)
cov1 = np.dot((X1 - mju1).T, (X1 - mju1))
cov2 = np.dot((X2 - mju2).T, (X2 - mju2))
Sw = cov1 + cov2
w = np.dot(np.mat(Sw).I, (mju1 - mju2).reshape((len(mju1),1)))# 计算w
return w
# In[4]:
def calculate_covariance_matrix(X, Y=np.empty((0,0))):
if not Y.any():
Y = X
n_samples = np.shape(X)[0]
covariance_matrix = (1 / (n_samples-1)) * (X - X.mean(axis=0)).T.dot(Y - Y.mean(axis=0))
return np.array(covariance_matrix, dtype=float)
def handw_LDA2(train_x, train_y):
x1 = np.array([train_x[i] for i in range(train_x.shape[0]) if train_y[i] == 1])
x2 = np.array([train_x[i] for i in range(train_x.shape[0]) if train_y[i] == -1])
print(x1.shape)
print(x2.shape)
# 计算两个子集的协方差矩阵
S1 = calculate_covariance_matrix(x1)
S2 = calculate_covariance_matrix(x2)
Sw = S1 + S2
# 计算两个子集的均值
mu1 = x1.mean(axis=0)
mu2 = x2.mean(axis=0)
mean_diff = np.atleast_1d(mu1 - mu2)
mean_diff = mean_diff.reshape(train_x.shape[1], -1)
w = np.linalg.pinv(Sw).dot(mean_diff)
return w
# In[5]:
def sklearn_LDA(totdata_x, totdata_y):
res = 0.0
for j in range(0, 10):
train_x, train_y, test_x, test_y = cross_10folds(totdata_x, totdata_y, j)
clf = LinearDiscriminantAnalysis()
clf.fit(train_x, train_y)
LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
solver='svd', store_covariance=False, tol=0.0001)
right = 0
for i in range(0, test_x.shape[0]):
if(clf.predict([test_x[i]])==test_y[i]):
right = right+1
res += right/test_y.shape[0]
print("第 %d 次的准确率为 %f" %(j, right/test_y.shape[0]))
print("最后的准确率为 %f" %(res/10))
# In[ ]: