# 加载数据并降维

In [1]:
from tqdm.notebook import tqdm
import torch, torchvision
from torchvision.datasets import FashionMNIST
import numpy as np

In [2]:
trainset = FashionMNIST('.', train=True)
testset = FashionMNIST('.', train=False)

In [3]:
# 训练集测试集
X_train = trainset.data.numpy().reshape(-1,28*28)
Y_train = trainset.targets.numpy()

X_test = testset.data.numpy().reshape(-1,28*28)
Y_test = testset.targets.numpy()

In [4]:
# LDA降维到9维
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, Y_train)
X_train_9d = lda.transform(X_train)
X_test_9d = lda.transform(X_test)

# 逻辑回归

In [5]:
from sklearn.linear_model import LogisticRegression

In [6]:
lr = LogisticRegression(multi_class='ovr', solver='newton-cg')
lr.fit(X_train_9d, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='ovr', n_jobs=None, penalty='l2',
                   random_state=None, solver='newton-cg', tol=0.0001, verbose=0,
                   warm_start=False)

In [7]:
print("train average accuracy:", lr.score(X_train_9d, Y_train))
print("test average accuracy:", lr.score(X_test_9d, Y_test))

train average accuracy: 0.8373166666666667
test average accuracy: 0.8169


# 决策树

In [8]:
from sklearn.tree import DecisionTreeClassifier, export_graphviz

In [9]:
best_score = 0
best_max_depth = 0
for max_depth in tqdm(range(1,20)):
    tree = DecisionTreeClassifier(max_depth=max_depth, random_state=0)
    tree.fit(X_train_9d, Y_train)
    score = tree.score(X_test_9d, Y_test)
    if score > best_score:
        best_score = score
        best_max_depth = max_depth

HBox(children=(IntProgress(value=0, max=19), HTML(value='')))




In [10]:
print('best_max_depth:',best_max_depth)

best_max_depth: 11


In [11]:
tree = DecisionTreeClassifier(max_depth=best_max_depth, random_state=0)
tree.fit(X_train_9d, Y_train)
print("train average accuracy:", tree.score(X_train_9d, Y_train))
print("test average accuracy:", tree.score(X_test_9d, Y_test))

train average accuracy: 0.8560833333333333
test average accuracy: 0.8133


# SVM

In [12]:
from sklearn.svm import SVC

In [13]:
svm = SVC(gamma='auto')
svm.fit(X_train_9d, Y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [14]:
print("train average accuracy:", svm.score(X_train_9d, Y_train))
print("test average accuracy:", svm.score(X_test_9d, Y_test))

train average accuracy: 0.8654166666666666
test average accuracy: 0.837


# AdaBoost

In [15]:
from sklearn.ensemble import AdaBoostClassifier

In [16]:
adaboost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=11),
                             algorithm="SAMME",
                             n_estimators=200,
                             learning_rate=0.8)
adaboost.fit(X_train_9d, Y_train)

AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=DecisionTreeClassifier(class_weight=None,
                                                         criterion='gini',
                                                         max_depth=11,
                                                         max_features=None,
                                                         max_leaf_nodes=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=1,
                                                         min_samples_split=2,
                                                         min_weight_fraction_leaf=0.0,
                                                         presort=False,
                                                         random_state=None,
                              

In [17]:
print("train average accuracy:", adaboost.score(X_train_9d, Y_train))
print("test average accuracy:", adaboost.score(X_test_9d, Y_test))

train average accuracy: 0.9953
test average accuracy: 0.8263


# 高斯朴素贝叶斯

In [18]:
from sklearn.naive_bayes import GaussianNB

In [19]:
nb = GaussianNB()
nb.fit(X_train_9d, Y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [20]:
print("train average accuracy:", nb.score(X_train_9d, Y_train))
print("test average accuracy:", nb.score(X_test_9d, Y_test))

train average accuracy: 0.8259
test average accuracy: 0.8109
