In [None]:
import os
import gc
import random
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import seaborn as sns
from collections import Counter
from tqdm import tqdm, trange

from skopt import BayesSearchCV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV

from utils import Cifar10

import warnings
warnings.filterwarnings("ignore")

path = "../data/cifar-10-batches-py"

# Data processing

## Data loading

In [None]:
dataset = Cifar10(path)

In [None]:
X_train, y_train = dataset.get_train(flatten=True)
X_test, y_test = dataset.get_test(flatten=True)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

## Data reduction

In [None]:
n_components = 128

In [None]:
pca = PCA(n_components=n_components)
pca.fit(X_train)
pca.explained_variance_ratio_.sum()

In [None]:
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
X_train_pca.shape, X_test_pca.shape

## Standardization

In [None]:
std = StandardScaler()
std.fit(X_train_pca)
X_train_pca_std = std.transform(X_train_pca)
X_test_pca_std = std.transform(X_test_pca)
X_train_pca_std.shape, X_test_pca_std.shape

# Classification

In [None]:
counter = Counter(y_train)
labels = list(counter.keys())
train_counts = [counter[i] for i in labels]
counter = Counter(y_test)
test_counts = [counter[i] for i in labels]
plt.bar([i-0.2 for i in labels], train_counts, width=0.4, label="训练集")
plt.bar([i+0.2 for i in labels], test_counts, width=0.4, label="测试集")
plt.xticks(labels, [dataset.label_to_name(i) for i in labels], rotation=30)
plt.legend()
plt.show()

## Logistic regression

In [None]:
lr = LogisticRegression(
    tol = 1e-3,
    C = 1,
    solver = "saga",
    penalty= "l2"
    # verbose = 1
)
lr.fit(X_train_pca_std, y_train)
lr.score(X_test_pca_std, y_test)

## Decision tree

In [None]:
dt = DecisionTreeClassifier()
dt.fit(X_train_pca_std, y_train)
dt.score(X_test_pca_std, y_test)

## Extra trees

In [None]:
et = ExtraTreesClassifier()
et.fit(X_train_pca_std, y_train)
et.score(X_test_pca_std, y_test)

In [None]:
del X_train, X_test, X_train_pca, X_test_pca, X_train_pca_std, X_test_pca_std, y_train, y_test
gc.collect()