In [12]:
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

def load_batch(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict[b'data'], dict[b'labels']

def load_meta():
    with open('batches.meta', 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict[b'label_names']

# Load data training
x = []
y = []
for i in range(1, 6):
    data, labels = load_batch(f'cifar-10-batches-py/data_batch_{i}')
    x.append(data)
    y.extend(labels)

# Ghép các batch lại
x_train = np.concatenate(x)
y_train = np.array(y)

x_test, y_test = load_batch(f'cifar-10-batches-py/test_batch')


model = RandomForestClassifier()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))

(10000, 3072)
              precision    recall  f1-score   support

           0       0.54      0.56      0.55      1000
           1       0.53      0.56      0.54      1000
           2       0.37      0.33      0.35      1000
           3       0.33      0.27      0.30      1000
           4       0.39      0.39      0.39      1000
           5       0.42      0.38      0.40      1000
           6       0.46      0.56      0.51      1000
           7       0.52      0.47      0.49      1000
           8       0.58      0.61      0.59      1000
           9       0.48      0.55      0.51      1000

    accuracy                           0.47     10000
   macro avg       0.46      0.47      0.46     10000
weighted avg       0.46      0.47      0.46     10000



In [13]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = RandomForestClassifier()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

           0       0.53      0.56      0.54      1000
           1       0.51      0.54      0.53      1000
           2       0.37      0.32      0.34      1000
           3       0.35      0.29      0.32      1000
           4       0.39      0.39      0.39      1000
           5       0.43      0.40      0.42      1000
           6       0.48      0.56      0.52      1000
           7       0.52      0.46      0.49      1000
           8       0.59      0.61      0.60      1000
           9       0.48      0.55      0.52      1000

    accuracy                           0.47     10000
   macro avg       0.47      0.47      0.47     10000
weighted avg       0.47      0.47      0.47     10000



In [14]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = RandomForestClassifier(
    n_estimators=200,  # tăng số lượng cây
    max_depth=20,      # giới hạn độ sâu cây
)
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

           0       0.55      0.55      0.55      1000
           1       0.54      0.55      0.54      1000
           2       0.41      0.33      0.36      1000
           3       0.35      0.26      0.30      1000
           4       0.40      0.42      0.41      1000
           5       0.45      0.41      0.43      1000
           6       0.47      0.61      0.53      1000
           7       0.52      0.49      0.50      1000
           8       0.58      0.62      0.60      1000
           9       0.47      0.57      0.52      1000

    accuracy                           0.48     10000
   macro avg       0.47      0.48      0.47     10000
weighted avg       0.47      0.48      0.47     10000

