In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

from os import listdir, path
from sklearn.model_selection import train_test_split

from data_utils import PCA, RandomForestClassifier, StandardScaler, SVC
from data_utils import classification_error, display_confusion_matrix
from data_utils import LFWUtils

from image_utils import make_image, open_image

from PIL import Image

In [2]:
# Set the image path
data_dir = "data/image"
categories = ['family', 'hello', 'help', 'house', 'i_love_you', 'no', 'please', 'sorry', 'thankyou', 'yes']

In [3]:
#load data
def load_images(data_dir, categories):
    data = []
    labels = []
    for label, category in enumerate(categories):
        category_path = os.path.join(data_dir, category)
        for img_name in os.listdir(category_path):
            img_path = os.path.join(category_path, img_name)
            try:
                # Open the image and grayscale it
                img = Image.open(img_path).convert('L')
                img = img.resize((64, 64))  # 调整到固定大小
                data.append(np.array(img).flatten())  # 展平图像
                labels.append(label)
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
    return np.array(data), np.array(labels)

In [11]:
X_np, y_np = load_images(data_dir, categories)
X = pd.DataFrame(X_np)
y = pd.DataFrame(y_np,columns=['label'])

# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 特征降维
pca = PCA(n_components=50)  # 设置降维后的主成分数
X_pca = pca.fit_transform(X_scaled)

In [12]:
y

Unnamed: 0,label
0,0
1,0
2,0
3,0
4,0
...,...
98,9
99,9
100,9
101,9


In [None]:
X_pca


Unnamed: 0,PC0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,...,PC40,PC41,PC42,PC43,PC44,PC45,PC46,PC47,PC48,PC49
0,47.540748,4.888209,6.924885,38.385177,-22.354920,7.974485,-24.475519,0.552878,-9.482676,6.826649,...,1.570714,-3.963675,-6.958291,0.007492,1.640817,-0.046271,1.880899,-0.839461,0.380903,-3.265490
1,60.158729,5.089598,21.754982,-10.651661,7.779653,-2.261833,7.301299,-7.545008,5.045839,0.915457,...,-1.837123,0.521926,3.239567,-2.178683,-1.524578,2.716432,0.847224,1.022084,0.536077,-3.252607
2,-31.610831,12.479342,0.750718,-14.013141,13.613487,6.236399,4.504561,5.790669,-5.227950,-10.158999,...,0.954562,1.003626,-3.120619,-3.951612,2.980242,-1.190887,-2.114405,-0.810374,2.807030,-2.349751
3,50.586047,-23.969289,27.311226,1.977539,13.079465,3.743626,-1.518113,1.803836,2.741236,2.498836,...,2.261869,2.426685,0.376067,2.973439,-0.838841,-7.149685,1.472273,1.131658,-2.157289,-1.512455
4,-76.114405,-10.026138,0.329946,-7.850632,10.217982,-13.679613,-5.677672,1.308870,-9.027713,-7.616882,...,-2.594040,5.061041,0.775774,-4.579605,-3.429032,-1.680082,2.549184,-0.033548,0.630684,-1.213748
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,-65.654641,-7.355422,-1.116914,-1.627512,5.292537,3.892864,-1.561305,12.406829,3.284898,-0.066842,...,-5.148705,-3.979458,-4.436866,-3.277646,2.840640,3.659860,-3.512440,-0.729849,0.954476,5.429907
99,-9.058135,-49.577080,-0.969567,-0.009596,-17.275299,-1.174468,2.186823,-8.428934,3.627781,3.815240,...,1.692685,-0.638174,2.425962,3.455963,-6.001058,2.642654,-5.576805,-3.261831,5.789958,-0.381687
100,47.035838,20.344979,-4.285227,-26.353319,2.126070,6.959924,14.476557,8.427870,1.961053,1.914890,...,5.013706,-0.102558,-1.980310,5.109454,3.172786,1.958027,-1.617724,2.935607,-4.876473,-2.534072
101,-40.532260,-14.899306,-2.860690,4.294184,10.507512,12.048698,-2.364850,8.403951,-3.384073,-7.262069,...,-3.078501,-2.280262,9.069843,-4.126647,2.146087,-1.674074,-3.386201,-4.091891,-2.374611,-1.209491
