In [22]:
from PIL import Image
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

def extract_pixel_features(image_path, resize_shape=(64, 64)):
    with Image.open(image_path) as img:
        img_resized = img.resize(resize_shape)
        img_array = np.array(img_resized)
        return img_array.flatten()

def load_images_and_labels(data_dir):
    features = []
    labels = []
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                feature = extract_pixel_features(img_path)
                features.append(feature)
                labels.append(label)
    return np.array(features), np.array(labels)

train_data_dir = 'C:/Users/Administrator/Desktop/melanoma_cancer_dataset/train'
test_data_dir = 'C:/Users/Administrator/Desktop/melanoma_cancer_dataset/test'

# 加载训练集和测试集
train_features, train_labels = load_images_and_labels(train_data_dir)
test_features, test_labels = load_images_and_labels(test_data_dir)

# 标签编码
le = LabelEncoder()
train_labels_encoded = le.fit_transform(train_labels)
test_labels_encoded = le.transform(test_labels)

# PCA 降维
pca = PCA(n_components=0.95)  # 保持95%的方差
train_features_pca = pca.fit_transform(train_features)
test_features_pca = pca.transform(test_features)

# 应用 KNN 分类器
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(train_features_pca, train_labels_encoded)

# 预测
test_pred = knn.predict(test_features_pca)

# 反编码预测标签
test_pred_decoded = le.inverse_transform(test_pred)
test_labels_decoded = le.inverse_transform(test_labels_encoded)

# 评估
print("Confusion Matrix:\n", confusion_matrix(test_labels_decoded, test_pred_decoded))
print("\nClassification Report:\n", classification_report(test_labels_decoded, test_pred_decoded))

Confusion Matrix:
 [[467  33]
 [ 68 432]]

Classification Report:
               precision    recall  f1-score   support

      benign       0.87      0.93      0.90       500
   malignant       0.93      0.86      0.90       500

    accuracy                           0.90      1000
   macro avg       0.90      0.90      0.90      1000
weighted avg       0.90      0.90      0.90      1000

