## Prepare labeled data

In [None]:
import numpy as np


methods = ['lbp', 'glcm', 'hog', 'laws']
loaded = {}

for method in methods:
    data = np.load(f'./saved_features/{method}_features.npz')
    loaded[method] = {
        'features': data['features'],
        'labels': data['labels']
    }

ref_method = methods[0]
assert all((loaded[m]['labels'] == loaded[ref_method]['labels']).all() for m in methods), "Label mismatch"

X = np.concatenate([loaded[m]['features'] for m in methods], axis=1)
y = loaded[ref_method]['labels']

### Feature Selection

In [20]:
def load_and_concat_features(method_list):
    feature_list = []
    labels = None
    for method in method_list:
        data = loaded[method]
        feature_list.append(data['features'])
        if labels is None:
            labels = data['labels']
    X_concat = np.concatenate(feature_list, axis=1)
    return X_concat, labels

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score


def evaluate_knn(X, y, k=3, folds=5):
    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=42)
    acc_scores = []
    f1_scores = []

    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        clf = KNeighborsClassifier(n_neighbors=k)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        acc_scores.append(accuracy_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    return np.mean(acc_scores), np.mean(f1_scores)

In [25]:
from itertools import combinations

results = []
for r in range(1, len(methods)+1):
    for combo in combinations(methods, r):
        try:
            X, y = load_and_concat_features(combo)
            acc, f1 = evaluate_knn(X, y, k=3)
            results.append((combo, acc, f1))
            print(f"{combo} → Acc: {acc:.4f}, Macro-F1: {f1:.4f}")
        except Exception as e:
            print(f"{combo} 조합에서 오류 발생: {e}")

# 최고 성능 조합 출력
if results:
    best = max(results, key=lambda x: x[2])
    print("\n✅ Best combination:", best[0])
    print(f"   Accuracy: {best[1]:.4f}, Macro-F1: {best[2]:.4f}")
else:
    print("유효한 feature 조합 결과 없음")

('lbp',) → Acc: 0.3805, Macro-F1: 0.2632
('glcm',) → Acc: 0.2370, Macro-F1: 0.1263
('hog',) → Acc: 0.3153, Macro-F1: 0.1986
('laws',) → Acc: 0.3150, Macro-F1: 0.1978
('lbp', 'glcm') → Acc: 0.2377, Macro-F1: 0.1267
('lbp', 'hog') → Acc: 0.3205, Macro-F1: 0.2044
('lbp', 'laws') → Acc: 0.4022, Macro-F1: 0.2731
('glcm', 'hog') → Acc: 0.2913, Macro-F1: 0.1542
('glcm', 'laws') → Acc: 0.2373, Macro-F1: 0.1268
('hog', 'laws') → Acc: 0.3275, Macro-F1: 0.2059
('lbp', 'glcm', 'hog') → Acc: 0.2913, Macro-F1: 0.1543
('lbp', 'glcm', 'laws') → Acc: 0.2377, Macro-F1: 0.1268
('lbp', 'hog', 'laws') → Acc: 0.3298, Macro-F1: 0.2090
('glcm', 'hog', 'laws') → Acc: 0.2911, Macro-F1: 0.1541
('lbp', 'glcm', 'hog', 'laws') → Acc: 0.2913, Macro-F1: 0.1542

✅ Best combination: ('lbp', 'laws')
   Accuracy: 0.4022, Macro-F1: 0.2731


### Feature Vector Extraction

In [None]:
import os
import numpy as np


FEATURE_PATH = './saved_features'

def load_feature(method):
    path = os.path.join(FEATURE_PATH, f"{method}_features.npz")
    data = np.load(path)
    return data['features'], data['labels']

# (lbp + laws)
X_lbp, y_lbp = load_feature('lbp')
X_laws, y_laws = load_feature('laws')

# 라벨 검증
assert np.array_equal(y_lbp, y_laws), "라벨이 일치하지 않습니다."

# feature vector combination
X_combined = np.concatenate([X_lbp, X_laws], axis=1)
y_combined = y_lbp

# 저장
os.makedirs("./prepared", exist_ok=True)
np.save('./prepared/X_train_lbp_laws.npy', X_combined)
np.save('./prepared/y_train_lbp_laws.npy', y_combined)

print("Feature vector saved.")
print("X shape:", X_combined.shape)
print("y shape:", y_combined.shape)

Feature vector saved.
X shape: (10292, 73)
y shape: (10292,)
