In [60]:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
from skimage.feature import local_binary_pattern, hog, graycomatrix, graycoprops
from scipy import signal as sg
from tqdm import tqdm

## Prepare dataset

### feature extraction

In [61]:
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)

    gray = cv2.medianBlur(gray, 3)

    edges = cv2.Canny(gray, 100, 200)
    edges = cv2.GaussianBlur(edges, (3, 3), 0)

    combined = cv2.addWeighted(gray, 0.7, edges, 0.3, 0)

    return image, combined

def extract_lbp(gray):
    # lbp = local_binary_pattern(gray, P=8, R=1)
    lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
    # hist, _ = np.histogram(lbp.ravel(), bins=64, range=(0, 256))
    hist, _ = np.histogram(lbp.ravel(), bins=60, range=(0, 60))
    hist = hist.astype("float")
    hist /= hist.sum()
    return hist

def extract_glcm(gray):
    a_gray = gray.astype(np.uint8)
    glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=False, normed=True)
    features = [
        graycoprops(glcm, 'contrast')[0, 0],
        graycoprops(glcm, 'dissimilarity')[0, 0],
        graycoprops(glcm, 'homogeneity')[0, 0],
        graycoprops(glcm, 'energy')[0, 0],
        graycoprops(glcm, 'correlation')[0, 0]
    ]
    return np.array(features)

def extract_hog(gray):
    resized = cv2.resize(gray, (128, 128))
    features = hog(resized,
                   orientations=8,
                   pixels_per_cell=(16, 16),
                   cells_per_block=(1, 1),
                   visualize=False,
                   channel_axis=None)
    return features

def extract_laws(gray):
    (rows, cols) = gray.shape[:2]
    smooth_kernel = (1/25)*np.ones((5,5))
    gray_smooth = sg.convolve(gray, smooth_kernel,"same")
    gray_processed = np.abs(gray - gray_smooth)
    
    filter_vectors = np.array([[ 1,  4,  6,  4, 1],
                               [-1, -2,  0,  2, 1],
                               [-1,  0,  2,  0, 1],
                               [ 1, -4,  6, -4, 1]])

    filters = [np.matmul(fv1.reshape(5, 1), fv2.reshape(1, 5))
               for fv1 in filter_vectors for fv2 in filter_vectors]

    conv_maps = np.zeros((rows, cols, 16))
    for i in range(len(filters)):
        conv_maps[:, :, i] = sg.convolve(gray_processed, filters[i], 'same')

    texture_maps = [
        (conv_maps[:, :, 1]+conv_maps[:, :, 4])//2,
        (conv_maps[:, :, 2]+conv_maps[:, :, 8])//2,
        (conv_maps[:, :, 3]+conv_maps[:, :, 12])//2,
        (conv_maps[:, :, 7]+conv_maps[:, :, 13])//2,
        (conv_maps[:, :, 6]+conv_maps[:, :, 9])//2,
        (conv_maps[:, :, 11]+conv_maps[:, :, 14])//2,
        conv_maps[:, :, 10],
        conv_maps[:, :, 5],
        conv_maps[:, :, 15]
    ]
    norm_map = conv_maps[:, :, 0]
    TEM = [np.abs(tm).sum() / np.abs(norm_map).sum() for tm in texture_maps]
    TEM = np.array(TEM)
    TEM = TEM / np.linalg.norm(TEM)
    return TEM

### process dataset

In [10]:
def process_dataset(dataset_path, method):
    features = []
    labels = []

    class_dirs = sorted([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))])
    
    for cls in class_dirs:
        class_path = os.path.join(dataset_path, cls)
        image_files = sorted([f for f in os.listdir(class_path) if f.endswith('.png')])
        
        for img_file in tqdm(image_files, desc=f"{cls:12s} - {method.upper()}"):
            img_path = os.path.join(class_path, img_file)
            image, gray = preprocess_image(img_path)

            if method == 'lbp':
                feature = extract_lbp(gray)
            elif method == 'glcm':
                feature = extract_glcm(gray)
            elif method == 'hog':
                feature = extract_hog(gray)
            elif method == 'laws':
                feature = extract_laws(gray)
            else:
                continue

            features.append(feature)
            labels.append(cls)

    return np.array(features), np.array(labels)

## Prepare labeled data

In [11]:
from sklearn.decomposition import PCA

dataset_path = "./recaptcha-dataset/Large"
methods = ["lbp", "glcm", "hog", "laws"]

loaded = {}

for method in methods:
    features, labels = process_dataset(dataset_path, method)
    loaded[method] = {
        'features': features,
        'labels': labels
    }

ref_method = methods[0]
assert all((loaded[m]['labels'] == loaded[ref_method]['labels']).all() for m in methods), "Label mismatch"

X_raw = np.concatenate([loaded[m]['features'] for m in methods], axis=1)
y = loaded[ref_method]['labels']

pca = PCA(n_components=100)
X = pca.fit_transform(X_raw)

Bicycle      - LBP: 100%|██████████| 800/800 [00:02<00:00, 310.86it/s]
Bridge       - LBP: 100%|██████████| 553/553 [00:02<00:00, 269.42it/s]
Bus          - LBP: 100%|██████████| 1229/1229 [00:03<00:00, 335.85it/s]
Car          - LBP: 100%|██████████| 3578/3578 [00:11<00:00, 318.76it/s]
Chimney      - LBP: 100%|██████████| 56/56 [00:00<00:00, 290.43it/s]
Crosswalk    - LBP: 100%|██████████| 1260/1260 [00:04<00:00, 286.11it/s]
Hydrant      - LBP: 100%|██████████| 972/972 [00:02<00:00, 337.25it/s]
Motorcycle   - LBP: 100%|██████████| 101/101 [00:00<00:00, 304.69it/s]
Palm         - LBP: 100%|██████████| 932/932 [00:02<00:00, 341.44it/s]
Traffic Light - LBP: 100%|██████████| 811/811 [00:02<00:00, 298.86it/s]
Bicycle      - GLCM: 100%|██████████| 800/800 [00:02<00:00, 331.23it/s]
Bridge       - GLCM: 100%|██████████| 553/553 [00:02<00:00, 258.26it/s]
Bus          - GLCM: 100%|██████████| 1229/1229 [00:04<00:00, 296.06it/s]
Car          - GLCM: 100%|██████████| 3578/3578 [00:11<00:00, 305.2

### feature selection

In [62]:
def load_and_concat_features(method_list):
    feature_list = []
    labels = None
    for method in method_list:
        data = loaded[method]
        feature_list.append(data['features'])
        if labels is None:
            labels = data['labels']
    X_concat = np.concatenate(feature_list, axis=1)
    return X_concat, labels

In [63]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score


def evaluate_knn(X, y, k=3, folds=5):
    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=42)
    acc_scores = []
    f1_scores = []

    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        clf = KNeighborsClassifier(n_neighbors=k)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        acc_scores.append(accuracy_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred, average='macro'))

    return np.mean(acc_scores), np.mean(f1_scores)

In [64]:
from itertools import combinations

results = []
for r in range(1, len(methods)+1):
    for combo in combinations(methods, r):
        try:
            X, y = load_and_concat_features(combo)
            acc, f1 = evaluate_knn(X, y, k=3)
            results.append((combo, acc, f1))
            print(f"{combo} → Acc: {acc:.4f}, Macro-F1: {f1:.4f}")
        except Exception as e:
            print(f"{combo} 조합에서 오류 발생: {e}")

# 최고 성능 조합 출력
if results:
    best = max(results, key=lambda x: x[2])
    print("\nBest combination:", best[0])
    print(f"   Accuracy: {best[1]:.4f}, Macro-F1: {best[2]:.4f}")
else:
    print("유효한 feature 조합 결과 없음")

('lbp',) → Acc: 0.3167, Macro-F1: 0.2039
('glcm',) → Acc: 0.2484, Macro-F1: 0.1363
('hog',) → Acc: 0.3117, Macro-F1: 0.1924
('laws',) → Acc: 0.2858, Macro-F1: 0.1747
('lbp', 'glcm') → Acc: 0.2495, Macro-F1: 0.1368
('lbp', 'hog') → Acc: 0.3130, Macro-F1: 0.1932
('lbp', 'laws') → Acc: 0.3495, Macro-F1: 0.2292
('glcm', 'hog') → Acc: 0.2935, Macro-F1: 0.1686
('glcm', 'laws') → Acc: 0.2493, Macro-F1: 0.1366
('hog', 'laws') → Acc: 0.3130, Macro-F1: 0.1938
('lbp', 'glcm', 'hog') → Acc: 0.2932, Macro-F1: 0.1683
('lbp', 'glcm', 'laws') → Acc: 0.2503, Macro-F1: 0.1368
('lbp', 'hog', 'laws') → Acc: 0.3149, Macro-F1: 0.1946
('glcm', 'hog', 'laws') → Acc: 0.2934, Macro-F1: 0.1685
('lbp', 'glcm', 'hog', 'laws') → Acc: 0.2936, Macro-F1: 0.1686

Best combination: ('lbp', 'laws')
   Accuracy: 0.3495, Macro-F1: 0.2292


### Feature Vector Extraction

In [65]:
import os
import numpy as np


X_lbp = loaded['lbp']['features']
X_laws = loaded['laws']['features']
y_lbp = loaded['lbp']['labels']
y_laws = loaded['laws']['labels']

# 라벨 검증
assert np.array_equal(y_lbp, y_laws), "라벨이 일치하지 않습니다."

# feature vector combination
X_combined = np.concatenate([X_lbp, X_laws], axis=1)
y_combined = y_lbp

# 저장
os.makedirs("./prepared_feature", exist_ok=True)
np.save('./prepared_feature/X_train_lbp_laws_a3.npy', X_combined)
np.save('./prepared_feature/y_train_lbp_laws_a3.npy', y_combined)

print("Feature vector saved.")
print("X shape:", X_combined.shape)
print("y shape:", y_combined.shape)

Feature vector saved.
X shape: (10292, 69)
y shape: (10292,)


## Prepare test data

In [66]:
# !git clone https://github.com/hbcbh1999/recaptcha-dataset.git

### Preprocessing

In [67]:
image_dir = "./testset"
image_list = []

for fname in sorted(os.listdir(image_dir)):
    if fname.endswith(".png"):
        img_path = os.path.join(image_dir, fname)
        image = cv2.imread(img_path)

        if image is None:
            print(f"이미지 로딩 실패: {img_path}")
            continue 
        
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.equalizeHist(gray)

        gray = cv2.medianBlur(gray, 3)

        edges = cv2.Canny(gray, 100, 200)

        combined = cv2.addWeighted(gray, 0.7, edges, 0.3, 0)

        image_list.append((fname, combined))

## Feature extraction

In [68]:
def norm_hist(hist):
    # Normalize the histogram
    hist = hist.astype('float')
    hist /= hist.sum()
    return hist

### LBP

In [69]:
lbp_features = []

# LBP
for fname, gray in image_list:
    lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')

    hist_lbp, bin_lbp = np.histogram(lbp.ravel(), bins=60, range=(0, 60))
    hist_lbp = norm_hist(hist_lbp)    # 64-d
    lbp_features.append((fname, hist_lbp))

### Law's texture

In [70]:
# Law's texture
def laws_texture(gray):
    (rows, cols) = gray.shape[:2]

    smooth_kernel = (1/25)*np.ones((5,5))
    gray_smooth = sg.convolve(gray, smooth_kernel,"same")
    gray_processed = np.abs(gray - gray_smooth)

    filter_vectors = np.array([[ 1,  4,  6,  4, 1],    # L5
                               [-1, -2,  0,  2, 1],    # E5
                               [-1,  0,  2,  0, 1],    # S5
                               [ 1, -4,  6, -4, 1]])   # R5

    # 0:L5L5, 1:L5E5, 2:L5S5, 3:L5R5,
    # 4:E5L5, 5:E5E5, 6:E5S5, 7:E5R5,
    # 8:S5L5, 9:S5E5, 10:S5S5, 11:S5R5,
    # 12:R5L5, 13:R5E5, 14:R5S5, 15:R5R5
    filters = list()
    for i in range(4):
        for j in range(4):
            filters.append(np.matmul(filter_vectors[i][:].reshape(5,1),
                                     filter_vectors[j][:].reshape(1,5)))

    conv_maps = np.zeros((rows, cols,16))
    for i in range(len(filters)):
        conv_maps[:, :, i] = sg.convolve(gray_processed,
                                         filters[i],'same')

    texture_maps = list()
    texture_maps.append((conv_maps[:, :, 1]+conv_maps[:, :, 4])//2)     # L5E5 / E5L5
    texture_maps.append((conv_maps[:, :, 2]+conv_maps[:, :, 8])//2)     # L5S5 / S5L5
    texture_maps.append((conv_maps[:, :, 3]+conv_maps[:, :, 12])//2)    # L5R5 / R5L5
    texture_maps.append((conv_maps[:, :, 7]+conv_maps[:, :, 13])//2)    # E5R5 / R5E5
    texture_maps.append((conv_maps[:, :, 6]+conv_maps[:, :, 9])//2)     # E5S5 / S5E5
    texture_maps.append((conv_maps[:, :, 11]+conv_maps[:, :, 14])//2)   # S5R5 / R5S5
    texture_maps.append(conv_maps[:, :, 10])                            # S5S5
    texture_maps.append(conv_maps[:, :, 5])                             # E5E5
    texture_maps.append(conv_maps[:, :, 15])                            # R5R5
    texture_maps.append(conv_maps[:, :, 0])                             # L5L5 (use to norm TEM)

    TEM = list()
    for i in range(9):
        TEM.append(np.abs(texture_maps[i]).sum() / np.abs(texture_maps[9]).sum())

    return TEM


laws_feature = []

for fname, gray in image_list:
    laws = laws_texture(gray)    # 9-d
    laws_feature.append((fname, np.array(laws)))

### Load .npy Files

In [71]:
used_features = ['lbp', 'laws']
feature_dir = "./prepared_feature"

X_train_raw = np.load(os.path.join(feature_dir, "X_train_lbp_laws_a3.npy"))
y_train = np.load(os.path.join(feature_dir, "y_train_lbp_laws_a3.npy"))

pca = PCA(n_components=50)
X_train = pca.fit_transform(X_train_raw)

### Combine feature vectors

In [72]:
test_features = []
test_filenames = []

for (fname1, lbp), (fname2, laws) in zip(lbp_features, laws_feature):
    assert fname1 == fname2, f"{fname1} != {fname2}"
    combined = np.concatenate([lbp, laws])
    test_features.append(combined)
    test_filenames.append(fname1)

X_test_raw = np.array(test_features)

X_test = pca.transform(X_test_raw)

### KNN

In [73]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors = 3)

classifier.fit(X_train, y_train)

### Task 1: Classification

In [74]:
predict_labels = classifier.predict(X_test)
print(predict_labels)    # array(100)

['Crosswalk' 'Crosswalk' 'Crosswalk' 'Palm' 'Hydrant' 'Crosswalk' 'Palm'
 'Crosswalk' 'Car' 'Car' 'Car' 'Palm' 'Crosswalk' 'Car' 'Crosswalk' 'Bus'
 'Bus' 'Chimney' 'Crosswalk' 'Palm' 'Crosswalk' 'Bus' 'Crosswalk'
 'Crosswalk' 'Palm' 'Crosswalk' 'Bus' 'Chimney' 'Crosswalk' 'Crosswalk'
 'Chimney' 'Crosswalk' 'Chimney' 'Crosswalk' 'Car' 'Crosswalk' 'Crosswalk'
 'Car' 'Bus' 'Crosswalk' 'Crosswalk' 'Traffic Light' 'Crosswalk'
 'Crosswalk' 'Car' 'Bus' 'Car' 'Car' 'Palm' 'Car' 'Crosswalk' 'Bridge'
 'Crosswalk' 'Crosswalk' 'Crosswalk' 'Bridge' 'Crosswalk' 'Chimney' 'Car'
 'Crosswalk' 'Chimney' 'Crosswalk' 'Car' 'Palm' 'Bridge' 'Car' 'Crosswalk'
 'Car' 'Crosswalk' 'Crosswalk' 'Crosswalk' 'Chimney' 'Car' 'Crosswalk'
 'Hydrant' 'Crosswalk' 'Palm' 'Crosswalk' 'Crosswalk' 'Car' 'Palm'
 'Traffic Light' 'Crosswalk' 'Car' 'Crosswalk' 'Bridge' 'Crosswalk'
 'Crosswalk' 'Crosswalk' 'Bridge' 'Crosswalk' 'Crosswalk' 'Crosswalk'
 'Crosswalk' 'Crosswalk' 'Palm' 'Crosswalk' 'Car' 'Palm' 'Crosswalk']


In [75]:
import csv

# with open('c1_t1_a1.csv','w') as file :
#     write = csv.writer(file)
#     for i, predict_label in enumerate(predict_labels):
#         write.writerow([f'query{i+1:03}.png', predict_label])

with open('c1_t1_a3.csv','w') as file :
    writer = csv.writer(file)
    for fname, pred in zip(test_filenames, predict_labels):
        writer.writerow([fname, pred])

### Task 2: Retrieval

In [76]:
neigh_ind = classifier.kneighbors(X=X_test, n_neighbors=10, return_distance=False) # Top-10 results
neigh_labels = np.array(y_train)[neigh_ind]

In [77]:
print(neigh_labels)    # array(100x10)

[['Crosswalk' 'Hydrant' 'Crosswalk' 'Crosswalk' 'Crosswalk' 'Palm'
  'Crosswalk' 'Crosswalk' 'Palm' 'Crosswalk']
 ['Crosswalk' 'Crosswalk' 'Hydrant' 'Crosswalk' 'Crosswalk' 'Crosswalk'
  'Crosswalk' 'Car' 'Car' 'Crosswalk']
 ['Crosswalk' 'Crosswalk' 'Chimney' 'Crosswalk' 'Car' 'Crosswalk'
  'Hydrant' 'Crosswalk' 'Crosswalk' 'Crosswalk']
 ['Palm' 'Palm' 'Chimney' 'Car' 'Palm' 'Traffic Light' 'Palm'
  'Traffic Light' 'Palm' 'Crosswalk']
 ['Hydrant' 'Hydrant' 'Crosswalk' 'Car' 'Hydrant' 'Crosswalk'
  'Motorcycle' 'Hydrant' 'Car' 'Hydrant']
 ['Crosswalk' 'Palm' 'Crosswalk' 'Palm' 'Crosswalk' 'Crosswalk'
  'Crosswalk' 'Traffic Light' 'Palm' 'Crosswalk']
 ['Palm' 'Palm' 'Car' 'Traffic Light' 'Car' 'Hydrant' 'Car' 'Crosswalk'
  'Car' 'Hydrant']
 ['Crosswalk' 'Crosswalk' 'Palm' 'Crosswalk' 'Crosswalk' 'Car' 'Car'
  'Crosswalk' 'Palm' 'Hydrant']
 ['Car' 'Car' 'Palm' 'Car' 'Chimney' 'Hydrant' 'Car' 'Car' 'Crosswalk'
  'Car']
 ['Hydrant' 'Car' 'Crosswalk' 'Bridge' 'Crosswalk' 'Crosswalk' 'Chimney

In [78]:
import csv

with open('c1_t2_a3.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    for fname, neigh_label in zip(test_filenames, neigh_labels):
        writer.writerow([fname] + list(neigh_label))