In [2]:
import json
import os
import pickle

import numpy as np
from skimage import feature, io, transform, util, filters, morphology
from sklearn import model_selection

In [3]:
DATA_DIRECTORY = "data"
MEN_DIRECTORY = os.path.join(DATA_DIRECTORY, "men")
WOMEN_DIRECTORY = os.path.join(DATA_DIRECTORY, "women")
LABELS_FILENAME = os.path.join(DATA_DIRECTORY, "labels.jsonl")

In [4]:
paths: list[str] = []
labels: list[int] = []
with open(LABELS_FILENAME, "r") as f:
    for line in f:
        entry = json.loads(line)
        path = entry["image_url"]
        label = int(entry["label"])
        paths.append(path)
        labels.append(label)

In [5]:
import numpy as np
from skimage import color, filters, io, morphology, measure
from utils import get_features_from_path

path = paths[0]
label = labels[0]
print(path, label)

data/men/3/3_men (125).JPG 3


In [None]:
from skimage import img_as_float

hull_diff = img_as_float(hull.copy())
hull_diff[hand] = 0.5

io.imshow(hull_diff)
io.show()

In [39]:
# hull_diff = hand ^ hull

# io.imshow(hull_diff)
# io.show()

props = measure.regionprops(measure.label(hand))[0]

SKIP = {
    "coords",
    "image",
    "image_convex",
    "image_filled",
    "inertia_tensor",
    "inertia_tensor_eigvals",
    "moments",
    "moments_central",
    "moments_hu",
    "moments_normalized",
}
for k in props:
    if k not in SKIP:
        print(f"{k}: {props[k]}")

area: 5598300.0
area_bbox: 8914664.0
area_convex: 6340244.0
area_filled: 5598300.0
axis_major_length: 4302.945240222048
axis_minor_length: 1770.5333315446928
bbox: (118, 596, 2340, 4608)
centroid: (1255.3290411374883, 2733.779756354608)
centroid_local: [1137.32904114 2137.77975635]
eccentricity: 0.9114232564282464
equivalent_diameter_area: 2669.8271373425773
euler_number: -24
extent: 0.6279877738521609
feret_diameter_max: 4188.332842552034
label: 1
orientation: -1.39768219690805
perimeter: 15067.052988109353
perimeter_crofton: 14736.387083105503
slice: (slice(118, 2340, None), slice(596, 4608, None))
solidity: 0.8829786361534351


In [None]:
N = 20
# Sample N images from each class
classes = np.unique(labels)
sampled_paths = []
sampled_labels = []
for c in classes:
    class_paths = [p for p, l in zip(paths, labels) if l == c]
    sampled_paths.extend(class_paths[:N])
    sampled_labels.extend([c] * N)

paths = sampled_paths
labels = sampled_labels

In [None]:
from hog import hog

myhog = hog
skhog = feature.hog


def get_features(img_path: str) -> np.ndarray:
    img = util.img_as_float(io.imread(img_path, as_gray=True))

    img = transform.rescale(img, 1 / 16)

    # features = myhog(img)
    features = skhog(
        img, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1)
    )

    return features

In [None]:
features = [get_features(path) for path in paths]

In [None]:
with open("features.pkl", "wb") as f:
    pickle.dump(features, f)

In [None]:
# with open("features.pickle", "rb") as f:
# features = pickle.load(f)

In [None]:
random_state = 312

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    features, labels, test_size=0.2, random_state=random_state
)

In [None]:
cv = model_selection.StratifiedKFold(
    n_splits=5, shuffle=True, random_state=random_state
)

In [None]:
from sklearn.svm import SVC

clf = SVC(kernel="linear")

result = model_selection.cross_validate(
    clf,
    X_train,
    y_train,
    cv=cv,
    scoring=("accuracy", "balanced_accuracy"),
)

result

In [None]:
clf.fit(X_train, y_train)

In [None]:
with open("model.pkl", "wb") as f:
    pickle.dump(clf, f)

In [None]:
# with open("model.pkl", "rb") as f:
#     clf = pickle.load(f)

In [None]:
clf.score(X_test, y_test)