<a href="https://colab.research.google.com/github/wooihaw/three_shapes_classification/blob/main/three_shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%matplotlib inline

In [None]:
!wget https://raw.githubusercontent.com/wooihaw/datasets/main/three_shapes.zip

In [None]:
!unzip three_shapes.zip

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
from skimage.io import imread

filelist = []
labels = []
for root, dirs, files in os.walk('three_shapes/'):
    print(f'Folder: {root}, sub-folders: {dirs}, number of files: {len(files)}')
    if len(files) == 0:
        continue
    filelist.extend([os.path.join(root, f) for f in files])
    dir = root.split('/')[-1]
    labels.extend([dir] * len(files))

In [None]:
inim0 = imread(filelist[0], as_gray=True)
plt.imshow(inim0, cmap='gray')
plt.show()

In [None]:
images = [imread(filelist[0], as_gray=True).reshape(1, -1)]
for i in range(1, len(filelist)):
    images = np.append(images, [imread(filelist[i], as_gray=True).reshape(1, -1)], axis=0)
  
X = images.squeeze()
y = np.array(labels)
print(X.shape, y.shape)

In [None]:
from sklearn.model_selection import train_test_split as split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

X_train, X_test, y_train, y_test = split(X, y, stratify=y, random_state=42)

rfc = RandomForestClassifier(random_state=42).fit(X_train, y_train)
print(f'rfc score: {rfc.score(X_test, y_test)}')

In [None]:
pipe1 = Pipeline([('scl', None), ('dr', PCA(random_state=42)), ('clf', KNeighborsClassifier())])
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
params = {}
params['scl'] = [None, StandardScaler(), MinMaxScaler(), RobustScaler()]
params['dr__n_components'] = range(50, 151, 10)
params['clf'] = [KNeighborsClassifier(), LogisticRegression(random_state=42), RandomForestClassifier(random_state=42), 
                XGBClassifier(random_state=42), MLPClassifier(random_state=42)]
gs = GridSearchCV(pipe1, params, cv=folds, n_jobs=-1, verbose=2)
gs.fit(X_train, y_train)
print(gs.best_params_)

pipe1.set_params(**gs.best_params_).fit(X_train, y_train)
print(f'pipe1 score: {pipe1.score(X_test, y_test)}')

In [None]:
print(gs.best_score_)