In [1]:
from model import DataLoader, PreProcessor, FeatureExtractor
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import os
import numpy as np

In [2]:
# read data
females_list = os.listdir('./data/Females')
males_list = os.listdir('./data/Males')[:len(females_list) + 30]
imgs_list = [f"./data/Females/{img_path}" for img_path in females_list] \
    + [f"./data/Males/{img_path}" for img_path in males_list]
y = np.array(([0] * len(females_list)) + ([1] * len(males_list)))

In [3]:
# split data
X_train, X_test, y_train, y_test = train_test_split(imgs_list, y, test_size=0.2,random_state=109) 
X_test_old = X_test

In [4]:
# data pipeline
data_pipe = Pipeline([
    ('Data Loader', DataLoader()), 
    ('PreProcessing', PreProcessor()),
    ("Features", FeatureExtractor()),
])

In [5]:
# transforming the data
X_train = data_pipe.fit_transform(X_train)
X_test = data_pipe.transform(X_test)

In [6]:
# model pipeline
pipe = Pipeline([
    ("Standard Scaling", StandardScaler()),
    ("PCA", PCA(n_components=80)),
    ("Estimator", RandomForestClassifier(n_estimators=10000))])

In [16]:
# training the model
pipe.fit(X_train, y_train)

In [17]:
# testing the model
pipe.score(X_test, y_test)

0.6949152542372882

In [23]:
print(len(X_test))

# add the new data
males_list = os.listdir('./data/Males')[len(females_list) + 30:]
X_test_old += [f"./data/Males/{img_path}" for img_path in males_list]
X_test = X_test_old

y_test = list(y_test) + ([1] * len(males_list))


print(len(X_test))
print(len(y_test))

130
201
201


In [22]:
# The new score
X_test = data_pipe.transform(X_test)
pipe.score(X_test, y_test)

0.8076923076923077