In [1]:
import glob

import numpy as np
from skimage import io
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [2]:
tamago_path = 'data/tamagoyaki/*'
dashi_path = 'data/dashimaki/*'

In [3]:
tamago_image_paths = glob.glob(tamago_path)
dashi_image_paths = glob.glob(dashi_path)

In [4]:
tamago_image_paths[:5]

['data/tamagoyaki/tamagoyaki_0081.jpeg',
 'data/tamagoyaki/tamagoyaki_0039.jpeg',
 'data/tamagoyaki/tamagoyaki_0097.jpeg',
 'data/tamagoyaki/tamagoyaki_0078.jpeg',
 'data/tamagoyaki/tamagoyaki_0003.jpeg']

In [5]:
def read_images(image_paths, label):
    X = np.concatenate([
        io.imread(image_path).flatten().reshape(1, -1) for image_path in image_paths
    ])
    y = np.array([label]*X.shape[0])
    return X, y

In [6]:
X_tamago, y_tamago = read_images(tamago_image_paths, 1)

In [7]:
y_tamago

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [8]:
X_tamago

array([[175, 174, 170, ..., 173, 169, 160],
       [ 84,  89,  57, ..., 250, 252, 251],
       [249, 249, 247, ..., 255, 255, 255],
       ...,
       [  1,   1,   1, ..., 254, 252, 253],
       [248, 180,  73, ...,  29,  30,  32],
       [108,  64,  37, ..., 207, 206, 212]], dtype=uint8)

In [9]:
X_dashi, y_dashi = read_images(dashi_image_paths, 0)

In [10]:
y_dashi

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [11]:
X_dashi

array([[136, 140, 126, ..., 129, 141, 129],
       [223, 198, 167, ..., 196, 145,  80],
       [255, 254, 255, ..., 207, 206, 201],
       ...,
       [178, 177, 172, ..., 181, 183, 161],
       [226, 219, 226, ..., 226, 226, 234],
       [253, 252, 255, ..., 115,  78,   0]], dtype=uint8)

In [12]:
X = np.concatenate([X_tamago, X_dashi])
y = np.concatenate([y_tamago, y_dashi])

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [14]:
model = RandomForestClassifier(n_estimators=10)
model.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [15]:
model.score(X_test, y_test)

0.6612903225806451