In [30]:
import collections
import math
import os
import random
from six.moves import urllib

from IPython.display import clear_output, Image, display, HTML

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

import tensorflow_hub as hub

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.metrics as sk_metrics
import time

from sklearn.dummy import DummyClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

Getting the image data and splitting into train and test sets

In [31]:
FLOWERS_DIR = './flower_photos'
TRAIN_FRACTION = 0.50075
RANDOM_SEED = 2018


def download_images():
  """If the images aren't already downloaded, save them to FLOWERS_DIR."""
  if not os.path.exists(FLOWERS_DIR):
    DOWNLOAD_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'
    print('Downloading flower images from %s...' % DOWNLOAD_URL)
    urllib.request.urlretrieve(DOWNLOAD_URL, 'flower_photos.tgz')
    !tar xfz flower_photos.tgz
  print('Flower photos are located in %s' % FLOWERS_DIR)


def make_train_and_test_sets():
  """Split the data into train and test sets and get the label classes."""
  train_examples, test_examples = [], []
  shuffler = random.Random(RANDOM_SEED)
  is_root = True
  for (dirname, subdirs, filenames) in tf.gfile.Walk(FLOWERS_DIR):
    # The root directory gives us the classes
    if is_root:
      subdirs = sorted(subdirs)
      classes = collections.OrderedDict(enumerate(subdirs))
      label_to_class = dict([(x, i) for i, x in enumerate(subdirs)])
      is_root = False
    # The sub directories give us the image files for training.
    else:
      filenames.sort()
      shuffler.shuffle(filenames)
      full_filenames = [os.path.join(dirname, f) for f in filenames]
      label = dirname.split('/')[-1]
      label_class = label_to_class[label]
      # An example is the image file and it's label class.
      examples = list(zip(full_filenames, [label_class] * len(filenames)))
      num_train = int(len(filenames) * TRAIN_FRACTION)
      train_examples.extend(examples[:num_train])
      test_examples.extend(examples[num_train:])

  shuffler.shuffle(train_examples)
  shuffler.shuffle(test_examples)
  return train_examples, test_examples, classes

In [32]:
# Download the images and split the images into train and test sets.
download_images()
TRAIN_EXAMPLES, TEST_EXAMPLES, CLASSES = make_train_and_test_sets()
NUM_CLASSES = len(CLASSES)

print('\nThe dataset has %d label classes: %s' % (NUM_CLASSES, CLASSES.values()))
print('There are %d training images' % len(TRAIN_EXAMPLES))
print('there are %d test images' % len(TEST_EXAMPLES))

Flower photos are located in ./flower_photos

The dataset has 5 label classes: odict_values(['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'])
There are 1835 training images
there are 1835 test images


In [33]:
x = TRAIN_EXAMPLES
y = TEST_EXAMPLES
z = CLASSES

In [34]:
print(z)

OrderedDict([(0, 'daisy'), (1, 'dandelion'), (2, 'roses'), (3, 'sunflowers'), (4, 'tulips')])


Using sklearn stratified Dummy Classifier to predict image classes

In [38]:
dummy_clf = DummyClassifier(strategy = "stratified")
dummy_clf.fit(x, y)
spred = dummy_clf.predict(y)
print(spred)

[['./flower_photos/tulips/4602809199_d3030cef01_m.jpg' '1']
 ['./flower_photos/roses/7820626638_3e2d712303.jpg' '4']
 ['./flower_photos/dandelion/4510938552_6f7bae172a_n.jpg' '3']
 ...
 ['./flower_photos/sunflowers/7270375648_79f0caef42_n.jpg' '3']
 ['./flower_photos/daisy/5135131051_102d4878ca_n.jpg' '1']
 ['./flower_photos/sunflowers/45045005_57354ee844.jpg' '2']]


Stratified usually yields about 1/6 or 16-17% accuracy

Using sklearn uniform Dummy Classifier to predict image classes

In [37]:
dummy_clf = DummyClassifier(strategy = "uniform")
dummy_clf.fit(x, y)
upred = dummy_clf.predict(x)
print(upred)

[['./flower_photos/dandelion/4155914848_3d57f50fc7.jpg' '4']
 ['./flower_photos/dandelion/8980145452_efbd6e3b04.jpg' '1']
 ['./flower_photos/tulips/5574219476_1f46775487_n.jpg' '3']
 ...
 ['./flower_photos/sunflowers/6112510436_9fe06e695a_n.jpg' '1']
 ['./flower_photos/roses/5050969148_a0090f762a.jpg' '2']
 ['./flower_photos/daisy/495098110_3a4bb30042_n.jpg' '0']]


Uniform usually yields anywhere from 16-17% to 50% accuracy

Using sklearn Multi Output Classifier to predict image classes

In [39]:
import numpy as np
from sklearn.datasets import make_multilabel_classification
from sklearn.multioutput import MultiOutputClassifier
from sklearn.neighbors import KNeighborsClassifier

In [41]:
x, y = make_multilabel_classification(n_classes=3, random_state=0)
clf = MultiOutputClassifier(KNeighborsClassifier()).fit(x, y)
clf.predict(x)

clf.score(x, y)

0.68

Yields an accuracy of 68%, which is much higher than the sklearn Dummy Classifier