In [1]:
configs = {
  "fs.adl.oauth2.access.token.provider.type": "ClientCredential",
  "fs.adl.oauth2.client.id": "4ab956bb-5a0b-45b3-a87b-22424b89ea36",
  "fs.adl.oauth2.credential": ".2SD44SJ2nti=QtOym[@anQ[cXiW?6v.",
  "fs.adl.oauth2.refresh.url": "https://login.microsoftonline.com/5d7e4366-1b9b-45cf-8e79-b14b27df46e1/oauth2/token"}

dbutils.fs.mount(
  source = "adl://imagerepo.azuredatalakestore.net/dogImages",
  mount_point  = "/mnt/dogImages",
  extra_configs = configs)

dbutils.fs.mount(
  source = "adl://imagerepo.azuredatalakestore.net/cifar-10-batches-py",
  mount_point  = "/mnt/cifar-10-batches-py",
  extra_configs = configs)

In [2]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob2 import glob

# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']), 134)
    return dog_files, dog_targets

# load train, test, and validation datasets
train_files, train_targets = load_dataset('/dbfs/mnt/dogImages/dogImages/train/')
valid_files, valid_targets = load_dataset('/dbfs/mnt/dogImages/dogImages/valid/')
test_files, test_targets = load_dataset('/dbfs/mnt/dogImages/dogImages/test/')

# load list of dog names
dog_names = [item[20:-1] for item in sorted(glob("/dbfs/mnt/dogImages/dogImages/train/*/"))]

# print statistics about the dataset
print('There are %d total dog categories.' % len(dog_names))
print('There are %s total dog images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training dog images.' % len(train_files))
print('There are %d validation dog images.' % len(valid_files))
print('There are %d test dog images.'% len(test_files))

In [3]:
from keras.preprocessing import image                  
from tqdm import tqdm

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [4]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_tensors = paths_to_tensor(train_files)
valid_tensors = paths_to_tensor(valid_files)
test_tensors = paths_to_tensor(test_files)

In [5]:
def extract_VGG16(tensor):
	from keras.applications.vgg16 import VGG16, preprocess_input
	return VGG16(weights='imagenet', include_top=False).predict(preprocess_input(tensor))

def extract_VGG19(tensor):
	from keras.applications.vgg19 import VGG19, preprocess_input
	return VGG19(weights='imagenet', include_top=False).predict(preprocess_input(tensor))

def extract_Resnet50(tensor):
	from keras.applications.resnet50 import ResNet50, preprocess_input
	return ResNet50(weights='imagenet', include_top=False).predict(preprocess_input(tensor))

def extract_Xception(tensor):
	from keras.applications.xception import Xception, preprocess_input
	return Xception(weights='imagenet', include_top=False).predict(preprocess_input(tensor))

def extract_InceptionV3(tensor):
	from keras.applications.inception_v3 import InceptionV3, preprocess_input
	return InceptionV3(weights='imagenet', include_top=False).predict(preprocess_input(tensor))

In [6]:
bottleneck_features_train = extract_Resnet50(train_tensors)

In [7]:
bottleneck_features_valid = extract_Resnet50(valid_tensors)

In [8]:
bottleneck_features_test = extract_Resnet50(test_tensors)

In [9]:
np.save("/dbfs/mnt/dogImages/bottleneck_features/DogResnet50DataTrain", bottleneck_features_train)
np.save("/dbfs/mnt/dogImages/bottleneck_features/DogResnet50DataValid", bottleneck_features_valid)
np.save("/dbfs/mnt/dogImages/bottleneck_features/DogResnet50DataTest", bottleneck_features_test)

In [10]:
np.save("/dbfs/mnt/dogImages/bottleneck_features/TrainTargets", train_targets)
np.save("/dbfs/mnt/dogImages/bottleneck_features/ValidTargets", valid_targets)
np.save("/dbfs/mnt/dogImages/bottleneck_features/TestTargets", test_targets)

In [11]:
np.save("/dbfs/mnt/dogImages/bottleneck_features/DogNames", dog_names)