In [None]:
!pip install --upgrade --ignore-installed kaggle

In [1]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle

mkdir: cannot create directory ‘/home/student/.kaggle’: File exists


In [2]:
! chmod 600 ~/.kaggle/kaggle.json


In [3]:
!kaggle datasets download -d puneet6060/intel-image-classification

intel-image-classification.zip: Skipping, found more recently modified local copy (use --force to force download)


In [11]:
!ls

 AzureML			  julia
 MMLSpark			  kaggle.json
 SparkML			  pytorch
'Untitled Folder'		  seg_pred
 Untitled.ipynb			  seg_test
 catboost			  seg_train
 h2o				  usamafile.ipynb
 intel-image-classification.zip   vgg_transfer_trained_wts.h5


In [None]:
!unzip -q intel-image-classification.zip

In [1]:
!ls

 AzureML	    h2o				     seg_pred
 MMLSpark	    intel-image-classification.zip   seg_test
 SparkML	    julia			     seg_train
'Untitled Folder'   kaggle.json			     usamafile.ipynb
 catboost	    pytorch


In [13]:
import os
import cv2
import numpy as np
from imutils import paths
from sklearn.preprocessing import LabelBinarizer
from tqdm import tqdm
def load_data(data_dir):
  data = []
  labels = []
  class_dirs = os.listdir(data_dir)
  
  for direc in class_dirs:
    class_dir = os.path.join(data_dir, direc)
    for imagepath in tqdm(list(paths.list_images(class_dir))):
      image = cv2.imread(imagepath)
      image = cv2.resize(image, (150, 150))  # incase images not of same size
      data.append(image)
      labels.append(direc)
  # normalizing and converting to numpy array format
  data = np.array(data, dtype='float')/255.0
  labels = np.array(labels)
  return data, labels

In [28]:
train_dir = "seg_train/seg_train"
test_dir = "seg_test/seg_test"
pred_dir = "data/intel-image-classification-mini/seg_pred/"

print('loading train images')
X_train, y_train = load_data(train_dir)

print('loading test images')
X_test, y_test = load_data(test_dir)

  5%|▍         | 105/2271 [00:00<00:02, 1048.27it/s]

loading train images


100%|██████████| 2271/2271 [00:02<00:00, 985.71it/s]
100%|██████████| 2382/2382 [00:02<00:00, 1093.24it/s]
100%|██████████| 2404/2404 [00:02<00:00, 1124.74it/s]
100%|██████████| 2512/2512 [00:01<00:00, 1278.05it/s]
100%|██████████| 2274/2274 [00:01<00:00, 1239.83it/s]
100%|██████████| 2191/2191 [00:02<00:00, 947.31it/s] 
0it [00:00, ?it/s]
0it [00:00, ?it/s]
 20%|██        | 95/474 [00:00<00:00, 949.62it/s]

loading test images


100%|██████████| 474/474 [00:00<00:00, 934.98it/s]
100%|██████████| 501/501 [00:00<00:00, 1076.37it/s]
100%|██████████| 553/553 [00:00<00:00, 1228.06it/s]
100%|██████████| 525/525 [00:00<00:00, 1144.48it/s]
100%|██████████| 510/510 [00:00<00:00, 1234.30it/s]
100%|██████████| 437/437 [00:00<00:00, 1092.11it/s]


In [4]:
X_train.shape

(14034, 150, 150, 3)

In [15]:
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.fit_transform(y_test)

In [16]:
y_train[0]

array([0, 1, 0, 0, 0, 0])

In [17]:
import keras
from keras.layers import Conv2D, Dense, MaxPooling2D, Flatten, Dropout
from keras.models import Sequential


vgg16 = Sequential()
vgg16.add(Conv2D(64, kernel_size=3, padding='same', activation='relu',
                 input_shape=(150, 150, 3)))
vgg16.add(Conv2D(64, kernel_size=3, padding='same', activation='relu'))
vgg16.add(MaxPooling2D(pool_size=2, strides=2))
vgg16.add(Dropout(0.25))

vgg16.add(Conv2D(128, kernel_size=3, padding='same', activation='relu'))
vgg16.add(Conv2D(128, kernel_size=3, padding='same', activation='relu'))
vgg16.add(MaxPooling2D(pool_size=2, strides=2))
vgg16.add(Dropout(0.25))

vgg16.add(Conv2D(256, kernel_size=3, padding='same', activation='relu'))
vgg16.add(Conv2D(256, kernel_size=3, padding='same', activation='relu'))
vgg16.add(Conv2D(256, kernel_size=3, padding='same', activation='relu'))
vgg16.add(MaxPooling2D(pool_size=2, strides=2))
vgg16.add(Dropout(0.25))

vgg16.add(Conv2D(512, kernel_size=3, padding='same', activation='relu'))
vgg16.add(Conv2D(512, kernel_size=3, padding='same', activation='relu'))
vgg16.add(Conv2D(512, kernel_size=3, padding='same', activation='relu'))
vgg16.add(MaxPooling2D(pool_size=2, strides=2))
vgg16.add(Dropout(0.25))

vgg16.add(Conv2D(512, kernel_size=3, activation='relu'))
vgg16.add(Conv2D(512, kernel_size=3, activation='relu'))
vgg16.add(Conv2D(512, kernel_size=3, activation='relu'))
vgg16.add(MaxPooling2D(pool_size=2, strides=2))
vgg16.add(Dropout(0.25))

vgg16.add(Flatten())

vgg16.add(Dense(4096, activation='relu'))
vgg16.add(Dense(4096, activation='relu'))
vgg16.add(Dense(4096, activation='relu'))

# the data we are using has 3 classes
vgg16.add(Dense(6, activation='softmax'))


In [None]:
vgg16.summary()

In [20]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
(X_train, X_valid, y_train, y_valid)= train_test_split(X_train, y_train, test_size=0.2, random_state=42)

aug = ImageDataGenerator(
		rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)


from keras.optimizers import SGD
sgd = SGD(lr=0.001, decay=1e-7, momentum=.9)
vgg16.compile(loss='categorical_crossentropy', 
              optimizer=sgd,
              metrics=['accuracy'])




In [22]:
H = vgg16.fit(
	x=aug.flow(X_train, y_train, batch_size=128),
	validation_data=(X_valid, y_valid),
	steps_per_epoch=len(X_train) // 128,
	epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
#save the model's trained weights
vgg16.save_weights('vgg16_transfer_learning.h5')