# Sorting in folders

In [2]:
from sklearn.model_selection import train_test_split

import os
import pandas as pd
from shutil import copyfile
from tqdm.auto import tqdm

def check_path(path):
    os.system("if [ ! -d " + path + " ]; then mkdir -p " + path + "; fi")

In [3]:
labels_path = 'train.csv'
labels = pd.read_csv(labels_path)
display(labels.head(2))

Unnamed: 0,name,category
0,0.jpg,47
1,1.jpg,60


In [51]:
len(labels)

16857

In [6]:
train_X, val_X = train_test_split(labels, test_size=0.1)

In [7]:
train_X.head(2)

Unnamed: 0,name,category
7296,7296.jpg,56
15893,15893.jpg,41


In [15]:
categories_train = train_X.groupby(['category'])['name'].apply(lambda grp: list(grp)).to_dict()
categories_val = val_X.groupby(['category'])['name'].apply(lambda grp: list(grp)).to_dict()

In [16]:
def make_cat(categories,train_path,output_path):
    check_path(output_path)
    for key in tqdm(categories.keys()):
        check_path(output_path+str(key))
        for pict_name in categories[key]:
            try:
                copyfile(train_path+pict_name, output_path+str(key)+"/"+pict_name)
            except Exception as e:
                print("Problem with ", train_path+pict_name)
                print(e)

In [17]:
os.listdir("image_data")

['cat_train', 'train', 'new_cat_train', 'without_back', 'test']

In [18]:
train_path = "image_data/without_back/"
output_train_path = "image_data/new_cat_train/"
output_val_path = "image_data/new_cat_val/"

make_cat(categories_train,train_path,output_train_path)
make_cat(categories_val,train_path,output_val_path)

HBox(children=(IntProgress(value=0), HTML(value='')))

Problem with  image_data/without_back/11450.jpg
[Errno 2] No such file or directory: 'image_data/without_back/11450.jpg'



HBox(children=(IntProgress(value=0), HTML(value='')))




# Augmentation pipeline

In [19]:
import glob
import numpy as np
import matplotlib.pyplot as plt
from pylab import rcParams
import PIL.Image as img

# Uploading dataset
def img_array(path):
    image = img.open(path)
    tmp = np.array(image)
    image.close()
    return tmp

In [23]:
!pip install Augmentor --user

Collecting Augmentor
  Downloading https://files.pythonhosted.org/packages/cb/79/861f38d5830cff631e30e33b127076bfef8ac98171e51daa06df0118c75f/Augmentor-0.2.8-py2.py3-none-any.whl
Installing collected packages: Augmentor
Successfully installed Augmentor-0.2.8
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [26]:
import Augmentor
from Augmentor import Pipeline

def augmentation(path, n):
    p = Augmentor.Pipeline(path) # ensure you press enter after this, don't just c&p this code.
    Pipeline.set_seed(100)
    p.rotate(probability=0.3, max_left_rotation=3, max_right_rotation=3)
    p.random_distortion(probability=0.9, grid_width=20, grid_height=20, magnitude=2)
    p.random_erasing(probability=0.5, rectangle_area=0.2)
    p.zoom(probability=0.5, min_factor=1.1, max_factor=1.5)
    p.sample(n)

path = output_train_path

for i in range(0,100):
    augmentation(path+str(i)+'/', 400 - len(categories_train[i]))

Executing Pipeline:   0%|          | 0/177 [00:00<?, ? Samples/s]

Initialised with 223 image(s) found.
Output directory set to image_data/new_cat_train/0/output.

Processing <PIL.Image.Image image mode=RGB size=576x768 at 0x7FE14147E2B0>: 100%|██████████| 177/177 [00:06<00:00, 28.01 Samples/s]                   
Executing Pipeline:   0%|          | 0/266 [00:00<?, ? Samples/s]

Initialised with 133 image(s) found.
Output directory set to image_data/new_cat_train/1/output.

Processing <PIL.Image.Image image mode=RGB size=600x600 at 0x7FE1419E8FD0>: 100%|██████████| 266/266 [00:07<00:00, 37.27 Samples/s]                   
Executing Pipeline:   0%|          | 0/180 [00:00<?, ? Samples/s]

Initialised with 220 image(s) found.
Output directory set to image_data/new_cat_train/2/output.

Processing <PIL.Image.Image image mode=RGB size=866x1280 at 0x7FE13ACEC2B0>: 100%|██████████| 180/180 [00:06<00:00, 29.29 Samples/s]                   
Executing Pipeline:   0%|          | 0/174 [00:00<?, ? Samples/s]

Initialised with 226 image(s) found.
Output directory set to image_data/new_cat_train/3/output.

Processing <PIL.Image.Image image mode=RGB size=299x299 at 0x7FE10D245860>: 100%|██████████| 174/174 [00:06<00:00, 28.30 Samples/s]                 
Executing Pipeline:   0%|          | 0/203 [00:00<?, ? Samples/s]

Initialised with 197 image(s) found.
Output directory set to image_data/new_cat_train/4/output.

Processing <PIL.Image.Image image mode=RGB size=403x604 at 0x7FE10BC7C2B0>: 100%|██████████| 203/203 [00:06<00:00, 30.29 Samples/s]                    
Executing Pipeline:   0%|          | 0/199 [00:00<?, ? Samples/s]

Initialised with 201 image(s) found.
Output directory set to image_data/new_cat_train/5/output.

Processing <PIL.Image.Image image mode=RGB size=960x1280 at 0x7FE1416CB518>: 100%|██████████| 199/199 [00:07<00:00, 26.88 Samples/s]                   
Executing Pipeline:   0%|          | 0/275 [00:00<?, ? Samples/s]

Initialised with 125 image(s) found.
Output directory set to image_data/new_cat_train/6/output.

Processing <PIL.Image.Image image mode=RGB size=720x1080 at 0x7FE0E89DE710>: 100%|██████████| 275/275 [00:09<00:00, 29.57 Samples/s]                  
Executing Pipeline:   0%|          | 0/236 [00:00<?, ? Samples/s]

Initialised with 164 image(s) found.
Output directory set to image_data/new_cat_train/7/output.

Processing <PIL.Image.Image image mode=RGB size=800x800 at 0x7FE141481A58>: 100%|██████████| 236/236 [00:07<00:00, 30.93 Samples/s]                    
Executing Pipeline:   0%|          | 0/335 [00:00<?, ? Samples/s]

Initialised with 65 image(s) found.
Output directory set to image_data/new_cat_train/8/output.

Processing <PIL.Image.Image image mode=RGB size=600x900 at 0x7FE0C00857B8>: 100%|██████████| 335/335 [00:09<00:00, 35.52 Samples/s]                   
Executing Pipeline:   0%|          | 0/223 [00:00<?, ? Samples/s]

Initialised with 177 image(s) found.
Output directory set to image_data/new_cat_train/9/output.

Processing <PIL.Image.Image image mode=RGB size=1280x960 at 0x7FE10CFCD3C8>: 100%|██████████| 223/223 [00:08<00:00, 98.60 Samples/s]                  
Executing Pipeline:   0%|          | 0/318 [00:00<?, ? Samples/s]

Initialised with 82 image(s) found.
Output directory set to image_data/new_cat_train/10/output.

Processing <PIL.Image.Image image mode=RGB size=920x1280 at 0x7FE0E8B3F208>: 100%|██████████| 318/318 [00:07<00:00, 42.72 Samples/s]                  
Executing Pipeline:   0%|          | 0/237 [00:00<?, ? Samples/s]

Initialised with 163 image(s) found.
Output directory set to image_data/new_cat_train/11/output.

Processing <PIL.Image.Image image mode=RGB size=1280x960 at 0x7FE10C08EB00>: 100%|██████████| 237/237 [00:08<00:00, 28.79 Samples/s]                 
Executing Pipeline:   0%|          | 0/277 [00:00<?, ? Samples/s]

Initialised with 123 image(s) found.
Output directory set to image_data/new_cat_train/12/output.

Processing <PIL.Image.Image image mode=RGB size=1280x1280 at 0x7FE10D030AC8>: 100%|██████████| 277/277 [00:09<00:00, 28.91 Samples/s]                  
Executing Pipeline:   0%|          | 0/247 [00:00<?, ? Samples/s]

Initialised with 153 image(s) found.
Output directory set to image_data/new_cat_train/13/output.

Processing <PIL.Image.Image image mode=RGB size=1280x960 at 0x7FE1419C41D0>: 100%|██████████| 247/247 [00:08<00:00, 30.05 Samples/s]                  
Executing Pipeline:   0%|          | 0/179 [00:00<?, ? Samples/s]

Initialised with 221 image(s) found.
Output directory set to image_data/new_cat_train/14/output.

Processing <PIL.Image.Image image mode=RGB size=794x794 at 0x7FE10BC7C898>: 100%|██████████| 179/179 [00:05<00:00, 30.84 Samples/s]                   
Executing Pipeline:   0%|          | 0/289 [00:00<?, ? Samples/s]

Initialised with 111 image(s) found.
Output directory set to image_data/new_cat_train/15/output.

Processing <PIL.Image.Image image mode=RGB size=400x300 at 0x7FE141A12128>: 100%|██████████| 289/289 [00:09<00:00, 29.88 Samples/s]                   
Executing Pipeline:   0%|          | 0/206 [00:00<?, ? Samples/s]

Initialised with 194 image(s) found.
Output directory set to image_data/new_cat_train/16/output.

Processing <PIL.Image.Image image mode=RGB size=660x524 at 0x7FE10D2456A0>: 100%|██████████| 206/206 [00:07<00:00, 27.19 Samples/s]                   
Executing Pipeline:   0%|          | 0/326 [00:00<?, ? Samples/s]

Initialised with 74 image(s) found.
Output directory set to image_data/new_cat_train/17/output.

Processing <PIL.Image.Image image mode=RGB size=1000x1000 at 0x7FE111DFBB00>: 100%|██████████| 326/326 [00:10<00:00, 30.73 Samples/s]                 
Executing Pipeline:   0%|          | 0/243 [00:00<?, ? Samples/s]

Initialised with 157 image(s) found.
Output directory set to image_data/new_cat_train/18/output.

Processing <PIL.Image.Image image mode=RGB size=800x500 at 0x7FE138C3D908>: 100%|██████████| 243/243 [00:06<00:00, 35.92 Samples/s]                   
Executing Pipeline:   0%|          | 0/302 [00:00<?, ? Samples/s]

Initialised with 98 image(s) found.
Output directory set to image_data/new_cat_train/19/output.

Processing <PIL.Image.Image image mode=RGB size=1280x1280 at 0x7FE111BA0780>: 100%|██████████| 302/302 [00:10<00:00, 28.14 Samples/s]                 
Executing Pipeline:   0%|          | 0/226 [00:00<?, ? Samples/s]

Initialised with 174 image(s) found.
Output directory set to image_data/new_cat_train/20/output.

Processing <PIL.Image.Image image mode=RGB size=1200x800 at 0x7FE10C9B6B00>: 100%|██████████| 226/226 [00:08<00:00, 27.96 Samples/s]                  
Executing Pipeline:   0%|          | 0/254 [00:00<?, ? Samples/s]

Initialised with 146 image(s) found.
Output directory set to image_data/new_cat_train/21/output.

Processing <PIL.Image.Image image mode=RGB size=1280x853 at 0x7FE10C9B6C50>: 100%|██████████| 254/254 [00:08<00:00, 28.26 Samples/s]                  
Executing Pipeline:   0%|          | 0/250 [00:00<?, ? Samples/s]

Initialised with 150 image(s) found.
Output directory set to image_data/new_cat_train/22/output.

Processing <PIL.Image.Image image mode=RGB size=1200x1200 at 0x7FE10BB8AF60>: 100%|██████████| 250/250 [00:09<00:00, 89.72 Samples/s]                
Executing Pipeline:   0%|          | 0/236 [00:00<?, ? Samples/s]

Initialised with 164 image(s) found.
Output directory set to image_data/new_cat_train/23/output.

Processing <PIL.Image.Image image mode=RGB size=683x1024 at 0x7FE111C56630>: 100%|██████████| 236/236 [00:07<00:00, 32.19 Samples/s]                  
Executing Pipeline:   0%|          | 0/192 [00:00<?, ? Samples/s]

Initialised with 208 image(s) found.
Output directory set to image_data/new_cat_train/24/output.

Processing <PIL.Image.Image image mode=RGB size=293x220 at 0x7FE10CFCDD30>: 100%|██████████| 192/192 [00:06<00:00, 28.82 Samples/s]                  
Executing Pipeline:   0%|          | 0/321 [00:00<?, ? Samples/s]

Initialised with 79 image(s) found.
Output directory set to image_data/new_cat_train/25/output.

Processing <PIL.Image.Image image mode=RGB size=616x786 at 0x7FE0E89DEE80>: 100%|██████████| 321/321 [00:10<00:00, 30.56 Samples/s]                   
Executing Pipeline:   0%|          | 0/219 [00:00<?, ? Samples/s]

Initialised with 181 image(s) found.
Output directory set to image_data/new_cat_train/26/output.

Processing <PIL.Image.Image image mode=RGB size=800x800 at 0x7FE1419BA198>: 100%|██████████| 219/219 [00:07<00:00, 28.09 Samples/s]                    
Executing Pipeline:   0%|          | 0/261 [00:00<?, ? Samples/s]

Initialised with 139 image(s) found.
Output directory set to image_data/new_cat_train/27/output.

Processing <PIL.Image.Image image mode=RGB size=800x1200 at 0x7FE10D05A860>: 100%|██████████| 261/261 [00:08<00:00, 31.68 Samples/s]                  
Executing Pipeline:   0%|          | 0/331 [00:00<?, ? Samples/s]

Initialised with 69 image(s) found.
Output directory set to image_data/new_cat_train/28/output.

Processing <PIL.Image.Image image mode=RGB size=618x800 at 0x7FE111DFB198>: 100%|██████████| 331/331 [00:11<00:00, 28.18 Samples/s]                   
Executing Pipeline:   0%|          | 0/203 [00:00<?, ? Samples/s]

Initialised with 197 image(s) found.
Output directory set to image_data/new_cat_train/29/output.

Processing <PIL.Image.Image image mode=RGB size=800x600 at 0x7FE10CFA5080>: 100%|██████████| 203/203 [00:05<00:00, 34.48 Samples/s]                  
Executing Pipeline:   0%|          | 0/316 [00:00<?, ? Samples/s]

Initialised with 84 image(s) found.
Output directory set to image_data/new_cat_train/30/output.

Processing <PIL.Image.Image image mode=RGB size=599x599 at 0x7FE141A12160>: 100%|██████████| 316/316 [00:10<00:00, 30.15 Samples/s]                  
Executing Pipeline:   0%|          | 0/240 [00:00<?, ? Samples/s]

Initialised with 160 image(s) found.
Output directory set to image_data/new_cat_train/31/output.

Processing <PIL.Image.Image image mode=RGB size=600x600 at 0x7FE10D245EF0>: 100%|██████████| 240/240 [00:07<00:00, 30.60 Samples/s]                   
Executing Pipeline:   0%|          | 0/314 [00:00<?, ? Samples/s]

Initialised with 86 image(s) found.
Output directory set to image_data/new_cat_train/32/output.

Processing <PIL.Image.Image image mode=RGB size=840x1200 at 0x7FE0E89E76D8>: 100%|██████████| 314/314 [00:08<00:00, 35.79 Samples/s]                    
Executing Pipeline:   0%|          | 0/205 [00:00<?, ? Samples/s]

Initialised with 195 image(s) found.
Output directory set to image_data/new_cat_train/33/output.

Processing <PIL.Image.Image image mode=RGB size=960x1280 at 0x7FE111DFB9E8>: 100%|██████████| 205/205 [00:08<00:00, 110.29 Samples/s]               
Executing Pipeline:   0%|          | 0/305 [00:00<?, ? Samples/s]

Initialised with 95 image(s) found.
Output directory set to image_data/new_cat_train/34/output.

Processing <PIL.Image.Image image mode=RGB size=853x1280 at 0x7FE10D030B00>: 100%|██████████| 305/305 [00:09<00:00, 33.42 Samples/s]                 
Executing Pipeline:   0%|          | 0/317 [00:00<?, ? Samples/s]

Initialised with 83 image(s) found.
Output directory set to image_data/new_cat_train/35/output.

Processing <PIL.Image.Image image mode=RGB size=720x1280 at 0x7FE141A0B518>: 100%|██████████| 317/317 [00:08<00:00, 35.88 Samples/s]                 
Executing Pipeline:   0%|          | 0/259 [00:00<?, ? Samples/s]

Initialised with 141 image(s) found.
Output directory set to image_data/new_cat_train/36/output.

Processing <PIL.Image.Image image mode=RGB size=854x1280 at 0x7FE10BC7CEF0>: 100%|██████████| 259/259 [00:08<00:00, 29.29 Samples/s]                   
Executing Pipeline:   0%|          | 0/243 [00:00<?, ? Samples/s]

Initialised with 157 image(s) found.
Output directory set to image_data/new_cat_train/37/output.

Processing <PIL.Image.Image image mode=RGB size=1280x847 at 0x7FE0E89DEEB8>: 100%|██████████| 243/243 [00:08<00:00, 28.74 Samples/s]                  
Executing Pipeline:   0%|          | 0/188 [00:00<?, ? Samples/s]

Initialised with 212 image(s) found.
Output directory set to image_data/new_cat_train/38/output.

Processing <PIL.Image.Image image mode=RGB size=1280x1280 at 0x7FE10CFCD898>: 100%|██████████| 188/188 [00:06<00:00, 28.11 Samples/s]                  
Executing Pipeline:   0%|          | 0/186 [00:00<?, ? Samples/s]

Initialised with 214 image(s) found.
Output directory set to image_data/new_cat_train/39/output.

Processing <PIL.Image.Image image mode=RGB size=1280x956 at 0x7FE138C074A8>: 100%|██████████| 186/186 [00:07<00:00, 24.85 Samples/s]                   
Executing Pipeline:   0%|          | 0/196 [00:00<?, ? Samples/s]

Initialised with 204 image(s) found.
Output directory set to image_data/new_cat_train/40/output.

Processing <PIL.Image.Image image mode=RGB size=585x738 at 0x7FE10BC6F550>: 100%|██████████| 196/196 [00:07<00:00, 27.67 Samples/s]                   
Executing Pipeline:   0%|          | 0/250 [00:00<?, ? Samples/s]

Initialised with 150 image(s) found.
Output directory set to image_data/new_cat_train/41/output.

Processing <PIL.Image.Image image mode=RGB size=1280x1280 at 0x7FE1419C4160>: 100%|██████████| 250/250 [00:07<00:00, 34.24 Samples/s]                  
Executing Pipeline:   0%|          | 0/299 [00:00<?, ? Samples/s]

Initialised with 101 image(s) found.
Output directory set to image_data/new_cat_train/42/output.

Processing <PIL.Image.Image image mode=RGB size=500x750 at 0x7FE1417CEBE0>: 100%|██████████| 299/299 [00:05<00:00, 50.67 Samples/s]                  
Executing Pipeline:   0%|          | 0/206 [00:00<?, ? Samples/s]

Initialised with 194 image(s) found.
Output directory set to image_data/new_cat_train/43/output.

Processing <PIL.Image.Image image mode=RGB size=1280x608 at 0x7FE141A0B1D0>: 100%|██████████| 206/206 [00:05<00:00, 34.47 Samples/s]  
Executing Pipeline:   0%|          | 0/209 [00:00<?, ? Samples/s]

Initialised with 191 image(s) found.
Output directory set to image_data/new_cat_train/44/output.

Processing <PIL.Image.Image image mode=RGB size=960x1280 at 0x7FE0E8421518>: 100%|██████████| 209/209 [00:07<00:00, 27.89 Samples/s]                
Executing Pipeline:   0%|          | 0/275 [00:00<?, ? Samples/s]

Initialised with 125 image(s) found.
Output directory set to image_data/new_cat_train/45/output.

Processing <PIL.Image.Image image mode=RGB size=643x1280 at 0x7FE10D0CB6A0>: 100%|██████████| 275/275 [00:08<00:00, 33.42 Samples/s]                   
Executing Pipeline:   0%|          | 0/187 [00:00<?, ? Samples/s]

Initialised with 213 image(s) found.
Output directory set to image_data/new_cat_train/46/output.

Processing <PIL.Image.Image image mode=RGB size=716x640 at 0x7FE111BA0438>: 100%|██████████| 187/187 [00:05<00:00, 32.06 Samples/s]                   
Executing Pipeline:   0%|          | 0/225 [00:00<?, ? Samples/s]

Initialised with 175 image(s) found.
Output directory set to image_data/new_cat_train/47/output.

Processing <PIL.Image.Image image mode=RGB size=431x544 at 0x7FE111BA0C50>: 100%|██████████| 225/225 [00:08<00:00, 27.42 Samples/s]                  
Executing Pipeline:   0%|          | 0/279 [00:00<?, ? Samples/s]

Initialised with 121 image(s) found.
Output directory set to image_data/new_cat_train/48/output.

Processing <PIL.Image.Image image mode=RGB size=1000x750 at 0x7FE1419C4908>: 100%|██████████| 279/279 [00:10<00:00, 25.57 Samples/s]                  
Executing Pipeline:   0%|          | 0/278 [00:00<?, ? Samples/s]

Initialised with 122 image(s) found.
Output directory set to image_data/new_cat_train/49/output.

Processing <PIL.Image.Image image mode=RGB size=800x531 at 0x7FE10D245E80>: 100%|██████████| 278/278 [00:10<00:00, 26.81 Samples/s]                  
Executing Pipeline:   0%|          | 0/272 [00:00<?, ? Samples/s]

Initialised with 128 image(s) found.
Output directory set to image_data/new_cat_train/50/output.

Processing <PIL.Image.Image image mode=RGB size=854x1280 at 0x7FE0E89A2CF8>: 100%|██████████| 272/272 [00:07<00:00, 35.71 Samples/s]                  
Executing Pipeline:   0%|          | 0/199 [00:00<?, ? Samples/s]

Initialised with 201 image(s) found.
Output directory set to image_data/new_cat_train/51/output.

Processing <PIL.Image.Image image mode=RGB size=510x714 at 0x7FE141A07438>: 100%|██████████| 199/199 [00:06<00:00, 29.10 Samples/s]                   
Executing Pipeline:   0%|          | 0/243 [00:00<?, ? Samples/s]

Initialised with 157 image(s) found.
Output directory set to image_data/new_cat_train/52/output.

Processing <PIL.Image.Image image mode=RGB size=991x751 at 0x7FE0E8B3F2B0>: 100%|██████████| 243/243 [00:06<00:00, 35.21 Samples/s]                   
Executing Pipeline:   0%|          | 0/214 [00:00<?, ? Samples/s]

Initialised with 186 image(s) found.
Output directory set to image_data/new_cat_train/53/output.

Processing <PIL.Image.Image image mode=RGB size=450x600 at 0x7FE141A12160>: 100%|██████████| 214/214 [00:06<00:00, 34.57 Samples/s]                  
Executing Pipeline:   0%|          | 0/235 [00:00<?, ? Samples/s]

Initialised with 165 image(s) found.
Output directory set to image_data/new_cat_train/54/output.

Processing <PIL.Image.Image image mode=RGB size=960x1280 at 0x7FE1419C4400>: 100%|██████████| 235/235 [00:07<00:00, 29.89 Samples/s]                  
Executing Pipeline:   0%|          | 0/256 [00:00<?, ? Samples/s]

Initialised with 144 image(s) found.
Output directory set to image_data/new_cat_train/55/output.

Processing <PIL.Image.Image image mode=RGB size=1280x1054 at 0x7FE111DFBC50>: 100%|██████████| 256/256 [00:06<00:00, 38.67 Samples/s]                  
Executing Pipeline:   0%|          | 0/178 [00:00<?, ? Samples/s]

Initialised with 222 image(s) found.
Output directory set to image_data/new_cat_train/56/output.

Processing <PIL.Image.Image image mode=RGB size=1280x960 at 0x7FE138B6FDA0>: 100%|██████████| 178/178 [00:07<00:00, 25.08 Samples/s]                  
Executing Pipeline:   0%|          | 0/274 [00:00<?, ? Samples/s]

Initialised with 126 image(s) found.
Output directory set to image_data/new_cat_train/57/output.

Processing <PIL.Image.Image image mode=RGB size=850x1280 at 0x7FE10BC7C3C8>: 100%|██████████| 274/274 [00:11<00:00, 24.55 Samples/s]                   
Executing Pipeline:   0%|          | 0/300 [00:00<?, ? Samples/s]

Initialised with 100 image(s) found.
Output directory set to image_data/new_cat_train/58/output.

Processing <PIL.Image.Image image mode=RGB size=714x960 at 0x7FE13AE785F8>: 100%|██████████| 300/300 [00:10<00:00, 29.67 Samples/s]                   
Executing Pipeline:   0%|          | 0/344 [00:00<?, ? Samples/s]

Initialised with 56 image(s) found.
Output directory set to image_data/new_cat_train/59/output.

Processing <PIL.Image.Image image mode=RGB size=986x644 at 0x7FE1419C47B8>: 100%|██████████| 344/344 [00:10<00:00, 32.30 Samples/s]                    
Executing Pipeline:   0%|          | 0/246 [00:00<?, ? Samples/s]

Initialised with 154 image(s) found.
Output directory set to image_data/new_cat_train/60/output.

Processing <PIL.Image.Image image mode=RGB size=661x999 at 0x7FE10C9B64A8>: 100%|██████████| 246/246 [00:06<00:00, 36.23 Samples/s]                   
Executing Pipeline:   0%|          | 0/206 [00:00<?, ? Samples/s]

Initialised with 194 image(s) found.
Output directory set to image_data/new_cat_train/61/output.

Processing <PIL.Image.Image image mode=RGB size=853x1280 at 0x7FE0F19CB198>: 100%|██████████| 206/206 [00:07<00:00, 28.39 Samples/s]                  
Executing Pipeline:   0%|          | 0/304 [00:00<?, ? Samples/s]

Initialised with 96 image(s) found.
Output directory set to image_data/new_cat_train/62/output.

Processing <PIL.Image.Image image mode=RGB size=477x637 at 0x7FE1419BADA0>: 100%|██████████| 304/304 [00:10<00:00, 30.06 Samples/s]                    
Executing Pipeline:   0%|          | 0/328 [00:00<?, ? Samples/s]

Initialised with 72 image(s) found.
Output directory set to image_data/new_cat_train/63/output.

Processing <PIL.Image.Image image mode=RGB size=1080x1080 at 0x7FE10D245320>: 100%|██████████| 328/328 [00:10<00:00, 31.09 Samples/s]                  
Executing Pipeline:   0%|          | 0/278 [00:00<?, ? Samples/s]

Initialised with 122 image(s) found.
Output directory set to image_data/new_cat_train/64/output.

Processing <PIL.Image.Image image mode=RGB size=960x960 at 0x7FE138C3D048>: 100%|██████████| 278/278 [00:10<00:00, 27.27 Samples/s]                   
Executing Pipeline:   0%|          | 0/200 [00:00<?, ? Samples/s]

Initialised with 200 image(s) found.
Output directory set to image_data/new_cat_train/65/output.

Processing <PIL.Image.Image image mode=RGB size=609x718 at 0x7FE10CFA5550>: 100%|██████████| 200/200 [00:06<00:00, 28.88 Samples/s]                  
Executing Pipeline:   0%|          | 0/227 [00:00<?, ? Samples/s]

Initialised with 173 image(s) found.
Output directory set to image_data/new_cat_train/66/output.

Processing <PIL.Image.Image image mode=RGB size=640x480 at 0x7FE141A07E48>: 100%|██████████| 227/227 [00:07<00:00, 30.47 Samples/s]   
Executing Pipeline:   0%|          | 0/165 [00:00<?, ? Samples/s]

Initialised with 235 image(s) found.
Output directory set to image_data/new_cat_train/67/output.

Processing <PIL.Image.Image image mode=RGB size=720x1280 at 0x7FE138B6F0F0>: 100%|██████████| 165/165 [00:05<00:00, 29.95 Samples/s]                  
Executing Pipeline:   0%|          | 0/223 [00:00<?, ? Samples/s]

Initialised with 177 image(s) found.
Output directory set to image_data/new_cat_train/68/output.

Processing <PIL.Image.Image image mode=RGB size=1004x821 at 0x7FE1419BA438>: 100%|██████████| 223/223 [00:07<00:00, 29.56 Samples/s]                   
Executing Pipeline:   0%|          | 0/307 [00:00<?, ? Samples/s]

Initialised with 93 image(s) found.
Output directory set to image_data/new_cat_train/69/output.

Processing <PIL.Image.Image image mode=RGB size=178x183 at 0x7FE10BB8A978>: 100%|██████████| 307/307 [00:07<00:00, 39.14 Samples/s]                     
Executing Pipeline:   0%|          | 0/204 [00:00<?, ? Samples/s]

Initialised with 196 image(s) found.
Output directory set to image_data/new_cat_train/70/output.

Processing <PIL.Image.Image image mode=RGB size=768x1024 at 0x7FE10D116438>: 100%|██████████| 204/204 [00:07<00:00, 26.11 Samples/s]                 
Executing Pipeline:   0%|          | 0/244 [00:00<?, ? Samples/s]

Initialised with 156 image(s) found.
Output directory set to image_data/new_cat_train/71/output.

Processing <PIL.Image.Image image mode=RGB size=1280x1280 at 0x7FE0E8B3F828>: 100%|██████████| 244/244 [00:07<00:00, 32.79 Samples/s]                 
Executing Pipeline:   0%|          | 0/193 [00:00<?, ? Samples/s]

Initialised with 207 image(s) found.
Output directory set to image_data/new_cat_train/72/output.

Processing <PIL.Image.Image image mode=RGB size=1280x853 at 0x7FE10D0F6358>: 100%|██████████| 193/193 [00:08<00:00, 23.99 Samples/s]                
Executing Pipeline:   0%|          | 0/211 [00:00<?, ? Samples/s]

Initialised with 189 image(s) found.
Output directory set to image_data/new_cat_train/73/output.

Processing <PIL.Image.Image image mode=RGB size=800x800 at 0x7FE10D0D8DA0>: 100%|██████████| 211/211 [00:06<00:00, 32.22 Samples/s]                   
Executing Pipeline:   0%|          | 0/339 [00:00<?, ? Samples/s]

Initialised with 61 image(s) found.
Output directory set to image_data/new_cat_train/74/output.

Processing <PIL.Image.Image image mode=RGB size=800x1200 at 0x7FE138C3D978>: 100%|██████████| 339/339 [00:11<00:00, 28.31 Samples/s]                   
Executing Pipeline:   0%|          | 0/185 [00:00<?, ? Samples/s]

Initialised with 215 image(s) found.
Output directory set to image_data/new_cat_train/75/output.

Processing <PIL.Image.Image image mode=RGB size=864x1080 at 0x7FE111B9F240>: 100%|██████████| 185/185 [00:08<00:00, 22.85 Samples/s]                   
Executing Pipeline:   0%|          | 0/175 [00:00<?, ? Samples/s]

Initialised with 225 image(s) found.
Output directory set to image_data/new_cat_train/76/output.

Processing <PIL.Image.Image image mode=RGB size=600x528 at 0x7FE141A07518>: 100%|██████████| 175/175 [00:06<00:00, 27.31 Samples/s]  
Executing Pipeline:   0%|          | 0/166 [00:00<?, ? Samples/s]

Initialised with 234 image(s) found.
Output directory set to image_data/new_cat_train/77/output.

Processing <PIL.Image.Image image mode=RGB size=750x1000 at 0x7FE0C00245F8>: 100%|██████████| 166/166 [00:06<00:00, 25.15 Samples/s] 
Executing Pipeline:   0%|          | 0/252 [00:00<?, ? Samples/s]

Initialised with 148 image(s) found.
Output directory set to image_data/new_cat_train/78/output.

Processing <PIL.Image.Image image mode=RGB size=1134x927 at 0x7FE10BC7CBA8>: 100%|██████████| 252/252 [00:07<00:00, 34.16 Samples/s]                  
Executing Pipeline:   0%|          | 0/228 [00:00<?, ? Samples/s]

Initialised with 172 image(s) found.
Output directory set to image_data/new_cat_train/79/output.

Processing <PIL.Image.Image image mode=RGB size=960x1280 at 0x7FE141A124E0>: 100%|██████████| 228/228 [00:08<00:00, 26.11 Samples/s]                   
Executing Pipeline:   0%|          | 0/279 [00:00<?, ? Samples/s]

Initialised with 121 image(s) found.
Output directory set to image_data/new_cat_train/80/output.

Processing <PIL.Image.Image image mode=RGB size=851x1280 at 0x7FE141AF2DA0>: 100%|██████████| 279/279 [00:07<00:00, 36.84 Samples/s]                   
Executing Pipeline:   0%|          | 0/308 [00:00<?, ? Samples/s]

Initialised with 92 image(s) found.
Output directory set to image_data/new_cat_train/81/output.

Processing <PIL.Image.Image image mode=RGB size=1129x1129 at 0x7FE0E89A22E8>: 100%|██████████| 308/308 [00:10<00:00, 28.21 Samples/s]                 
Executing Pipeline:   0%|          | 0/302 [00:00<?, ? Samples/s]

Initialised with 98 image(s) found.
Output directory set to image_data/new_cat_train/82/output.

Processing <PIL.Image.Image image mode=RGB size=1024x1280 at 0x7FE141A07DA0>: 100%|██████████| 302/302 [00:10<00:00, 27.69 Samples/s]                 
Executing Pipeline:   0%|          | 0/240 [00:00<?, ? Samples/s]

Initialised with 160 image(s) found.
Output directory set to image_data/new_cat_train/83/output.

Processing <PIL.Image.Image image mode=RGB size=467x700 at 0x7FE10C948A20>: 100%|██████████| 240/240 [00:06<00:00, 36.81 Samples/s]                   
Executing Pipeline:   0%|          | 0/258 [00:00<?, ? Samples/s]

Initialised with 142 image(s) found.
Output directory set to image_data/new_cat_train/84/output.

Processing <PIL.Image.Image image mode=RGB size=853x1280 at 0x7FE0E89DE0F0>: 100%|██████████| 258/258 [00:09<00:00, 28.49 Samples/s]                  
Executing Pipeline:   0%|          | 0/263 [00:00<?, ? Samples/s]

Initialised with 137 image(s) found.
Output directory set to image_data/new_cat_train/85/output.

Processing <PIL.Image.Image image mode=RGB size=1000x1000 at 0x7FE1419BAE48>: 100%|██████████| 263/263 [00:08<00:00, 30.71 Samples/s]                 
Executing Pipeline:   0%|          | 0/187 [00:00<?, ? Samples/s]

Initialised with 213 image(s) found.
Output directory set to image_data/new_cat_train/86/output.

Processing <PIL.Image.Image image mode=RGB size=640x480 at 0x7FE10D245780>: 100%|██████████| 187/187 [00:06<00:00, 27.64 Samples/s]                  
Executing Pipeline:   0%|          | 0/330 [00:00<?, ? Samples/s]

Initialised with 70 image(s) found.
Output directory set to image_data/new_cat_train/87/output.

Processing <PIL.Image.Image image mode=RGB size=443x543 at 0x7FE10D25D550>: 100%|██████████| 330/330 [00:11<00:00, 28.60 Samples/s]                   
Executing Pipeline:   0%|          | 0/237 [00:00<?, ? Samples/s]

Initialised with 163 image(s) found.
Output directory set to image_data/new_cat_train/88/output.

Processing <PIL.Image.Image image mode=RGB size=960x1280 at 0x7FE10D25DC50>: 100%|██████████| 237/237 [00:06<00:00, 37.77 Samples/s]                   
Executing Pipeline:   0%|          | 0/200 [00:00<?, ? Samples/s]

Initialised with 200 image(s) found.
Output directory set to image_data/new_cat_train/89/output.

Processing <PIL.Image.Image image mode=RGB size=903x1280 at 0x7FE13AE78B70>: 100%|██████████| 200/200 [00:07<00:00, 78.01 Samples/s]                 
Executing Pipeline:   0%|          | 0/290 [00:00<?, ? Samples/s]

Initialised with 110 image(s) found.
Output directory set to image_data/new_cat_train/90/output.

Processing <PIL.Image.Image image mode=RGB size=728x970 at 0x7FE10CFA5B00>: 100%|██████████| 290/290 [00:08<00:00, 34.44 Samples/s]                   
Executing Pipeline:   0%|          | 0/333 [00:00<?, ? Samples/s]

Initialised with 67 image(s) found.
Output directory set to image_data/new_cat_train/91/output.

Processing <PIL.Image.Image image mode=RGB size=600x600 at 0x7FE141AF25F8>: 100%|██████████| 333/333 [00:06<00:00, 47.95 Samples/s]                   
Executing Pipeline:   0%|          | 0/301 [00:00<?, ? Samples/s]

Initialised with 99 image(s) found.
Output directory set to image_data/new_cat_train/92/output.

Processing <PIL.Image.Image image mode=RGB size=700x700 at 0x7FE0E8A2E6D8>: 100%|██████████| 301/301 [00:04<00:00, 66.21 Samples/s]                   
Executing Pipeline:   0%|          | 0/293 [00:00<?, ? Samples/s]

Initialised with 107 image(s) found.
Output directory set to image_data/new_cat_train/93/output.

Processing <PIL.Image.Image image mode=RGB size=668x626 at 0x7FE0C0024F60>: 100%|██████████| 293/293 [00:04<00:00, 59.70 Samples/s]                  
Executing Pipeline:   0%|          | 0/301 [00:00<?, ? Samples/s]

Initialised with 99 image(s) found.
Output directory set to image_data/new_cat_train/94/output.

Processing <PIL.Image.Image image mode=RGB size=1000x1000 at 0x7FE111DFBDA0>: 100%|██████████| 301/301 [00:04<00:00, 64.09 Samples/s]                  
Executing Pipeline:   0%|          | 0/226 [00:00<?, ? Samples/s]

Initialised with 174 image(s) found.
Output directory set to image_data/new_cat_train/95/output.

Processing <PIL.Image.Image image mode=RGB size=800x800 at 0x7FE10BB8A2E8>: 100%|██████████| 226/226 [00:05<00:00, 42.36 Samples/s]                    
Executing Pipeline:   0%|          | 0/246 [00:00<?, ? Samples/s]

Initialised with 154 image(s) found.
Output directory set to image_data/new_cat_train/96/output.

Processing <PIL.Image.Image image mode=RGB size=236x600 at 0x7FE13ADF0128>: 100%|██████████| 246/246 [00:03<00:00, 66.43 Samples/s]                    
Executing Pipeline:   0%|          | 0/249 [00:00<?, ? Samples/s]

Initialised with 151 image(s) found.
Output directory set to image_data/new_cat_train/97/output.

Processing <PIL.Image.Image image mode=RGB size=579x799 at 0x7FE1419BABA8>: 100%|██████████| 249/249 [00:04<00:00, 61.28 Samples/s]                    
Executing Pipeline:   0%|          | 0/250 [00:00<?, ? Samples/s]

Initialised with 150 image(s) found.
Output directory set to image_data/new_cat_train/98/output.

Processing <PIL.Image.Image image mode=RGB size=1081x1080 at 0x7FE13AE78EF0>: 100%|██████████| 250/250 [00:05<00:00, 44.32 Samples/s]                  
Executing Pipeline:   0%|          | 0/178 [00:00<?, ? Samples/s]

Initialised with 222 image(s) found.
Output directory set to image_data/new_cat_train/99/output.

Processing <PIL.Image.Image image mode=RGB size=960x1280 at 0x7FE141AF25C0>: 100%|██████████| 178/178 [00:04<00:00, 40.05 Samples/s]                  


In [27]:
# path = output_val_path

# for i in range(0,100):
#     augmentation(path+str(i)+'/', 400 - len(categories_val[i]))

# Modelling

In [32]:
#!pip install psutil --user

In [35]:
#!pip install humanize --user

In [4]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'

# Standard data science libraries
import psutil
import humanize
import os
from IPython.display import display_html

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

dataDirectory= "" 

In [5]:
import numpy as np
import keras
from keras import backend as K
from keras.models import Sequential
from keras.models import Model
from sklearn.model_selection import train_test_split
from keras.layers import Activation
from keras.layers.core import Dense, Flatten
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dropout
from keras.layers.convolutional import *
from keras.callbacks import ModelCheckpoint
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input
from keras.applications.inception_v3 import decode_predictions
from sklearn.metrics import confusion_matrix
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from keras.models import model_from_json
import itertools
import matplotlib.pyplot as plt
import time
import pandas as pd

Using TensorFlow backend.


In [39]:
train_path = "image_data/without_back/"
output_train_path = "image_data/new_cat_train/"
output_val_path = "image_data/new_cat_val/"

In [40]:
# train_path = dataDirectory+'image_data/train'
# test_path  = dataDirectory+'test/test'
# print(os.listdir(train_path))
# print(os.listdir(test_path))

In [41]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest') # set validation split #validation_split=0.2

In [68]:
# Go to label directiory and tranfer labels to pd
labels = pd.read_csv('train.csv').set_index('name')

# Convert pd to dic -> {Image.jpg : label}
labels = labels.to_dict()
labels = labels[list(labels.keys())[0]]

print(f'lenght of dictionary: {len(labels)}')

# Create a PD dataframe with columns Images and Labels
# This DF is needed for the flow_from_dataframe method of the ImageDataGenerator class
df = pd.DataFrame(list(labels.items()))
df.columns = ['images', 'labels']
df = df.astype({'labels': str})

# Display the occurance of each class
df.groupby('labels').count().head()

train_df, validation_df = train_test_split(df, test_size=0.1)

print(f'train_df: {train_df.shape}')
print(f'validation_df: {validation_df.shape}')

lenght of dictionary: 16857
train_df: (15171, 2)
validation_df: (1686, 2)


In [47]:
os.listdir("./image_data/")

['cat_train', 'train', 'new_cat_train', 'without_back', 'new_cat_val', 'test']

In [52]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest') # set validation split

In [58]:
test_datagen = ImageDataGenerator(
    rescale = 1./255)

test_generator = test_datagen.flow_from_directory(
        './image_data/test',
        target_size=(256, 256),
        shuffle = False,
        class_mode=None,
        batch_size=1)

Found 0 images belonging to 0 classes.


In [57]:
batchSize=32

selectedClasses = [str(i) for i in range(100)]

train_generator = train_datagen.flow_from_directory(
    output_train_path,
    target_size=(224, 224),
    batch_size=batchSize,
    classes=selectedClasses,
    subset='training') # set as training data

validation_generator = ImageDataGenerator().flow_from_directory(
    output_val_path, # same directory as training data
    target_size=(224, 224),
    batch_size=batchSize,
    classes=selectedClasses) # set as validation data

test_generator = ImageDataGenerator().flow_from_directory(
    './image_data/test', 
    target_size=(224,224), 
    classes=None,
    shuffle= False,
    batch_size = batchSize)# set as test data

Found 60686 images belonging to 100 classes.
Found 1686 images belonging to 100 classes.
Found 0 images belonging to 0 classes.


In [50]:
batchSize=32

selectedClasses = [str(i) for i in range(100)]

train_generator = ImageDataGenerator().flow_from_directory(
    output_train_path,
    target_size=(224, 224),
    batch_size=batchSize,
    classes=selectedClasses,
    subset='training') # set as training data

validation_generator = ImageDataGenerator().flow_from_directory(
    output_val_path, # same directory as training data
    target_size=(224, 224),
    batch_size=batchSize,
    classes=selectedClasses,
    subset='validation') # set as validation data

test_generator = ImageDataGenerator().flow_from_directory(
    'image_data/test', 
    target_size=(224,224), 
    classes=None,
    shuffle= False,
    batch_size = batchSize)# set as test data

Found 60686 images belonging to 100 classes.
Found 0 images belonging to 100 classes.
Found 0 images belonging to 0 classes.


In [6]:
#InceptionV3

base_model = InceptionV3(weights='imagenet', 
                                include_top=False, 
                                input_shape=(224, 224,3))
base_model.trainable = False

x = base_model.output
x = keras.layers.GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dropout(0.5)(x)
# and a sofymax/logistic layer -- we have 6 classes
predictions = Dense(100, activation='softmax')(x)

# this is the model we will train
model = Model(input=base_model.input, output=predictions)


model.summary()



Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 111, 111, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 111, 111, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 111, 111, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________

  app.launch_new_instance()


In [None]:
model.load_weights(weights_path)

In [61]:
#Atutomatic rename with epoch number and val accuracy:
#filepath="checkpoints/weights-improvement-epeoch-{epoch:02d}-val_acc-{val_acc:.2f}.hdf5"
modelName= "InceptionTutorial"
#save the best weights over the same file with the model name

#filepath="checkpoints/"+modelName+"_bestweights.hdf5"
filepath=modelName+"_bestweights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

model.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [62]:
stepsPerEpoch= (train_generator.samples+ (batchSize-1)) // batchSize
print("stepsPerEpoch: ", stepsPerEpoch)

validationSteps=(validation_generator.samples+ (batchSize-1)) // batchSize
print("validationSteps: ", validationSteps)


#validationSteps=(test_generator.samples+ (batchSize-1)) // batchSize
#print("validationSteps: ", validationSteps)

stepsPerEpoch:  1897
validationSteps:  53


In [None]:
train_generator.reset()
validation_generator.reset()

# Fit the model
history = model.fit_generator(
    train_generator, 
    validation_data = validation_generator,
    epochs = 10,
    steps_per_epoch = stepsPerEpoch,
    validation_steps= validationSteps,
    callbacks=callbacks_list,
    verbose=1)


Epoch 1/10
Epoch 2/10
   1/1897 [..............................] - ETA: 5:26 - loss: 1.1284 - accuracy: 0.6562



Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'Validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# Predict & submit 
# Uncomment to load previous model
# model = tf.keras.models.load_model('/kaggle/input/ml-model-v2/whole_model_v4.h5')

# Test data generator -> Rescale image size
test_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255)

# apply test_datagen to input files
test_generator = test_datagen.flow_from_directory(
        'test/',
        target_size=(224, 224),
        shuffle = False,
        class_mode=None,
        batch_size=1)

# Get the filenames & remove directory specification in front of filename
filenames = [filename for filename in test_generator.filenames]

# Not predicting in batches but each inidividual item, therefore we need to know the amount of predictions
nb_samples = len(filenames)

# Make predictions, returns probabilities for each class
print(f'Making predictions....')
predictions = model.predict_generator(test_generator,steps = nb_samples, verbose=1)

# Assign prediction to class with highest probability
y_pred_labels = np.argmax(predictions, axis = 1)

# Map predictions to the correct labels
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in y_pred_labels]

# Submit file
ff = [f[5:] for f in filenames]

submission = pd.DataFrame({'name':ff,'category':predictions})
submission.to_csv('submission_augmentation_inception_2_full.csv', index=False)
print("Done!”)