In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
import os
import glob
import csv
import matplotlib.pyplot as plt
import shutil
import math
import datetime

from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing import image
from keras.applications.nasnet import NASNetMobile, NASNetLarge
from keras.layers import Activation, Dense, Input, Dropout, MaxPooling2D, GlobalMaxPooling2D, GlobalAveragePooling2D, GaussianDropout, BatchNormalization, Flatten
from keras.models import Model
from keras.optimizers import Adam, RMSprop, sgd

np.random.seed(seed=9876)

INPUT_SIZE = 331 # size dell'input della rete
nb_classes = 35 # numero di classi 
rows = 2205 # numero di elementi del dataset
bs = 128 #BATCH_SIZE
nb_epoch = 5 # numero di epoche

Using TensorFlow backend.


In [0]:
def load_data(username,key):
  #funzione per scaricare i dati da kaggle
  os.environ['KAGGLE_USERNAME'] = username
  os.environ['KAGGLE_KEY'] = key
  !kaggle competitions download -c aw18-19
  !ls /content
  !unzip Dataset.zip
  !ls /content/Dataset/train
  
def isTrain():
  rnd= np.random.random()
  if rnd<0.80:
    return True
  return False

    
def data_split():
  # crea una cartella validation, con all'interno una cartella per razza.
  # per ogni razza si spostano il 20% delle immagini nella corrispondente cartella del validation.
  os.chdir('/content/Dataset')
  !ls
  !rm -rf validation
  !rm -rf models
  
  os.mkdir('models')
  os.chdir('/content/Dataset')
  os.mkdir('validation')

  for path in os.listdir('/content/Dataset/train'):
    os.chdir('/content/Dataset/validation')
    os.mkdir(path)
    os.chdir('/content')
    for filename in glob.glob('/content/Dataset/train/'+path+'/*.jpg'):
        if not isTrain() :
          completePath = filename.split('/')
          pathToCopy = "/content/Dataset/validation/"+path+"/"+completePath[5]
          shutil.move(filename, pathToCopy) 
  
  
def createModelNASNetLarge():
  # si prende il modello NASNetLarge tramite keras
  base_model = NASNetLarge(weights = 'imagenet', 
	include_top = False, 
	input_shape=(INPUT_SIZE, INPUT_SIZE, 3),
  pooling='avg')
		  
  x = base_model.output
  x = GaussianDropout(0.2)(x)
  
  x = Dense(nb_classes*4, activation='relu')(x) 
  x = GaussianDropout(0.2)(x)
  
  x = Dense(nb_classes*2, activation='relu')(x) 
  x = GaussianDropout(0.2)(x)
  
  predictions = Dense(nb_classes, activation='softmax')(x)
  model = Model(inputs = base_model.input, outputs = predictions)

  # non è necessario riaddestrare i layer del modello base
  for layer in base_model.layers:
	   layer.trainable = False
      
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) 
  return(model)

def train(model):
  #Si imposta l'erly-stopping: si monitora il valore dell'accuracy sul validation, se in un epoca successiva diminuisce, viene terminato l'addestramento
  early_stopping = EarlyStopping(monitor='val_acc', mode='max')

  history = model.fit_generator(train_generator,
                             steps_per_epoch=math.ceil(rows/bs),
                             epochs=nb_epoch, 
                             callbacks=[early_stopping],
                             validation_data= validation_generator,
                             validation_steps=math.ceil(rows/bs))

  print(history.history.keys())
  # grafici dell'accuracy epoca per epoca
  plt.plot(history.history['acc'])
  plt.plot(history.history['val_acc'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()
  # grafici della loss epoca per epoca
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()

def predictions():
  t_images =[]
  filenames = []
  pathTest= '/content/Dataset/test'
  for filename in os.listdir(pathTest):
     img = image.load_img(pathTest+'/'+filename, target_size=(INPUT_SIZE,INPUT_SIZE) )
     img = image.img_to_array(img)
     img/=255
     t_images.append(img)
     fil = filename.split('.')
     filenames.append(fil[0])

  x_test1 = np.array(t_images)
  pred = model.predict(x_test1)
  return pred

def createOutput(pred):
  array = []
  breed = sorted(os.listdir('/content/Dataset/train'))
  for i in range(len(pred)):
    array.append(np.argmax(pred[i]))
  dictionary = {'id':'breed'}
  for i in range(len(pred)):
    dictionary[filenames[i]]= breed[array[i]] 
  # si scrive il file delle prevision in csv secondo il formato "id,"breed"
  with open('result.csv','w') as f:
    for k in dictionary.keys():
      f.write("%s,%s\n"%(k,dictionary[k]))
      
  #per scaricare il file appena salvato basta andare nell'elenco dei file presenti nella macchina virtuale

**LOAD DATA**

In [0]:
#è necessario fornire l'username e la key per potersi collegare a kaggle con il proprio account
username = "your-kaggle-name"
key = "your-kaggle-token"
load_data(username,key)

In [0]:
data_split()
# preprocess (sia training che validation) e data Augmentation (solo training)
train_datagen = image.ImageDataGenerator(
        rescale=1./255,
        rotation_range=15,
        width_shift_range=.15,
        height_shift_range=.15,
        horizontal_flip=True)

test_datagen = image.ImageDataGenerator(rescale=1./255)

# si leggono le immagini dalle relative cartelle a batch per non occupare tutta la ram disponibile
train_generator = train_datagen.flow_from_directory(
        '/content/Dataset/train',
        target_size=(INPUT_SIZE, INPUT_SIZE),
        batch_size= bs,
        class_mode='categorical',
        shuffle=True,
        seed=1234)

validation_generator = test_datagen.flow_from_directory(
        '/content/Dataset/validation',
        target_size=(INPUT_SIZE, INPUT_SIZE),
        batch_size=bs,
        class_mode='categorical', 
        shuffle=True,
        seed=1234)

model = createModelNASNetLarge()
train(model)


**PREDICT** & **CREATE OUTPUT FILE**

In [0]:
pred = predictions()
createOutput(pred)
