In [91]:
import numpy as np 
import tensorflow as tf 
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
import cv2
import PIL

## Goal : Building a custom CNN model that predicts and insect's class 

### GPU

In [4]:
gpus = tf.config.list_physical_devices('GPU')
print("gpus", gpus)

gpus [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### Settings

In [33]:
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 8 
NB_CLASSES = 6
EPOCHS = 20

VALIDATION_SPLIT = 0.2

np.random.seed(1)

labels = {
    "coleoptera" : 0, 
    "diptera" : 1, 
    "hemiptera" : 2, 
    "hymenoptera" : 3, 
    "lepidoptera" : 4, 
    "odonata" : 5
}

# path for the insect dataset
DATASET_PATH = "/home/valentin/Desktop/deep_learning/practice_learn/datasets/ArTaxOr" 

ALL_IMAGES = [cls+"/"+image_path for cls in labels for image_path in os.listdir(os.path.join(DATASET_PATH,cls.capitalize()))]
print(f"found a total of {len(ALL_IMAGES)} images ")

found a total of 12964 images 


### Dataset

In [110]:
import random
import json


class InsectDataset(tf.keras.utils.Sequence): 
    
    def __init__(self, x_set, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE): 
        """
            WARNING : SHUFFLE THE DATASET BEFORE ! 
            x_set : path of the image
        """
        self.x_set = x_set 
        self.image_size = image_size 
        self.batch_size = batch_size 
        self.y_set = [path.split("/")[0] for path in x_set]
        
    def __len__(self):
        return tf.math.ceil(len(self.x_set) / self.batch_size)
        
    
    def __getitem__(self, idx):
        batch_x = self.x_set[idx:idx+self.batch_size] 
        batch_y = self.y_set[idx:idx+self.batch_size] 
        # get numpy array for each batch_x element 
        # arrays
        images = []
        targets = []
        for i,path in enumerate(batch_x): 
            image_path = os.path.join(DATASET_PATH, path.capitalize())
            # normalizing : [0-1]
            array = cv2.imread(image_path)
            images.append(array/255.0) 
            
            # one hot encoding 
            target = batch_y[i]
            one_hot = tf.one_hot(labels[target], depth=len(labels)) 
            targets.append(one_hot)
            
        return images, targets
            
            