# Modification des paramètres

Ici, nous allons essayer d'optimiser les paramètres d'entraînement ainsi que le learning rate. Voici les caractéristiques du meilleur modèle que nous avons obtenu :

In [4]:
best_config = {'num_convs': (3, 3), 'conv_sizes': (3, 3), 'fc_sizes': (200, 30)}
with_dropout = True

Nous allons maintenant lancer le code ci-dessous pour essayer de trouver les valeurs optimales pour le learning rate et la batch size.

In [2]:
!pip install torch
!pip install torchvision

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Defaulting to user installation because normal site-packages is not writeable
Collecting torchvision
  Downloading torchvision-0.14.1-cp310-cp310-manylinux1_x86_64.whl (24.2 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.2/24.2 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Installing collected packages: torchvision
Successfully installed torchvision-0.14.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython

In [1]:
import random
import numpy as np
import os
import time
%matplotlib inline
import matplotlib.pyplot as mp
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as om
import torchvision as tv
import torch.utils.data as dat

if torch.cuda.is_available():     # Make sure GPU is available
    print("Computing with GPU")
    dev = torch.device("cuda:0")
    kwar = {'num_workers': 8, 'pin_memory': True}
    cpu = torch.device("cpu")
else:
    print("Warning: CUDA not found, CPU only.")
    dev = torch.device("cpu")
    kwar = {}
    cpu = torch.device("cpu")

np.random.seed(551)

Computing with GPU


In [2]:
class MedNet(nn.Module):
    def __init__(self,xDim,yDim,numC, num_convs=(5, 10), conv_sizes=(7, 7), fc_sizes=(400, 80), add_dropout=False): # Pass image dimensions and number of labels when initializing a model   
        super(MedNet,self).__init__()  # Extends the basic nn.Module to the MedNet class
        # The parameters here define the architecture of the convolutional portion of the CNN. Each image pixel
        # has numConvs convolutions applied to it, and convSize is the number of surrounding pixels included
        # in each convolution. Lastly, the numNodesToFC formula calculates the final, remaining nodes at the last
        # level of convolutions so that this can be "flattened" and fed into the fully connected layers subsequently.
        # Each convolution makes the image a little smaller (convolutions do not, by default, "hang over" the edges
        # of the image), and this makes the effective image dimension decreases.
        
        self.add_dropout = add_dropout
        
        numConvs1 = num_convs[0]
        convSize1 = conv_sizes[0]
        numConvs2 = num_convs[1]
        convSize2 = conv_sizes[1]
        numNodesToFC = numConvs2*(xDim-(convSize1-1)-(convSize2-1))*(yDim-(convSize1-1)-(convSize2-1))

        # nn.Conv2d(channels in, channels out, convolution height/width)
        # 1 channel -- grayscale -- feeds into the first convolution. The same number output from one layer must be
        # fed into the next. These variables actually store the weights between layers for the model.
        
        self.cnv1 = nn.Conv2d(1, numConvs1, convSize1)
        self.cnv2 = nn.Conv2d(numConvs1, numConvs2, convSize2)

        # These parameters define the number of output nodes of each fully connected layer.
        # Each layer must output the same number of nodes as the next layer begins with.
        # The final layer must have output nodes equal to the number of labels used.
        
        fcSize1 = fc_sizes[0]
        fcSize2 = fc_sizes[1]
        
        # nn.Linear(nodes in, nodes out)
        # Stores the weights between the fully connected layers
        
        self.ful1 = nn.Linear(numNodesToFC,fcSize1)
        if self.add_dropout: self.drop1 = nn.Dropout(0.5)
        self.ful2 = nn.Linear(fcSize1, fcSize2)
        if self.add_dropout: self.drop2 = nn.Dropout(0.5)
        self.ful3 = nn.Linear(fcSize2,numC)
        
    def forward(self,x):
        # This defines the steps used in the computation of output from input.
        # It makes uses of the weights defined in the __init__ method.
        # Each assignment of x here is the result of feeding the input up through one layer.
        # Here we use the activation function elu, which is a smoother version of the popular relu function.
        
        x = F.elu(self.cnv1(x)) # Feed through first convolutional layer, then apply activation
        x = F.elu(self.cnv2(x)) # Feed through second convolutional layer, apply activation
        x = x.view(-1,self.num_flat_features(x)) # Flatten convolutional layer into fully connected layer
        x = F.elu(self.ful1(x)) # Feed through first fully connected layer, apply activation
        if self.add_dropout: x = self.drop1(x)
        x = F.elu(self.ful2(x)) # Feed through second FC layer, apply output
        if self.add_dropout: x = self.drop2(x)
        x = self.ful3(x)        # Final FC layer to output. No activation, because it's used to calculate loss
        return x

    def num_flat_features(self, x):  # Count the individual nodes in a layer
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
def train_model(params, 
                trainX, 
                trainY, 
                validX, 
                validY, 
                testX, 
                testY, 
                learnRate=0.01, 
                batchSize = 300,
                verbose=False, 
                add_dropout=False): # params est un dictionnaire qui contient les structures de CNN à essayer
    
    model = MedNet(imageWidth,imageHeight,numClass, add_dropout=add_dropout, **params).to(dev)

    learnRate = learnRate          # Define a learning rate.
    maxEpochs = 200           # Maximum training epochs
    t2vRatio = 1.2            # Maximum allowed ratio of validation to training loss
    t2vEpochs = 3             # Number of consecutive epochs before halting if validation loss exceeds above limit
    batchSize = batchSize          # Batch size. Going too large will cause an out-of-memory error.
    trainBats = nTrain // batchSize       # Number of training batches per epoch. Round down to simplify last batch
    validBats = nValid // batchSize       # Validation batches. Round down
    testBats = -(-nTest // batchSize)     # Testing batches. Round up to include all
    CEweights = torch.zeros(numClass)     # This takes into account the imbalanced dataset.
    for i in trainY.tolist():             #      By making rarer images count more to the loss, 
        CEweights[i].add_(1)              #      we prevent the model from ignoring them.
    CEweights = 1. / CEweights.clamp_(min=1.)                     # Weights should be inversely related to count
    CEweights = (CEweights * numClass / CEweights.sum()).to(dev)  # The weights average to 1
    opti = om.SGD(model.parameters(), lr = learnRate)   # Initialize an optimizer


    for i in range(maxEpochs):
        model.train()                     # Set model to training mode
        epochLoss = 0.
        permute = torch.randperm(nTrain)  # Shuffle data to randomize batches
        trainX = trainX[permute,:,:,:]
        trainY = trainY[permute]
        for j in range(trainBats):        # Iterate over batches
            opti.zero_grad()              # Zero out gradient accumulated in optimizer
            batX = trainX[j*batchSize:(j+1)*batchSize,:,:,:].to(dev)   # Slice shuffled data into batches
            batY = trainY[j*batchSize:(j+1)*batchSize].to(dev)         # .to(dev) moves these batches to the GPU
            yOut = model(batX)            # Evalute predictions
            loss = F.cross_entropy(yOut, batY,weight=CEweights)        # Compute loss
            epochLoss += loss.item()      # Add loss
            loss.backward()               # Backpropagate loss
            opti.step()                   # Update model weights using optimizer
        validLoss = 0.
        permute = torch.randperm(nValid)  # We go through the exact same steps, without backprop / optimization
        validX = validX[permute,:,:,:]    # in order to evaluate the validation loss
        validY = validY[permute]
        model.eval()                      # Set model to evaluation mode
        with torch.no_grad():             # Temporarily turn off gradient descent
            for j in range(validBats):
                opti.zero_grad()
                batX = validX[j*batchSize:(j+1)*batchSize,:,:,:].to(dev)
                batY = validY[j*batchSize:(j+1)*batchSize].to(dev)
                yOut = model(batX)
                validLoss += F.cross_entropy(yOut, batY,weight=CEweights).item()
        epochLoss /= trainBats            # Average loss over batches and print
        validLoss /= validBats
        if verbose: print("Epoch = {:-3}; Training loss = {:.4f}; Validation loss = {:.4f}".format(i,epochLoss,validLoss))
        if validLoss > t2vRatio * epochLoss:
            t2vEpochs -= 1                # Test if validation loss exceeds halting threshold
            if t2vEpochs < 1:
                if verbose: print("Validation loss too high; halting to prevent overfitting")
                break

    confuseMtx = np.zeros((numClass,numClass),dtype=int)    # Create empty confusion matrix
    model.eval()
    with torch.no_grad():
        permute = torch.randperm(nTest)                     # Shuffle test data
        testX = testX[permute,:,:,:]
        testY = testY[permute]
        for j in range(testBats):                           # Iterate over test batches
            batX = testX[j*batchSize:(j+1)*batchSize,:,:,:].to(dev)
            batY = testY[j*batchSize:(j+1)*batchSize].to(dev)
            yOut = model(batX)                              # Pass test batch through model
            pred = yOut.max(1,keepdim=True)[1]              # Generate predictions by finding the max Y values
            for j in torch.cat((batY.view_as(pred), pred),dim=1).tolist(): # Glue together Actual and Predicted to
                confuseMtx[j[0],j[1]] += 1                  # make (row, col) pairs, and increment confusion matrix
    correct = sum([confuseMtx[i,i] for i in range(numClass)])   # Sum over diagonal elements to count correct predictions
    if verbose: 
        print(f"Correct predictions: {correct} of {nTest}. Accuracy: {correct/nTest*100}%")
        print("Confusion Matrix:")
        print(confuseMtx)
        print(classNames)
    return (model, correct/nTest)

  

dataDir = 'resized'               # The main data directory
classNames = os.listdir(dataDir)  # Each type of image can be found in its own subdirectory
numClass = len(classNames)        # Number of types = number of subdirectories
imageFiles = [[os.path.join(dataDir,classNames[i],x) for x in os.listdir(os.path.join(dataDir,classNames[i]))]
              for i in range(numClass)]                     # A nested list of filenames
numEach = [len(imageFiles[i]) for i in range(numClass)]     # A count of each type of image
imageFilesList = []               # Created an un-nested list of filenames
imageClass = []                   # The labels -- the type of each individual image in the list
for i in range(numClass):
    imageFilesList.extend(imageFiles[i])
    imageClass.extend([i]*numEach[i])
numTotal = len(imageClass)        # Total number of images
imageWidth, imageHeight = Image.open(imageFilesList[0]).size         # The dimensions of each image

print("There are",numTotal,"images in",numClass,"distinct categories")
print("Label names:",classNames)
print("Label counts:",numEach)
print("Image dimensions:",imageWidth,"x",imageHeight)

toTensor = tv.transforms.ToTensor()
def scaleImage(x):          # Pass a PIL image, return a tensor
    y = toTensor(x)
    if(y.min() < y.max()):  # Assuming the image isn't empty, rescale so its values run from 0 to 1
        y = (y - y.min())/(y.max() - y.min()) 
    z = y - y.mean()        # Subtract the mean value of the image
    return z

imageTensor = torch.stack([scaleImage(Image.open(x)) for x in imageFilesList])  # Load, scale, and stack image (X) tensor
classTensor = torch.tensor(imageClass)  # Create label (Y) tensor
print("Rescaled min pixel value = {:1.3}; Max = {:1.3}; Mean = {:1.3}"
      .format(imageTensor.min().item(),imageTensor.max().item(),imageTensor.mean().item()))

validFrac = 0.1   # Define the fraction of images to move to validation dataset
testFrac = 0.1    # Define the fraction of images to move to test dataset
validList = []
testList = []
trainList = []

for i in range(numTotal):
    rann = np.random.random() # Randomly reassign images
    if rann < validFrac:
        validList.append(i)
    elif rann < testFrac + validFrac:
        testList.append(i)
    else:
        trainList.append(i)
        
nTrain = len(trainList)  # Count the number in each set
nValid = len(validList)
nTest = len(testList)
print("Training images =",nTrain,"Validation =",nValid,"Testing =",nTest)
trainIds = torch.tensor(trainList)    # Slice the big image and label tensors up into
validIds = torch.tensor(validList)    #       training, validation, and testing tensors
testIds = torch.tensor(testList)
trainX = imageTensor[trainIds,:,:,:]
trainY = classTensor[trainIds]
validX = imageTensor[validIds,:,:,:]
validY = classTensor[validIds]
testX = imageTensor[testIds,:,:,:]
testY = classTensor[testIds]

There are 58954 images in 6 distinct categories
Label names: ['ChestCT', 'CXR', 'BreastMRI', 'Hand', 'HeadCT', 'AbdomenCT']
Label counts: [10000, 10000, 8954, 10000, 10000, 10000]
Image dimensions: 64 x 64
Rescaled min pixel value = -0.786; Max = 0.972; Mean = -3.33e-09
Training images = 47230 Validation = 5878 Testing = 5846


In [5]:
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [50, 100, 200, 300, 500]
results = []
for lr in learning_rates:
    for batch in batch_sizes:
        res = train_model(best_config, 
                          trainX, 
                          trainY, 
                          validX, 
                          validY, 
                          testX, 
                          testY,
                          learnRate = lr,
                          batchSize = batch,
                          verbose=True, 
                          add_dropout=True)
        results.append(((lr, batch), res[1]))
        print(f"Parameters: {(lr, batch)} // Accuracy: {res*100}%")

        

Epoch =   0; Training loss = 1.7757; Validation loss = 1.7275
Epoch =   1; Training loss = 1.2879; Validation loss = 0.6153
Epoch =   2; Training loss = 0.5264; Validation loss = 0.2926
Epoch =   3; Training loss = 0.3555; Validation loss = 0.2138
Epoch =   4; Training loss = 0.2862; Validation loss = 0.1718
Epoch =   5; Training loss = 0.2368; Validation loss = 0.1419
Epoch =   6; Training loss = 0.1994; Validation loss = 0.1176
Epoch =   7; Training loss = 0.1788; Validation loss = 0.1018
Epoch =   8; Training loss = 0.1558; Validation loss = 0.0874
Epoch =   9; Training loss = 0.1351; Validation loss = 0.0767
Epoch =  10; Training loss = 0.1251; Validation loss = 0.0682
Epoch =  11; Training loss = 0.1149; Validation loss = 0.0619
Epoch =  12; Training loss = 0.1032; Validation loss = 0.0553
Epoch =  13; Training loss = 0.0965; Validation loss = 0.0504
Epoch =  14; Training loss = 0.0867; Validation loss = 0.0461
Epoch =  15; Training loss = 0.0828; Validation loss = 0.0410
Epoch = 

Correct predictions: 5839 of 5846. Accuracy: 99.88026000684228%
Confusion Matrix:
[[ 984    0    0    0    1    1]
 [   0  959    0    0    0    0]
 [   0    1  980    0    0    0]
 [   0    1    0 1012    0    0]
 [   1    0    0    1  972    1]
 [   0    0    0    0    0  932]]
['CXR', 'ChestCT', 'AbdomenCT', 'HeadCT', 'Hand', 'BreastMRI']
Parameters: (0.001, 50) // Accuracy: 99.88026000684228%
Epoch =   0; Training loss = 1.6196; Validation loss = 1.2606
Epoch =   1; Training loss = 0.9556; Validation loss = 0.6029
Epoch =   2; Training loss = 0.6197; Validation loss = 0.4060
Epoch =   3; Training loss = 0.4835; Validation loss = 0.3193
Epoch =   4; Training loss = 0.4083; Validation loss = 0.2726
Epoch =   5; Training loss = 0.3558; Validation loss = 0.2380
Epoch =   6; Training loss = 0.3232; Validation loss = 0.2165
Epoch =   7; Training loss = 0.2956; Validation loss = 0.1987
Epoch =   8; Training loss = 0.2721; Validation loss = 0.1839
Epoch =   9; Training loss = 0.2525; Valid

Epoch = 126; Training loss = 0.0283; Validation loss = 0.0240
Epoch = 127; Training loss = 0.0272; Validation loss = 0.0261
Epoch = 128; Training loss = 0.0284; Validation loss = 0.0259
Epoch = 129; Training loss = 0.0279; Validation loss = 0.0235
Epoch = 130; Training loss = 0.0280; Validation loss = 0.0254
Epoch = 131; Training loss = 0.0272; Validation loss = 0.0234
Epoch = 132; Training loss = 0.0269; Validation loss = 0.0214
Epoch = 133; Training loss = 0.0269; Validation loss = 0.0230
Epoch = 134; Training loss = 0.0269; Validation loss = 0.0224
Epoch = 135; Training loss = 0.0273; Validation loss = 0.0258
Epoch = 136; Training loss = 0.0272; Validation loss = 0.0241
Epoch = 137; Training loss = 0.0268; Validation loss = 0.0241
Epoch = 138; Training loss = 0.0277; Validation loss = 0.0229
Epoch = 139; Training loss = 0.0262; Validation loss = 0.0227
Epoch = 140; Training loss = 0.0242; Validation loss = 0.0218
Epoch = 141; Training loss = 0.0261; Validation loss = 0.0235
Epoch = 

Epoch =  56; Training loss = 0.1045; Validation loss = 0.0690
Epoch =  57; Training loss = 0.1034; Validation loss = 0.0686
Epoch =  58; Training loss = 0.1010; Validation loss = 0.0664
Epoch =  59; Training loss = 0.0983; Validation loss = 0.0659
Epoch =  60; Training loss = 0.0979; Validation loss = 0.0653
Epoch =  61; Training loss = 0.0985; Validation loss = 0.0633
Epoch =  62; Training loss = 0.0962; Validation loss = 0.0623
Epoch =  63; Training loss = 0.0932; Validation loss = 0.0611
Epoch =  64; Training loss = 0.0918; Validation loss = 0.0607
Epoch =  65; Training loss = 0.0905; Validation loss = 0.0598
Epoch =  66; Training loss = 0.0890; Validation loss = 0.0593
Epoch =  67; Training loss = 0.0867; Validation loss = 0.0586
Epoch =  68; Training loss = 0.0870; Validation loss = 0.0567
Epoch =  69; Training loss = 0.0868; Validation loss = 0.0563
Epoch =  70; Training loss = 0.0860; Validation loss = 0.0549
Epoch =  71; Training loss = 0.0837; Validation loss = 0.0553
Epoch = 

Epoch = 189; Training loss = 0.0275; Validation loss = 0.0172
Epoch = 190; Training loss = 0.0272; Validation loss = 0.0173
Epoch = 191; Training loss = 0.0276; Validation loss = 0.0173
Epoch = 192; Training loss = 0.0276; Validation loss = 0.0172
Epoch = 193; Training loss = 0.0271; Validation loss = 0.0170
Epoch = 194; Training loss = 0.0270; Validation loss = 0.0171
Epoch = 195; Training loss = 0.0263; Validation loss = 0.0171
Epoch = 196; Training loss = 0.0261; Validation loss = 0.0173
Epoch = 197; Training loss = 0.0260; Validation loss = 0.0163
Epoch = 198; Training loss = 0.0266; Validation loss = 0.0162
Epoch = 199; Training loss = 0.0260; Validation loss = 0.0165
Correct predictions: 5835 of 5846. Accuracy: 99.8118371536093%
Confusion Matrix:
[[ 985    0    0    0    0    1]
 [   0  958    1    0    0    0]
 [   0    2  979    0    0    0]
 [   0    2    0 1009    2    0]
 [   0    0    0    2  972    1]
 [   0    0    0    0    0  932]]
['CXR', 'ChestCT', 'AbdomenCT', 'HeadC

Epoch = 115; Training loss = 0.1006; Validation loss = 0.0711
Epoch = 116; Training loss = 0.0994; Validation loss = 0.0702
Epoch = 117; Training loss = 0.1001; Validation loss = 0.0699
Epoch = 118; Training loss = 0.1013; Validation loss = 0.0691
Epoch = 119; Training loss = 0.0982; Validation loss = 0.0661
Epoch = 120; Training loss = 0.0961; Validation loss = 0.0678
Epoch = 121; Training loss = 0.0945; Validation loss = 0.0676
Epoch = 122; Training loss = 0.0968; Validation loss = 0.0666
Epoch = 123; Training loss = 0.0965; Validation loss = 0.0671
Epoch = 124; Training loss = 0.0956; Validation loss = 0.0656
Epoch = 125; Training loss = 0.0933; Validation loss = 0.0640
Epoch = 126; Training loss = 0.0941; Validation loss = 0.0627
Epoch = 127; Training loss = 0.0935; Validation loss = 0.0637
Epoch = 128; Training loss = 0.0916; Validation loss = 0.0642
Epoch = 129; Training loss = 0.0915; Validation loss = 0.0632
Epoch = 130; Training loss = 0.0925; Validation loss = 0.0603
Epoch = 

Epoch =  42; Training loss = 0.2919; Validation loss = 0.2058
Epoch =  43; Training loss = 0.2859; Validation loss = 0.1964
Epoch =  44; Training loss = 0.2823; Validation loss = 0.1916
Epoch =  45; Training loss = 0.2753; Validation loss = 0.1865
Epoch =  46; Training loss = 0.2718; Validation loss = 0.1915
Epoch =  47; Training loss = 0.2704; Validation loss = 0.1881
Epoch =  48; Training loss = 0.2618; Validation loss = 0.1835
Epoch =  49; Training loss = 0.2588; Validation loss = 0.1778
Epoch =  50; Training loss = 0.2554; Validation loss = 0.1786
Epoch =  51; Training loss = 0.2513; Validation loss = 0.1799
Epoch =  52; Training loss = 0.2457; Validation loss = 0.1719
Epoch =  53; Training loss = 0.2427; Validation loss = 0.1704
Epoch =  54; Training loss = 0.2421; Validation loss = 0.1690
Epoch =  55; Training loss = 0.2375; Validation loss = 0.1690
Epoch =  56; Training loss = 0.2359; Validation loss = 0.1661
Epoch =  57; Training loss = 0.2310; Validation loss = 0.1597
Epoch = 

Epoch = 175; Training loss = 0.0813; Validation loss = 0.0474
Epoch = 176; Training loss = 0.0784; Validation loss = 0.0463
Epoch = 177; Training loss = 0.0789; Validation loss = 0.0459
Epoch = 178; Training loss = 0.0778; Validation loss = 0.0451
Epoch = 179; Training loss = 0.0763; Validation loss = 0.0457
Epoch = 180; Training loss = 0.0781; Validation loss = 0.0420
Epoch = 181; Training loss = 0.0780; Validation loss = 0.0417
Epoch = 182; Training loss = 0.0758; Validation loss = 0.0441
Epoch = 183; Training loss = 0.0752; Validation loss = 0.0428
Epoch = 184; Training loss = 0.0739; Validation loss = 0.0430
Epoch = 185; Training loss = 0.0738; Validation loss = 0.0433
Epoch = 186; Training loss = 0.0725; Validation loss = 0.0424
Epoch = 187; Training loss = 0.0730; Validation loss = 0.0417
Epoch = 188; Training loss = 0.0727; Validation loss = 0.0409
Epoch = 189; Training loss = 0.0720; Validation loss = 0.0405
Epoch = 190; Training loss = 0.0720; Validation loss = 0.0404
Epoch = 

Epoch =  18; Training loss = 0.0363; Validation loss = 0.0195
Epoch =  19; Training loss = 0.0339; Validation loss = 0.0167
Epoch =  20; Training loss = 0.0307; Validation loss = 0.0172
Epoch =  21; Training loss = 0.0310; Validation loss = 0.0178
Epoch =  22; Training loss = 0.0277; Validation loss = 0.0144
Epoch =  23; Training loss = 0.0247; Validation loss = 0.0151
Epoch =  24; Training loss = 0.0254; Validation loss = 0.0151
Epoch =  25; Training loss = 0.0236; Validation loss = 0.0137
Epoch =  26; Training loss = 0.0247; Validation loss = 0.0141
Epoch =  27; Training loss = 0.0215; Validation loss = 0.0129
Epoch =  28; Training loss = 0.0220; Validation loss = 0.0155
Epoch =  29; Training loss = 0.0208; Validation loss = 0.0158
Epoch =  30; Training loss = 0.0197; Validation loss = 0.0118
Epoch =  31; Training loss = 0.0188; Validation loss = 0.0137
Epoch =  32; Training loss = 0.0187; Validation loss = 0.0128
Epoch =  33; Training loss = 0.0176; Validation loss = 0.0121
Epoch = 

Epoch =  80; Training loss = 0.0156; Validation loss = 0.0141
Epoch =  81; Training loss = 0.0135; Validation loss = 0.0131
Epoch =  82; Training loss = 0.0132; Validation loss = 0.0141
Epoch =  83; Training loss = 0.0127; Validation loss = 0.0139
Epoch =  84; Training loss = 0.0129; Validation loss = 0.0126
Epoch =  85; Training loss = 0.0130; Validation loss = 0.0132
Epoch =  86; Training loss = 0.0126; Validation loss = 0.0159
Epoch =  87; Training loss = 0.0144; Validation loss = 0.0127
Epoch =  88; Training loss = 0.0118; Validation loss = 0.0149
Epoch =  89; Training loss = 0.0121; Validation loss = 0.0137
Epoch =  90; Training loss = 0.0126; Validation loss = 0.0133
Epoch =  91; Training loss = 0.0123; Validation loss = 0.0136
Epoch =  92; Training loss = 0.0121; Validation loss = 0.0139
Epoch =  93; Training loss = 0.0111; Validation loss = 0.0134
Validation loss too high; halting to prevent overfitting
Correct predictions: 5837 of 5846. Accuracy: 99.8460485802258%
Confusion Ma

Epoch = 111; Training loss = 0.0157; Validation loss = 0.0135
Epoch = 112; Training loss = 0.0151; Validation loss = 0.0117
Epoch = 113; Training loss = 0.0147; Validation loss = 0.0137
Epoch = 114; Training loss = 0.0149; Validation loss = 0.0133
Epoch = 115; Training loss = 0.0145; Validation loss = 0.0085
Epoch = 116; Training loss = 0.0146; Validation loss = 0.0125
Epoch = 117; Training loss = 0.0145; Validation loss = 0.0127
Epoch = 118; Training loss = 0.0141; Validation loss = 0.0123
Epoch = 119; Training loss = 0.0134; Validation loss = 0.0127
Epoch = 120; Training loss = 0.0138; Validation loss = 0.0138
Epoch = 121; Training loss = 0.0134; Validation loss = 0.0135
Epoch = 122; Training loss = 0.0137; Validation loss = 0.0132
Epoch = 123; Training loss = 0.0136; Validation loss = 0.0118
Epoch = 124; Training loss = 0.0121; Validation loss = 0.0120
Epoch = 125; Training loss = 0.0129; Validation loss = 0.0121
Epoch = 126; Training loss = 0.0130; Validation loss = 0.0124
Epoch = 

Epoch = 100; Training loss = nan; Validation loss = nan
Epoch = 101; Training loss = nan; Validation loss = nan
Epoch = 102; Training loss = nan; Validation loss = nan
Epoch = 103; Training loss = nan; Validation loss = nan
Epoch = 104; Training loss = nan; Validation loss = nan
Epoch = 105; Training loss = nan; Validation loss = nan
Epoch = 106; Training loss = nan; Validation loss = nan
Epoch = 107; Training loss = nan; Validation loss = nan
Epoch = 108; Training loss = nan; Validation loss = nan
Epoch = 109; Training loss = nan; Validation loss = nan
Epoch = 110; Training loss = nan; Validation loss = nan
Epoch = 111; Training loss = nan; Validation loss = nan
Epoch = 112; Training loss = nan; Validation loss = nan
Epoch = 113; Training loss = nan; Validation loss = nan
Epoch = 114; Training loss = nan; Validation loss = nan
Epoch = 115; Training loss = nan; Validation loss = nan
Epoch = 116; Training loss = nan; Validation loss = nan
Epoch = 117; Training loss = nan; Validation los

Epoch =  12; Training loss = nan; Validation loss = nan
Epoch =  13; Training loss = nan; Validation loss = nan
Epoch =  14; Training loss = nan; Validation loss = nan
Epoch =  15; Training loss = nan; Validation loss = nan
Epoch =  16; Training loss = nan; Validation loss = nan
Epoch =  17; Training loss = nan; Validation loss = nan
Epoch =  18; Training loss = nan; Validation loss = nan
Epoch =  19; Training loss = nan; Validation loss = nan
Epoch =  20; Training loss = nan; Validation loss = nan
Epoch =  21; Training loss = nan; Validation loss = nan
Epoch =  22; Training loss = nan; Validation loss = nan
Epoch =  23; Training loss = nan; Validation loss = nan
Epoch =  24; Training loss = nan; Validation loss = nan
Epoch =  25; Training loss = nan; Validation loss = nan
Epoch =  26; Training loss = nan; Validation loss = nan
Epoch =  27; Training loss = nan; Validation loss = nan
Epoch =  28; Training loss = nan; Validation loss = nan
Epoch =  29; Training loss = nan; Validation los

Epoch = 159; Training loss = nan; Validation loss = nan
Epoch = 160; Training loss = nan; Validation loss = nan
Epoch = 161; Training loss = nan; Validation loss = nan
Epoch = 162; Training loss = nan; Validation loss = nan
Epoch = 163; Training loss = nan; Validation loss = nan
Epoch = 164; Training loss = nan; Validation loss = nan
Epoch = 165; Training loss = nan; Validation loss = nan
Epoch = 166; Training loss = nan; Validation loss = nan
Epoch = 167; Training loss = nan; Validation loss = nan
Epoch = 168; Training loss = nan; Validation loss = nan
Epoch = 169; Training loss = nan; Validation loss = nan
Epoch = 170; Training loss = nan; Validation loss = nan
Epoch = 171; Training loss = nan; Validation loss = nan
Epoch = 172; Training loss = nan; Validation loss = nan
Epoch = 173; Training loss = nan; Validation loss = nan
Epoch = 174; Training loss = nan; Validation loss = nan
Epoch = 175; Training loss = nan; Validation loss = nan
Epoch = 176; Training loss = nan; Validation los

Epoch =  56; Training loss = 1.7948; Validation loss = 1.7922
Epoch =  57; Training loss = 1.7949; Validation loss = 1.7929
Epoch =  58; Training loss = 1.7947; Validation loss = 1.7951
Epoch =  59; Training loss = 1.7946; Validation loss = 1.7951
Epoch =  60; Training loss = 1.7950; Validation loss = 1.7924
Epoch =  61; Training loss = 1.7951; Validation loss = 1.7941
Epoch =  62; Training loss = 1.7949; Validation loss = 1.7950
Epoch =  63; Training loss = 1.7950; Validation loss = 1.7944
Epoch =  64; Training loss = 1.7948; Validation loss = 1.7926
Epoch =  65; Training loss = 1.7950; Validation loss = 1.7918
Epoch =  66; Training loss = 1.7949; Validation loss = 1.7936
Epoch =  67; Training loss = 1.7949; Validation loss = 1.7921
Epoch =  68; Training loss = 1.7946; Validation loss = 1.7919
Epoch =  69; Training loss = 1.7954; Validation loss = 1.7927
Epoch =  70; Training loss = 1.7945; Validation loss = 1.7922
Epoch =  71; Training loss = 1.7950; Validation loss = 1.7929
Epoch = 

Epoch = 189; Training loss = 1.7945; Validation loss = 1.7927
Epoch = 190; Training loss = 1.7949; Validation loss = 1.7953
Epoch = 191; Training loss = 1.7951; Validation loss = 1.7945
Epoch = 192; Training loss = 1.7948; Validation loss = 1.7922
Epoch = 193; Training loss = 1.7948; Validation loss = 1.7960
Epoch = 194; Training loss = 1.7945; Validation loss = 1.7928
Epoch = 195; Training loss = 1.7946; Validation loss = 1.7967
Epoch = 196; Training loss = 1.7946; Validation loss = 1.7934
Epoch = 197; Training loss = 1.7948; Validation loss = 1.7921
Epoch = 198; Training loss = 1.7946; Validation loss = 1.7933
Epoch = 199; Training loss = 1.7947; Validation loss = 1.7939
Correct predictions: 932 of 5846. Accuracy: 15.942524803284297%
Confusion Matrix:
[[   0    0    0    0    0  986]
 [   0    0    0    0    0  959]
 [   0    0    0    0    0  981]
 [   0    0    0    0    0 1013]
 [   0    0    0    0    0  975]
 [   0    0    0    0    0  932]]
['CXR', 'ChestCT', 'AbdomenCT', 'Head

In [6]:
import pandas as pd

results_df = pd.DataFrame(results, columns=["configuration", "accuracy"])
results_df.sort_values(by="accuracy", inplace=True, ascending=False)
for index, row in results_df.iterrows():
    print(f"{row['configuration']} // Accuracy: {row['accuracy']}")

(0.01, 200) // Accuracy: 0.9993157714676703
(0.01, 500) // Accuracy: 0.9989736572015053
(0.001, 50) // Accuracy: 0.9988026000684228
(0.01, 50) // Accuracy: 0.9988026000684228
(0.01, 100) // Accuracy: 0.9988026000684228
(0.1, 300) // Accuracy: 0.9988026000684228
(0.001, 100) // Accuracy: 0.9986315429353404
(0.1, 100) // Accuracy: 0.9986315429353404
(0.01, 300) // Accuracy: 0.998460485802258
(0.001, 200) // Accuracy: 0.9981183715360931
(0.001, 500) // Accuracy: 0.9923024290112897
(0.001, 300) // Accuracy: 0.99161820047896
(0.1, 50) // Accuracy: 0.16866233321929525
(0.1, 200) // Accuracy: 0.16866233321929525
(0.1, 500) // Accuracy: 0.15942524803284297


On constate que notre modèle est plus performant avec un learning rate de 0.01 et une batch size de 200. Enregistrons donc notre modèle entraîné avec ces paramètres sur le disque dur :

In [5]:
# On sauvegarde notre meilleur modèle
import pickle
best_lr = 0.01
best_batch_size = 200
res = train_model(best_config, 
                  trainX, 
                  trainY, 
                  validX, 
                  validY, 
                  testX, 
                  testY,
                  learnRate = best_lr,
                  batchSize = best_batch_size,
                  verbose=True, 
                  add_dropout=True)
pickle.dump(res[0], open("best_model.pkl", "wb"))

Epoch =   0; Training loss = 0.8580; Validation loss = 0.2725
Epoch =   1; Training loss = 0.2852; Validation loss = 0.1540
Epoch =   2; Training loss = 0.1908; Validation loss = 0.0989
Epoch =   3; Training loss = 0.1436; Validation loss = 0.0745
Epoch =   4; Training loss = 0.1133; Validation loss = 0.0587
Epoch =   5; Training loss = 0.0920; Validation loss = 0.0464
Epoch =   6; Training loss = 0.0787; Validation loss = 0.0365
Epoch =   7; Training loss = 0.0673; Validation loss = 0.0308
Epoch =   8; Training loss = 0.0566; Validation loss = 0.0249
Epoch =   9; Training loss = 0.0507; Validation loss = 0.0220
Epoch =  10; Training loss = 0.0452; Validation loss = 0.0195
Epoch =  11; Training loss = 0.0423; Validation loss = 0.0177
Epoch =  12; Training loss = 0.0378; Validation loss = 0.0177
Epoch =  13; Training loss = 0.0357; Validation loss = 0.0145
Epoch =  14; Training loss = 0.0312; Validation loss = 0.0134
Epoch =  15; Training loss = 0.0310; Validation loss = 0.0129
Epoch = 