In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

import numpy as np
import pandas as pd
import random

import itertools
import time



In [156]:
class ConvNet(nn.Module):
    def __init__(self, input_channels, hidden_channels, num_classes, fc_out=[64], kernel_sizes=None):
        super(ConvNet, self).__init__()

        if kernel_sizes is None:
            k = [[3, 2]]
        else:
            k = kernel_sizes

        self.conv1 = nn.Conv2d(
            input_channels, hidden_channels[0], kernel_size=k[0][0], stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=k[0][1], stride=2)

        self.conv_layers = nn.ModuleList()
        self.pool_layers = nn.ModuleList()  # Add module list for pooling layers

        for i in range(1, len(hidden_channels)):
            self.conv_layers.append(nn.Conv2d(
                hidden_channels[i-1], hidden_channels[i], kernel_size=k[i][0], stride=1, padding=1))
            self.relu = nn.ReLU()
            self.pool_layers.append(nn.MaxPool2d(
                kernel_size=k[i][1], stride=2))  # Add max pooling layer

        self.fc = nn.Linear(hidden_channels[-1], fc_out[0])
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(fc_out[0], num_classes)

    def forward(self, x):
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.pool1(out)

        # Iterate over conv and pool layers
        for conv_layer, pool_layer in zip(self.conv_layers, self.pool_layers):
            out = conv_layer(out)
            out = self.relu(out)
            out = pool_layer(out)  # Apply max pooling

        out = F.avg_pool2d(out, kernel_size=out.size()[
                           2:])  # Global average pooling
        out = out.view(out.size(0), -1)  # Flatten the tensor

        out = self.fc(out)
        out = self.relu3(out)
        out = self.fc2(out)

        if not self.training:
            out = F.softmax(out, dim=1)

        return out

In [32]:
def trainModel(model, data_loader, epochSize=20):
    train_loader, test_loader = data_loader[0], data_loader[1]

    loss_fn = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters())

    loss_per_epoch = []
    train_acc_per_epoch = []
    test_acc_per_epoch = []
    total_acc_per_epoch = []
    time_per_epoch = []
    exec_time = []

    start_total_time = time.time()
    for epoch in range(epochSize):

        loss = 0
        start_epoch_time = time.time()

        count = 1
        for input_batch, target_batch in train_loader:
            print(
                f"Epoch: {epoch + 1} => {time.time() - start_epoch_time:.2f}s {(count/count_batch_train)*100:.3f}%", end='')

            # Zero the gradients
            opt.zero_grad()

            # Forward pass
            predict_batch = model(input_batch)

            # Compute loss
            loss_batch = loss_fn(predict_batch, target_batch)

            # Backward pass and update weights
            loss_batch.backward()
            opt.step()

            loss += loss_batch.item()  # store the loss
            count += 1
            print('\r', end='', flush=True)

        loss_per_epoch.append(loss)
        # print(loss)

        # CALCULATE TRAIN ACCURACY
        correct = 0
        total = 0
        train_accuracy = 0

        with torch.no_grad():
            count = 1
            for images, labels in train_loader:
                print(f"Epoch: {epoch + 1} => {time.time() - start_epoch_time:.2f}s || Calculating Training Accuracy... {(count/count_batch_train)*100:.3f}%", end='', flush=True)
                # Forward pass
                outputs = model(images)

                # Get the predicted labels
                _, predicted = torch.max(outputs.data, 1)

                # Update counts
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                count += 1
                print('\r', end='', flush=True)

        # Calculate accuracy
        train_accuracy = correct / total
        train_acc_per_epoch.append(train_accuracy)

        # CALCULATE TEST ACCURACY
        correct = 0
        total = 0
        test_accuracy = 0

        with torch.no_grad():
            count = 1
            for images, labels in test_loader:
                print(f"Epoch: {epoch + 1} => {time.time() - start_epoch_time:.2f}s || Calculating Testing Accuracy... {(count/count_batch_test)*100:.3f}%", end='', flush=True)
                # Forward pass
                outputs = model(images)

                # Get the predicted labels
                _, predicted = torch.max(outputs.data, 1)

                # Update counts
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                count += 1
                print('\r', end='', flush=True)

        # Calculate accuracy
        test_accuracy = correct / total
        test_acc_per_epoch.append(test_accuracy)

        time_epoch = time.time() - start_epoch_time
        time_current = time.time() - start_total_time

        time_per_epoch.append(time_epoch)
        exec_time.append(time_current)

        total_accuracy = 0.3*train_accuracy + 0.7*test_accuracy
        total_acc_per_epoch.append(total_accuracy)

        print(f'Epoch: {epoch+1} || Loss: {loss} || Train Acc: {train_accuracy * 100:.4f}% || Test Acc: {test_accuracy * 100:.4f}% || Total Acc: {total_accuracy * 100:.4f}% || Epoch Time: {time_epoch:.4f} s || Current Runtime: {time_current:.4f} s')

    output = {
        'loss': loss_per_epoch,
        'train_acc': train_acc_per_epoch,
        'test_acc': test_acc_per_epoch,
        'total_acc': total_acc_per_epoch,
        'epoch_time': time_per_epoch,
        'exec_time': exec_time
    }

    return output

In [None]:
a = [1,2]
b = [4]

In [373]:
def fitness(df_individual, dataloader):
    # Create ConvNet instance with the provided individual configuration
    # ConvNet(self, input_channels, hidden_channels, num_classes, fc_out=[64], kernel_sizes=None)
    # ConvNet(1, [16, 32], num_classes=5, kernel_sizes=[[3, 2], [3, 2]])
    # ['Individual', 'conv_h (Layer 1)', 'conv_h (Layer 2)', 'conv_k (Layer 0)', 'pool_k (Layer 0)', 'conv_k (Layer 1)', 'pool_k (Layer 1)', 'conv_k (Layer 2)', 'pool_k (Layer 2)', 'fc_out']
    c = len(df_individual)
    n_layers = int((c - 4)/3) # n = (c - 4)/3 (4 extra columns)

    h = list(df_individual[1:n_layers+1])

    k_0 = list(df_individual[n_layers+1:n_layers + 3]) # initial layer (layer=0)
    k_n = list([list(df_individual[n_layers+3 + i:n_layers+5 + i]) for i in range(0, n_layers+2, 2)]) # next layers

    k = [k_0] + k_n

    model = ConvNet(1, hidden_channels=h, num_classes=5, kernel_sizes=k, fc_out=[df_individual[-1]])
    result = trainModel(model, dataloader, epochSize=10)
    return result['total_acc'][-1]


def define_fitness(df_population, dataloader):
    fitness_per_individual = []
    for _, individual in df_population.iterrows():
        fitness_per_individual.append(fitness(individual, dataloader))

    return fitness_per_individual


def generate_population(population_size, layers, hidden_channels_range, kernel_size_range, fc_out_range):
    np.random.seed(4)
    population = []
    for _ in range(population_size):
        # For each population,
        chromosome_h = []
        for _ in range(layers):
            # create chromosome for the number of hidden channels per layer e.g: (layers = 3) ->[256, 128, 64]
            filters = np.random.randint(
                hidden_channels_range[0], hidden_channels_range[1]+1
            )
            chromosome_h.append(filters)

        chromosome_k = []
        for _ in range(layers + 1):
            # create chromosome for the kernel sizes per layer including the initial layer (2D list, each row represents layer)
            kernel_size = np.random.randint(
                kernel_size_range[0], kernel_size_range[1]+1, (1, 2)
            )
            chromosome_k.append(list(kernel_size[0]))

        chromosome_fc_out = np.random.randint(
            fc_out_range[0], fc_out_range[1]+1

        )
        population.append((chromosome_h, chromosome_k, chromosome_fc_out))

    return population


def DataFrame_Pop(pop_unstructured):
    population = []
    for i in range(len(pop_unstructured)):
        lst1 = np.array(pop_unstructured[i][0])  # number of filters
        lst2 = np.array(pop_unstructured[i][1])  # kernel sizes
        lst3 = np.array(pop_unstructured[i][2])  # fc_out size
        flattened = np.concatenate(([i+1], lst1, lst2.flatten(), [lst3]))

        population.append(flattened.tolist())
    # print(population)
    col_name1 = [
        f"conv_h (Layer {i+1})" for i in range(len(pop_unstructured[0][0]))
    ]
    col_name2 = ["conv_k (Layer 0)", "pool_k (Layer 0)"]
    col_name3 = [
        f"conv_k (Layer {i+1})" for i in range(len(pop_unstructured[0][0]))
    ]
    col_name4 = [
        f"pool_k (Layer {i+1})" for i in range(len(pop_unstructured[0][0]))
    ]
    col_name5 = list(itertools.chain(*zip(col_name3, col_name4)))
    col_name6 = ["fc_out"]

    cols = ['Individual'] + col_name1 + col_name2 + col_name5 + col_name6
    return pd.DataFrame(population, columns=cols, index=None)

In [158]:
# Define the directory path
data_dir = './processed_dataset'

# Create the ImageFolder dataset
dataset = datasets.DatasetFolder(data_dir, loader=torch.load, extensions=".pt")

# Split the dataset into training and testing sets
train_size = int(0.8 * len(dataset))  # 80% for training
test_size = len(dataset) - train_size  # Remaining 20% for testing

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Set DataLoader
batchSize = 16  # Rule of thumb is to set to the power of 2. In this case 2^7
train_loader = DataLoader(train_dataset, batch_size=batchSize,shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batchSize, shuffle=False) # no need to shuffle test data

count_batch_train, count_batch_test = 0, 0
for xb, yb in train_loader:
  print(count_batch_train, end='', flush=True)
  count_batch_train += 1
  print("\r", end='', flush=True)
print(f'There are {count_batch_train} batches in train_loader')

for xb, yb in test_loader:
  print(count_batch_test, end='', flush=True)
  count_batch_test += 1
  print("\r", end='', flush=True)
print(f'There are {count_batch_test} batches in test_loader')

for i, j in train_loader:
    size = i.shape
    break

print(size)

There are 413 batches in train_loader
There are 104 batches in test_loader
torch.Size([16, 1, 224, 224])


In [365]:
pop = generate_population(population_size=5, layers=2, hidden_channels_range=[4, 64], kernel_size_range=[1, 4], fc_out_range=[32, 64])
df = DataFrame_Pop(pop)
print(pop)
df

[([62, 50], [[4, 2], [2, 1], [4, 1]], 41), ([62, 64], [[4, 2], [3, 4], [4, 2]], 35), ([4, 59], [[2, 2], [2, 3], [2, 3]], 34), ([50, 57], [[3, 1], [2, 3], [3, 4]], 64), ([44, 55], [[3, 2], [2, 1], [3, 2]], 60)]


Unnamed: 0,Individual,conv_h (Layer 1),conv_h (Layer 2),conv_k (Layer 0),pool_k (Layer 0),conv_k (Layer 1),pool_k (Layer 1),conv_k (Layer 2),pool_k (Layer 2),fc_out
0,1,62,50,4,2,2,1,4,1,41
1,2,62,64,4,2,3,4,4,2,35
2,3,4,59,2,2,2,3,2,3,34
3,4,50,57,3,1,2,3,3,4,64
4,5,44,55,3,2,2,1,3,2,60


In [374]:
fitness_scores = define_fitness(df, [train_loader, test_loader])

Epoch: 1 || Loss: 473.8258264064789 || Train Acc: 47.7424% || Test Acc: 47.7576% || Total Acc: 47.7530% || Epoch Time: 330.2717 s || Current Runtime: 330.2717 s
Epoch: 2 || Loss: 406.1062091588974 || Train Acc: 52.0303% || Test Acc: 53.3333% || Total Acc: 52.9424% || Epoch Time: 407.8808 s || Current Runtime: 738.1526 s
Epoch: 3 || Loss: 388.3903695344925 || Train Acc: 52.5000% || Test Acc: 53.5758% || Total Acc: 53.2530% || Epoch Time: 1419.2826 s || Current Runtime: 2157.4364 s
Epoch: 4 || Loss: 359.825410425663 || Train Acc: 56.7121% || Test Acc: 58.4242% || Total Acc: 57.9106% || Epoch Time: 329.2363 s || Current Runtime: 2486.6741 s
Epoch: 5 || Loss: 309.90715235471725 || Train Acc: 68.9394% || Test Acc: 69.0303% || Total Acc: 69.0030% || Epoch Time: 333.7991 s || Current Runtime: 2820.4732 s
Epoch: 6 || Loss: 274.78362768888474 || Train Acc: 67.3636% || Test Acc: 66.9697% || Total Acc: 67.0879% || Epoch Time: 323.3411 s || Current Runtime: 3143.8144 s
Epoch: 7 || Loss: 256.536813

In [375]:
fitness_scores

[0.8050454545454545,
 0.8845454545454545,
 0.7396515151515152,
 0.8691212121212121,
 0.8471818181818181]

In [376]:
df['Fitness'] = fitness_scores
df

Unnamed: 0,Individual,conv_h (Layer 1),conv_h (Layer 2),conv_k (Layer 0),pool_k (Layer 0),conv_k (Layer 1),pool_k (Layer 1),conv_k (Layer 2),pool_k (Layer 2),fc_out,Fitness
0,1,62,50,4,2,2,1,4,1,41,0.805045
1,2,62,64,4,2,3,4,4,2,35,0.884545
2,3,4,59,2,2,2,3,2,3,34,0.739652
3,4,50,57,3,1,2,3,3,4,64,0.869121
4,5,44,55,3,2,2,1,3,2,60,0.847182


In [378]:
def crossover(df_pop, dataloader):
    df_pop_sorted = df_pop.sort_values('Fitness', ascending=False) # Sort the rows by the fitness score in descending order
    c = len(df_pop.columns)
    r = len(df_pop)
    n_layers = int((c - 2)/3) # n = (c - 2)/3

    children = []
    for i in range(r-1):
        parents = df_pop_sorted.iloc[i:i+2] # Get top 2
        parent1 = parents.iloc[0, 1:c-1].to_list()
        parent2 = parents.iloc[1, 1:c-1].to_list()
        offspring = parent1[:n_layers] + parent2[n_layers:n_layers+1] + parent2[n_layers+1:]

        offspring = [int(x) for x in offspring]

        children.append([r + i + 1] + offspring)
    
    print(children)

    df_children = pd.DataFrame(children, columns=df_pop.columns[:-1])
    fitness_scores = []
    for child in children:
        fitness_scores.append(fitness(child, dataloader))

    df_children['Fitness'] = fitness_scores

    return df_children

df_children = crossover(df, [train_loader, test_loader])
    

[[6, 62, 64, 4, 1, 2, 3, 3, 4, 64], [7, 50, 57, 3, 2, 2, 1, 3, 2, 60], [8, 44, 55, 3, 2, 2, 1, 4, 1, 41], [9, 62, 50, 4, 2, 2, 3, 2, 3, 34]]
Epoch: 1 || Loss: 471.77978760004044 || Train Acc: 53.6515% || Test Acc: 52.9697% || Total Acc: 53.1742% || Epoch Time: 386.7324 s || Current Runtime: 386.7324 s
Epoch: 2 || Loss: 353.3882024884224 || Train Acc: 64.0758% || Test Acc: 64.9091% || Total Acc: 64.6591% || Epoch Time: 335.7254 s || Current Runtime: 722.4590 s
Epoch: 3 || Loss: 284.58426281809807 || Train Acc: 67.9394% || Test Acc: 67.2121% || Total Acc: 67.4303% || Epoch Time: 1327.6420 s || Current Runtime: 2050.1010 s
Epoch: 4 || Loss: 244.36315402388573 || Train Acc: 83.3182% || Test Acc: 83.3333% || Total Acc: 83.3288% || Epoch Time: 6193.4777 s || Current Runtime: 8243.5788 s
Epoch: 5 || Loss: 189.68311028182507 || Train Acc: 84.9545% || Test Acc: 86.0606% || Total Acc: 85.7288% || Epoch Time: 5701.7308 s || Current Runtime: 13945.3098 s
Epoch: 6 || Loss: 153.69583271443844 || Tra

In [379]:
df_children

Unnamed: 0,Individual,conv_h (Layer 1),conv_h (Layer 2),conv_k (Layer 0),pool_k (Layer 0),conv_k (Layer 1),pool_k (Layer 1),conv_k (Layer 2),pool_k (Layer 2),fc_out,Fitness
0,6,62,64,4,1,2,3,3,4,64,0.912985
1,7,50,57,3,2,2,1,3,2,60,0.832
2,8,44,55,3,2,2,1,4,1,41,0.828227
3,9,62,50,4,2,2,3,2,3,34,0.857288


In [380]:
df_concat = pd.concat([df, df_children])

In [381]:
df_concat.to_csv('population.csv', index=False)

In [382]:
df_concat

Unnamed: 0,Individual,conv_h (Layer 1),conv_h (Layer 2),conv_k (Layer 0),pool_k (Layer 0),conv_k (Layer 1),pool_k (Layer 1),conv_k (Layer 2),pool_k (Layer 2),fc_out,Fitness
0,1,62,50,4,2,2,1,4,1,41,0.805045
1,2,62,64,4,2,3,4,4,2,35,0.884545
2,3,4,59,2,2,2,3,2,3,34,0.739652
3,4,50,57,3,1,2,3,3,4,64,0.869121
4,5,44,55,3,2,2,1,3,2,60,0.847182
0,6,62,64,4,1,2,3,3,4,64,0.912985
1,7,50,57,3,2,2,1,3,2,60,0.832
2,8,44,55,3,2,2,1,4,1,41,0.828227
3,9,62,50,4,2,2,3,2,3,34,0.857288
