In [70]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torch.utils.data as data
import numpy as np
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances
import math

# Ensure reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# Check if GPU is available and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data preprocessing
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import torch
from torchvision import datasets, transforms

def prepare_data(dataset_name):
    print('==> Preparing data..')
    
    if dataset_name == 'cifar10':
        # CIFAR-10 Normalization
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
        ])
        train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
        test_set = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    
    elif dataset_name == 'cifar100':
        # CIFAR-100 Normalization
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
        ])
        train_set = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
        test_set = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
    
    elif dataset_name == 'svhn':
        # SVHN Normalization
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4377, 0.4438, 0.4728), (0.1980, 0.2010, 0.1970)),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4377, 0.4438, 0.4728), (0.1980, 0.2010, 0.1970)),
        ])
        train_set = datasets.SVHN(root='./data', split='train', download=True, transform=transform_train)
        test_set = datasets.SVHN(root='./data', split='test', download=True, transform=transform_test)
    
    else:
        raise ValueError("Invalid dataset name. Choose from 'cifar10', 'cifar100', or 'svhn'.")

    torch.manual_seed(42)
    initial_train_set, remainder = torch.utils.data.random_split(train_set, [10000, len(train_set) - 10000])

    print(f"Size of train_set: {len(train_set)}")
    print(f"Size of test_set: {len(test_set)}")

    return initial_train_set, remainder, test_set


        
def train_model(model, train_loader, epochs, learning_rate):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
     
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
        
        # Step the scheduler
        scheduler.step()
    


def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')
    return accuracy

def calculate_cluster_centers(embeddings, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters, random_state=0, n_init=10)
    cluster_labels = kmeans.fit_predict(embeddings)
    cluster_centers = kmeans.cluster_centers_
    return cluster_centers

def get_most_diverse_samples(tsne_results, cluster_centers, num_diverse_samples):
    distances = euclidean_distances(tsne_results, cluster_centers)
    min_distances = np.max(distances, axis=1)
    sorted_indices = np.argsort(min_distances)
    diverse_indices = sorted_indices[:num_diverse_samples]
    return diverse_indices

def extract_embeddings(model, test):
    test_loader = data.DataLoader(test, batch_size=64, shuffle=False)
    model.eval()
    embeddings = []
    targets_list = []
    with torch.no_grad():
        for images, targets in test_loader:
            images, targets = images.to(device), targets.to(device)
            intermediate_features = model(images)
            embeddings.extend(intermediate_features.view(intermediate_features.size(0), -1).tolist())
            targets_list.append(targets)
    return embeddings

def least_confidence_images(model, test_dataset, k=2500):
    test_loader = data.DataLoader(test_dataset, batch_size=64, shuffle=False)
    confidences = []
    labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            max_probs, _ = torch.max(probs, dim=1)
            confidences.extend(max_probs.cpu().tolist())
            labels.extend(targets.cpu().tolist())
    confidences = torch.tensor(confidences)
    k = min(k, len(confidences))
    _, indices = torch.topk(confidences, k, largest=False)
    return data.Subset(test_dataset, indices), [labels[i] for i in indices]

def high_confidence_images(model, test_dataset, k=2500):
    test_loader = data.DataLoader(test_dataset, batch_size=64, shuffle=False)
    confidences = []
    labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            max_probs, _ = torch.max(probs, dim=1)
            confidences.extend(max_probs.cpu().tolist())
            labels.extend(targets.cpu().tolist())
    confidences = torch.tensor(confidences)
    k = min(k, len(confidences))
    _, indices = torch.topk(confidences, k, largest=True)
    return data.Subset(test_dataset, indices), [labels[i] for i in indices]
    
def HC_diverse(embed_model, remainder, n=None):
    high_conf_images, high_conf_indices = high_confidence_images(embed_model, remainder, k=len(remainder) if 2*n > len(remainder) else 2*n)
    high_conf_embeddings = extract_embeddings(embed_model, high_conf_images)
    high_conf_embeddings = np.array([np.array(e) for e in high_conf_embeddings])
    tsne = TSNE(n_components=2, perplexity=30, n_iter=300)
    tsne_results = tsne.fit_transform(high_conf_embeddings)
    cluster_centers = calculate_cluster_centers(tsne_results, 10)
    diverse_indices = get_most_diverse_samples(tsne_results, cluster_centers, n)
    diverse_images = data.Subset(high_conf_images, diverse_indices)
    return diverse_images, [high_conf_indices[i] for i in diverse_indices]

def LC_diverse(embed_model, remainder, n=None):
    least_conf_images, least_conf_indices = least_confidence_images(embed_model, remainder, k=len(remainder) if 2*n > len(remainder) else 2*n)
    least_conf_embeddings = extract_embeddings(embed_model, least_conf_images)
    least_conf_embeddings = np.array([np.array(e) for e in least_conf_embeddings])
    tsne = TSNE(n_components=2, perplexity=30, n_iter=300)
    tsne_results = tsne.fit_transform(least_conf_embeddings)
    cluster_centers = calculate_cluster_centers(tsne_results, 10)
    diverse_indices = get_most_diverse_samples(tsne_results, cluster_centers, n)
    diverse_images = data.Subset(least_conf_images, diverse_indices)
    return diverse_images, [least_conf_indices[i] for i in diverse_indices]

def LC_HC(model, remainder, n=5000):
    least_confident_2500, least_confident_indices = least_confidence_images(model, remainder, k=2500)
    most_confident_2500, most_confident_indices = high_confidence_images(model, remainder, k=2500)
    combined_dataset = data.ConcatDataset([least_confident_2500, most_confident_2500])
    combined_indices = least_confident_indices + most_confident_indices
    return combined_dataset, combined_indices

def LC_HC_diverse(embed_model, remainder, n, low_conf_ratio=0.5, high_conf_ratio=0.5):
    assert low_conf_ratio + high_conf_ratio == 1.0, "The sum of low_conf_ratio and high_conf_ratio must be 1.0"

    n_low = int(n * low_conf_ratio)
    n_high = int(n * high_conf_ratio)

    least_conf_images, least_conf_indices = least_confidence_images(embed_model, remainder, k=len(remainder) if 2*n_low > len(remainder) else 2*n_low)
    least_conf_embeddings = extract_embeddings(embed_model, least_conf_images)
    least_conf_embeddings = np.array([np.array(e) for e in least_conf_embeddings])
    tsne = TSNE(n_components=2, perplexity=30, n_iter=300)
    tsne_results = tsne.fit_transform(least_conf_embeddings)
    cluster_centers = calculate_cluster_centers(tsne_results, 10)
    diverse_low_indices = get_most_diverse_samples(tsne_results, cluster_centers, n_low)
    diverse_least_conf_images = data.Subset(least_conf_images, diverse_low_indices)

    high_conf_images, high_conf_indices = high_confidence_images(embed_model, remainder, k=len(remainder) if 2*n_high > len(remainder) else 2*n_high)
    high_conf_embeddings = extract_embeddings(embed_model, high_conf_images)
    high_conf_embeddings = np.array([np.array(e) for e in high_conf_embeddings])
    tsne_results = tsne.fit_transform(high_conf_embeddings)
    cluster_centers = calculate_cluster_centers(tsne_results, 10)
    diverse_high_indices = get_most_diverse_samples(tsne_results, cluster_centers, n_high)
    diverse_high_conf_images = data.Subset(high_conf_images, diverse_high_indices)

    combined_dataset = data.ConcatDataset([diverse_least_conf_images, diverse_high_conf_images])
    combined_indices = [least_conf_indices[i] for i in diverse_low_indices] + [high_conf_indices[i] for i in diverse_high_indices]

    return combined_dataset, combined_indices


def train_until_empty(model, initial_train_set, remainder_set, test_set, max_iterations=15, batch_size=64, learning_rate=0.01, method=1):
    exp_acc = []
    
    for iteration in range(max_iterations):
        print(f"Starting Iteration {iteration}")
        print(f"Remaindersize:{len(remainder_set)}")
        if len(remainder_set) == 0:
            print("Dataset is empty. Stopping training.")
            break
            
        if method == 1:
            train_data,used_indices = LC_HC(model, remainder_set, n=5000)
        elif method == 2:
            train_data,used_indices = LC_HC_diverse(model, remainder_set, n=5000)
        elif method == 3:
            train_data,used_indices = HC_diverse(model, remainder_set, n=5000)
        elif method == 4:
            train_data,used_indices = LC_diverse(model, remainder_set, n=5000)
        else:
            print("Invalid Method")
            return exp_acc
    
        initial_train_set = data.ConcatDataset([initial_train_set, train_data])
        remainder_set = data.Subset(remainder_set, list(range(len(train_data), len(remainder_set))))
            
        print(f"\nTraining iteration {iteration + 1}")
        print(f"Train Set Size: {len(initial_train_set)}, Remainder Size: {len(remainder_set)}")
        train_loader = data.DataLoader(initial_train_set, batch_size=batch_size, shuffle=True)
        train_model(model, train_loader, epochs=50, learning_rate=learning_rate)

        test_loader = data.DataLoader(test_set, batch_size=batch_size)
        accuracy = test_model(model, test_loader)
        exp_acc.append(accuracy)

        print(f"Iteration {iteration + 1}: Test Accuracy - {accuracy}")

    return exp_acc
    
import copy

def run_all_methods(model, initial_train_set, remainder, test_set):
    methods = [1, 2, 3, 4]
    results = {}

    for method in methods:
        print(f"\nStarting training with method {method}")
        #model.load_state_dict(initial_model_state)
        initial_train_set_copy = copy.deepcopy(initial_train_set)
        remainder_copy = copy.deepcopy(remainder)
        train_loader = data.DataLoader(initial_train_set_copy, batch_size=64, shuffle=True)
        # train_model(model, train_loader, epochs=1, learning_rate=0.01)
        test_loader = data.DataLoader(test_set, batch_size=64)
        # test_model(model, test_loader)
        exp_acc = train_until_empty(model, initial_train_set_copy, remainder_copy, test_set, 
                                    max_iterations=15, batch_size=64, learning_rate=0.01, method=method)
        results[f"method_{method}"] = exp_acc

    return results


In [71]:
import torch
import math

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

num_classes = 100

class Bottleneck(nn.Module):
    def __init__(self, in_planes, growth_rate):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4*growth_rate)
        self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat([out,x], 1)
        return out

class Transition(nn.Module):
    def __init__(self, in_planes, out_planes):
        super(Transition, self).__init__()
        self.bn = nn.BatchNorm2d(in_planes)
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)

    def forward(self, x):
        out = self.conv(F.relu(self.bn(x)))
        out = F.avg_pool2d(out, 2)
        return out

class DenseNet(nn.Module):
    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate

        num_planes = 2*growth_rate
        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)

        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
        num_planes += nblocks[0]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans1 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
        num_planes += nblocks[1]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
        num_planes += nblocks[2]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
        num_planes += nblocks[3]*growth_rate

        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Linear(num_planes, 100)

    def _make_dense_layers(self, block, in_planes, nblock):
        layers = []
        for i in range(nblock):
            layers.append(block(in_planes, self.growth_rate))
            in_planes += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.trans3(self.dense3(out))
        out = self.dense4(out)
        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

    
class Block(nn.Module):
    '''Depthwise conv + Pointwise conv'''
    def __init__(self, in_planes, out_planes, stride=1):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        return out

class MobileNet(nn.Module):
    # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
    cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]

    def __init__(self, num_classes=10):  # Change the number of classes to 10 for SVHN
        super(MobileNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.linear = nn.Linear(1024, 100)

    def _make_layers(self, in_planes):
        layers = []
        for x in self.cfg:
            out_planes = x if isinstance(x, int) else x[0]
            stride = 1 if isinstance(x, int) else x[1]
            layers.append(Block(in_planes, out_planes, stride))
            in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.avg_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, 100)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out
        
        
class Bottleneck2(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck2, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=True)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=True)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(
                    in_planes,
                    self.expansion * planes,
                    kernel_size=1,
                    stride=stride,
                    bias=True,
                ),
                nn.BatchNorm2d(self.expansion * planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out
cfg = {
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
}

class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.features = self._make_layers(cfg['VGG16'])
        self.classifier = nn.Linear(512, 100)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)


"""vgg16 = VGG16().to(device)
resnet50 = ResNet(Bottleneck2, [3, 4, 6, 3], num_classes=10).to(device)
resnet56 = ResNet(BasicBlock, [9, 9, 9, 9], num_classes=10).to(device)
resnet18 = ResNet(BasicBlock, [2, 2, 2, 2], num_classes=10).to(device)
mobilenet = MobileNet().to(device)
densenet121 = DenseNet(Bottleneck, [6,12,24,16], growth_rate=32).to(device)"""

all_models = {
    'resnet18': ResNet(BasicBlock, [2, 2, 2, 2], num_classes=10).to(device),
    'resnet50': ResNet(Bottleneck2, [3, 4, 6, 3], num_classes=10).to(device),
    'resnet56': ResNet(BasicBlock, [9, 9, 9, 9], num_classes=10).to(device),
    'mobilenet': MobileNet(num_classes=10).to(device),
    'densenet121': DenseNet(Bottleneck, [6, 12, 24, 16], growth_rate=32, num_classes=10).to(device),
    'vgg16': VGG16(num_classes=10).to(device)
}



In [None]:
import torch
import os
import copy
import torch.utils.data as data

# Function to check if GPU is available and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def save_model(model, model_name, dataset_name, stage):
    # Create a directory to save the models if it doesn't exist
    if not os.path.exists('saved_models'):
        os.makedirs('saved_models')
    
    # Save the model state with the appropriate naming convention
    model_path = f'saved_models/{model_name}_{dataset_name}_{stage}.pth'
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")

def main():
    datasets_list = ['cifar100', 'cifar10', 'svhn']

    for dataset_name in datasets_list:
        # Prepare data
        initial_train_set, remainder_set, test_set = prepare_data(dataset_name)
        test_loader = data.DataLoader(test_set, batch_size=64, shuffle=False)

        # Iterate over models
        for model_name, model in all_models.items():
            print(f"\nTraining {model_name} on {dataset_name}")

            # Move the model to the device (GPU or CPU)
            model = model.to(device)
            
            # Train the model on the initial 10k images
            train_loader = data.DataLoader(initial_train_set, batch_size=64, shuffle=True)

            # Training loop
            train_model(model, train_loader, epochs=50, learning_rate=0.01)

            # Evaluate the model on the test set
            accuracy = test_model(model, test_loader)
            print(f"Initial accuracy of {model_name} on {dataset_name}: {accuracy:.2f}%")
            
            # Save the initial trained model after training on 10k images
            save_model(model, model_name, dataset_name, 'initial')

            # Iterate through each active learning method
            methods = [1, 2, 3, 4]  # Define all the method numbers
            for method in methods:
                print(f"\nApplying method {method} on {model_name} for {dataset_name}")

                # Load the initial trained model for a fresh start
                model_copy = copy.deepcopy(model)
                model_copy.load_state_dict(torch.load(f'saved_models/{model_name}_{dataset_name}_initial.pth'))

                # Move the copied model to the device
                model_copy = model_copy.to(device)

                # Deep copy of the data to prevent contamination between methods
                initial_train_set_copy = copy.deepcopy(initial_train_set)
                remainder_copy = copy.deepcopy(remainder_set)

                # Run the selected method and save results
                results = train_until_empty(model_copy, initial_train_set_copy, remainder_copy, test_set, 
                                            max_iterations=15, batch_size=64, learning_rate=0.01, method=method)
                
                # Save the retrained model for this method
                save_model(model_copy, model_name, dataset_name, f'method_{method}')
                
                # Save or print the results
                print(f"Results for {model_name} on {dataset_name} with method {method}: {results}")

if __name__ == "__main__":
    main()
