# CP Performance Comparisons on Different Size of Data

## Experiment Setup
+ CP performance: measured by the **coverage rate** and the **average size of prediction intervals**
+ Dataset: CIFAR-100
+ Comparison candidates: different combinations of 4 score functions (`THR`, `APS`, `SAPS(0.2)`, `RAPS(1,0)`) and 3 predictors (`SplitPredictor`, `ClusterPredictor`, `ClassWisePredictor`).
+ Size variation: sample 10 subsets with 10 classes, 20 classes, ..., 100 classes from CIFAR-100 respectively, then conduct CP on each of these data subsets.

In [109]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torchvision.datasets import CIFAR100
from tqdm import tqdm  # Import tqdm for the progress bar

# Define the transformation for the data
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 expects 224x224 input size
    transforms.ToTensor(),
])

# Download and load the CIFAR-100 dataset
cifar100_trainset = CIFAR100(root='./data', train=True, transform=transform, download=True)
cifar100_testset = CIFAR100(root='./data', train=False, transform=transform, download=True)
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

Files already downloaded and verified
Files already downloaded and verified


In [110]:
# Re-map the class labels to fit the input format of the loss function 
class RemapCIFAR100(torch.utils.data.Dataset):
    def __init__(self, root, train=True, selected_classes=None, transform=None, target_transform=None, download=False):
        self.cifar100 = datasets.CIFAR100(root, train=train, transform=None, target_transform=None, download=download)
        
        if selected_classes is None:
            selected_classes = list(range(10))  # Default: classes 0 to 9
        
        self.selected_classes = selected_classes
        self.class_mapping = {class_id: idx for idx, class_id in enumerate(selected_classes)}
        self.transform = transform
        self.target_transform = target_transform

        # Remap labels during initialization
        self.data, self.targets = self.remap_labels(self.cifar100.data, self.cifar100.targets)

    def remap_labels(self, data, targets):
        remapped_targets = [self.class_mapping[target] for target in targets if target in self.selected_classes]
        remapped_data = [data[i] for i, target in enumerate(targets) if target in self.selected_classes]

        return remapped_data, remapped_targets

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index]

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

    def __len__(self):
        return len(self.data)


## Build the Subsets Sequence

In [111]:
import random

def gen_subset_sequence(seed = 820):
    random.seed(seed)
    indices = list(range(100))
    random.shuffle(indices)

    subsets = []
    for i in range(10):
        selected_classes = indices[:10*(i+1)]
        # Example usage:

        train_dataset = RemapCIFAR100(root='./data', train=True, selected_classes=selected_classes, transform=transforms.ToTensor(), download=True)
        test_dataset = RemapCIFAR100(root='./data', train=False, selected_classes=selected_classes, transform=transforms.ToTensor(), download=True)

        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

        subsets.append((selected_classes, train_dataset, test_dataset, train_loader, test_loader))

    return subsets

## Fine-tune the VGG16

In [112]:
SUBSET_ID = 9    # the size of subset, "=k" refers to a subset of 10(k+1) classes k = 0,1,2,...,9

In [113]:
def train_vgg16(train_loader, selected_classes):
    # Load the pre-trained VGG16 model
    vgg16_model = models.vgg16(pretrained=True)

    # Modify the model for the new task
    vgg16_model.classifier[-1] = nn.Linear(4096, len(selected_classes))  # Adjust the number of classes

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(vgg16_model.parameters(), lr=0.001, momentum=0.9)

    # Train the model
    num_epochs = 1
    vgg16_model = vgg16_model.to(device)
    # train_loader = train_loader.to(device)

    for epoch in range(num_epochs):
        vgg16_model.train()
        for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}', dynamic_ncols=True):
            optimizer.zero_grad()
            outputs = vgg16_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    return vgg16_model

In [114]:
subsets = gen_subset_sequence()
subset = subsets[SUBSET_ID]

selected_classes, train_dataset, test_dataset, train_loader, test_loader = subset
model = train_vgg16(train_loader, selected_classes)
torch.save(model.state_dict(), "models/model_{}0.pth".format(SUBSET_ID+1))

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


Epoch 1/1: 100%|██████████| 782/782 [02:01<00:00,  6.44it/s]


## CP and Obtain the CP Measures

### Load the Model If Trained and Saved Previously

In [115]:
# subsets = gen_subset_sequence()
# subset = subsets[SUBSET_ID]
# selected_classes, train_dataset, test_dataset, train_loader, test_loader = subset
# # Load the saved model
# model = models.vgg16(pretrained=False)
# model.classifier[-1] = nn.Linear(4096, (SUBSET_ID+1)*10)  # Adjust the number of classes
# model.load_state_dict(torch.load("models/model_{}0.pth".format(SUBSET_ID+1)))

In [116]:
from torchcp.classification.scores import THR, APS, SAPS, RAPS
from torchcp.classification.predictors import SplitPredictor, ClusterPredictor, ClassWisePredictor

SCORE_FUNCTIONS = [THR(), APS(), SAPS(0.2), RAPS(1,0)]

PREDICTORS = [SplitPredictor, ClusterPredictor, ClassWisePredictor]

## Record the Experiment Data

In [117]:
import pandas as pd
rows = [score.__class__.__name__ for score in SCORE_FUNCTIONS]
columns = [predictor(THR(), model).__class__.__name__ for predictor in PREDICTORS]

cov_rates = pd.DataFrame(index=rows, columns=columns)
avg_sizes = pd.DataFrame(index=rows, columns=columns)
for score in SCORE_FUNCTIONS: 
    for class_predictor in PREDICTORS:
        predictor = class_predictor(score, model)
        predictor.calibrate(train_loader, alpha=0.1)
        cp_measures = predictor.evaluate(test_loader)
        print(f"Experiment--Data : CIFAR-100, Score : {score.__class__.__name__}, Predictor : {predictor.__class__.__name__}, Alpha : {0.1}")
        print(cp_measures)
        cov_rates.loc[score.__class__.__name__][predictor.__class__.__name__] = cp_measures["Coverage_rate"]
        avg_sizes.loc[score.__class__.__name__][predictor.__class__.__name__] = cp_measures["Average_size"]

cov_rates.to_csv("results/cov_rates_{}0.csv".format(SUBSET_ID+1))
avg_sizes.to_csv("results/avg_sizes_{}0.csv".format(SUBSET_ID+1))

Experiment--Data : CIFAR-100, Score : THR, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.8777, 'Average_size': 6.0148}
Experiment--Data : CIFAR-100, Score : THR, Predictor : ClusterPredictor, Alpha : 0.1
{'Coverage_rate': 0.8757, 'Average_size': 5.9401}
Experiment--Data : CIFAR-100, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.1
{'Coverage_rate': 0.8746, 'Average_size': 6.4652}
Experiment--Data : CIFAR-100, Score : APS, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.8764, 'Average_size': 9.9199}
Experiment--Data : CIFAR-100, Score : APS, Predictor : ClusterPredictor, Alpha : 0.1
{'Coverage_rate': 0.8841, 'Average_size': 10.1735}
Experiment--Data : CIFAR-100, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.1
{'Coverage_rate': 0.8788, 'Average_size': 9.9169}
Experiment--Data : CIFAR-100, Score : SAPS, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.8781, 'Average_size': 7.067}
Experiment--Data : CIFAR-100, Score : SAPS, Predictor : 