In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
from torchvision import transforms
from torchcp.classification.predictors import SplitPredictor, ClusterPredictor, ClassWisePredictor
from torchcp.classification.scores import THR, APS, RAPS, SAPS, Margin
from torchcp.utils import fix_randomness
from torchcp.classification.utils.metrics import Metrics

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# CNN model for demonstration purposes
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu = torch.nn.ReLU()
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc = torch.nn.Linear(14 * 14 * 32, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(-1, 14 * 14 * 32)
        x = self.fc(x)
        return x

def test_fashionmnist():
    fix_randomness(seed=0)
    model_name = 'CNN'

    # Load or generate FashionMNIST dataset
    fashionmnist_test = FashionMNIST(root='./data', train=False, download=True, transform=transform)

    # Determine the length of the dataset
    dataset_length = len(fashionmnist_test)
    
    # Define the desired split lengths
    cal_length = int(0.7 * dataset_length)  # 70% for calibration
    test_length = dataset_length - cal_length  # Remaining 30% for testing

    # Perform the split
    cal_dataset, test_dataset = torch.utils.data.random_split(fashionmnist_test, [cal_length, test_length])

    model = CNN()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=64, shuffle=True, pin_memory=True)
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

    # A standard process of conformal prediction
    #alpha = 0.1
    alpha_values = [0.1, 0.5]
    predictors = [SplitPredictor, ClassWisePredictor, ClusterPredictor]
    score_functions = [THR(), APS(), RAPS(1, 0), SAPS(0.2)]

    for alpha in alpha_values:
        for score in score_functions: 
            for class_predictor in predictors:
                predictor = class_predictor(score, model)
                predictor.calibrate(cal_data_loader, alpha)
                print(f"Experiment--Data : FashionMNIST, Model : {model_name}, Score : {score.__class__.__name__}, Predictor : {predictor.__class__.__name__}, Alpha : {alpha}")
                print(predictor.evaluate(test_data_loader))

In [2]:
test_fashionmnist()

Experiment--Data : FashionMNIST, Model : CNN, Score : THR, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.9023333333333333, 'Average_size': 9.367666666666667}
Experiment--Data : FashionMNIST, Model : CNN, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.1
{'Coverage_rate': 0.9023333333333333, 'Average_size': 8.445666666666666}
Experiment--Data : FashionMNIST, Model : CNN, Score : THR, Predictor : ClusterPredictor, Alpha : 0.1
{'Coverage_rate': 0.9023333333333333, 'Average_size': 9.371666666666666}
Experiment--Data : FashionMNIST, Model : CNN, Score : APS, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.902, 'Average_size': 9.192333333333334}
Experiment--Data : FashionMNIST, Model : CNN, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.1
{'Coverage_rate': 0.9023333333333333, 'Average_size': 8.298}
Experiment--Data : FashionMNIST, Model : CNN, Score : APS, Predictor : ClusterPredictor, Alpha : 0.1
{'Coverage_rate': 0.8973333333333333, 'Average_size': 

In [3]:
from torchvision.datasets import CIFAR10

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

class CNN(torch.nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu = torch.nn.ReLU()
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc = torch.nn.Linear(16 * 16 * 32, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(-1, 16 * 16 * 32)
        x = self.fc(x)
        return x

def test_cifar10():
    fix_randomness(seed=0)
    model_name = 'CNN'

    # 加载 CIFAR-10 数据集
    cifar10_test = CIFAR10(root='./data', train=False, download=True, transform=transform)

    dataset_length = len(cifar10_test)

    cal_length = int(0.7 * dataset_length)
    test_length = dataset_length - cal_length

    cal_dataset, test_dataset = torch.utils.data.random_split(cifar10_test, [cal_length, test_length])

    model = CNN(num_classes=10)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=64, shuffle=True, pin_memory=True)
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

    alpha_values = [0.1, 0.5]
    predictors = [SplitPredictor, ClassWisePredictor, ClusterPredictor]
    score_functions = [THR(), APS(), RAPS(1, 0), SAPS(0.2)]

    for alpha in alpha_values:
        for score in score_functions: 
            for class_predictor in predictors:
                predictor = class_predictor(score, model)
                predictor.calibrate(cal_data_loader, alpha)
                print(f"Experiment--Data : CIFAR-10, Model : {model_name}, Score : {score.__class__.__name__}, Predictor : {predictor.__class__.__name__}, Alpha : {alpha}")
                print(predictor.evaluate(test_data_loader))

In [4]:
test_cifar10()

Files already downloaded and verified
Experiment--Data : CIFAR-10, Model : CNN, Score : THR, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.9066666666666666, 'Average_size': 8.897333333333334}
Experiment--Data : CIFAR-10, Model : CNN, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.1
{'Coverage_rate': 0.9083333333333333, 'Average_size': 8.701333333333332}
Experiment--Data : CIFAR-10, Model : CNN, Score : THR, Predictor : ClusterPredictor, Alpha : 0.1
{'Coverage_rate': 0.9063333333333333, 'Average_size': 8.897}
Experiment--Data : CIFAR-10, Model : CNN, Score : APS, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.9056666666666666, 'Average_size': 8.989}
Experiment--Data : CIFAR-10, Model : CNN, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.1
{'Coverage_rate': 0.897, 'Average_size': 8.735333333333333}
Experiment--Data : CIFAR-10, Model : CNN, Score : APS, Predictor : ClusterPredictor, Alpha : 0.1
{'Coverage_rate': 0.907, 'Average_size': 8.994333333

In [5]:
from torchvision.datasets import MNIST

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

class CNN(torch.nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu = torch.nn.ReLU()
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc = torch.nn.Linear(14 * 14 * 32, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(-1, 14 * 14 * 32)
        x = self.fc(x)
        return x

def test_mnist():
    fix_randomness(seed=0)
    model_name = 'CNN'

    # 加载 MNIST 数据集
    mnist_test = MNIST(root='./data', train=False, download=True, transform=transform)

    dataset_length = len(mnist_test)

    cal_length = int(0.7 * dataset_length)
    test_length = dataset_length - cal_length

    cal_dataset, test_dataset = torch.utils.data.random_split(mnist_test, [cal_length, test_length])

    model = CNN(num_classes=10)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    cal_data_loader = torch.utils.data.DataLoader(cal_dataset, batch_size=64, shuffle=True, pin_memory=True)
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

    alpha_values = [0.1, 0.5]
    predictors = [SplitPredictor, ClassWisePredictor, ClusterPredictor]
    score_functions = [THR(), APS(), RAPS(1, 0), SAPS(0.2)]

    for alpha in alpha_values:
        for score in score_functions:
            for class_predictor in predictors:
                predictor = class_predictor(score, model)
                predictor.calibrate(cal_data_loader, alpha)
                print(f"Experiment--Data : MNIST, Model : {model_name}, Score : {score.__class__.__name__}, Predictor : {predictor.__class__.__name__}, Alpha : {alpha}")
                print(predictor.evaluate(test_data_loader))

In [6]:
test_mnist()

Experiment--Data : MNIST, Model : CNN, Score : THR, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.899, 'Average_size': 8.706666666666667}
Experiment--Data : MNIST, Model : CNN, Score : THR, Predictor : ClassWisePredictor, Alpha : 0.1
{'Coverage_rate': 0.888, 'Average_size': 8.486}
Experiment--Data : MNIST, Model : CNN, Score : THR, Predictor : ClusterPredictor, Alpha : 0.1
{'Coverage_rate': 0.9, 'Average_size': 8.718}
Experiment--Data : MNIST, Model : CNN, Score : APS, Predictor : SplitPredictor, Alpha : 0.1
{'Coverage_rate': 0.906, 'Average_size': 8.771333333333333}
Experiment--Data : MNIST, Model : CNN, Score : APS, Predictor : ClassWisePredictor, Alpha : 0.1
{'Coverage_rate': 0.895, 'Average_size': 8.783333333333333}
Experiment--Data : MNIST, Model : CNN, Score : APS, Predictor : ClusterPredictor, Alpha : 0.1
{'Coverage_rate': 0.906, 'Average_size': 8.789666666666667}
Experiment--Data : MNIST, Model : CNN, Score : RAPS, Predictor : SplitPredictor, Alpha : 0.1
{'Coverag